From 019a221686d8f71ae5bd097fbb043986650083e9 Mon Sep 17 00:00:00 2001 From: Tom Callaway Date: Tue, 4 Jun 2013 12:01:26 -0400 Subject: [PATCH] add metapackage, update to svn218 --- gperftools-2.0-svn190-to-svn218.patch | 1972 +++++++++++++++++++++++++ gperftools.spec | 23 +- 2 files changed, 1992 insertions(+), 3 deletions(-) create mode 100644 gperftools-2.0-svn190-to-svn218.patch diff --git a/gperftools-2.0-svn190-to-svn218.patch b/gperftools-2.0-svn190-to-svn218.patch new file mode 100644 index 0000000..771e61a --- /dev/null +++ b/gperftools-2.0-svn190-to-svn218.patch @@ -0,0 +1,1972 @@ +Only in gperftools-2.0: aclocal.m4 +Only in gperftools-2.0: aclocal.m4.svn-r190 +diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh +--- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400 ++++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400 +@@ -1,54 +1,3 @@ + #!/bin/sh + +-# Before using, you should figure out all the .m4 macros that your +-# configure.m4 script needs and make sure they exist in the m4/ +-# directory. +-# +-# These are the files that this script might edit: +-# aclocal.m4 configure Makefile.in src/config.h.in \ +-# depcomp config.guess config.sub install-sh missing mkinstalldirs \ +-# ltmain.sh +-# +-# Here's a command you can run to see what files aclocal will import: +-# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p' +- +-set -ex +-rm -rf autom4te.cache +- +-trap 'rm -f aclocal.m4.tmp' EXIT +- +-# Returns the first binary in $* that exists, or the last arg, if none exists. +-WhichOf() { +- for candidate in "$@"; do +- if "$candidate" --version >/dev/null 2>&1; then +- echo "$candidate" +- return +- fi +- done +- echo "$candidate" # the last one in $@ +-} +- +-# Use version 1.9 of aclocal and automake if available. +-ACLOCAL=`WhichOf aclocal-1.9 aclocal` +-AUTOMAKE=`WhichOf automake-1.9 automake` +-LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize` +- +-# aclocal tries to overwrite aclocal.m4 even if the contents haven't +-# changed, which is annoying when the file is not open for edit (in +-# p4). We work around this by writing to a temp file and just +-# updating the timestamp if the file hasn't change. +-"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp +-if cmp aclocal.m4.tmp aclocal.m4; then +- touch aclocal.m4 # pretend that we regenerated the file +- rm -f aclocal.m4.tmp +-else +- mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails +-fi +- +-grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f +-autoconf -f -W all,no-obsolete +-autoheader -f -W all +-"$AUTOMAKE" -a -c -f -W all +- +-rm -rf autom4te.cache +-exit 0 ++autoreconf -i +Only in gperftools-2.0: autogen.sh.svn-r190 +Only in gperftools-2.0: compile +Only in gperftools-2.0: config.guess +Only in gperftools-2.0: config.sub +Only in gperftools-2.0: configure +diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac +--- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400 ++++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400 +@@ -99,28 +99,7 @@ + [gpt_cv_objcopy_weaken=no]) + AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes) + +-case $host_os in +- *mingw*) +- # Disabling fast install keeps libtool from creating wrapper scripts +- # around the executables it builds. Such scripts have caused failures on +- # MinGW. Using this option means an extra link step is executed during +- # "make install". +- _LT_SET_OPTION([LT_INIT],[disable-fast-install]) +-AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put +-the `disable-fast-install' option into LT_INIT's first parameter.]) +- +- ;; +- *) +- _LT_SET_OPTION([LT_INIT],[fast-install]) +-AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put +-the `fast-install' option into LT_INIT's first parameter.]) +- +- ;; +-esac +- +-LT_INIT +-AC_SUBST(LIBTOOL_DEPS) +-AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x") ++LT_INIT([]) + + AC_C_INLINE + AX_C___ATTRIBUTE__ +@@ -134,6 +113,7 @@ + AC_CHECK_TYPES([Elf32_Versym],,, [#include ]) # for vdso_support.h + AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory + AC_CHECK_FUNCS(geteuid) # for turning off services when run as root ++AC_CHECK_FUNCS(fork) # for the pthread_atfork setup + AC_CHECK_HEADERS(features.h) # for vdso_support.h + AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not + AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h +@@ -183,6 +163,11 @@ + # This workaround comes from + # http://cygwin.com/ml/cygwin/2004-11/msg00138.html + case "$host" in ++ *-*-mingw*) ++ dnl mingw doesn't have mmap, not worth ++ dnl checking. Especially given that mingw can be a ++ dnl cross-compiler ++ ;; + *-*-cygwin*) + ac_cv_func_mmap_fixed_mapped=yes + AC_DEFINE(HAVE_MMAP, 1, +@@ -310,10 +295,18 @@ + # Note, however, that our code tickles a bug in gcc < 4.1.2 + # involving TLS and -fPIC (which our libraries will use) on x86: + # http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html ++# ++# And mingw also does compile __thread but resultant code actually ++# fails to work correctly at least in some not so ancient version: ++# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html + AC_MSG_CHECKING([for __thread]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2)) + #error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html +-#endif], [static __thread int p = 0])], ++#endif ++#if defined(__MINGW32__) ++#error mingw doesn't really support tls ++#endif ++], [static __thread int p = 0])], + [AC_DEFINE(HAVE_TLS, 1, + Define to 1 if compiler supports __thread) + AC_MSG_RESULT([yes])], +Only in gperftools-2.0: configure.ac.svn-r190 +Only in gperftools-2.0: configure.svn-r190 +Only in gperftools-2.0: depcomp +Only in gperftools-2.0/doc: cpuprofile.html.svn-r190 +Only in gperftools-2.0/doc: heapprofile.html.svn-r190 +Only in gperftools-2.0/doc: pprof.see_also.svn-r190 +diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL +--- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500 ++++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400 +@@ -8,6 +8,28 @@ + Perftools-Specific Install Notes + ================================ + ++*** Building from source repository ++ ++As of 2.1 gperftools does not have configure and other autotools ++products checked into it's source repository. This is common practice ++for projects using autotools. ++ ++NOTE: Source releases (.tar.gz that you download from ++code.google.com/p/gperftools) still have all required files just as ++before. Nothing has changed w.r.t. building from .tar.gz releases. ++ ++But, in order to build gperftools checked out from subversion ++repository you need to have autoconf, automake and libtool ++installed. And before running ./configure you have to generate it (and ++a bunch of other files) by running ./autogen.sh script. That script ++will take care of calling correct autotools programs in correct order. ++ ++If you're maintainer then it's business as usual too. Just run make ++dist (or, preferably, make distcheck) and it'll produce .tar.gz or ++.tar.bz2 with all autotools magic already included. So that users can ++build our software without having autotools. ++ ++ + *** NOTE FOR 64-BIT LINUX SYSTEMS + + The glibc built-in stack-unwinder on 64-bit systems has some problems +Only in gperftools-2.0: install-sh +Only in gperftools-2.0: libtool +Only in gperftools-2.0: ltmain.sh +Only in gperftools-2.0/m4: libtool.m4 +Only in gperftools-2.0/m4: libtool.m4.svn-r190 +Only in gperftools-2.0/m4: lt~obsolete.m4 +Only in gperftools-2.0/m4: ltoptions.m4 +Only in gperftools-2.0/m4: ltsugar.m4 +Only in gperftools-2.0/m4: ltversion.m4 +diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am +--- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400 ++++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400 +@@ -221,7 +221,7 @@ + src/windows/preamble_patcher.cc \ + src/windows/preamble_patcher_with_stub.cc + # patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us. +-libwindows_la_LIBADD = -lPsapi ++libwindows_la_LIBADD = -lpsapi + + SPINLOCK_INCLUDES = src/base/spinlock.h \ + src/base/spinlock_internal.h \ +@@ -238,6 +238,7 @@ + noinst_LTLIBRARIES += libspinlock.la + libspinlock_la_SOURCES = src/base/spinlock.cc \ + src/base/spinlock_internal.cc \ ++ src/base/atomicops-internals-x86.cc \ + $(SPINLOCK_INCLUDES) + + LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la +@@ -355,7 +356,7 @@ + $(STACKTRACE_INCLUDES) + libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK) + STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)' +-libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) ++libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS) + + ### Unittests + TESTS += stacktrace_unittest +@@ -468,7 +469,7 @@ + -DNO_HEAP_CHECK \ + $(PTHREAD_CFLAGS) -DNDEBUG \ + $(AM_CXXFLAGS) $(NO_EXCEPTIONS) +-libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) ++libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS) + libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) + + lib_LTLIBRARIES += libtcmalloc_minimal.la +@@ -477,7 +478,7 @@ + libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \ + $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) + # -version-info gets passed to libtool +-libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ ++libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS) + libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS) + + # For windows, we're playing around with trying to do some stacktrace +@@ -539,6 +540,12 @@ + tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) + tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) + ++TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest ++tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc ++tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) ++tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) ++tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) ++ + # This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc) + # In theory this should work under mingw, but mingw has trouble running + # shell scripts that end in .exe. And it doesn't seem to build shared +@@ -898,8 +905,16 @@ + + ### Unittests + +-TESTS += tcmalloc_unittest +-TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \ ++TESTS += tcmalloc_unittest.sh$(EXEEXT) ++tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh ++noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES) ++tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \ ++ tcmalloc_unittest ++ rm -f $@ ++ cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@ ++ ++noinst_PROGRAMS += tcmalloc_unittest ++tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \ + src/gperftools/malloc_extension.h + tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ + src/tcmalloc.h \ +@@ -956,6 +971,12 @@ + tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) + tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS) + ++TESTS += tcmalloc_large_heap_fragmentation_unittest ++tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc ++tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) ++tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) ++tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS) ++ + TESTS += raw_printer_test + raw_printer_test_SOURCES = src/tests/raw_printer_test.cc + raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) +Only in gperftools-2.0: Makefile.am.svn-r190 +Only in gperftools-2.0: Makefile.in +Only in gperftools-2.0: Makefile.in.svn-r190 +Only in gperftools-2.0: missing +Only in gperftools-2.0: mkinstalldirs +Only in gperftools-2.0: NEWS.svn-r190 +diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h +--- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400 +@@ -50,6 +50,16 @@ + // implementations on other archtectures will cause your code to break. If you + // do not know what you are doing, avoid these routines, and use a Mutex. + // ++// These following lower-level operations are typically useful only to people ++// implementing higher-level synchronization operations like spinlocks, ++// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or ++// a store with appropriate memory-ordering instructions. "Acquire" operations ++// ensure that no later memory access can be reordered ahead of the operation. ++// "Release" operations ensure that no previous memory access can be reordered ++// after the operation. "Barrier" operations have both "Acquire" and "Release" ++// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory ++// access. ++// + // It is incorrect to make direct assignments to/from an atomic variable. + // You should use one of the Load or Store routines. The NoBarrier + // versions are provided when no barriers are needed: +@@ -95,10 +105,10 @@ + #include "base/atomicops-internals-arm-v6plus.h" + #elif defined(ARMV3) + #include "base/atomicops-internals-arm-generic.h" +-#elif defined(_WIN32) +-#include "base/atomicops-internals-windows.h" + #elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) + #include "base/atomicops-internals-x86.h" ++#elif defined(_WIN32) ++#include "base/atomicops-internals-windows.h" + #elif defined(__linux__) && defined(__PPC__) + #include "base/atomicops-internals-linuxppc.h" + #else +@@ -149,6 +159,18 @@ + reinterpret_cast(ptr), new_value); + } + ++AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, ++ AtomicWord new_value) { ++ return Acquire_AtomicExchange( ++ reinterpret_cast(ptr), new_value); ++} ++ ++AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, ++ AtomicWord new_value) { ++ return Release_AtomicExchange( ++ reinterpret_cast(ptr), new_value); ++} ++ + // Atomically increment *ptr by "increment". Returns the new value of + // *ptr with the increment applied. This routine implies no memory + // barriers. +@@ -164,17 +186,6 @@ + reinterpret_cast(ptr), increment); + } + +-// ------------------------------------------------------------------------ +-// These following lower-level operations are typically useful only to people +-// implementing higher-level synchronization operations like spinlocks, +-// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or +-// a store with appropriate memory-ordering instructions. "Acquire" operations +-// ensure that no later memory access can be reordered ahead of the operation. +-// "Release" operations ensure that no previous memory access can be reordered +-// after the operation. "Barrier" operations have both "Acquire" and "Release" +-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +-// access. +-// ------------------------------------------------------------------------ + inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, + AtomicWord old_value, + AtomicWord new_value) { +@@ -250,6 +261,8 @@ + Atomic32 old_value, + Atomic32 new_value); + Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); ++Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); ++Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); + Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); + Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment); +@@ -271,6 +284,8 @@ + Atomic64 old_value, + Atomic64 new_value); + Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); ++Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); ++Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); + Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); + Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); + +diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h +--- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400 +@@ -89,6 +89,18 @@ + return old_value; + } + ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ ++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + for (;;) { +@@ -176,6 +188,18 @@ + return 0; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("NoBarrier_AtomicIncrement"); +diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h +--- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400 +@@ -94,6 +94,28 @@ + return old; + } + ++inline void MemoryBarrier() { ++#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) ++ uint32_t dest = 0; ++ __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory"); ++#else ++ __asm__ __volatile__("dmb" : : : "memory"); ++#endif ++} ++ ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); ++ MemoryBarrier(); ++ return old_value; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ MemoryBarrier(); ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 tmp, res; +@@ -110,10 +132,6 @@ + return res; + } + +-inline void MemoryBarrier() { +- __asm__ __volatile__("dmb" : : : "memory"); +-} +- + inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 tmp, res; +@@ -220,6 +238,19 @@ + return old; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); ++ MemoryBarrier(); ++ return old_value; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ MemoryBarrier(); ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + int store_failed; +@@ -303,6 +334,18 @@ + return 0; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ NotImplementedFatalError("Acquire_AtomicExchange"); ++ return 0; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ NotImplementedFatalError("Release_AtomicExchange"); ++ return 0; ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + NotImplementedFatalError("NoBarrier_AtomicIncrement"); +diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h +--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400 +@@ -163,6 +163,26 @@ + return old_value; + } + ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, ++ Atomic32 new_value) { ++ Atomic32 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ ++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, ++ Atomic32 new_value) { ++ Atomic32 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap32Release(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast(ptr)); +@@ -294,6 +314,26 @@ + return old_value; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, ++ Atomic64 new_value) { ++ Atomic64 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, ++ Atomic64 new_value) { ++ Atomic64 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap64Release(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64(increment, const_cast(ptr)); +Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190 +diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h +--- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400 +@@ -132,6 +132,21 @@ + return old_value; + } + ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, ++ Atomic32 new_value) { ++ Atomic32 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ ++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, ++ Atomic32 new_value) { ++ return Acquire_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, + Atomic32 increment) { + return OSAtomicAdd32(increment, const_cast(ptr)); +@@ -217,6 +232,21 @@ + return old_value; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, ++ Atomic64 new_value) { ++ Atomic64 old_value; ++ do { ++ old_value = *ptr; ++ } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value, ++ const_cast(ptr))); ++ return old_value; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, ++ Atomic64 new_value) { ++ return Acquire_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, + Atomic64 increment) { + return OSAtomicAdd64(increment, const_cast(ptr)); +diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h +--- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400 +@@ -137,6 +137,18 @@ + return static_cast(result); + } + ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ // FastInterlockedExchange has both acquire and release memory barriers. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ ++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ // FastInterlockedExchange has both acquire and release memory barriers. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + return FastInterlockedExchangeAdd( +@@ -188,8 +200,7 @@ + } + + inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { +- NoBarrier_AtomicExchange(ptr, value); +- // acts as a barrier in this implementation ++ Acquire_AtomicExchange(ptr, value); + } + + inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { +@@ -478,6 +489,18 @@ + #endif // defined(_WIN64) || defined(__MINGW64__) + + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ // FastInterlockedExchange has both acquire and release memory barriers. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ // FastInterlockedExchange has both acquire and release memory barriers. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, + Atomic64 old_value, + Atomic64 new_value) { +Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190 +diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h +--- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400 +@@ -89,6 +89,21 @@ + return new_value; // Now it's the previous value. + } + ++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); ++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { ++ __asm__ __volatile__("lfence" : : : "memory"); ++ } ++ return old_val; ++} ++ ++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, ++ Atomic32 new_value) { ++ // xchgl already has release memory barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, + Atomic32 increment) { + Atomic32 temp = increment; +@@ -152,7 +167,7 @@ + __asm__ __volatile__("mfence" : : : "memory"); + } else { // mfence is faster but not present on PIII + Atomic32 x = 0; +- NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII ++ Acquire_AtomicExchange(&x, 0); + } + } + +@@ -161,8 +176,7 @@ + *ptr = value; + __asm__ __volatile__("mfence" : : : "memory"); + } else { +- NoBarrier_AtomicExchange(ptr, value); +- // acts as a barrier on PIII ++ Acquire_AtomicExchange(ptr, value); + } + } + #endif +@@ -213,6 +227,21 @@ + return new_value; // Now it's the previous value. + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); ++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { ++ __asm__ __volatile__("lfence" : : : "memory"); ++ } ++ return old_val; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_value) { ++ // xchgq already has release memory barrier semantics. ++ return NoBarrier_AtomicExchange(ptr, new_value); ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 temp = increment; +@@ -334,6 +363,20 @@ + return old_val; + } + ++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_val) { ++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); ++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { ++ __asm__ __volatile__("lfence" : : : "memory"); ++ } ++ return old_val; ++} ++ ++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, ++ Atomic64 new_val) { ++ return NoBarrier_AtomicExchange(ptr, new_val); ++} ++ + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, + Atomic64 increment) { + Atomic64 old_val, new_val; +diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h +--- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400 ++++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400 +@@ -334,10 +334,13 @@ + #if defined(HAVE___ATTRIBUTE__) + # if (defined(__i386__) || defined(__x86_64__)) + # define CACHELINE_ALIGNED __attribute__((aligned(64))) +-# elif defined(__arm__) +-# define CACHELINE_ALIGNED __attribute__((aligned(32))) + # elif (defined(__PPC__) || defined(__PPC64__)) + # define CACHELINE_ALIGNED __attribute__((aligned(16))) ++# elif (defined(__arm__)) ++# define CACHELINE_ALIGNED __attribute__((aligned(64))) ++ // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned ++# else ++# error Could not determine cache line length - unknown architecture + # endif + #else + # define CACHELINE_ALIGNED +Only in gperftools-2.0/src/base: basictypes.h.svn-r190 +Only in gperftools-2.0/src/base: cycleclock.h.svn-r190 +diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h +--- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400 ++++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400 +@@ -148,6 +148,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -404,24 +406,24 @@ + }; + #elif defined(__x86_64__) + struct kernel_stat { +- unsigned long st_dev; +- unsigned long st_ino; +- unsigned long st_nlink; ++ uint64_t st_dev; ++ uint64_t st_ino; ++ uint64_t st_nlink; + unsigned st_mode; + unsigned st_uid; + unsigned st_gid; + unsigned __pad0; +- unsigned long st_rdev; +- long st_size; +- long st_blksize; +- long st_blocks; +- unsigned long st_atime_; +- unsigned long st_atime_nsec_; +- unsigned long st_mtime_; +- unsigned long st_mtime_nsec_; +- unsigned long st_ctime_; +- unsigned long st_ctime_nsec_; +- long __unused[3]; ++ uint64_t st_rdev; ++ int64_t st_size; ++ int64_t st_blksize; ++ int64_t st_blocks; ++ uint64_t st_atime_; ++ uint64_t st_atime_nsec_; ++ uint64_t st_mtime_; ++ uint64_t st_mtime_nsec_; ++ uint64_t st_ctime_; ++ uint64_t st_ctime_nsec_; ++ int64_t __unused[3]; + }; + #elif defined(__PPC__) + struct kernel_stat { +@@ -1013,74 +1015,141 @@ + * location (e.g. when using the clone() system call with the CLONE_VM + * option). + */ ++ #undef LSS_ENTRYPOINT ++ #define LSS_ENTRYPOINT "syscall\n" ++ ++ /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. ++ * We need to explicitly cast to an unsigned 64 bit type to avoid implicit ++ * sign extension. We can't cast pointers directly because those are ++ * 32 bits, and gcc will dump ugly warnings about casting from a pointer ++ * to an integer of a different size. ++ */ ++ #undef LSS_SYSCALL_ARG ++ #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) ++ #undef _LSS_RETURN ++ #define _LSS_RETURN(type, res, cast) \ ++ do { \ ++ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ ++ LSS_ERRNO = -(res); \ ++ res = -1; \ ++ } \ ++ return (type)(cast)(res); \ ++ } while (0) ++ #undef LSS_RETURN ++ #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) ++ ++ #undef _LSS_BODY ++ #define _LSS_BODY(nr, type, name, cast, ...) \ ++ long long __res; \ ++ __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ ++ : "=a" (__res) \ ++ : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ ++ : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ ++ _LSS_RETURN(type, __res, cast) + #undef LSS_BODY +- #define LSS_BODY(type,name, ...) \ +- long __res; \ +- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ +- ##__VA_ARGS__ : "r11", "rcx", "memory"); \ +- LSS_RETURN(type, __res) ++ #define LSS_BODY(nr, type, name, args...) \ ++ _LSS_BODY(nr, type, name, uintptr_t, ## args) ++ ++ #undef LSS_BODY_ASM0 ++ #undef LSS_BODY_ASM1 ++ #undef LSS_BODY_ASM2 ++ #undef LSS_BODY_ASM3 ++ #undef LSS_BODY_ASM4 ++ #undef LSS_BODY_ASM5 ++ #undef LSS_BODY_ASM6 ++ #define LSS_BODY_ASM0 ++ #define LSS_BODY_ASM1 LSS_BODY_ASM0 ++ #define LSS_BODY_ASM2 LSS_BODY_ASM1 ++ #define LSS_BODY_ASM3 LSS_BODY_ASM2 ++ #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" ++ #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" ++ #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" ++ ++ #undef LSS_BODY_CLOBBER0 ++ #undef LSS_BODY_CLOBBER1 ++ #undef LSS_BODY_CLOBBER2 ++ #undef LSS_BODY_CLOBBER3 ++ #undef LSS_BODY_CLOBBER4 ++ #undef LSS_BODY_CLOBBER5 ++ #undef LSS_BODY_CLOBBER6 ++ #define LSS_BODY_CLOBBER0 ++ #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 ++ #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 ++ #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 ++ #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", ++ #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", ++ #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", ++ ++ #undef LSS_BODY_ARG0 ++ #undef LSS_BODY_ARG1 ++ #undef LSS_BODY_ARG2 ++ #undef LSS_BODY_ARG3 ++ #undef LSS_BODY_ARG4 ++ #undef LSS_BODY_ARG5 ++ #undef LSS_BODY_ARG6 ++ #define LSS_BODY_ARG0() ++ #define LSS_BODY_ARG1(arg1) \ ++ LSS_BODY_ARG0(), "D" (arg1) ++ #define LSS_BODY_ARG2(arg1, arg2) \ ++ LSS_BODY_ARG1(arg1), "S" (arg2) ++ #define LSS_BODY_ARG3(arg1, arg2, arg3) \ ++ LSS_BODY_ARG2(arg1, arg2), "d" (arg3) ++ #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ ++ LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) ++ #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ ++ LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) ++ #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ ++ LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) ++ + #undef _syscall0 + #define _syscall0(type,name) \ + type LSS_NAME(name)() { \ +- LSS_BODY(type, name); \ ++ LSS_BODY(0, type, name); \ + } + #undef _syscall1 + #define _syscall1(type,name,type1,arg1) \ + type LSS_NAME(name)(type1 arg1) { \ +- LSS_BODY(type, name, "D" ((long)(arg1))); \ ++ LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ + } + #undef _syscall2 + #define _syscall2(type,name,type1,arg1,type2,arg2) \ + type LSS_NAME(name)(type1 arg1, type2 arg2) { \ +- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ ++ LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ + } + #undef _syscall3 + #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ +- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ +- "d" ((long)(arg3))); \ ++ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ ++ LSS_SYSCALL_ARG(arg3)); \ + } + #undef _syscall4 + #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ +- long __res; \ +- __asm__ __volatile__("movq %5,%%r10; syscall" : \ +- "=a" (__res) : "0" (__NR_##name), \ +- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ +- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ +- LSS_RETURN(type, __res); \ ++ LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ ++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ + } + #undef _syscall5 + #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5) { \ +- long __res; \ +- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ +- "=a" (__res) : "0" (__NR_##name), \ +- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ +- "r" ((long)(arg4)), "r" ((long)(arg5)) : \ +- "r8", "r10", "r11", "rcx", "memory"); \ +- LSS_RETURN(type, __res); \ ++ LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ ++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ ++ LSS_SYSCALL_ARG(arg5)); \ + } + #undef _syscall6 + #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ + type5,arg5,type6,arg6) \ + type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ + type5 arg5, type6 arg6) { \ +- long __res; \ +- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ +- "syscall" : \ +- "=a" (__res) : "0" (__NR_##name), \ +- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ +- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ +- "r8", "r9", "r10", "r11", "rcx", "memory"); \ +- LSS_RETURN(type, __res); \ ++ LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ ++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ ++ LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ + } + LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, + int flags, void *arg, int *parent_tidptr, + void *newtls, int *child_tidptr) { +- long __res; ++ long long __res; + { + __asm__ __volatile__(/* if (fn == NULL) + * return -EINVAL; +@@ -1145,8 +1214,13 @@ + "1:\n" + : "=a" (__res) + : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), +- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), +- "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) ++ "r"(LSS_SYSCALL_ARG(fn)), ++ "S"(LSS_SYSCALL_ARG(child_stack)), ++ "D"(LSS_SYSCALL_ARG(flags)), ++ "r"(LSS_SYSCALL_ARG(arg)), ++ "d"(LSS_SYSCALL_ARG(parent_tidptr)), ++ "r"(LSS_SYSCALL_ARG(newtls)), ++ "r"(LSS_SYSCALL_ARG(child_tidptr)) + : "rsp", "memory", "r8", "r10", "r11", "rcx"); + } + LSS_RETURN(int, __res); +@@ -1159,7 +1233,7 @@ + * Unfortunately, we cannot just reference the glibc version of this + * function, as glibc goes out of its way to make it inaccessible. + */ +- void (*res)(void); ++ long long res; + __asm__ __volatile__("call 2f\n" + "0:.align 16\n" + "1:movq %1,%%rax\n" +@@ -1168,7 +1242,7 @@ + "addq $(1b-0b),%0\n" + : "=a" (res) + : "i" (__NR_rt_sigreturn)); +- return res; ++ return (void (*)(void))(uintptr_t)res; + } + #elif defined(__arm__) + /* Most definitions of _syscallX() neglect to mark "memory" as being +@@ -1797,8 +1871,16 @@ + LSS_INLINE _syscall0(pid_t, _gettid) + LSS_INLINE _syscall2(int, kill, pid_t, p, + int, s) +- LSS_INLINE _syscall3(off_t, lseek, int, f, +- off_t, o, int, w) ++ #if defined(__x86_64__) ++ /* Need to make sure off_t isn't truncated to 32-bits under x32. */ ++ LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { ++ _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), ++ LSS_SYSCALL_ARG(w)); ++ } ++ #else ++ LSS_INLINE _syscall3(off_t, lseek, int, f, ++ off_t, o, int, w) ++ #endif + LSS_INLINE _syscall2(int, munmap, void*, s, + size_t, l) + LSS_INLINE _syscall5(void*, _mremap, void*, o, +@@ -1835,10 +1917,13 @@ + int, t, int, p) + #endif + #if defined(__x86_64__) +- LSS_INLINE _syscall6(void*, mmap, void*, s, +- size_t, l, int, p, +- int, f, int, d, +- __off64_t, o) ++ /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ ++ LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, ++ __off64_t o) { ++ LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), ++ LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), ++ LSS_SYSCALL_ARG(d), (uint64_t)(o)); ++ } + + LSS_INLINE int LSS_NAME(sigaction)(int signum, + const struct kernel_sigaction *act, +Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190 +Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190 +diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h +--- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400 +@@ -31,11 +31,6 @@ + * Author: Sanjay Ghemawat + */ + +-// +-// Fast spinlocks (at least on x86, a lock/unlock pair is approximately +-// half the cost of a Mutex because the unlock just does a store instead +-// of a compare-and-swap which is expensive). +- + // SpinLock is async signal safe. + // If used within a signal handler, all lock holders + // should block the signal even outside the signal handler. +@@ -95,10 +90,9 @@ + // TODO(csilvers): uncomment the annotation when we figure out how to + // support this macro with 0 args (see thread_annotations.h) + inline void Unlock() /*UNLOCK_FUNCTION()*/ { +- uint64 wait_cycles = +- static_cast(base::subtle::NoBarrier_Load(&lockword_)); + ANNOTATE_RWLOCK_RELEASED(this, 1); +- base::subtle::Release_Store(&lockword_, kSpinLockFree); ++ uint64 wait_cycles = static_cast( ++ base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree)); + if (wait_cycles != kSpinLockHeld) { + // Collect contentionz profile info, and speed the wakeup of any waiter. + // The wait_cycles value indicates how long this thread spent waiting +Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190 +Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190 +diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h +--- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400 +@@ -38,7 +38,7 @@ + #include + #if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) + #include // for DWORD +-#include // for CreateToolhelp32Snapshot ++#include // for CreateToolhelp32Snapshot + #endif + #ifdef HAVE_UNISTD_H + #include // for pid_t +diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h +--- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400 +@@ -79,6 +79,16 @@ + // page full of 5-byte objects would have 2 bytes memory overhead). + size_t OverheadBytes(); + ++ // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call ++ // to set the lock in a consistent state before the fork. ++ void Lock() { ++ lock_.Lock(); ++ } ++ ++ void Unlock() { ++ lock_.Unlock(); ++ } ++ + private: + // TransferCache is used to cache transfers of + // sizemap.num_objects_to_move(size_class) back and forth between +diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc +--- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400 ++++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400 +@@ -30,12 +30,32 @@ + // --- + // Author: Sanjay Ghemawat + ++#include // for getenv and strtol + #include "config.h" + #include "common.h" + #include "system-alloc.h" ++#include "base/spinlock.h" + + namespace tcmalloc { + ++// Define the maximum number of object per classe type to transfer between ++// thread and central caches. ++static int32 FLAGS_tcmalloc_transfer_num_objects; ++ ++static const int32 kDefaultTransferNumObjecs = 32768; ++ ++// The init function is provided to explicit initialize the variable value ++// from the env. var to avoid C++ global construction that might defer its ++// initialization after a malloc/new call. ++static inline void InitTCMallocTransferNumObjects() ++{ ++ if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) { ++ const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ"); ++ FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs : ++ strtol(envval, NULL, 10); ++ } ++} ++ + // Note: the following only works for "n"s that fit in 32-bits, but + // that is fine since we only use it for small sizes. + static inline int LgFloor(size_t n) { +@@ -90,13 +110,16 @@ + // - We go to the central freelist too often and we have to acquire + // its lock each time. + // This value strikes a balance between the constraints above. +- if (num > 32) num = 32; ++ if (num > FLAGS_tcmalloc_transfer_num_objects) ++ num = FLAGS_tcmalloc_transfer_num_objects; + + return num; + } + + // Initialize the mapping arrays + void SizeMap::Init() { ++ InitTCMallocTransferNumObjects(); ++ + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] + if (ClassIndex(0) < 0) { + Log(kCrash, __FILE__, __LINE__, +@@ -189,12 +212,56 @@ + + // Metadata allocator -- keeps stats about how many bytes allocated. + static uint64_t metadata_system_bytes_ = 0; ++static const size_t kMetadataAllocChunkSize = 8*1024*1024; ++static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8; ++// usually malloc uses larger alignments, but because metadata cannot ++// have and fancy simd types, aligning on pointer size seems fine ++static const size_t kMetadataAllignment = sizeof(void *); ++ ++static char *metadata_chunk_alloc_; ++static size_t metadata_chunk_avail_; ++ ++static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED); ++ + void* MetaDataAlloc(size_t bytes) { +- void* result = TCMalloc_SystemAlloc(bytes, NULL); +- if (result != NULL) { +- metadata_system_bytes_ += bytes; ++ if (bytes >= kMetadataAllocChunkSize) { ++ void *rv = TCMalloc_SystemAlloc(bytes, ++ NULL, kMetadataAllignment); ++ if (rv != NULL) { ++ metadata_system_bytes_ += bytes; ++ } ++ return rv; + } +- return result; ++ ++ SpinLockHolder h(&metadata_alloc_lock); ++ ++ // the following works by essentially turning address to integer of ++ // log_2 kMetadataAllignment size and negating it. I.e. negated ++ // value + original value gets 0 and that's what we want modulo ++ // kMetadataAllignment. Note, we negate before masking higher bits ++ // off, otherwise we'd have to mask them off after negation anyways. ++ intptr_t alignment = -reinterpret_cast(metadata_chunk_alloc_) & (kMetadataAllignment-1); ++ ++ if (metadata_chunk_avail_ < bytes + alignment) { ++ size_t real_size; ++ void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize, ++ &real_size, kMetadataAllignment); ++ if (ptr == NULL) { ++ return NULL; ++ } ++ ++ metadata_chunk_alloc_ = static_cast(ptr); ++ metadata_chunk_avail_ = real_size; ++ ++ alignment = 0; ++ } ++ ++ void *rv = static_cast(metadata_chunk_alloc_ + alignment); ++ bytes += alignment; ++ metadata_chunk_alloc_ += bytes; ++ metadata_chunk_avail_ -= bytes; ++ metadata_system_bytes_ += bytes; ++ return rv; + } + + uint64_t metadata_system_bytes() { return metadata_system_bytes_; } +Only in gperftools-2.0/src: common.cc.svn-r190 +diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h +--- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400 ++++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400 +@@ -80,7 +80,7 @@ + static const size_t kMinAlign = 16; + #elif defined(TCMALLOC_ALIGN_8BYTES) + static const size_t kPageShift = 13; +-static const size_t kNumClasses = 93; ++static const size_t kNumClasses = 95; + // Unless we force to use 8 bytes alignment we use an alignment of + // at least 16 bytes to statisfy requirements for some SSE types. + // Keep in mind when using the 16 bytes alignment you can have a space +@@ -88,7 +88,7 @@ + static const size_t kMinAlign = 8; + #else + static const size_t kPageShift = 13; +-static const size_t kNumClasses = 86; ++static const size_t kNumClasses = 88; + static const size_t kMinAlign = 16; + #endif + static const size_t kMaxThreadCacheSize = 4 << 20; +Only in gperftools-2.0/src: common.h.svn-r190 +diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in +--- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400 ++++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400 +@@ -56,6 +56,9 @@ + /* Define to 1 if you have the header file. */ + #undef HAVE_FEATURES_H + ++/* Define to 1 if you have the `fork' function. */ ++#undef HAVE_FORK ++ + /* Define to 1 if you have the `geteuid' function. */ + #undef HAVE_GETEUID + +Only in gperftools-2.0/src: config.h.in.svn-r190 +Only in gperftools-2.0/src: debugallocation.cc.svn-r190 +Only in gperftools-2.0/src: getpc.h.svn-r190 +Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190 +Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190 +Only in gperftools-2.0/src: heap-checker.cc.svn-r190 +Only in gperftools-2.0/src: heap-profiler.cc.svn-r190 +Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190 +Only in gperftools-2.0/src: malloc_extension.cc.svn-r190 +Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190 +Only in gperftools-2.0/src: memory_region_map.cc.svn-r190 +diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc +--- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400 ++++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400 +@@ -108,6 +108,8 @@ + return AllocLarge(n); // May be NULL + } + ++static const size_t kForcedCoalesceInterval = 128*1024*1024; ++ + Span* PageHeap::New(Length n) { + ASSERT(Check()); + ASSERT(n > 0); +@@ -116,6 +118,38 @@ + if (result != NULL) + return result; + ++ if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0 ++ && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4 ++ && (stats_.system_bytes / kForcedCoalesceInterval ++ != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) { ++ // We're about to grow heap, but there are lots of free pages. ++ // tcmalloc's design decision to keep unmapped and free spans ++ // separately and never coalesce them means that sometimes there ++ // can be free pages span of sufficient size, but it consists of ++ // "segments" of different type so page heap search cannot find ++ // it. In order to prevent growing heap and wasting memory in such ++ // case we're going to unmap all free pages. So that all free ++ // spans are maximally coalesced. ++ // ++ // We're also limiting 'rate' of going into this path to be at ++ // most once per 128 megs of heap growth. Otherwise programs that ++ // grow heap frequently (and that means by small amount) could be ++ // penalized with higher count of minor page faults. ++ // ++ // See also large_heap_fragmentation_unittest.cc and ++ // https://code.google.com/p/gperftools/issues/detail?id=368 ++ ReleaseAtLeastNPages(static_cast(0x7fffffff)); ++ ++ // then try again. If we are forced to grow heap because of large ++ // spans fragmentation and not because of problem described above, ++ // then at the very least we've just unmapped free but ++ // insufficiently big large spans back to OS. So in case of really ++ // unlucky memory fragmentation we'll be consuming virtual address ++ // space, but not real memory ++ result = SearchFreeAndLargeLists(n); ++ if (result != NULL) return result; ++ } ++ + // Grow the heap and try again. + if (!GrowHeap(n)) { + ASSERT(Check()); +Only in gperftools-2.0/src: page_heap.cc.svn-r190 +Only in gperftools-2.0/src: page_heap.h.svn-r190 +Only in gperftools-2.0/src: pprof.svn-r190 +Only in gperftools-2.0/src: profiler.cc.svn-r190 +diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc +--- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400 +@@ -39,6 +39,39 @@ + + namespace tcmalloc { + ++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) ++// These following two functions are registered via pthread_atfork to make ++// sure the central_cache locks remain in a consisten state in the forked ++// version of the thread. ++ ++static ++void CentralCacheLockAll() ++{ ++ Static::pageheap_lock()->Lock(); ++ for (int i = 0; i < kNumClasses; ++i) ++ Static::central_cache()[i].Lock(); ++} ++ ++static ++void CentralCacheUnlockAll() ++{ ++ for (int i = 0; i < kNumClasses; ++i) ++ Static::central_cache()[i].Unlock(); ++ Static::pageheap_lock()->Unlock(); ++} ++#endif ++ ++static inline ++void SetupAtForkLocksHandler() ++{ ++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) ++ pthread_atfork(CentralCacheLockAll, // parent calls before fork ++ CentralCacheUnlockAll, // parent calls after fork ++ CentralCacheUnlockAll); // child calls after fork ++#endif ++} ++ ++ + SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED); + SizeMap Static::sizemap_; + CentralFreeListPadded Static::central_cache_[kNumClasses]; +@@ -49,6 +82,7 @@ + StackTrace* Static::growth_stacks_ = NULL; + PageHeap* Static::pageheap_ = NULL; + ++ + void Static::InitStaticVars() { + sizemap_.Init(); + span_allocator_.Init(); +@@ -61,6 +95,8 @@ + for (int i = 0; i < kNumClasses; ++i) { + central_cache_[i].Init(i); + } ++ SetupAtForkLocksHandler(); ++ + // It's important to have PageHeap allocated, not in static storage, + // so that HeapLeakChecker does not consider all the byte patterns stored + // in is caches as pointers that are sources of heap object liveness, +Only in gperftools-2.0/src: static_vars.h.svn-r190 +Only in gperftools-2.0/src: symbolize.cc.svn-r190 +Only in gperftools-2.0/src: system-alloc.cc.svn-r190 +Only in gperftools-2.0/src: system-alloc.h.svn-r190 +Only in gperftools-2.0/src: tcmalloc.cc.svn-r190 +diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc +--- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400 +@@ -38,13 +38,14 @@ + #define GG_ULONGLONG(x) static_cast(x) + + template +-static void TestAtomicIncrement() { ++static void TestAtomicIncrement(AtomicType (*atomic_increment_func) ++ (volatile AtomicType*, AtomicType)) { + // For now, we just test single threaded execution + +- // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go ++ // use a guard value to make sure the atomic_increment_func doesn't go + // outside the expected address bounds. This is in particular to + // test that some future change to the asm code doesn't cause the +- // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit ++ // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit + // machines. + struct { + AtomicType prev_word; +@@ -60,47 +61,47 @@ + s.count = 0; + s.next_word = next_word_value; + +- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1)); ++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1)); + ASSERT_EQ(1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2)); ++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2)); + ASSERT_EQ(3, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3)); ++ ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3)); + ASSERT_EQ(6, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3)); ++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3)); + ASSERT_EQ(3, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2)); ++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2)); + ASSERT_EQ(1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); ++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1)); + ASSERT_EQ(0, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); ++ ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1)); + ASSERT_EQ(-1, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4)); ++ ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4)); + ASSERT_EQ(-5, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); + +- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5)); ++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5)); + ASSERT_EQ(0, s.count); + ASSERT_EQ(prev_word_value, s.prev_word); + ASSERT_EQ(next_word_value, s.next_word); +@@ -111,9 +112,10 @@ + + + template +-static void TestCompareAndSwap() { ++static void TestCompareAndSwap(AtomicType (*compare_and_swap_func) ++ (volatile AtomicType*, AtomicType, AtomicType)) { + AtomicType value = 0; +- AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1); ++ AtomicType prev = (*compare_and_swap_func)(&value, 0, 1); + ASSERT_EQ(1, value); + ASSERT_EQ(0, prev); + +@@ -122,21 +124,22 @@ + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; +- prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5); ++ prev = (*compare_and_swap_func)(&value, 0, 5); + ASSERT_EQ(k_test_val, value); + ASSERT_EQ(k_test_val, prev); + + value = k_test_val; +- prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5); ++ prev = (*compare_and_swap_func)(&value, k_test_val, 5); + ASSERT_EQ(5, value); + ASSERT_EQ(k_test_val, prev); + } + + + template +-static void TestAtomicExchange() { ++static void TestAtomicExchange(AtomicType (*atomic_exchange_func) ++ (volatile AtomicType*, AtomicType)) { + AtomicType value = 0; +- AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1); ++ AtomicType new_value = (*atomic_exchange_func)(&value, 1); + ASSERT_EQ(1, value); + ASSERT_EQ(0, new_value); + +@@ -145,28 +148,29 @@ + const AtomicType k_test_val = (GG_ULONGLONG(1) << + (NUM_BITS(AtomicType) - 2)) + 11; + value = k_test_val; +- new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val); ++ new_value = (*atomic_exchange_func)(&value, k_test_val); + ASSERT_EQ(k_test_val, value); + ASSERT_EQ(k_test_val, new_value); + + value = k_test_val; +- new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5); ++ new_value = (*atomic_exchange_func)(&value, 5); + ASSERT_EQ(5, value); + ASSERT_EQ(k_test_val, new_value); + } + + + template +-static void TestAtomicIncrementBounds() { ++static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func) ++ (volatile AtomicType*, AtomicType)) { + // Test increment at the half-width boundary of the atomic type. + // It is primarily for testing at the 32-bit boundary for 64-bit atomic type. + AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2); + AtomicType value = test_val - 1; +- AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); ++ AtomicType new_value = (*atomic_increment_func)(&value, 1); + ASSERT_EQ(test_val, value); + ASSERT_EQ(value, new_value); + +- base::subtle::NoBarrier_AtomicIncrement(&value, -1); ++ (*atomic_increment_func)(&value, -1); + ASSERT_EQ(test_val - 1, value); + } + +@@ -222,16 +226,28 @@ + + template + static void TestAtomicOps() { +- TestCompareAndSwap(); +- TestAtomicExchange(); +- TestAtomicIncrementBounds(); ++ TestCompareAndSwap(base::subtle::NoBarrier_CompareAndSwap); ++ TestCompareAndSwap(base::subtle::Acquire_CompareAndSwap); ++ TestCompareAndSwap(base::subtle::Release_CompareAndSwap); ++ ++ TestAtomicExchange(base::subtle::NoBarrier_AtomicExchange); ++ TestAtomicExchange(base::subtle::Acquire_AtomicExchange); ++ TestAtomicExchange(base::subtle::Release_AtomicExchange); ++ ++ TestAtomicIncrementBounds( ++ base::subtle::NoBarrier_AtomicIncrement); ++ TestAtomicIncrementBounds( ++ base::subtle::Barrier_AtomicIncrement); ++ + TestStore(); + TestLoad(); + } + + int main(int argc, char** argv) { +- TestAtomicIncrement(); +- TestAtomicIncrement(); ++ TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement); ++ TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement); ++ TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement); ++ TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement); + + TestAtomicOps(); + TestAtomicOps(); +@@ -248,8 +264,10 @@ + // If we ever *do* want to enable this, try adding -msse (or -mmmx?) + // to the CXXFLAGS in Makefile.am. + #if 0 and defined(BASE_HAS_ATOMIC64) +- TestAtomicIncrement(); +- TestAtomicOps(); ++ TestAtomicIncrement( ++ base::subtle::NoBarrier_AtomicIncrement); ++ TestAtomicIncrement( ++ base::subtle::Barrier_AtomicIncrement); + #endif + + printf("PASS\n"); +Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190 +diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc +--- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500 ++++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400 +@@ -0,0 +1,62 @@ ++// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- ++// Redistribution and use in source and binary forms, with or without ++// modification, are permitted provided that the following conditions are ++// met: ++// ++// * Redistributions of source code must retain the above copyright ++// notice, this list of conditions and the following disclaimer. ++// * Redistributions in binary form must reproduce the above ++// copyright notice, this list of conditions and the following disclaimer ++// in the documentation and/or other materials provided with the ++// distribution. ++// * Neither the name of Google Inc. nor the names of its ++// contributors may be used to endorse or promote products derived from ++// this software without specific prior written permission. ++// ++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++// This is a unit test for exercising fragmentation of large (over 1 ++// meg) page spans. It makes sure that allocations/releases of ++// increasing memory chunks do not blowup memory ++// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368 ++ ++ ++#include ++#include ++#include ++ ++#include "base/logging.h" ++#include "common.h" ++#include ++ ++ ++int main (int argc, char** argv) { ++ for (int pass = 1; pass <= 3; pass++) { ++ size_t size = 100*1024*1024; ++ while (size < 500*1024*1024) { ++ void *ptr = malloc(size); ++ free(ptr); ++ size += 20000; ++ ++ size_t heap_size = static_cast(-1); ++ MallocExtension::instance()->GetNumericProperty("generic.heap_size", ++ &heap_size); ++ ++ ++ CHECK_LT(heap_size, 1*1024*1024*1024); ++ } ++ } ++ ++ printf("PASS\n"); ++ return 0; ++} +diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c +--- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400 +@@ -59,6 +59,16 @@ + g_delete_hook_calls++; + } + ++static ++void *forced_malloc(size_t size) ++{ ++ void *rv = malloc(size); ++ if (!rv) { ++ FAIL("malloc is not supposed to fail here"); ++ } ++ return rv; ++} ++ + void TestMallocHook(void) { + /* TODO(csilvers): figure out why we get: + * E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken! +@@ -78,8 +88,9 @@ + if (!MallocHook_AddDeleteHook(&TestDeleteHook)) { + FAIL("Failed to add delete hook"); + } +- free(malloc(10)); +- free(malloc(20)); ++ ++ free(forced_malloc(10)); ++ free(forced_malloc(20)); + if (g_new_hook_calls != 2) { + FAIL("Wrong number of calls to the new hook"); + } +Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190 +Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190 +Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190 +Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190 +diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc +--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400 ++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400 +@@ -725,7 +725,7 @@ + // Note the ... in the hook signature: we don't care what arguments + // the hook takes. + #define MAKE_HOOK_CALLBACK(hook_type) \ +- static int g_##hook_type##_calls = 0; \ ++ static volatile int g_##hook_type##_calls = 0; \ + static void IncrementCallsTo##hook_type(...) { \ + g_##hook_type##_calls++; \ + } \ +@@ -760,7 +760,7 @@ + CHECK((p % sizeof(void*)) == 0); + CHECK((p % sizeof(double)) == 0); + +- // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES) ++ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES) + // alignment for large enough objects + if (size >= kMinAlign) { + CHECK((p % kMinAlign) == 0); +Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190 +diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh +--- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500 ++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400 +@@ -0,0 +1,68 @@ ++#!/bin/sh ++ ++# Copyright (c) 2013, Google Inc. ++# All rights reserved. ++# ++# Redistribution and use in source and binary forms, with or without ++# modification, are permitted provided that the following conditions are ++# met: ++# ++# * Redistributions of source code must retain the above copyright ++# notice, this list of conditions and the following disclaimer. ++# * Redistributions in binary form must reproduce the above ++# copyright notice, this list of conditions and the following disclaimer ++# in the documentation and/or other materials provided with the ++# distribution. ++# * Neither the name of Google Inc. nor the names of its ++# contributors may be used to endorse or promote products derived from ++# this software without specific prior written permission. ++# ++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT ++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT ++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, ++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT ++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, ++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY ++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++# --- ++# Author: Adhemerval Zanella ++# ++# Runs the tcmalloc_unittest with various environment variables. ++# This is necessary because tuning some environment variables ++# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program ++# behavior, just performance. ++ ++BINDIR="${BINDIR:-.}" ++TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest" ++ ++TMPDIR=/tmp/tcmalloc_unittest ++rm -rf $TMPDIR || exit 2 ++mkdir $TMPDIR || exit 3 ++ ++# $1: value of tcmalloc_unittest env. var. ++run_check_transfer_num_obj() { ++ [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1" ++ ++ echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... " ++ if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then ++ echo "OK" ++ else ++ echo "FAILED" ++ echo "Output from the failed run:" ++ echo "----" ++ cat $TMPDIR/output ++ echo "----" ++ exit 4 ++ fi ++} ++ ++run_check_transfer_num_obj "" ++run_check_transfer_num_obj "40" ++run_check_transfer_num_obj "4096" ++ ++echo "PASS" +Only in gperftools-2.0/src: thread_cache.cc.svn-r190 +Only in gperftools-2.0/src: thread_cache.h.svn-r190 +diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h +--- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400 +@@ -60,6 +60,8 @@ + // pretend the pthreads wrapper doesn't exist, even when it does. + #undef HAVE_PTHREAD + ++#define HAVE_PID_T ++ + #include "windows/port.h" + + #endif /* __MINGW32__ */ +diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc +--- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400 +@@ -85,7 +85,7 @@ + #include + #include + #include // for _msize and _expand +-#include // for EnumProcessModules, GetModuleInformation, etc. ++#include // for EnumProcessModules, GetModuleInformation, etc. + #include + #include + #include +Only in gperftools-2.0/src/windows: port.cc.svn-r190 +diff -urP gperftools-2.0/src/windows/port.h gperftools-2.0-svn218/src/windows/port.h +--- gperftools-2.0/src/windows/port.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/port.h 2013-06-04 10:16:57.683841683 -0400 +@@ -390,7 +390,10 @@ + + /* ----------------------------------- SYSTEM/PROCESS */ + ++#ifndef HAVE_PID_T + typedef int pid_t; ++#endif ++ + #if __STDC__ && !defined(__MINGW32__) + inline pid_t getpid(void) { return _getpid(); } + #endif +diff -urP gperftools-2.0/src/windows/preamble_patcher.cc gperftools-2.0-svn218/src/windows/preamble_patcher.cc +--- gperftools-2.0/src/windows/preamble_patcher.cc 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/preamble_patcher.cc 2013-06-04 10:16:57.601841682 -0400 +@@ -103,6 +103,7 @@ + new_target = target + 2 + relative_offset; + } else if (target[0] == ASM_JMP32ABS_0 && + target[1] == ASM_JMP32ABS_1) { ++ jmp32rel: + // Visual studio seems to sometimes do it this way instead of the + // previous way. Not sure what the rules are, but it was happening + // with operator new in some binaries. +@@ -118,6 +119,18 @@ + memcpy(&new_target_v, reinterpret_cast(target + 2), 4); + } + new_target = reinterpret_cast(*new_target_v); ++ } else if (kIs64BitBinary && target[0] == ASM_REXW ++ && target[1] == ASM_JMP32ABS_0 ++ && target[2] == ASM_JMP32ABS_1) { ++ // in Visual Studio 2012 we're seeing jump like that: ++ // rex.W jmpq *0x11d019(%rip) ++ // ++ // according to docs I have, rex prefix is actually unneeded and ++ // can be ignored. I.e. docs say for jumps like that operand ++ // already defaults to 64-bit. But clearly it breaks abs. jump ++ // detection above and we just skip rex ++ target++; ++ goto jmp32rel; + } else { + break; + } +@@ -535,6 +548,12 @@ + return (*(target) & 0x70) == 0x70 && instruction_size == 2; + } + ++bool PreamblePatcher::IsShortJump( ++ unsigned char* target, ++ unsigned int instruction_size) { ++ return target[0] == 0xeb && instruction_size == 2; ++} ++ + bool PreamblePatcher::IsNearConditionalJump( + unsigned char* target, + unsigned int instruction_size) { +@@ -575,7 +594,9 @@ + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { +- unsigned char* original_jump_dest = (source + 2) + source[1]; ++ // note: rel8 offset is signed. Thus we need to ask for signed char ++ // to negative offsets right ++ unsigned char* original_jump_dest = (source + 2) + static_cast(source[1]); + unsigned char* stub_jump_from = target + 6; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { +@@ -597,6 +618,36 @@ + reinterpret_cast(&fixup_jump_offset), 4); + } + ++ return SIDESTEP_SUCCESS; ++} ++ ++SideStepError PreamblePatcher::PatchShortJump( ++ unsigned char* source, ++ unsigned int instruction_size, ++ unsigned char* target, ++ unsigned int* target_bytes, ++ unsigned int target_size) { ++ // note: rel8 offset is _signed_. Thus we need signed char here. ++ unsigned char* original_jump_dest = (source + 2) + static_cast(source[1]); ++ unsigned char* stub_jump_from = target + 5; ++ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; ++ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { ++ SIDESTEP_ASSERT(false && ++ "Unable to fix up short jump because target" ++ " is too far away."); ++ return SIDESTEP_JUMP_INSTRUCTION; ++ } ++ ++ *target_bytes = 5; ++ if (target_size > *target_bytes) { ++ // Convert the short jump to a near jump. ++ // ++ // e9 xx xx xx xx = jmp rel32off ++ target[0] = 0xe9; ++ memcpy(reinterpret_cast(target + 1), ++ reinterpret_cast(&fixup_jump_offset), 4); ++ } ++ + return SIDESTEP_SUCCESS; + } + +diff -urP gperftools-2.0/src/windows/preamble_patcher.h gperftools-2.0-svn218/src/windows/preamble_patcher.h +--- gperftools-2.0/src/windows/preamble_patcher.h 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/preamble_patcher.h 2013-06-04 10:16:57.601841682 -0400 +@@ -467,6 +467,8 @@ + static bool IsShortConditionalJump(unsigned char* target, + unsigned int instruction_size); + ++ static bool IsShortJump(unsigned char *target, unsigned int instruction_size); ++ + // Helper routine that determines if a target instruction is a near + // conditional jump. + // +@@ -547,6 +549,12 @@ + unsigned int* target_bytes, + unsigned int target_size); + ++ static SideStepError PatchShortJump(unsigned char* source, ++ unsigned int instruction_size, ++ unsigned char* target, ++ unsigned int* target_bytes, ++ unsigned int target_size); ++ + // Helper routine that converts an instruction that will convert various + // jump-like instructions to corresponding instructions in the target buffer. + // What this routine does is fix up the relative offsets contained in jump +diff -urP gperftools-2.0/src/windows/preamble_patcher_with_stub.cc gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc +--- gperftools-2.0/src/windows/preamble_patcher_with_stub.cc 2012-02-02 16:36:23.000000000 -0500 ++++ gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc 2013-06-04 10:16:57.682841683 -0400 +@@ -150,6 +150,11 @@ + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); ++ } else if (IsShortJump(target + preamble_bytes, cur_bytes)) { ++ jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes, ++ preamble_stub + stub_bytes, ++ &jump_bytes, ++ stub_size - stub_bytes); + } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) || + IsNearRelativeJump(target + preamble_bytes, cur_bytes) || + IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) || +Only in gperftools-2.0/src/windows: TODO.svn-r190 diff --git a/gperftools.spec b/gperftools.spec index 7046ecb..3f5e0f9 100644 --- a/gperftools.spec +++ b/gperftools.spec @@ -2,7 +2,7 @@ Name: gperftools Version: 2.0 -Release: 10%{?dist} +Release: 11%{?dist} License: BSD Group: Development/Tools Summary: Very fast malloc and performance analysis tools @@ -10,10 +10,14 @@ URL: http://code.google.com/p/gperftools/ Source0: http://gperftools.googlecode.com/files/%{name}-%{version}.tar.gz # Update to latest svn, since google forgets how to make releases Patch0: gperftools-svn-r190.patch +Patch1: gperftools-2.0-svn190-to-svn218.patch ExclusiveArch: %{ix86} x86_64 ppc ppc64 %{arm} %ifnarch ppc ppc64 BuildRequires: libunwind-devel %endif +BuildRequires: autoconf, automake, libtool +Requires: gperftools-devel = %{version}-%{release} +Requires: pprof = %{version}-%{release} %description Perf Tools is a collection of performance analysis tools, including a @@ -21,6 +25,9 @@ high-performance multi-threaded malloc() implementation that works particularly well with threads and STL, a thread-friendly heap-checker, a heap profiler, and a cpu-profiler. +This is a metapackage which pulls in all of the gperftools (and pprof) +binaries, libraries, and development headers, so that you can use them. + %package devel Summary: Development libraries and headers for gperftools Group: Development/Libraries @@ -52,6 +59,7 @@ Pprof is a heap and CPU profiler tool, part of the gperftools suite. %prep %setup -q %patch0 -p1 -b .svn-r190 +%patch1 -p1 -b .svn-r218 # Fix end-of-line encoding sed -i 's/\r//' README_windows.txt @@ -59,8 +67,10 @@ sed -i 's/\r//' README_windows.txt # No need to have exec permissions on source code chmod -x src/sampler.h src/sampler.cc +autoreconf -i + %build -CXXFLAGS=`echo $RPM_OPT_FLAGS -DTCMALLOC_LARGE_PAGES| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'` +CXXFLAGS=`echo $RPM_OPT_FLAGS -fno-strict-aliasing -Wno-unused-local-typedefs -DTCMALLOC_LARGE_PAGES| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'` %configure --disable-static # Bad rpath! @@ -82,13 +92,15 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL %check # http://code.google.com/p/google-perftools/issues/detail?id=153 %ifnarch ppc -# Their test suite is junk. Disabling. +# Their test suite is almost always broken. # LD_LIBRARY_PATH=./.libs make check %endif %post libs -p /sbin/ldconfig %postun libs -p /sbin/ldconfig +%files + %files -n pprof %{_bindir}/pprof %{_mandir}/man1/* @@ -104,6 +116,11 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL %{_libdir}/*.so.* %changelog +* Tue Jun 4 2013 Tom Callaway - 2.0-11 +- pass -fno-strict-aliasing +- create "gperftools" metapackage. +- update to svn r218 (cleanups, some ARM fixes) + * Thu Mar 14 2013 Dan HorĂ¡k - 2.0-10 - build on ppc64 as well