Only in gperftools-2.0: aclocal.m4 Only in gperftools-2.0: aclocal.m4.svn-r190 diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh --- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400 +++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400 @@ -1,54 +1,3 @@ #!/bin/sh -# Before using, you should figure out all the .m4 macros that your -# configure.m4 script needs and make sure they exist in the m4/ -# directory. -# -# These are the files that this script might edit: -# aclocal.m4 configure Makefile.in src/config.h.in \ -# depcomp config.guess config.sub install-sh missing mkinstalldirs \ -# ltmain.sh -# -# Here's a command you can run to see what files aclocal will import: -# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p' - -set -ex -rm -rf autom4te.cache - -trap 'rm -f aclocal.m4.tmp' EXIT - -# Returns the first binary in $* that exists, or the last arg, if none exists. -WhichOf() { - for candidate in "$@"; do - if "$candidate" --version >/dev/null 2>&1; then - echo "$candidate" - return - fi - done - echo "$candidate" # the last one in $@ -} - -# Use version 1.9 of aclocal and automake if available. -ACLOCAL=`WhichOf aclocal-1.9 aclocal` -AUTOMAKE=`WhichOf automake-1.9 automake` -LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize` - -# aclocal tries to overwrite aclocal.m4 even if the contents haven't -# changed, which is annoying when the file is not open for edit (in -# p4). We work around this by writing to a temp file and just -# updating the timestamp if the file hasn't change. -"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp -if cmp aclocal.m4.tmp aclocal.m4; then - touch aclocal.m4 # pretend that we regenerated the file - rm -f aclocal.m4.tmp -else - mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails -fi - -grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f -autoconf -f -W all,no-obsolete -autoheader -f -W all -"$AUTOMAKE" -a -c -f -W all - -rm -rf autom4te.cache -exit 0 +autoreconf -i Only in gperftools-2.0: autogen.sh.svn-r190 Only in gperftools-2.0: compile Only in gperftools-2.0: config.guess Only in gperftools-2.0: config.sub Only in gperftools-2.0: configure diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac --- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400 +++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400 @@ -99,28 +99,7 @@ [gpt_cv_objcopy_weaken=no]) AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes) -case $host_os in - *mingw*) - # Disabling fast install keeps libtool from creating wrapper scripts - # around the executables it builds. Such scripts have caused failures on - # MinGW. Using this option means an extra link step is executed during - # "make install". - _LT_SET_OPTION([LT_INIT],[disable-fast-install]) -AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put -the `disable-fast-install' option into LT_INIT's first parameter.]) - - ;; - *) - _LT_SET_OPTION([LT_INIT],[fast-install]) -AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put -the `fast-install' option into LT_INIT's first parameter.]) - - ;; -esac - -LT_INIT -AC_SUBST(LIBTOOL_DEPS) -AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x") +LT_INIT([]) AC_C_INLINE AX_C___ATTRIBUTE__ @@ -134,6 +113,7 @@ AC_CHECK_TYPES([Elf32_Versym],,, [#include ]) # for vdso_support.h AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory AC_CHECK_FUNCS(geteuid) # for turning off services when run as root +AC_CHECK_FUNCS(fork) # for the pthread_atfork setup AC_CHECK_HEADERS(features.h) # for vdso_support.h AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h @@ -183,6 +163,11 @@ # This workaround comes from # http://cygwin.com/ml/cygwin/2004-11/msg00138.html case "$host" in + *-*-mingw*) + dnl mingw doesn't have mmap, not worth + dnl checking. Especially given that mingw can be a + dnl cross-compiler + ;; *-*-cygwin*) ac_cv_func_mmap_fixed_mapped=yes AC_DEFINE(HAVE_MMAP, 1, @@ -310,10 +295,18 @@ # Note, however, that our code tickles a bug in gcc < 4.1.2 # involving TLS and -fPIC (which our libraries will use) on x86: # http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html +# +# And mingw also does compile __thread but resultant code actually +# fails to work correctly at least in some not so ancient version: +# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html AC_MSG_CHECKING([for __thread]) AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2)) #error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html -#endif], [static __thread int p = 0])], +#endif +#if defined(__MINGW32__) +#error mingw doesn't really support tls +#endif +], [static __thread int p = 0])], [AC_DEFINE(HAVE_TLS, 1, Define to 1 if compiler supports __thread) AC_MSG_RESULT([yes])], Only in gperftools-2.0: configure.ac.svn-r190 Only in gperftools-2.0: configure.svn-r190 Only in gperftools-2.0: depcomp Only in gperftools-2.0/doc: cpuprofile.html.svn-r190 Only in gperftools-2.0/doc: heapprofile.html.svn-r190 Only in gperftools-2.0/doc: pprof.see_also.svn-r190 diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL --- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500 +++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400 @@ -8,6 +8,28 @@ Perftools-Specific Install Notes ================================ +*** Building from source repository + +As of 2.1 gperftools does not have configure and other autotools +products checked into it's source repository. This is common practice +for projects using autotools. + +NOTE: Source releases (.tar.gz that you download from +code.google.com/p/gperftools) still have all required files just as +before. Nothing has changed w.r.t. building from .tar.gz releases. + +But, in order to build gperftools checked out from subversion +repository you need to have autoconf, automake and libtool +installed. And before running ./configure you have to generate it (and +a bunch of other files) by running ./autogen.sh script. That script +will take care of calling correct autotools programs in correct order. + +If you're maintainer then it's business as usual too. Just run make +dist (or, preferably, make distcheck) and it'll produce .tar.gz or +.tar.bz2 with all autotools magic already included. So that users can +build our software without having autotools. + + *** NOTE FOR 64-BIT LINUX SYSTEMS The glibc built-in stack-unwinder on 64-bit systems has some problems Only in gperftools-2.0: install-sh Only in gperftools-2.0: libtool Only in gperftools-2.0: ltmain.sh Only in gperftools-2.0/m4: libtool.m4 Only in gperftools-2.0/m4: libtool.m4.svn-r190 Only in gperftools-2.0/m4: lt~obsolete.m4 Only in gperftools-2.0/m4: ltoptions.m4 Only in gperftools-2.0/m4: ltsugar.m4 Only in gperftools-2.0/m4: ltversion.m4 diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am --- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400 +++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400 @@ -221,7 +221,7 @@ src/windows/preamble_patcher.cc \ src/windows/preamble_patcher_with_stub.cc # patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us. -libwindows_la_LIBADD = -lPsapi +libwindows_la_LIBADD = -lpsapi SPINLOCK_INCLUDES = src/base/spinlock.h \ src/base/spinlock_internal.h \ @@ -238,6 +238,7 @@ noinst_LTLIBRARIES += libspinlock.la libspinlock_la_SOURCES = src/base/spinlock.cc \ src/base/spinlock_internal.cc \ + src/base/atomicops-internals-x86.cc \ $(SPINLOCK_INCLUDES) LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la @@ -355,7 +356,7 @@ $(STACKTRACE_INCLUDES) libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK) STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)' -libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) +libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS) ### Unittests TESTS += stacktrace_unittest @@ -468,7 +469,7 @@ -DNO_HEAP_CHECK \ $(PTHREAD_CFLAGS) -DNDEBUG \ $(AM_CXXFLAGS) $(NO_EXCEPTIONS) -libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) +libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS) libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK) lib_LTLIBRARIES += libtcmalloc_minimal.la @@ -477,7 +478,7 @@ libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \ $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS) # -version-info gets passed to libtool -libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ +libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS) libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS) # For windows, we're playing around with trying to do some stacktrace @@ -539,6 +540,12 @@ tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) +TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest +tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc +tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) +tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) +tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS) + # This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc) # In theory this should work under mingw, but mingw has trouble running # shell scripts that end in .exe. And it doesn't seem to build shared @@ -898,8 +905,16 @@ ### Unittests -TESTS += tcmalloc_unittest -TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \ +TESTS += tcmalloc_unittest.sh$(EXEEXT) +tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh +noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES) +tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \ + tcmalloc_unittest + rm -f $@ + cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@ + +noinst_PROGRAMS += tcmalloc_unittest +tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \ src/gperftools/malloc_extension.h tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \ src/tcmalloc.h \ @@ -956,6 +971,12 @@ tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS) +TESTS += tcmalloc_large_heap_fragmentation_unittest +tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc +tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) +tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS) +tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS) + TESTS += raw_printer_test raw_printer_test_SOURCES = src/tests/raw_printer_test.cc raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS) Only in gperftools-2.0: Makefile.am.svn-r190 Only in gperftools-2.0: Makefile.in Only in gperftools-2.0: Makefile.in.svn-r190 Only in gperftools-2.0: missing Only in gperftools-2.0: mkinstalldirs Only in gperftools-2.0: NEWS.svn-r190 diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h --- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400 @@ -50,6 +50,16 @@ // implementations on other archtectures will cause your code to break. If you // do not know what you are doing, avoid these routines, and use a Mutex. // +// These following lower-level operations are typically useful only to people +// implementing higher-level synchronization operations like spinlocks, +// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or +// a store with appropriate memory-ordering instructions. "Acquire" operations +// ensure that no later memory access can be reordered ahead of the operation. +// "Release" operations ensure that no previous memory access can be reordered +// after the operation. "Barrier" operations have both "Acquire" and "Release" +// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory +// access. +// // It is incorrect to make direct assignments to/from an atomic variable. // You should use one of the Load or Store routines. The NoBarrier // versions are provided when no barriers are needed: @@ -95,10 +105,10 @@ #include "base/atomicops-internals-arm-v6plus.h" #elif defined(ARMV3) #include "base/atomicops-internals-arm-generic.h" -#elif defined(_WIN32) -#include "base/atomicops-internals-windows.h" #elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__)) #include "base/atomicops-internals-x86.h" +#elif defined(_WIN32) +#include "base/atomicops-internals-windows.h" #elif defined(__linux__) && defined(__PPC__) #include "base/atomicops-internals-linuxppc.h" #else @@ -149,6 +159,18 @@ reinterpret_cast(ptr), new_value); } +AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Acquire_AtomicExchange( + reinterpret_cast(ptr), new_value); +} + +AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr, + AtomicWord new_value) { + return Release_AtomicExchange( + reinterpret_cast(ptr), new_value); +} + // Atomically increment *ptr by "increment". Returns the new value of // *ptr with the increment applied. This routine implies no memory // barriers. @@ -164,17 +186,6 @@ reinterpret_cast(ptr), increment); } -// ------------------------------------------------------------------------ -// These following lower-level operations are typically useful only to people -// implementing higher-level synchronization operations like spinlocks, -// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or -// a store with appropriate memory-ordering instructions. "Acquire" operations -// ensure that no later memory access can be reordered ahead of the operation. -// "Release" operations ensure that no previous memory access can be reordered -// after the operation. "Barrier" operations have both "Acquire" and "Release" -// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory -// access. -// ------------------------------------------------------------------------ inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr, AtomicWord old_value, AtomicWord new_value) { @@ -250,6 +261,8 @@ Atomic32 old_value, Atomic32 new_value); Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); +Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value); Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment); @@ -271,6 +284,8 @@ Atomic64 old_value, Atomic64 new_value); Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); +Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value); Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment); diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h --- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400 @@ -89,6 +89,18 @@ return old_value; } +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { for (;;) { @@ -176,6 +188,18 @@ return 0; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // pLinuxKernelCmpxchg already has acquire and release barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { NotImplementedFatalError("NoBarrier_AtomicIncrement"); diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h --- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400 @@ -94,6 +94,28 @@ return old; } +inline void MemoryBarrier() { +#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__) + uint32_t dest = 0; + __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory"); +#else + __asm__ __volatile__("dmb" : : : "memory"); +#endif +} + +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { Atomic32 tmp, res; @@ -110,10 +132,6 @@ return res; } -inline void MemoryBarrier() { - __asm__ __volatile__("dmb" : : : "memory"); -} - inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { Atomic32 tmp, res; @@ -220,6 +238,19 @@ return old; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value); + MemoryBarrier(); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + MemoryBarrier(); + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { int store_failed; @@ -303,6 +334,18 @@ return 0; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Acquire_AtomicExchange"); + return 0; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + NotImplementedFatalError("Release_AtomicExchange"); + return 0; +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { NotImplementedFatalError("NoBarrier_AtomicIncrement"); diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h --- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400 +++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400 @@ -163,6 +163,26 @@ return old_value; } +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value, + const_cast(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Release(old_value, new_value, + const_cast(ptr))); + return old_value; +} + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, Atomic32 increment) { return OSAtomicAdd32(increment, const_cast(ptr)); @@ -294,6 +314,26 @@ return old_value; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value, + const_cast(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Release(old_value, new_value, + const_cast(ptr))); + return old_value; +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, Atomic64 increment) { return OSAtomicAdd64(increment, const_cast(ptr)); Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190 diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h --- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500 +++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400 @@ -132,6 +132,21 @@ return old_value; } +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + Atomic32 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value, + const_cast(ptr))); + return old_value; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr, + Atomic32 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr, Atomic32 increment) { return OSAtomicAdd32(increment, const_cast(ptr)); @@ -217,6 +232,21 @@ return old_value; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + Atomic64 old_value; + do { + old_value = *ptr; + } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value, + const_cast(ptr))); + return old_value; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr, + Atomic64 new_value) { + return Acquire_AtomicExchange(ptr, new_value); +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr, Atomic64 increment) { return OSAtomicAdd64(increment, const_cast(ptr)); diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h --- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400 +++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400 @@ -137,6 +137,18 @@ return static_cast(result); } +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { return FastInterlockedExchangeAdd( @@ -188,8 +200,7 @@ } inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) { - NoBarrier_AtomicExchange(ptr, value); - // acts as a barrier in this implementation + Acquire_AtomicExchange(ptr, value); } inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) { @@ -478,6 +489,18 @@ #endif // defined(_WIN64) || defined(__MINGW64__) +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // FastInterlockedExchange has both acquire and release memory barriers. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr, Atomic64 old_value, Atomic64 new_value) { Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190 diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h --- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400 @@ -89,6 +89,21 @@ return new_value; // Now it's the previous value. } +inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return old_val; +} + +inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, + Atomic32 new_value) { + // xchgl already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment) { Atomic32 temp = increment; @@ -152,7 +167,7 @@ __asm__ __volatile__("mfence" : : : "memory"); } else { // mfence is faster but not present on PIII Atomic32 x = 0; - NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII + Acquire_AtomicExchange(&x, 0); } } @@ -161,8 +176,7 @@ *ptr = value; __asm__ __volatile__("mfence" : : : "memory"); } else { - NoBarrier_AtomicExchange(ptr, value); - // acts as a barrier on PIII + Acquire_AtomicExchange(ptr, value); } } #endif @@ -213,6 +227,21 @@ return new_value; // Now it's the previous value. } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_value) { + // xchgq already has release memory barrier semantics. + return NoBarrier_AtomicExchange(ptr, new_value); +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { Atomic64 temp = increment; @@ -334,6 +363,20 @@ return old_val; } +inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val); + if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) { + __asm__ __volatile__("lfence" : : : "memory"); + } + return old_val; +} + +inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, + Atomic64 new_val) { + return NoBarrier_AtomicExchange(ptr, new_val); +} + inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment) { Atomic64 old_val, new_val; diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h --- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400 +++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400 @@ -334,10 +334,13 @@ #if defined(HAVE___ATTRIBUTE__) # if (defined(__i386__) || defined(__x86_64__)) # define CACHELINE_ALIGNED __attribute__((aligned(64))) -# elif defined(__arm__) -# define CACHELINE_ALIGNED __attribute__((aligned(32))) # elif (defined(__PPC__) || defined(__PPC64__)) # define CACHELINE_ALIGNED __attribute__((aligned(16))) +# elif (defined(__arm__)) +# define CACHELINE_ALIGNED __attribute__((aligned(64))) + // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned +# else +# error Could not determine cache line length - unknown architecture # endif #else # define CACHELINE_ALIGNED Only in gperftools-2.0/src/base: basictypes.h.svn-r190 Only in gperftools-2.0/src/base: cycleclock.h.svn-r190 diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h --- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400 +++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400 @@ -148,6 +148,8 @@ #include #include #include +#include +#include #include #include #include @@ -404,24 +406,24 @@ }; #elif defined(__x86_64__) struct kernel_stat { - unsigned long st_dev; - unsigned long st_ino; - unsigned long st_nlink; + uint64_t st_dev; + uint64_t st_ino; + uint64_t st_nlink; unsigned st_mode; unsigned st_uid; unsigned st_gid; unsigned __pad0; - unsigned long st_rdev; - long st_size; - long st_blksize; - long st_blocks; - unsigned long st_atime_; - unsigned long st_atime_nsec_; - unsigned long st_mtime_; - unsigned long st_mtime_nsec_; - unsigned long st_ctime_; - unsigned long st_ctime_nsec_; - long __unused[3]; + uint64_t st_rdev; + int64_t st_size; + int64_t st_blksize; + int64_t st_blocks; + uint64_t st_atime_; + uint64_t st_atime_nsec_; + uint64_t st_mtime_; + uint64_t st_mtime_nsec_; + uint64_t st_ctime_; + uint64_t st_ctime_nsec_; + int64_t __unused[3]; }; #elif defined(__PPC__) struct kernel_stat { @@ -1013,74 +1015,141 @@ * location (e.g. when using the clone() system call with the CLONE_VM * option). */ + #undef LSS_ENTRYPOINT + #define LSS_ENTRYPOINT "syscall\n" + + /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit. + * We need to explicitly cast to an unsigned 64 bit type to avoid implicit + * sign extension. We can't cast pointers directly because those are + * 32 bits, and gcc will dump ugly warnings about casting from a pointer + * to an integer of a different size. + */ + #undef LSS_SYSCALL_ARG + #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a)) + #undef _LSS_RETURN + #define _LSS_RETURN(type, res, cast) \ + do { \ + if ((uint64_t)(res) >= (uint64_t)(-4095)) { \ + LSS_ERRNO = -(res); \ + res = -1; \ + } \ + return (type)(cast)(res); \ + } while (0) + #undef LSS_RETURN + #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t) + + #undef _LSS_BODY + #define _LSS_BODY(nr, type, name, cast, ...) \ + long long __res; \ + __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \ + : "=a" (__res) \ + : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \ + : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \ + _LSS_RETURN(type, __res, cast) #undef LSS_BODY - #define LSS_BODY(type,name, ...) \ - long __res; \ - __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \ - ##__VA_ARGS__ : "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res) + #define LSS_BODY(nr, type, name, args...) \ + _LSS_BODY(nr, type, name, uintptr_t, ## args) + + #undef LSS_BODY_ASM0 + #undef LSS_BODY_ASM1 + #undef LSS_BODY_ASM2 + #undef LSS_BODY_ASM3 + #undef LSS_BODY_ASM4 + #undef LSS_BODY_ASM5 + #undef LSS_BODY_ASM6 + #define LSS_BODY_ASM0 + #define LSS_BODY_ASM1 LSS_BODY_ASM0 + #define LSS_BODY_ASM2 LSS_BODY_ASM1 + #define LSS_BODY_ASM3 LSS_BODY_ASM2 + #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;" + #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;" + #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;" + + #undef LSS_BODY_CLOBBER0 + #undef LSS_BODY_CLOBBER1 + #undef LSS_BODY_CLOBBER2 + #undef LSS_BODY_CLOBBER3 + #undef LSS_BODY_CLOBBER4 + #undef LSS_BODY_CLOBBER5 + #undef LSS_BODY_CLOBBER6 + #define LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0 + #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1 + #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2 + #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10", + #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8", + #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9", + + #undef LSS_BODY_ARG0 + #undef LSS_BODY_ARG1 + #undef LSS_BODY_ARG2 + #undef LSS_BODY_ARG3 + #undef LSS_BODY_ARG4 + #undef LSS_BODY_ARG5 + #undef LSS_BODY_ARG6 + #define LSS_BODY_ARG0() + #define LSS_BODY_ARG1(arg1) \ + LSS_BODY_ARG0(), "D" (arg1) + #define LSS_BODY_ARG2(arg1, arg2) \ + LSS_BODY_ARG1(arg1), "S" (arg2) + #define LSS_BODY_ARG3(arg1, arg2, arg3) \ + LSS_BODY_ARG2(arg1, arg2), "d" (arg3) + #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \ + LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4) + #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \ + LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5) + #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \ + LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6) + #undef _syscall0 #define _syscall0(type,name) \ type LSS_NAME(name)() { \ - LSS_BODY(type, name); \ + LSS_BODY(0, type, name); \ } #undef _syscall1 #define _syscall1(type,name,type1,arg1) \ type LSS_NAME(name)(type1 arg1) { \ - LSS_BODY(type, name, "D" ((long)(arg1))); \ + LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \ } #undef _syscall2 #define _syscall2(type,name,type1,arg1,type2,arg2) \ type LSS_NAME(name)(type1 arg1, type2 arg2) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \ + LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\ } #undef _syscall3 #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \ - LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \ - "d" ((long)(arg3))); \ + LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3)); \ } #undef _syscall4 #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ + LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\ } #undef _syscall5 #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ type5,arg5) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ type5 arg5) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)) : \ - "r8", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ + LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5)); \ } #undef _syscall6 #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \ type5,arg5,type6,arg6) \ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \ type5 arg5, type6 arg6) { \ - long __res; \ - __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \ - "syscall" : \ - "=a" (__res) : "0" (__NR_##name), \ - "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \ - "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \ - "r8", "r9", "r10", "r11", "rcx", "memory"); \ - LSS_RETURN(type, __res); \ + LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \ + LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \ + LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\ } LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { - long __res; + long long __res; { __asm__ __volatile__(/* if (fn == NULL) * return -EINVAL; @@ -1145,8 +1214,13 @@ "1:\n" : "=a" (__res) : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit), - "r"(fn), "S"(child_stack), "D"(flags), "r"(arg), - "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr) + "r"(LSS_SYSCALL_ARG(fn)), + "S"(LSS_SYSCALL_ARG(child_stack)), + "D"(LSS_SYSCALL_ARG(flags)), + "r"(LSS_SYSCALL_ARG(arg)), + "d"(LSS_SYSCALL_ARG(parent_tidptr)), + "r"(LSS_SYSCALL_ARG(newtls)), + "r"(LSS_SYSCALL_ARG(child_tidptr)) : "rsp", "memory", "r8", "r10", "r11", "rcx"); } LSS_RETURN(int, __res); @@ -1159,7 +1233,7 @@ * Unfortunately, we cannot just reference the glibc version of this * function, as glibc goes out of its way to make it inaccessible. */ - void (*res)(void); + long long res; __asm__ __volatile__("call 2f\n" "0:.align 16\n" "1:movq %1,%%rax\n" @@ -1168,7 +1242,7 @@ "addq $(1b-0b),%0\n" : "=a" (res) : "i" (__NR_rt_sigreturn)); - return res; + return (void (*)(void))(uintptr_t)res; } #elif defined(__arm__) /* Most definitions of _syscallX() neglect to mark "memory" as being @@ -1797,8 +1871,16 @@ LSS_INLINE _syscall0(pid_t, _gettid) LSS_INLINE _syscall2(int, kill, pid_t, p, int, s) - LSS_INLINE _syscall3(off_t, lseek, int, f, - off_t, o, int, w) + #if defined(__x86_64__) + /* Need to make sure off_t isn't truncated to 32-bits under x32. */ + LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) { + _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o), + LSS_SYSCALL_ARG(w)); + } + #else + LSS_INLINE _syscall3(off_t, lseek, int, f, + off_t, o, int, w) + #endif LSS_INLINE _syscall2(int, munmap, void*, s, size_t, l) LSS_INLINE _syscall5(void*, _mremap, void*, o, @@ -1835,10 +1917,13 @@ int, t, int, p) #endif #if defined(__x86_64__) - LSS_INLINE _syscall6(void*, mmap, void*, s, - size_t, l, int, p, - int, f, int, d, - __off64_t, o) + /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */ + LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d, + __off64_t o) { + LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l), + LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f), + LSS_SYSCALL_ARG(d), (uint64_t)(o)); + } LSS_INLINE int LSS_NAME(sigaction)(int signum, const struct kernel_sigaction *act, Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190 Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190 diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h --- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400 @@ -31,11 +31,6 @@ * Author: Sanjay Ghemawat */ -// -// Fast spinlocks (at least on x86, a lock/unlock pair is approximately -// half the cost of a Mutex because the unlock just does a store instead -// of a compare-and-swap which is expensive). - // SpinLock is async signal safe. // If used within a signal handler, all lock holders // should block the signal even outside the signal handler. @@ -95,10 +90,9 @@ // TODO(csilvers): uncomment the annotation when we figure out how to // support this macro with 0 args (see thread_annotations.h) inline void Unlock() /*UNLOCK_FUNCTION()*/ { - uint64 wait_cycles = - static_cast(base::subtle::NoBarrier_Load(&lockword_)); ANNOTATE_RWLOCK_RELEASED(this, 1); - base::subtle::Release_Store(&lockword_, kSpinLockFree); + uint64 wait_cycles = static_cast( + base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree)); if (wait_cycles != kSpinLockHeld) { // Collect contentionz profile info, and speed the wakeup of any waiter. // The wait_cycles value indicates how long this thread spent waiting Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190 Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190 diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h --- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400 @@ -38,7 +38,7 @@ #include #if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__)) #include // for DWORD -#include // for CreateToolhelp32Snapshot +#include // for CreateToolhelp32Snapshot #endif #ifdef HAVE_UNISTD_H #include // for pid_t diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h --- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400 @@ -79,6 +79,16 @@ // page full of 5-byte objects would have 2 bytes memory overhead). size_t OverheadBytes(); + // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call + // to set the lock in a consistent state before the fork. + void Lock() { + lock_.Lock(); + } + + void Unlock() { + lock_.Unlock(); + } + private: // TransferCache is used to cache transfers of // sizemap.num_objects_to_move(size_class) back and forth between diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc --- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400 +++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400 @@ -30,12 +30,32 @@ // --- // Author: Sanjay Ghemawat +#include // for getenv and strtol #include "config.h" #include "common.h" #include "system-alloc.h" +#include "base/spinlock.h" namespace tcmalloc { +// Define the maximum number of object per classe type to transfer between +// thread and central caches. +static int32 FLAGS_tcmalloc_transfer_num_objects; + +static const int32 kDefaultTransferNumObjecs = 32768; + +// The init function is provided to explicit initialize the variable value +// from the env. var to avoid C++ global construction that might defer its +// initialization after a malloc/new call. +static inline void InitTCMallocTransferNumObjects() +{ + if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) { + const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ"); + FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs : + strtol(envval, NULL, 10); + } +} + // Note: the following only works for "n"s that fit in 32-bits, but // that is fine since we only use it for small sizes. static inline int LgFloor(size_t n) { @@ -90,13 +110,16 @@ // - We go to the central freelist too often and we have to acquire // its lock each time. // This value strikes a balance between the constraints above. - if (num > 32) num = 32; + if (num > FLAGS_tcmalloc_transfer_num_objects) + num = FLAGS_tcmalloc_transfer_num_objects; return num; } // Initialize the mapping arrays void SizeMap::Init() { + InitTCMallocTransferNumObjects(); + // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] if (ClassIndex(0) < 0) { Log(kCrash, __FILE__, __LINE__, @@ -189,12 +212,56 @@ // Metadata allocator -- keeps stats about how many bytes allocated. static uint64_t metadata_system_bytes_ = 0; +static const size_t kMetadataAllocChunkSize = 8*1024*1024; +static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8; +// usually malloc uses larger alignments, but because metadata cannot +// have and fancy simd types, aligning on pointer size seems fine +static const size_t kMetadataAllignment = sizeof(void *); + +static char *metadata_chunk_alloc_; +static size_t metadata_chunk_avail_; + +static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED); + void* MetaDataAlloc(size_t bytes) { - void* result = TCMalloc_SystemAlloc(bytes, NULL); - if (result != NULL) { - metadata_system_bytes_ += bytes; + if (bytes >= kMetadataAllocChunkSize) { + void *rv = TCMalloc_SystemAlloc(bytes, + NULL, kMetadataAllignment); + if (rv != NULL) { + metadata_system_bytes_ += bytes; + } + return rv; } - return result; + + SpinLockHolder h(&metadata_alloc_lock); + + // the following works by essentially turning address to integer of + // log_2 kMetadataAllignment size and negating it. I.e. negated + // value + original value gets 0 and that's what we want modulo + // kMetadataAllignment. Note, we negate before masking higher bits + // off, otherwise we'd have to mask them off after negation anyways. + intptr_t alignment = -reinterpret_cast(metadata_chunk_alloc_) & (kMetadataAllignment-1); + + if (metadata_chunk_avail_ < bytes + alignment) { + size_t real_size; + void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize, + &real_size, kMetadataAllignment); + if (ptr == NULL) { + return NULL; + } + + metadata_chunk_alloc_ = static_cast(ptr); + metadata_chunk_avail_ = real_size; + + alignment = 0; + } + + void *rv = static_cast(metadata_chunk_alloc_ + alignment); + bytes += alignment; + metadata_chunk_alloc_ += bytes; + metadata_chunk_avail_ -= bytes; + metadata_system_bytes_ += bytes; + return rv; } uint64_t metadata_system_bytes() { return metadata_system_bytes_; } Only in gperftools-2.0/src: common.cc.svn-r190 diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h --- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400 +++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400 @@ -80,7 +80,7 @@ static const size_t kMinAlign = 16; #elif defined(TCMALLOC_ALIGN_8BYTES) static const size_t kPageShift = 13; -static const size_t kNumClasses = 93; +static const size_t kNumClasses = 95; // Unless we force to use 8 bytes alignment we use an alignment of // at least 16 bytes to statisfy requirements for some SSE types. // Keep in mind when using the 16 bytes alignment you can have a space @@ -88,7 +88,7 @@ static const size_t kMinAlign = 8; #else static const size_t kPageShift = 13; -static const size_t kNumClasses = 86; +static const size_t kNumClasses = 88; static const size_t kMinAlign = 16; #endif static const size_t kMaxThreadCacheSize = 4 << 20; Only in gperftools-2.0/src: common.h.svn-r190 diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in --- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400 +++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400 @@ -56,6 +56,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_FEATURES_H +/* Define to 1 if you have the `fork' function. */ +#undef HAVE_FORK + /* Define to 1 if you have the `geteuid' function. */ #undef HAVE_GETEUID Only in gperftools-2.0/src: config.h.in.svn-r190 Only in gperftools-2.0/src: debugallocation.cc.svn-r190 Only in gperftools-2.0/src: getpc.h.svn-r190 Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190 Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190 Only in gperftools-2.0/src: heap-checker.cc.svn-r190 Only in gperftools-2.0/src: heap-profiler.cc.svn-r190 Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190 Only in gperftools-2.0/src: malloc_extension.cc.svn-r190 Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190 Only in gperftools-2.0/src: memory_region_map.cc.svn-r190 diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc --- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400 +++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400 @@ -108,6 +108,8 @@ return AllocLarge(n); // May be NULL } +static const size_t kForcedCoalesceInterval = 128*1024*1024; + Span* PageHeap::New(Length n) { ASSERT(Check()); ASSERT(n > 0); @@ -116,6 +118,38 @@ if (result != NULL) return result; + if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0 + && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4 + && (stats_.system_bytes / kForcedCoalesceInterval + != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) { + // We're about to grow heap, but there are lots of free pages. + // tcmalloc's design decision to keep unmapped and free spans + // separately and never coalesce them means that sometimes there + // can be free pages span of sufficient size, but it consists of + // "segments" of different type so page heap search cannot find + // it. In order to prevent growing heap and wasting memory in such + // case we're going to unmap all free pages. So that all free + // spans are maximally coalesced. + // + // We're also limiting 'rate' of going into this path to be at + // most once per 128 megs of heap growth. Otherwise programs that + // grow heap frequently (and that means by small amount) could be + // penalized with higher count of minor page faults. + // + // See also large_heap_fragmentation_unittest.cc and + // https://code.google.com/p/gperftools/issues/detail?id=368 + ReleaseAtLeastNPages(static_cast(0x7fffffff)); + + // then try again. If we are forced to grow heap because of large + // spans fragmentation and not because of problem described above, + // then at the very least we've just unmapped free but + // insufficiently big large spans back to OS. So in case of really + // unlucky memory fragmentation we'll be consuming virtual address + // space, but not real memory + result = SearchFreeAndLargeLists(n); + if (result != NULL) return result; + } + // Grow the heap and try again. if (!GrowHeap(n)) { ASSERT(Check()); Only in gperftools-2.0/src: page_heap.cc.svn-r190 Only in gperftools-2.0/src: page_heap.h.svn-r190 Only in gperftools-2.0/src: pprof.svn-r190 Only in gperftools-2.0/src: profiler.cc.svn-r190 diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc --- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400 @@ -39,6 +39,39 @@ namespace tcmalloc { +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) +// These following two functions are registered via pthread_atfork to make +// sure the central_cache locks remain in a consisten state in the forked +// version of the thread. + +static +void CentralCacheLockAll() +{ + Static::pageheap_lock()->Lock(); + for (int i = 0; i < kNumClasses; ++i) + Static::central_cache()[i].Lock(); +} + +static +void CentralCacheUnlockAll() +{ + for (int i = 0; i < kNumClasses; ++i) + Static::central_cache()[i].Unlock(); + Static::pageheap_lock()->Unlock(); +} +#endif + +static inline +void SetupAtForkLocksHandler() +{ +#if defined(HAVE_FORK) && defined(HAVE_PTHREAD) + pthread_atfork(CentralCacheLockAll, // parent calls before fork + CentralCacheUnlockAll, // parent calls after fork + CentralCacheUnlockAll); // child calls after fork +#endif +} + + SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED); SizeMap Static::sizemap_; CentralFreeListPadded Static::central_cache_[kNumClasses]; @@ -49,6 +82,7 @@ StackTrace* Static::growth_stacks_ = NULL; PageHeap* Static::pageheap_ = NULL; + void Static::InitStaticVars() { sizemap_.Init(); span_allocator_.Init(); @@ -61,6 +95,8 @@ for (int i = 0; i < kNumClasses; ++i) { central_cache_[i].Init(i); } + SetupAtForkLocksHandler(); + // It's important to have PageHeap allocated, not in static storage, // so that HeapLeakChecker does not consider all the byte patterns stored // in is caches as pointers that are sources of heap object liveness, Only in gperftools-2.0/src: static_vars.h.svn-r190 Only in gperftools-2.0/src: symbolize.cc.svn-r190 Only in gperftools-2.0/src: system-alloc.cc.svn-r190 Only in gperftools-2.0/src: system-alloc.h.svn-r190 Only in gperftools-2.0/src: tcmalloc.cc.svn-r190 diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc --- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400 @@ -38,13 +38,14 @@ #define GG_ULONGLONG(x) static_cast(x) template -static void TestAtomicIncrement() { +static void TestAtomicIncrement(AtomicType (*atomic_increment_func) + (volatile AtomicType*, AtomicType)) { // For now, we just test single threaded execution - // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go + // use a guard value to make sure the atomic_increment_func doesn't go // outside the expected address bounds. This is in particular to // test that some future change to the asm code doesn't cause the - // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit + // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit // machines. struct { AtomicType prev_word; @@ -60,47 +61,47 @@ s.count = 0; s.next_word = next_word_value; - ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1)); + ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1)); ASSERT_EQ(1, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2)); + ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2)); ASSERT_EQ(3, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3)); + ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3)); ASSERT_EQ(6, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3)); + ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3)); ASSERT_EQ(3, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2)); + ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2)); ASSERT_EQ(1, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); + ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1)); ASSERT_EQ(0, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1)); + ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1)); ASSERT_EQ(-1, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4)); + ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4)); ASSERT_EQ(-5, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); - ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5)); + ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5)); ASSERT_EQ(0, s.count); ASSERT_EQ(prev_word_value, s.prev_word); ASSERT_EQ(next_word_value, s.next_word); @@ -111,9 +112,10 @@ template -static void TestCompareAndSwap() { +static void TestCompareAndSwap(AtomicType (*compare_and_swap_func) + (volatile AtomicType*, AtomicType, AtomicType)) { AtomicType value = 0; - AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1); + AtomicType prev = (*compare_and_swap_func)(&value, 0, 1); ASSERT_EQ(1, value); ASSERT_EQ(0, prev); @@ -122,21 +124,22 @@ const AtomicType k_test_val = (GG_ULONGLONG(1) << (NUM_BITS(AtomicType) - 2)) + 11; value = k_test_val; - prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5); + prev = (*compare_and_swap_func)(&value, 0, 5); ASSERT_EQ(k_test_val, value); ASSERT_EQ(k_test_val, prev); value = k_test_val; - prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5); + prev = (*compare_and_swap_func)(&value, k_test_val, 5); ASSERT_EQ(5, value); ASSERT_EQ(k_test_val, prev); } template -static void TestAtomicExchange() { +static void TestAtomicExchange(AtomicType (*atomic_exchange_func) + (volatile AtomicType*, AtomicType)) { AtomicType value = 0; - AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1); + AtomicType new_value = (*atomic_exchange_func)(&value, 1); ASSERT_EQ(1, value); ASSERT_EQ(0, new_value); @@ -145,28 +148,29 @@ const AtomicType k_test_val = (GG_ULONGLONG(1) << (NUM_BITS(AtomicType) - 2)) + 11; value = k_test_val; - new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val); + new_value = (*atomic_exchange_func)(&value, k_test_val); ASSERT_EQ(k_test_val, value); ASSERT_EQ(k_test_val, new_value); value = k_test_val; - new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5); + new_value = (*atomic_exchange_func)(&value, 5); ASSERT_EQ(5, value); ASSERT_EQ(k_test_val, new_value); } template -static void TestAtomicIncrementBounds() { +static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func) + (volatile AtomicType*, AtomicType)) { // Test increment at the half-width boundary of the atomic type. // It is primarily for testing at the 32-bit boundary for 64-bit atomic type. AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2); AtomicType value = test_val - 1; - AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1); + AtomicType new_value = (*atomic_increment_func)(&value, 1); ASSERT_EQ(test_val, value); ASSERT_EQ(value, new_value); - base::subtle::NoBarrier_AtomicIncrement(&value, -1); + (*atomic_increment_func)(&value, -1); ASSERT_EQ(test_val - 1, value); } @@ -222,16 +226,28 @@ template static void TestAtomicOps() { - TestCompareAndSwap(); - TestAtomicExchange(); - TestAtomicIncrementBounds(); + TestCompareAndSwap(base::subtle::NoBarrier_CompareAndSwap); + TestCompareAndSwap(base::subtle::Acquire_CompareAndSwap); + TestCompareAndSwap(base::subtle::Release_CompareAndSwap); + + TestAtomicExchange(base::subtle::NoBarrier_AtomicExchange); + TestAtomicExchange(base::subtle::Acquire_AtomicExchange); + TestAtomicExchange(base::subtle::Release_AtomicExchange); + + TestAtomicIncrementBounds( + base::subtle::NoBarrier_AtomicIncrement); + TestAtomicIncrementBounds( + base::subtle::Barrier_AtomicIncrement); + TestStore(); TestLoad(); } int main(int argc, char** argv) { - TestAtomicIncrement(); - TestAtomicIncrement(); + TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement); + TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement); + TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement); + TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement); TestAtomicOps(); TestAtomicOps(); @@ -248,8 +264,10 @@ // If we ever *do* want to enable this, try adding -msse (or -mmmx?) // to the CXXFLAGS in Makefile.am. #if 0 and defined(BASE_HAS_ATOMIC64) - TestAtomicIncrement(); - TestAtomicOps(); + TestAtomicIncrement( + base::subtle::NoBarrier_AtomicIncrement); + TestAtomicIncrement( + base::subtle::Barrier_AtomicIncrement); #endif printf("PASS\n"); Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190 diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc --- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500 +++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400 @@ -0,0 +1,62 @@ +// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This is a unit test for exercising fragmentation of large (over 1 +// meg) page spans. It makes sure that allocations/releases of +// increasing memory chunks do not blowup memory +// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368 + + +#include +#include +#include + +#include "base/logging.h" +#include "common.h" +#include + + +int main (int argc, char** argv) { + for (int pass = 1; pass <= 3; pass++) { + size_t size = 100*1024*1024; + while (size < 500*1024*1024) { + void *ptr = malloc(size); + free(ptr); + size += 20000; + + size_t heap_size = static_cast(-1); + MallocExtension::instance()->GetNumericProperty("generic.heap_size", + &heap_size); + + + CHECK_LT(heap_size, 1*1024*1024*1024); + } + } + + printf("PASS\n"); + return 0; +} diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c --- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500 +++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400 @@ -59,6 +59,16 @@ g_delete_hook_calls++; } +static +void *forced_malloc(size_t size) +{ + void *rv = malloc(size); + if (!rv) { + FAIL("malloc is not supposed to fail here"); + } + return rv; +} + void TestMallocHook(void) { /* TODO(csilvers): figure out why we get: * E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken! @@ -78,8 +88,9 @@ if (!MallocHook_AddDeleteHook(&TestDeleteHook)) { FAIL("Failed to add delete hook"); } - free(malloc(10)); - free(malloc(20)); + + free(forced_malloc(10)); + free(forced_malloc(20)); if (g_new_hook_calls != 2) { FAIL("Wrong number of calls to the new hook"); } Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190 Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190 Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190 Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190 diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc --- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400 +++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400 @@ -725,7 +725,7 @@ // Note the ... in the hook signature: we don't care what arguments // the hook takes. #define MAKE_HOOK_CALLBACK(hook_type) \ - static int g_##hook_type##_calls = 0; \ + static volatile int g_##hook_type##_calls = 0; \ static void IncrementCallsTo##hook_type(...) { \ g_##hook_type##_calls++; \ } \ @@ -760,7 +760,7 @@ CHECK((p % sizeof(void*)) == 0); CHECK((p % sizeof(double)) == 0); - // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES) + // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES) // alignment for large enough objects if (size >= kMinAlign) { CHECK((p % kMinAlign) == 0); Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190 diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh --- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500 +++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400 @@ -0,0 +1,68 @@ +#!/bin/sh + +# Copyright (c) 2013, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Author: Adhemerval Zanella +# +# Runs the tcmalloc_unittest with various environment variables. +# This is necessary because tuning some environment variables +# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program +# behavior, just performance. + +BINDIR="${BINDIR:-.}" +TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest" + +TMPDIR=/tmp/tcmalloc_unittest +rm -rf $TMPDIR || exit 2 +mkdir $TMPDIR || exit 3 + +# $1: value of tcmalloc_unittest env. var. +run_check_transfer_num_obj() { + [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1" + + echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... " + if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then + echo "OK" + else + echo "FAILED" + echo "Output from the failed run:" + echo "----" + cat $TMPDIR/output + echo "----" + exit 4 + fi +} + +run_check_transfer_num_obj "" +run_check_transfer_num_obj "40" +run_check_transfer_num_obj "4096" + +echo "PASS" Only in gperftools-2.0/src: thread_cache.cc.svn-r190 Only in gperftools-2.0/src: thread_cache.h.svn-r190 diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h --- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400 @@ -60,6 +60,8 @@ // pretend the pthreads wrapper doesn't exist, even when it does. #undef HAVE_PTHREAD +#define HAVE_PID_T + #include "windows/port.h" #endif /* __MINGW32__ */ diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc --- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400 @@ -85,7 +85,7 @@ #include #include #include // for _msize and _expand -#include // for EnumProcessModules, GetModuleInformation, etc. +#include // for EnumProcessModules, GetModuleInformation, etc. #include #include #include Only in gperftools-2.0/src/windows: port.cc.svn-r190 diff -urP gperftools-2.0/src/windows/port.h gperftools-2.0-svn218/src/windows/port.h --- gperftools-2.0/src/windows/port.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/port.h 2013-06-04 10:16:57.683841683 -0400 @@ -390,7 +390,10 @@ /* ----------------------------------- SYSTEM/PROCESS */ +#ifndef HAVE_PID_T typedef int pid_t; +#endif + #if __STDC__ && !defined(__MINGW32__) inline pid_t getpid(void) { return _getpid(); } #endif diff -urP gperftools-2.0/src/windows/preamble_patcher.cc gperftools-2.0-svn218/src/windows/preamble_patcher.cc --- gperftools-2.0/src/windows/preamble_patcher.cc 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/preamble_patcher.cc 2013-06-04 10:16:57.601841682 -0400 @@ -103,6 +103,7 @@ new_target = target + 2 + relative_offset; } else if (target[0] == ASM_JMP32ABS_0 && target[1] == ASM_JMP32ABS_1) { + jmp32rel: // Visual studio seems to sometimes do it this way instead of the // previous way. Not sure what the rules are, but it was happening // with operator new in some binaries. @@ -118,6 +119,18 @@ memcpy(&new_target_v, reinterpret_cast(target + 2), 4); } new_target = reinterpret_cast(*new_target_v); + } else if (kIs64BitBinary && target[0] == ASM_REXW + && target[1] == ASM_JMP32ABS_0 + && target[2] == ASM_JMP32ABS_1) { + // in Visual Studio 2012 we're seeing jump like that: + // rex.W jmpq *0x11d019(%rip) + // + // according to docs I have, rex prefix is actually unneeded and + // can be ignored. I.e. docs say for jumps like that operand + // already defaults to 64-bit. But clearly it breaks abs. jump + // detection above and we just skip rex + target++; + goto jmp32rel; } else { break; } @@ -535,6 +548,12 @@ return (*(target) & 0x70) == 0x70 && instruction_size == 2; } +bool PreamblePatcher::IsShortJump( + unsigned char* target, + unsigned int instruction_size) { + return target[0] == 0xeb && instruction_size == 2; +} + bool PreamblePatcher::IsNearConditionalJump( unsigned char* target, unsigned int instruction_size) { @@ -575,7 +594,9 @@ unsigned char* target, unsigned int* target_bytes, unsigned int target_size) { - unsigned char* original_jump_dest = (source + 2) + source[1]; + // note: rel8 offset is signed. Thus we need to ask for signed char + // to negative offsets right + unsigned char* original_jump_dest = (source + 2) + static_cast(source[1]); unsigned char* stub_jump_from = target + 6; __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { @@ -597,6 +618,36 @@ reinterpret_cast(&fixup_jump_offset), 4); } + return SIDESTEP_SUCCESS; +} + +SideStepError PreamblePatcher::PatchShortJump( + unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size) { + // note: rel8 offset is _signed_. Thus we need signed char here. + unsigned char* original_jump_dest = (source + 2) + static_cast(source[1]); + unsigned char* stub_jump_from = target + 5; + __int64 fixup_jump_offset = original_jump_dest - stub_jump_from; + if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) { + SIDESTEP_ASSERT(false && + "Unable to fix up short jump because target" + " is too far away."); + return SIDESTEP_JUMP_INSTRUCTION; + } + + *target_bytes = 5; + if (target_size > *target_bytes) { + // Convert the short jump to a near jump. + // + // e9 xx xx xx xx = jmp rel32off + target[0] = 0xe9; + memcpy(reinterpret_cast(target + 1), + reinterpret_cast(&fixup_jump_offset), 4); + } + return SIDESTEP_SUCCESS; } diff -urP gperftools-2.0/src/windows/preamble_patcher.h gperftools-2.0-svn218/src/windows/preamble_patcher.h --- gperftools-2.0/src/windows/preamble_patcher.h 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/preamble_patcher.h 2013-06-04 10:16:57.601841682 -0400 @@ -467,6 +467,8 @@ static bool IsShortConditionalJump(unsigned char* target, unsigned int instruction_size); + static bool IsShortJump(unsigned char *target, unsigned int instruction_size); + // Helper routine that determines if a target instruction is a near // conditional jump. // @@ -547,6 +549,12 @@ unsigned int* target_bytes, unsigned int target_size); + static SideStepError PatchShortJump(unsigned char* source, + unsigned int instruction_size, + unsigned char* target, + unsigned int* target_bytes, + unsigned int target_size); + // Helper routine that converts an instruction that will convert various // jump-like instructions to corresponding instructions in the target buffer. // What this routine does is fix up the relative offsets contained in jump diff -urP gperftools-2.0/src/windows/preamble_patcher_with_stub.cc gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc --- gperftools-2.0/src/windows/preamble_patcher_with_stub.cc 2012-02-02 16:36:23.000000000 -0500 +++ gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc 2013-06-04 10:16:57.682841683 -0400 @@ -150,6 +150,11 @@ preamble_stub + stub_bytes, &jump_bytes, stub_size - stub_bytes); + } else if (IsShortJump(target + preamble_bytes, cur_bytes)) { + jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes, + preamble_stub + stub_bytes, + &jump_bytes, + stub_size - stub_bytes); } else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) || IsNearRelativeJump(target + preamble_bytes, cur_bytes) || IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) || Only in gperftools-2.0/src/windows: TODO.svn-r190