From 2f88ac8ddc39da827352466f896e6470acd35118 Mon Sep 17 00:00:00 2001
From: Tom Callaway
Date: Tue, 4 Jun 2013 10:43:26 -0400
Subject: [PATCH] update to svn218, add metapackage, fix compiler flags
---
gperftools-2.0-svn190-to-svn218.patch | 1972 ++++++
gperftools-svn-r190.patch | 8016 +++++++++++++++++++++++++
gperftools.spec | 54 +-
3 files changed, 10032 insertions(+), 10 deletions(-)
create mode 100644 gperftools-2.0-svn190-to-svn218.patch
create mode 100644 gperftools-svn-r190.patch
diff --git a/gperftools-2.0-svn190-to-svn218.patch b/gperftools-2.0-svn190-to-svn218.patch
new file mode 100644
index 0000000..771e61a
--- /dev/null
+++ b/gperftools-2.0-svn190-to-svn218.patch
@@ -0,0 +1,1972 @@
+Only in gperftools-2.0: aclocal.m4
+Only in gperftools-2.0: aclocal.m4.svn-r190
+diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh
+--- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400
++++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400
+@@ -1,54 +1,3 @@
+ #!/bin/sh
+
+-# Before using, you should figure out all the .m4 macros that your
+-# configure.m4 script needs and make sure they exist in the m4/
+-# directory.
+-#
+-# These are the files that this script might edit:
+-# aclocal.m4 configure Makefile.in src/config.h.in \
+-# depcomp config.guess config.sub install-sh missing mkinstalldirs \
+-# ltmain.sh
+-#
+-# Here's a command you can run to see what files aclocal will import:
+-# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p'
+-
+-set -ex
+-rm -rf autom4te.cache
+-
+-trap 'rm -f aclocal.m4.tmp' EXIT
+-
+-# Returns the first binary in $* that exists, or the last arg, if none exists.
+-WhichOf() {
+- for candidate in "$@"; do
+- if "$candidate" --version >/dev/null 2>&1; then
+- echo "$candidate"
+- return
+- fi
+- done
+- echo "$candidate" # the last one in $@
+-}
+-
+-# Use version 1.9 of aclocal and automake if available.
+-ACLOCAL=`WhichOf aclocal-1.9 aclocal`
+-AUTOMAKE=`WhichOf automake-1.9 automake`
+-LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize`
+-
+-# aclocal tries to overwrite aclocal.m4 even if the contents haven't
+-# changed, which is annoying when the file is not open for edit (in
+-# p4). We work around this by writing to a temp file and just
+-# updating the timestamp if the file hasn't change.
+-"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp
+-if cmp aclocal.m4.tmp aclocal.m4; then
+- touch aclocal.m4 # pretend that we regenerated the file
+- rm -f aclocal.m4.tmp
+-else
+- mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails
+-fi
+-
+-grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f
+-autoconf -f -W all,no-obsolete
+-autoheader -f -W all
+-"$AUTOMAKE" -a -c -f -W all
+-
+-rm -rf autom4te.cache
+-exit 0
++autoreconf -i
+Only in gperftools-2.0: autogen.sh.svn-r190
+Only in gperftools-2.0: compile
+Only in gperftools-2.0: config.guess
+Only in gperftools-2.0: config.sub
+Only in gperftools-2.0: configure
+diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac
+--- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400
++++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400
+@@ -99,28 +99,7 @@
+ [gpt_cv_objcopy_weaken=no])
+ AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes)
+
+-case $host_os in
+- *mingw*)
+- # Disabling fast install keeps libtool from creating wrapper scripts
+- # around the executables it builds. Such scripts have caused failures on
+- # MinGW. Using this option means an extra link step is executed during
+- # "make install".
+- _LT_SET_OPTION([LT_INIT],[disable-fast-install])
+-AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
+-the `disable-fast-install' option into LT_INIT's first parameter.])
+-
+- ;;
+- *)
+- _LT_SET_OPTION([LT_INIT],[fast-install])
+-AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
+-the `fast-install' option into LT_INIT's first parameter.])
+-
+- ;;
+-esac
+-
+-LT_INIT
+-AC_SUBST(LIBTOOL_DEPS)
+-AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x")
++LT_INIT([])
+
+ AC_C_INLINE
+ AX_C___ATTRIBUTE__
+@@ -134,6 +113,7 @@
+ AC_CHECK_TYPES([Elf32_Versym],,, [#include ]) # for vdso_support.h
+ AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory
+ AC_CHECK_FUNCS(geteuid) # for turning off services when run as root
++AC_CHECK_FUNCS(fork) # for the pthread_atfork setup
+ AC_CHECK_HEADERS(features.h) # for vdso_support.h
+ AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not
+ AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h
+@@ -183,6 +163,11 @@
+ # This workaround comes from
+ # http://cygwin.com/ml/cygwin/2004-11/msg00138.html
+ case "$host" in
++ *-*-mingw*)
++ dnl mingw doesn't have mmap, not worth
++ dnl checking. Especially given that mingw can be a
++ dnl cross-compiler
++ ;;
+ *-*-cygwin*)
+ ac_cv_func_mmap_fixed_mapped=yes
+ AC_DEFINE(HAVE_MMAP, 1,
+@@ -310,10 +295,18 @@
+ # Note, however, that our code tickles a bug in gcc < 4.1.2
+ # involving TLS and -fPIC (which our libraries will use) on x86:
+ # http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
++#
++# And mingw also does compile __thread but resultant code actually
++# fails to work correctly at least in some not so ancient version:
++# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html
+ AC_MSG_CHECKING([for __thread])
+ AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2))
+ #error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
+-#endif], [static __thread int p = 0])],
++#endif
++#if defined(__MINGW32__)
++#error mingw doesn't really support tls
++#endif
++], [static __thread int p = 0])],
+ [AC_DEFINE(HAVE_TLS, 1,
+ Define to 1 if compiler supports __thread)
+ AC_MSG_RESULT([yes])],
+Only in gperftools-2.0: configure.ac.svn-r190
+Only in gperftools-2.0: configure.svn-r190
+Only in gperftools-2.0: depcomp
+Only in gperftools-2.0/doc: cpuprofile.html.svn-r190
+Only in gperftools-2.0/doc: heapprofile.html.svn-r190
+Only in gperftools-2.0/doc: pprof.see_also.svn-r190
+diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL
+--- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500
++++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400
+@@ -8,6 +8,28 @@
+ Perftools-Specific Install Notes
+ ================================
+
++*** Building from source repository
++
++As of 2.1 gperftools does not have configure and other autotools
++products checked into it's source repository. This is common practice
++for projects using autotools.
++
++NOTE: Source releases (.tar.gz that you download from
++code.google.com/p/gperftools) still have all required files just as
++before. Nothing has changed w.r.t. building from .tar.gz releases.
++
++But, in order to build gperftools checked out from subversion
++repository you need to have autoconf, automake and libtool
++installed. And before running ./configure you have to generate it (and
++a bunch of other files) by running ./autogen.sh script. That script
++will take care of calling correct autotools programs in correct order.
++
++If you're maintainer then it's business as usual too. Just run make
++dist (or, preferably, make distcheck) and it'll produce .tar.gz or
++.tar.bz2 with all autotools magic already included. So that users can
++build our software without having autotools.
++
++
+ *** NOTE FOR 64-BIT LINUX SYSTEMS
+
+ The glibc built-in stack-unwinder on 64-bit systems has some problems
+Only in gperftools-2.0: install-sh
+Only in gperftools-2.0: libtool
+Only in gperftools-2.0: ltmain.sh
+Only in gperftools-2.0/m4: libtool.m4
+Only in gperftools-2.0/m4: libtool.m4.svn-r190
+Only in gperftools-2.0/m4: lt~obsolete.m4
+Only in gperftools-2.0/m4: ltoptions.m4
+Only in gperftools-2.0/m4: ltsugar.m4
+Only in gperftools-2.0/m4: ltversion.m4
+diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am
+--- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400
++++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400
+@@ -221,7 +221,7 @@
+ src/windows/preamble_patcher.cc \
+ src/windows/preamble_patcher_with_stub.cc
+ # patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
+-libwindows_la_LIBADD = -lPsapi
++libwindows_la_LIBADD = -lpsapi
+
+ SPINLOCK_INCLUDES = src/base/spinlock.h \
+ src/base/spinlock_internal.h \
+@@ -238,6 +238,7 @@
+ noinst_LTLIBRARIES += libspinlock.la
+ libspinlock_la_SOURCES = src/base/spinlock.cc \
+ src/base/spinlock_internal.cc \
++ src/base/atomicops-internals-x86.cc \
+ $(SPINLOCK_INCLUDES)
+
+ LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la
+@@ -355,7 +356,7 @@
+ $(STACKTRACE_INCLUDES)
+ libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK)
+ STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)'
+-libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS)
++libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS)
+
+ ### Unittests
+ TESTS += stacktrace_unittest
+@@ -468,7 +469,7 @@
+ -DNO_HEAP_CHECK \
+ $(PTHREAD_CFLAGS) -DNDEBUG \
+ $(AM_CXXFLAGS) $(NO_EXCEPTIONS)
+-libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
++libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
+ libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
+
+ lib_LTLIBRARIES += libtcmalloc_minimal.la
+@@ -477,7 +478,7 @@
+ libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
+ $(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
+ # -version-info gets passed to libtool
+-libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
++libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS)
+ libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
+
+ # For windows, we're playing around with trying to do some stacktrace
+@@ -539,6 +540,12 @@
+ tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+ tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+
++TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest
++tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
++tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
++tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
++tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
++
+ # This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc)
+ # In theory this should work under mingw, but mingw has trouble running
+ # shell scripts that end in .exe. And it doesn't seem to build shared
+@@ -898,8 +905,16 @@
+
+ ### Unittests
+
+-TESTS += tcmalloc_unittest
+-TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \
++TESTS += tcmalloc_unittest.sh$(EXEEXT)
++tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh
++noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES)
++tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \
++ tcmalloc_unittest
++ rm -f $@
++ cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@
++
++noinst_PROGRAMS += tcmalloc_unittest
++tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \
+ src/gperftools/malloc_extension.h
+ tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
+ src/tcmalloc.h \
+@@ -956,6 +971,12 @@
+ tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+ tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
+
++TESTS += tcmalloc_large_heap_fragmentation_unittest
++tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
++tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
++tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
++tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
++
+ TESTS += raw_printer_test
+ raw_printer_test_SOURCES = src/tests/raw_printer_test.cc
+ raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
+Only in gperftools-2.0: Makefile.am.svn-r190
+Only in gperftools-2.0: Makefile.in
+Only in gperftools-2.0: Makefile.in.svn-r190
+Only in gperftools-2.0: missing
+Only in gperftools-2.0: mkinstalldirs
+Only in gperftools-2.0: NEWS.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h
+--- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400
+@@ -50,6 +50,16 @@
+ // implementations on other archtectures will cause your code to break. If you
+ // do not know what you are doing, avoid these routines, and use a Mutex.
+ //
++// These following lower-level operations are typically useful only to people
++// implementing higher-level synchronization operations like spinlocks,
++// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
++// a store with appropriate memory-ordering instructions. "Acquire" operations
++// ensure that no later memory access can be reordered ahead of the operation.
++// "Release" operations ensure that no previous memory access can be reordered
++// after the operation. "Barrier" operations have both "Acquire" and "Release"
++// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
++// access.
++//
+ // It is incorrect to make direct assignments to/from an atomic variable.
+ // You should use one of the Load or Store routines. The NoBarrier
+ // versions are provided when no barriers are needed:
+@@ -95,10 +105,10 @@
+ #include "base/atomicops-internals-arm-v6plus.h"
+ #elif defined(ARMV3)
+ #include "base/atomicops-internals-arm-generic.h"
+-#elif defined(_WIN32)
+-#include "base/atomicops-internals-windows.h"
+ #elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
+ #include "base/atomicops-internals-x86.h"
++#elif defined(_WIN32)
++#include "base/atomicops-internals-windows.h"
+ #elif defined(__linux__) && defined(__PPC__)
+ #include "base/atomicops-internals-linuxppc.h"
+ #else
+@@ -149,6 +159,18 @@
+ reinterpret_cast(ptr), new_value);
+ }
+
++AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
++ AtomicWord new_value) {
++ return Acquire_AtomicExchange(
++ reinterpret_cast(ptr), new_value);
++}
++
++AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
++ AtomicWord new_value) {
++ return Release_AtomicExchange(
++ reinterpret_cast(ptr), new_value);
++}
++
+ // Atomically increment *ptr by "increment". Returns the new value of
+ // *ptr with the increment applied. This routine implies no memory
+ // barriers.
+@@ -164,17 +186,6 @@
+ reinterpret_cast(ptr), increment);
+ }
+
+-// ------------------------------------------------------------------------
+-// These following lower-level operations are typically useful only to people
+-// implementing higher-level synchronization operations like spinlocks,
+-// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
+-// a store with appropriate memory-ordering instructions. "Acquire" operations
+-// ensure that no later memory access can be reordered ahead of the operation.
+-// "Release" operations ensure that no previous memory access can be reordered
+-// after the operation. "Barrier" operations have both "Acquire" and "Release"
+-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
+-// access.
+-// ------------------------------------------------------------------------
+ inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
+ AtomicWord old_value,
+ AtomicWord new_value) {
+@@ -250,6 +261,8 @@
+ Atomic32 old_value,
+ Atomic32 new_value);
+ Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
++Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
++Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+ Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment);
+ Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment);
+@@ -271,6 +284,8 @@
+ Atomic64 old_value,
+ Atomic64 new_value);
+ Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
++Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
++Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+ Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
+ Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
+
+diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h
+--- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400
+@@ -89,6 +89,18 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ for (;;) {
+@@ -176,6 +188,18 @@
+ return 0;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("NoBarrier_AtomicIncrement");
+diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h
+--- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400
+@@ -94,6 +94,28 @@
+ return old;
+ }
+
++inline void MemoryBarrier() {
++#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
++ uint32_t dest = 0;
++ __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
++#else
++ __asm__ __volatile__("dmb" : : : "memory");
++#endif
++}
++
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
++ MemoryBarrier();
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ MemoryBarrier();
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 tmp, res;
+@@ -110,10 +132,6 @@
+ return res;
+ }
+
+-inline void MemoryBarrier() {
+- __asm__ __volatile__("dmb" : : : "memory");
+-}
+-
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 tmp, res;
+@@ -220,6 +238,19 @@
+ return old;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
++ MemoryBarrier();
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ MemoryBarrier();
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ int store_failed;
+@@ -303,6 +334,18 @@
+ return 0;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ NotImplementedFatalError("Acquire_AtomicExchange");
++ return 0;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ NotImplementedFatalError("Release_AtomicExchange");
++ return 0;
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ NotImplementedFatalError("NoBarrier_AtomicIncrement");
+diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h
+--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400
++++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400
+@@ -163,6 +163,26 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
+ Atomic32 increment) {
+ return OSAtomicAdd32(increment, const_cast(ptr));
+@@ -294,6 +314,26 @@
+ return old_value;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
+ Atomic64 increment) {
+ return OSAtomicAdd64(increment, const_cast(ptr));
+Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h
+--- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400
+@@ -132,6 +132,21 @@
+ return old_value;
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ Atomic32 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
++ Atomic32 new_value) {
++ return Acquire_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
+ Atomic32 increment) {
+ return OSAtomicAdd32(increment, const_cast(ptr));
+@@ -217,6 +232,21 @@
+ return old_value;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ Atomic64 old_value;
++ do {
++ old_value = *ptr;
++ } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
++ const_cast(ptr)));
++ return old_value;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
++ Atomic64 new_value) {
++ return Acquire_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
+ Atomic64 increment) {
+ return OSAtomicAdd64(increment, const_cast(ptr));
+diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h
+--- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400
+@@ -137,6 +137,18 @@
+ return static_cast(result);
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ return FastInterlockedExchangeAdd(
+@@ -188,8 +200,7 @@
+ }
+
+ inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
+- NoBarrier_AtomicExchange(ptr, value);
+- // acts as a barrier in this implementation
++ Acquire_AtomicExchange(ptr, value);
+ }
+
+ inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
+@@ -478,6 +489,18 @@
+ #endif // defined(_WIN64) || defined(__MINGW64__)
+
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // FastInterlockedExchange has both acquire and release memory barriers.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
+ Atomic64 old_value,
+ Atomic64 new_value) {
+Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190
+diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h
+--- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400
+@@ -89,6 +89,21 @@
+ return new_value; // Now it's the previous value.
+ }
+
++inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
++ Atomic32 new_value) {
++ // xchgl already has release memory barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
+ Atomic32 increment) {
+ Atomic32 temp = increment;
+@@ -152,7 +167,7 @@
+ __asm__ __volatile__("mfence" : : : "memory");
+ } else { // mfence is faster but not present on PIII
+ Atomic32 x = 0;
+- NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
++ Acquire_AtomicExchange(&x, 0);
+ }
+ }
+
+@@ -161,8 +176,7 @@
+ *ptr = value;
+ __asm__ __volatile__("mfence" : : : "memory");
+ } else {
+- NoBarrier_AtomicExchange(ptr, value);
+- // acts as a barrier on PIII
++ Acquire_AtomicExchange(ptr, value);
+ }
+ }
+ #endif
+@@ -213,6 +227,21 @@
+ return new_value; // Now it's the previous value.
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_value) {
++ // xchgq already has release memory barrier semantics.
++ return NoBarrier_AtomicExchange(ptr, new_value);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 temp = increment;
+@@ -334,6 +363,20 @@
+ return old_val;
+ }
+
++inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_val) {
++ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
++ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
++ __asm__ __volatile__("lfence" : : : "memory");
++ }
++ return old_val;
++}
++
++inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
++ Atomic64 new_val) {
++ return NoBarrier_AtomicExchange(ptr, new_val);
++}
++
+ inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
+ Atomic64 increment) {
+ Atomic64 old_val, new_val;
+diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h
+--- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400
+@@ -334,10 +334,13 @@
+ #if defined(HAVE___ATTRIBUTE__)
+ # if (defined(__i386__) || defined(__x86_64__))
+ # define CACHELINE_ALIGNED __attribute__((aligned(64)))
+-# elif defined(__arm__)
+-# define CACHELINE_ALIGNED __attribute__((aligned(32)))
+ # elif (defined(__PPC__) || defined(__PPC64__))
+ # define CACHELINE_ALIGNED __attribute__((aligned(16)))
++# elif (defined(__arm__))
++# define CACHELINE_ALIGNED __attribute__((aligned(64)))
++ // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
++# else
++# error Could not determine cache line length - unknown architecture
+ # endif
+ #else
+ # define CACHELINE_ALIGNED
+Only in gperftools-2.0/src/base: basictypes.h.svn-r190
+Only in gperftools-2.0/src/base: cycleclock.h.svn-r190
+diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h
+--- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400
++++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400
+@@ -148,6 +148,8 @@
+ #include
+ #include
+ #include
++#include
++#include
+ #include
+ #include
+ #include
+@@ -404,24 +406,24 @@
+ };
+ #elif defined(__x86_64__)
+ struct kernel_stat {
+- unsigned long st_dev;
+- unsigned long st_ino;
+- unsigned long st_nlink;
++ uint64_t st_dev;
++ uint64_t st_ino;
++ uint64_t st_nlink;
+ unsigned st_mode;
+ unsigned st_uid;
+ unsigned st_gid;
+ unsigned __pad0;
+- unsigned long st_rdev;
+- long st_size;
+- long st_blksize;
+- long st_blocks;
+- unsigned long st_atime_;
+- unsigned long st_atime_nsec_;
+- unsigned long st_mtime_;
+- unsigned long st_mtime_nsec_;
+- unsigned long st_ctime_;
+- unsigned long st_ctime_nsec_;
+- long __unused[3];
++ uint64_t st_rdev;
++ int64_t st_size;
++ int64_t st_blksize;
++ int64_t st_blocks;
++ uint64_t st_atime_;
++ uint64_t st_atime_nsec_;
++ uint64_t st_mtime_;
++ uint64_t st_mtime_nsec_;
++ uint64_t st_ctime_;
++ uint64_t st_ctime_nsec_;
++ int64_t __unused[3];
+ };
+ #elif defined(__PPC__)
+ struct kernel_stat {
+@@ -1013,74 +1015,141 @@
+ * location (e.g. when using the clone() system call with the CLONE_VM
+ * option).
+ */
++ #undef LSS_ENTRYPOINT
++ #define LSS_ENTRYPOINT "syscall\n"
++
++ /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
++ * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
++ * sign extension. We can't cast pointers directly because those are
++ * 32 bits, and gcc will dump ugly warnings about casting from a pointer
++ * to an integer of a different size.
++ */
++ #undef LSS_SYSCALL_ARG
++ #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
++ #undef _LSS_RETURN
++ #define _LSS_RETURN(type, res, cast) \
++ do { \
++ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \
++ LSS_ERRNO = -(res); \
++ res = -1; \
++ } \
++ return (type)(cast)(res); \
++ } while (0)
++ #undef LSS_RETURN
++ #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
++
++ #undef _LSS_BODY
++ #define _LSS_BODY(nr, type, name, cast, ...) \
++ long long __res; \
++ __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \
++ : "=a" (__res) \
++ : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \
++ : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \
++ _LSS_RETURN(type, __res, cast)
+ #undef LSS_BODY
+- #define LSS_BODY(type,name, ...) \
+- long __res; \
+- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \
+- ##__VA_ARGS__ : "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res)
++ #define LSS_BODY(nr, type, name, args...) \
++ _LSS_BODY(nr, type, name, uintptr_t, ## args)
++
++ #undef LSS_BODY_ASM0
++ #undef LSS_BODY_ASM1
++ #undef LSS_BODY_ASM2
++ #undef LSS_BODY_ASM3
++ #undef LSS_BODY_ASM4
++ #undef LSS_BODY_ASM5
++ #undef LSS_BODY_ASM6
++ #define LSS_BODY_ASM0
++ #define LSS_BODY_ASM1 LSS_BODY_ASM0
++ #define LSS_BODY_ASM2 LSS_BODY_ASM1
++ #define LSS_BODY_ASM3 LSS_BODY_ASM2
++ #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
++ #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
++ #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
++
++ #undef LSS_BODY_CLOBBER0
++ #undef LSS_BODY_CLOBBER1
++ #undef LSS_BODY_CLOBBER2
++ #undef LSS_BODY_CLOBBER3
++ #undef LSS_BODY_CLOBBER4
++ #undef LSS_BODY_CLOBBER5
++ #undef LSS_BODY_CLOBBER6
++ #define LSS_BODY_CLOBBER0
++ #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
++ #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
++ #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
++ #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
++ #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
++ #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
++
++ #undef LSS_BODY_ARG0
++ #undef LSS_BODY_ARG1
++ #undef LSS_BODY_ARG2
++ #undef LSS_BODY_ARG3
++ #undef LSS_BODY_ARG4
++ #undef LSS_BODY_ARG5
++ #undef LSS_BODY_ARG6
++ #define LSS_BODY_ARG0()
++ #define LSS_BODY_ARG1(arg1) \
++ LSS_BODY_ARG0(), "D" (arg1)
++ #define LSS_BODY_ARG2(arg1, arg2) \
++ LSS_BODY_ARG1(arg1), "S" (arg2)
++ #define LSS_BODY_ARG3(arg1, arg2, arg3) \
++ LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
++ #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
++ LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
++ #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
++ LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
++ #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
++ LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
++
+ #undef _syscall0
+ #define _syscall0(type,name) \
+ type LSS_NAME(name)() { \
+- LSS_BODY(type, name); \
++ LSS_BODY(0, type, name); \
+ }
+ #undef _syscall1
+ #define _syscall1(type,name,type1,arg1) \
+ type LSS_NAME(name)(type1 arg1) { \
+- LSS_BODY(type, name, "D" ((long)(arg1))); \
++ LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \
+ }
+ #undef _syscall2
+ #define _syscall2(type,name,type1,arg1,type2,arg2) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2) { \
+- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \
++ LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
+ }
+ #undef _syscall3
+ #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
+- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \
+- "d" ((long)(arg3))); \
++ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3)); \
+ }
+ #undef _syscall4
+ #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
+ }
+ #undef _syscall5
+ #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)), "r" ((long)(arg5)) : \
+- "r8", "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
++ LSS_SYSCALL_ARG(arg5)); \
+ }
+ #undef _syscall6
+ #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
+ type5,arg5,type6,arg6) \
+ type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
+ type5 arg5, type6 arg6) { \
+- long __res; \
+- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \
+- "syscall" : \
+- "=a" (__res) : "0" (__NR_##name), \
+- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
+- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
+- "r8", "r9", "r10", "r11", "rcx", "memory"); \
+- LSS_RETURN(type, __res); \
++ LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
++ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
++ LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
+ }
+ LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
+ int flags, void *arg, int *parent_tidptr,
+ void *newtls, int *child_tidptr) {
+- long __res;
++ long long __res;
+ {
+ __asm__ __volatile__(/* if (fn == NULL)
+ * return -EINVAL;
+@@ -1145,8 +1214,13 @@
+ "1:\n"
+ : "=a" (__res)
+ : "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
+- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
+- "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
++ "r"(LSS_SYSCALL_ARG(fn)),
++ "S"(LSS_SYSCALL_ARG(child_stack)),
++ "D"(LSS_SYSCALL_ARG(flags)),
++ "r"(LSS_SYSCALL_ARG(arg)),
++ "d"(LSS_SYSCALL_ARG(parent_tidptr)),
++ "r"(LSS_SYSCALL_ARG(newtls)),
++ "r"(LSS_SYSCALL_ARG(child_tidptr))
+ : "rsp", "memory", "r8", "r10", "r11", "rcx");
+ }
+ LSS_RETURN(int, __res);
+@@ -1159,7 +1233,7 @@
+ * Unfortunately, we cannot just reference the glibc version of this
+ * function, as glibc goes out of its way to make it inaccessible.
+ */
+- void (*res)(void);
++ long long res;
+ __asm__ __volatile__("call 2f\n"
+ "0:.align 16\n"
+ "1:movq %1,%%rax\n"
+@@ -1168,7 +1242,7 @@
+ "addq $(1b-0b),%0\n"
+ : "=a" (res)
+ : "i" (__NR_rt_sigreturn));
+- return res;
++ return (void (*)(void))(uintptr_t)res;
+ }
+ #elif defined(__arm__)
+ /* Most definitions of _syscallX() neglect to mark "memory" as being
+@@ -1797,8 +1871,16 @@
+ LSS_INLINE _syscall0(pid_t, _gettid)
+ LSS_INLINE _syscall2(int, kill, pid_t, p,
+ int, s)
+- LSS_INLINE _syscall3(off_t, lseek, int, f,
+- off_t, o, int, w)
++ #if defined(__x86_64__)
++ /* Need to make sure off_t isn't truncated to 32-bits under x32. */
++ LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
++ _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
++ LSS_SYSCALL_ARG(w));
++ }
++ #else
++ LSS_INLINE _syscall3(off_t, lseek, int, f,
++ off_t, o, int, w)
++ #endif
+ LSS_INLINE _syscall2(int, munmap, void*, s,
+ size_t, l)
+ LSS_INLINE _syscall5(void*, _mremap, void*, o,
+@@ -1835,10 +1917,13 @@
+ int, t, int, p)
+ #endif
+ #if defined(__x86_64__)
+- LSS_INLINE _syscall6(void*, mmap, void*, s,
+- size_t, l, int, p,
+- int, f, int, d,
+- __off64_t, o)
++ /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */
++ LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
++ __off64_t o) {
++ LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
++ LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
++ LSS_SYSCALL_ARG(d), (uint64_t)(o));
++ }
+
+ LSS_INLINE int LSS_NAME(sigaction)(int signum,
+ const struct kernel_sigaction *act,
+Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190
+Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190
+diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h
+--- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400
+@@ -31,11 +31,6 @@
+ * Author: Sanjay Ghemawat
+ */
+
+-//
+-// Fast spinlocks (at least on x86, a lock/unlock pair is approximately
+-// half the cost of a Mutex because the unlock just does a store instead
+-// of a compare-and-swap which is expensive).
+-
+ // SpinLock is async signal safe.
+ // If used within a signal handler, all lock holders
+ // should block the signal even outside the signal handler.
+@@ -95,10 +90,9 @@
+ // TODO(csilvers): uncomment the annotation when we figure out how to
+ // support this macro with 0 args (see thread_annotations.h)
+ inline void Unlock() /*UNLOCK_FUNCTION()*/ {
+- uint64 wait_cycles =
+- static_cast(base::subtle::NoBarrier_Load(&lockword_));
+ ANNOTATE_RWLOCK_RELEASED(this, 1);
+- base::subtle::Release_Store(&lockword_, kSpinLockFree);
++ uint64 wait_cycles = static_cast(
++ base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
+ if (wait_cycles != kSpinLockHeld) {
+ // Collect contentionz profile info, and speed the wakeup of any waiter.
+ // The wait_cycles value indicates how long this thread spent waiting
+Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190
+Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190
+diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h
+--- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400
+@@ -38,7 +38,7 @@
+ #include
+ #if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
+ #include // for DWORD
+-#include // for CreateToolhelp32Snapshot
++#include // for CreateToolhelp32Snapshot
+ #endif
+ #ifdef HAVE_UNISTD_H
+ #include // for pid_t
+diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h
+--- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400
+@@ -79,6 +79,16 @@
+ // page full of 5-byte objects would have 2 bytes memory overhead).
+ size_t OverheadBytes();
+
++ // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
++ // to set the lock in a consistent state before the fork.
++ void Lock() {
++ lock_.Lock();
++ }
++
++ void Unlock() {
++ lock_.Unlock();
++ }
++
+ private:
+ // TransferCache is used to cache transfers of
+ // sizemap.num_objects_to_move(size_class) back and forth between
+diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc
+--- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400
+@@ -30,12 +30,32 @@
+ // ---
+ // Author: Sanjay Ghemawat
+
++#include // for getenv and strtol
+ #include "config.h"
+ #include "common.h"
+ #include "system-alloc.h"
++#include "base/spinlock.h"
+
+ namespace tcmalloc {
+
++// Define the maximum number of object per classe type to transfer between
++// thread and central caches.
++static int32 FLAGS_tcmalloc_transfer_num_objects;
++
++static const int32 kDefaultTransferNumObjecs = 32768;
++
++// The init function is provided to explicit initialize the variable value
++// from the env. var to avoid C++ global construction that might defer its
++// initialization after a malloc/new call.
++static inline void InitTCMallocTransferNumObjects()
++{
++ if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
++ const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ");
++ FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
++ strtol(envval, NULL, 10);
++ }
++}
++
+ // Note: the following only works for "n"s that fit in 32-bits, but
+ // that is fine since we only use it for small sizes.
+ static inline int LgFloor(size_t n) {
+@@ -90,13 +110,16 @@
+ // - We go to the central freelist too often and we have to acquire
+ // its lock each time.
+ // This value strikes a balance between the constraints above.
+- if (num > 32) num = 32;
++ if (num > FLAGS_tcmalloc_transfer_num_objects)
++ num = FLAGS_tcmalloc_transfer_num_objects;
+
+ return num;
+ }
+
+ // Initialize the mapping arrays
+ void SizeMap::Init() {
++ InitTCMallocTransferNumObjects();
++
+ // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
+ if (ClassIndex(0) < 0) {
+ Log(kCrash, __FILE__, __LINE__,
+@@ -189,12 +212,56 @@
+
+ // Metadata allocator -- keeps stats about how many bytes allocated.
+ static uint64_t metadata_system_bytes_ = 0;
++static const size_t kMetadataAllocChunkSize = 8*1024*1024;
++static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8;
++// usually malloc uses larger alignments, but because metadata cannot
++// have and fancy simd types, aligning on pointer size seems fine
++static const size_t kMetadataAllignment = sizeof(void *);
++
++static char *metadata_chunk_alloc_;
++static size_t metadata_chunk_avail_;
++
++static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
++
+ void* MetaDataAlloc(size_t bytes) {
+- void* result = TCMalloc_SystemAlloc(bytes, NULL);
+- if (result != NULL) {
+- metadata_system_bytes_ += bytes;
++ if (bytes >= kMetadataAllocChunkSize) {
++ void *rv = TCMalloc_SystemAlloc(bytes,
++ NULL, kMetadataAllignment);
++ if (rv != NULL) {
++ metadata_system_bytes_ += bytes;
++ }
++ return rv;
+ }
+- return result;
++
++ SpinLockHolder h(&metadata_alloc_lock);
++
++ // the following works by essentially turning address to integer of
++ // log_2 kMetadataAllignment size and negating it. I.e. negated
++ // value + original value gets 0 and that's what we want modulo
++ // kMetadataAllignment. Note, we negate before masking higher bits
++ // off, otherwise we'd have to mask them off after negation anyways.
++ intptr_t alignment = -reinterpret_cast(metadata_chunk_alloc_) & (kMetadataAllignment-1);
++
++ if (metadata_chunk_avail_ < bytes + alignment) {
++ size_t real_size;
++ void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
++ &real_size, kMetadataAllignment);
++ if (ptr == NULL) {
++ return NULL;
++ }
++
++ metadata_chunk_alloc_ = static_cast(ptr);
++ metadata_chunk_avail_ = real_size;
++
++ alignment = 0;
++ }
++
++ void *rv = static_cast(metadata_chunk_alloc_ + alignment);
++ bytes += alignment;
++ metadata_chunk_alloc_ += bytes;
++ metadata_chunk_avail_ -= bytes;
++ metadata_system_bytes_ += bytes;
++ return rv;
+ }
+
+ uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
+Only in gperftools-2.0/src: common.cc.svn-r190
+diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h
+--- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400
+@@ -80,7 +80,7 @@
+ static const size_t kMinAlign = 16;
+ #elif defined(TCMALLOC_ALIGN_8BYTES)
+ static const size_t kPageShift = 13;
+-static const size_t kNumClasses = 93;
++static const size_t kNumClasses = 95;
+ // Unless we force to use 8 bytes alignment we use an alignment of
+ // at least 16 bytes to statisfy requirements for some SSE types.
+ // Keep in mind when using the 16 bytes alignment you can have a space
+@@ -88,7 +88,7 @@
+ static const size_t kMinAlign = 8;
+ #else
+ static const size_t kPageShift = 13;
+-static const size_t kNumClasses = 86;
++static const size_t kNumClasses = 88;
+ static const size_t kMinAlign = 16;
+ #endif
+ static const size_t kMaxThreadCacheSize = 4 << 20;
+Only in gperftools-2.0/src: common.h.svn-r190
+diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in
+--- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400
++++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400
+@@ -56,6 +56,9 @@
+ /* Define to 1 if you have the header file. */
+ #undef HAVE_FEATURES_H
+
++/* Define to 1 if you have the `fork' function. */
++#undef HAVE_FORK
++
+ /* Define to 1 if you have the `geteuid' function. */
+ #undef HAVE_GETEUID
+
+Only in gperftools-2.0/src: config.h.in.svn-r190
+Only in gperftools-2.0/src: debugallocation.cc.svn-r190
+Only in gperftools-2.0/src: getpc.h.svn-r190
+Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190
+Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190
+Only in gperftools-2.0/src: heap-checker.cc.svn-r190
+Only in gperftools-2.0/src: heap-profiler.cc.svn-r190
+Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190
+Only in gperftools-2.0/src: malloc_extension.cc.svn-r190
+Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190
+Only in gperftools-2.0/src: memory_region_map.cc.svn-r190
+diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc
+--- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400
++++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400
+@@ -108,6 +108,8 @@
+ return AllocLarge(n); // May be NULL
+ }
+
++static const size_t kForcedCoalesceInterval = 128*1024*1024;
++
+ Span* PageHeap::New(Length n) {
+ ASSERT(Check());
+ ASSERT(n > 0);
+@@ -116,6 +118,38 @@
+ if (result != NULL)
+ return result;
+
++ if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
++ && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
++ && (stats_.system_bytes / kForcedCoalesceInterval
++ != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
++ // We're about to grow heap, but there are lots of free pages.
++ // tcmalloc's design decision to keep unmapped and free spans
++ // separately and never coalesce them means that sometimes there
++ // can be free pages span of sufficient size, but it consists of
++ // "segments" of different type so page heap search cannot find
++ // it. In order to prevent growing heap and wasting memory in such
++ // case we're going to unmap all free pages. So that all free
++ // spans are maximally coalesced.
++ //
++ // We're also limiting 'rate' of going into this path to be at
++ // most once per 128 megs of heap growth. Otherwise programs that
++ // grow heap frequently (and that means by small amount) could be
++ // penalized with higher count of minor page faults.
++ //
++ // See also large_heap_fragmentation_unittest.cc and
++ // https://code.google.com/p/gperftools/issues/detail?id=368
++ ReleaseAtLeastNPages(static_cast(0x7fffffff));
++
++ // then try again. If we are forced to grow heap because of large
++ // spans fragmentation and not because of problem described above,
++ // then at the very least we've just unmapped free but
++ // insufficiently big large spans back to OS. So in case of really
++ // unlucky memory fragmentation we'll be consuming virtual address
++ // space, but not real memory
++ result = SearchFreeAndLargeLists(n);
++ if (result != NULL) return result;
++ }
++
+ // Grow the heap and try again.
+ if (!GrowHeap(n)) {
+ ASSERT(Check());
+Only in gperftools-2.0/src: page_heap.cc.svn-r190
+Only in gperftools-2.0/src: page_heap.h.svn-r190
+Only in gperftools-2.0/src: pprof.svn-r190
+Only in gperftools-2.0/src: profiler.cc.svn-r190
+diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc
+--- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400
+@@ -39,6 +39,39 @@
+
+ namespace tcmalloc {
+
++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
++// These following two functions are registered via pthread_atfork to make
++// sure the central_cache locks remain in a consisten state in the forked
++// version of the thread.
++
++static
++void CentralCacheLockAll()
++{
++ Static::pageheap_lock()->Lock();
++ for (int i = 0; i < kNumClasses; ++i)
++ Static::central_cache()[i].Lock();
++}
++
++static
++void CentralCacheUnlockAll()
++{
++ for (int i = 0; i < kNumClasses; ++i)
++ Static::central_cache()[i].Unlock();
++ Static::pageheap_lock()->Unlock();
++}
++#endif
++
++static inline
++void SetupAtForkLocksHandler()
++{
++#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
++ pthread_atfork(CentralCacheLockAll, // parent calls before fork
++ CentralCacheUnlockAll, // parent calls after fork
++ CentralCacheUnlockAll); // child calls after fork
++#endif
++}
++
++
+ SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
+ SizeMap Static::sizemap_;
+ CentralFreeListPadded Static::central_cache_[kNumClasses];
+@@ -49,6 +82,7 @@
+ StackTrace* Static::growth_stacks_ = NULL;
+ PageHeap* Static::pageheap_ = NULL;
+
++
+ void Static::InitStaticVars() {
+ sizemap_.Init();
+ span_allocator_.Init();
+@@ -61,6 +95,8 @@
+ for (int i = 0; i < kNumClasses; ++i) {
+ central_cache_[i].Init(i);
+ }
++ SetupAtForkLocksHandler();
++
+ // It's important to have PageHeap allocated, not in static storage,
+ // so that HeapLeakChecker does not consider all the byte patterns stored
+ // in is caches as pointers that are sources of heap object liveness,
+Only in gperftools-2.0/src: static_vars.h.svn-r190
+Only in gperftools-2.0/src: symbolize.cc.svn-r190
+Only in gperftools-2.0/src: system-alloc.cc.svn-r190
+Only in gperftools-2.0/src: system-alloc.h.svn-r190
+Only in gperftools-2.0/src: tcmalloc.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc
+--- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400
+@@ -38,13 +38,14 @@
+ #define GG_ULONGLONG(x) static_cast(x)
+
+ template
+-static void TestAtomicIncrement() {
++static void TestAtomicIncrement(AtomicType (*atomic_increment_func)
++ (volatile AtomicType*, AtomicType)) {
+ // For now, we just test single threaded execution
+
+- // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go
++ // use a guard value to make sure the atomic_increment_func doesn't go
+ // outside the expected address bounds. This is in particular to
+ // test that some future change to the asm code doesn't cause the
+- // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit
++ // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit
+ // machines.
+ struct {
+ AtomicType prev_word;
+@@ -60,47 +61,47 @@
+ s.count = 0;
+ s.next_word = next_word_value;
+
+- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1));
++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1));
+ ASSERT_EQ(1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2));
++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2));
+ ASSERT_EQ(3, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3));
++ ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3));
+ ASSERT_EQ(6, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3));
++ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3));
+ ASSERT_EQ(3, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2));
++ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2));
+ ASSERT_EQ(1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1));
+ ASSERT_EQ(0, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
++ ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1));
+ ASSERT_EQ(-1, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4));
++ ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4));
+ ASSERT_EQ(-5, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+
+- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5));
++ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5));
+ ASSERT_EQ(0, s.count);
+ ASSERT_EQ(prev_word_value, s.prev_word);
+ ASSERT_EQ(next_word_value, s.next_word);
+@@ -111,9 +112,10 @@
+
+
+ template
+-static void TestCompareAndSwap() {
++static void TestCompareAndSwap(AtomicType (*compare_and_swap_func)
++ (volatile AtomicType*, AtomicType, AtomicType)) {
+ AtomicType value = 0;
+- AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1);
++ AtomicType prev = (*compare_and_swap_func)(&value, 0, 1);
+ ASSERT_EQ(1, value);
+ ASSERT_EQ(0, prev);
+
+@@ -122,21 +124,22 @@
+ const AtomicType k_test_val = (GG_ULONGLONG(1) <<
+ (NUM_BITS(AtomicType) - 2)) + 11;
+ value = k_test_val;
+- prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5);
++ prev = (*compare_and_swap_func)(&value, 0, 5);
+ ASSERT_EQ(k_test_val, value);
+ ASSERT_EQ(k_test_val, prev);
+
+ value = k_test_val;
+- prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5);
++ prev = (*compare_and_swap_func)(&value, k_test_val, 5);
+ ASSERT_EQ(5, value);
+ ASSERT_EQ(k_test_val, prev);
+ }
+
+
+ template
+-static void TestAtomicExchange() {
++static void TestAtomicExchange(AtomicType (*atomic_exchange_func)
++ (volatile AtomicType*, AtomicType)) {
+ AtomicType value = 0;
+- AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1);
++ AtomicType new_value = (*atomic_exchange_func)(&value, 1);
+ ASSERT_EQ(1, value);
+ ASSERT_EQ(0, new_value);
+
+@@ -145,28 +148,29 @@
+ const AtomicType k_test_val = (GG_ULONGLONG(1) <<
+ (NUM_BITS(AtomicType) - 2)) + 11;
+ value = k_test_val;
+- new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val);
++ new_value = (*atomic_exchange_func)(&value, k_test_val);
+ ASSERT_EQ(k_test_val, value);
+ ASSERT_EQ(k_test_val, new_value);
+
+ value = k_test_val;
+- new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5);
++ new_value = (*atomic_exchange_func)(&value, 5);
+ ASSERT_EQ(5, value);
+ ASSERT_EQ(k_test_val, new_value);
+ }
+
+
+ template
+-static void TestAtomicIncrementBounds() {
++static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func)
++ (volatile AtomicType*, AtomicType)) {
+ // Test increment at the half-width boundary of the atomic type.
+ // It is primarily for testing at the 32-bit boundary for 64-bit atomic type.
+ AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2);
+ AtomicType value = test_val - 1;
+- AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1);
++ AtomicType new_value = (*atomic_increment_func)(&value, 1);
+ ASSERT_EQ(test_val, value);
+ ASSERT_EQ(value, new_value);
+
+- base::subtle::NoBarrier_AtomicIncrement(&value, -1);
++ (*atomic_increment_func)(&value, -1);
+ ASSERT_EQ(test_val - 1, value);
+ }
+
+@@ -222,16 +226,28 @@
+
+ template
+ static void TestAtomicOps() {
+- TestCompareAndSwap();
+- TestAtomicExchange();
+- TestAtomicIncrementBounds();
++ TestCompareAndSwap(base::subtle::NoBarrier_CompareAndSwap);
++ TestCompareAndSwap(base::subtle::Acquire_CompareAndSwap);
++ TestCompareAndSwap(base::subtle::Release_CompareAndSwap);
++
++ TestAtomicExchange(base::subtle::NoBarrier_AtomicExchange);
++ TestAtomicExchange(base::subtle::Acquire_AtomicExchange);
++ TestAtomicExchange(base::subtle::Release_AtomicExchange);
++
++ TestAtomicIncrementBounds(
++ base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrementBounds(
++ base::subtle::Barrier_AtomicIncrement);
++
+ TestStore();
+ TestLoad();
+ }
+
+ int main(int argc, char** argv) {
+- TestAtomicIncrement();
+- TestAtomicIncrement();
++ TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement);
++ TestAtomicIncrement(base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement(base::subtle::Barrier_AtomicIncrement);
+
+ TestAtomicOps();
+ TestAtomicOps();
+@@ -248,8 +264,10 @@
+ // If we ever *do* want to enable this, try adding -msse (or -mmmx?)
+ // to the CXXFLAGS in Makefile.am.
+ #if 0 and defined(BASE_HAS_ATOMIC64)
+- TestAtomicIncrement();
+- TestAtomicOps();
++ TestAtomicIncrement(
++ base::subtle::NoBarrier_AtomicIncrement);
++ TestAtomicIncrement(
++ base::subtle::Barrier_AtomicIncrement);
+ #endif
+
+ printf("PASS\n");
+Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc
+--- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400
+@@ -0,0 +1,62 @@
++// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
++// Redistribution and use in source and binary forms, with or without
++// modification, are permitted provided that the following conditions are
++// met:
++//
++// * Redistributions of source code must retain the above copyright
++// notice, this list of conditions and the following disclaimer.
++// * Redistributions in binary form must reproduce the above
++// copyright notice, this list of conditions and the following disclaimer
++// in the documentation and/or other materials provided with the
++// distribution.
++// * Neither the name of Google Inc. nor the names of its
++// contributors may be used to endorse or promote products derived from
++// this software without specific prior written permission.
++//
++// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++// This is a unit test for exercising fragmentation of large (over 1
++// meg) page spans. It makes sure that allocations/releases of
++// increasing memory chunks do not blowup memory
++// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368
++
++
++#include
++#include
++#include
++
++#include "base/logging.h"
++#include "common.h"
++#include
++
++
++int main (int argc, char** argv) {
++ for (int pass = 1; pass <= 3; pass++) {
++ size_t size = 100*1024*1024;
++ while (size < 500*1024*1024) {
++ void *ptr = malloc(size);
++ free(ptr);
++ size += 20000;
++
++ size_t heap_size = static_cast(-1);
++ MallocExtension::instance()->GetNumericProperty("generic.heap_size",
++ &heap_size);
++
++
++ CHECK_LT(heap_size, 1*1024*1024*1024);
++ }
++ }
++
++ printf("PASS\n");
++ return 0;
++}
+diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c
+--- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400
+@@ -59,6 +59,16 @@
+ g_delete_hook_calls++;
+ }
+
++static
++void *forced_malloc(size_t size)
++{
++ void *rv = malloc(size);
++ if (!rv) {
++ FAIL("malloc is not supposed to fail here");
++ }
++ return rv;
++}
++
+ void TestMallocHook(void) {
+ /* TODO(csilvers): figure out why we get:
+ * E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken!
+@@ -78,8 +88,9 @@
+ if (!MallocHook_AddDeleteHook(&TestDeleteHook)) {
+ FAIL("Failed to add delete hook");
+ }
+- free(malloc(10));
+- free(malloc(20));
++
++ free(forced_malloc(10));
++ free(forced_malloc(20));
+ if (g_new_hook_calls != 2) {
+ FAIL("Wrong number of calls to the new hook");
+ }
+Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190
+Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190
+Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190
+Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190
+diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc
+--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400
++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400
+@@ -725,7 +725,7 @@
+ // Note the ... in the hook signature: we don't care what arguments
+ // the hook takes.
+ #define MAKE_HOOK_CALLBACK(hook_type) \
+- static int g_##hook_type##_calls = 0; \
++ static volatile int g_##hook_type##_calls = 0; \
+ static void IncrementCallsTo##hook_type(...) { \
+ g_##hook_type##_calls++; \
+ } \
+@@ -760,7 +760,7 @@
+ CHECK((p % sizeof(void*)) == 0);
+ CHECK((p % sizeof(double)) == 0);
+
+- // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
++ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
+ // alignment for large enough objects
+ if (size >= kMinAlign) {
+ CHECK((p % kMinAlign) == 0);
+Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190
+diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh
+--- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500
++++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400
+@@ -0,0 +1,68 @@
++#!/bin/sh
++
++# Copyright (c) 2013, Google Inc.
++# All rights reserved.
++#
++# Redistribution and use in source and binary forms, with or without
++# modification, are permitted provided that the following conditions are
++# met:
++#
++# * Redistributions of source code must retain the above copyright
++# notice, this list of conditions and the following disclaimer.
++# * Redistributions in binary form must reproduce the above
++# copyright notice, this list of conditions and the following disclaimer
++# in the documentation and/or other materials provided with the
++# distribution.
++# * Neither the name of Google Inc. nor the names of its
++# contributors may be used to endorse or promote products derived from
++# this software without specific prior written permission.
++#
++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
++# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
++# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
++# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
++# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
++# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
++# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
++# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
++# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
++# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++# ---
++# Author: Adhemerval Zanella
++#
++# Runs the tcmalloc_unittest with various environment variables.
++# This is necessary because tuning some environment variables
++# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program
++# behavior, just performance.
++
++BINDIR="${BINDIR:-.}"
++TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest"
++
++TMPDIR=/tmp/tcmalloc_unittest
++rm -rf $TMPDIR || exit 2
++mkdir $TMPDIR || exit 3
++
++# $1: value of tcmalloc_unittest env. var.
++run_check_transfer_num_obj() {
++ [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1"
++
++ echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... "
++ if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then
++ echo "OK"
++ else
++ echo "FAILED"
++ echo "Output from the failed run:"
++ echo "----"
++ cat $TMPDIR/output
++ echo "----"
++ exit 4
++ fi
++}
++
++run_check_transfer_num_obj ""
++run_check_transfer_num_obj "40"
++run_check_transfer_num_obj "4096"
++
++echo "PASS"
+Only in gperftools-2.0/src: thread_cache.cc.svn-r190
+Only in gperftools-2.0/src: thread_cache.h.svn-r190
+diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h
+--- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400
+@@ -60,6 +60,8 @@
+ // pretend the pthreads wrapper doesn't exist, even when it does.
+ #undef HAVE_PTHREAD
+
++#define HAVE_PID_T
++
+ #include "windows/port.h"
+
+ #endif /* __MINGW32__ */
+diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc
+--- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500
++++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400
+@@ -85,7 +85,7 @@
+ #include
+ #include
+ #include // for _msize and _expand
+-#include // for EnumProcessModules, GetModuleInformation, etc.
++#include // for EnumProcessModules, GetModuleInformation, etc.
+ #include
+ #include
+ % env CPUPROFILE=ls.prof /bin/ls
+-
++
++ In addition to defining the environment variable CPUPROFILE
++ you can also define CPUPROFILESIGNAL. This allows profiling to be
++ controlled via the signal number that you specify. The signal number
++ must be unused by the program under normal operation. Internally it
++ acts as a switch, triggered by the signal, which is off by default.
++ For instance, if you had a copy of /bin/chrome
that had been
++ been linked against libprofiler, you could run:
++ % env CPUPROFILE=chrome.prof CPUPROFILESIGNAL=12 /bin/chrome &
++ You can then trigger profiling to start:
++ % killall -12 chrome
++ Then after a period of time you can tell it to stop which will
++ generate the profile:
++ % killall -12 chrome
++
+ In your code, bracket the code you want profiled in calls to
+ ProfilerStart()
and ProfilerStop()
.
+ (These functions are declared in <gperftools/profiler.h>
.)
+ ProfilerStart()
will take
+ the profile-filename as an argument.
++
+
+
+ In Linux 2.6 and above, profiling works correctly with threads,
+diff -urP gperftools-2.0/doc/heapprofile.html /home/spot/gperftools/doc/heapprofile.html
+--- gperftools-2.0/doc/heapprofile.html 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/doc/heapprofile.html 2013-03-01 14:25:39.086366811 -0500
+@@ -91,7 +91,7 @@
+
HEAP_PROFILE_ALLOCATION_INTERVAL |
+ default: 1073741824 (1 Gb) |
+
+- Dump heap profiling information once every specified number of
++ Dump heap profiling information each time the specified number of
+ bytes has been allocated by the program.
+ |
+
+@@ -106,6 +106,15 @@
+
+
+
++ HEAP_PROFILE_TIME_INTERVAL |
++ default: 104857600 (100 Mb) |
++
++ Dump heap profiling information each time the specified
++ number of seconds has elapsed.
++ |
++
++
++
+ HEAP_PROFILE_MMAP |
+ default: false |
+
+@@ -122,7 +131,7 @@
+ |
+
+
+- HEAP_PROFILE_MMAP_ONLY |
++ HEAP_PROFILE_ONLY_MMAP |
+ default: false |
+
+ Only profile mmap , mremap , and sbrk
+diff -urP gperftools-2.0/doc/pprof.see_also /home/spot/gperftools/doc/pprof.see_also
+--- gperftools-2.0/doc/pprof.see_also 1969-12-31 19:00:00.000000000 -0500
++++ /home/spot/gperftools/doc/pprof.see_also 2013-03-01 14:25:39.089366811 -0500
+@@ -0,0 +1,11 @@
++[see also]
++Further documentation for
++.B pprof
++is maintained as a web page called
++.B cpu_profiler.html
++and is likely installed at one of the following locations:
++.IP
++.B /usr/share/gperftools/cpu_profiler.html
++.br
++.B /usr/local/share/gperftools/cpu_profiler.html
++.PP
+Only in gperftools-2.0: libtool
+diff -urP gperftools-2.0/m4/libtool.m4 /home/spot/gperftools/m4/libtool.m4
+--- gperftools-2.0/m4/libtool.m4 2010-11-05 18:21:23.000000000 -0400
++++ /home/spot/gperftools/m4/libtool.m4 2013-03-01 14:25:37.807366857 -0500
+@@ -1021,7 +1021,7 @@
+ # to the aix ld manual.
+ m4_defun([_LT_SYS_MODULE_PATH_AIX],
+ [m4_require([_LT_DECL_SED])dnl
+-AC_LINK_IFELSE(AC_LANG_PROGRAM,[
++AC_LINK_IFELSE([AC_LANG_PROGRAM],[
+ lt_aix_libpath_sed='
+ /Import File Strings/,/^$/ {
+ /^0/ {
+@@ -4846,7 +4846,7 @@
+ # implicitly export all symbols.
+ save_LDFLAGS="$LDFLAGS"
+ LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo ${wl}-update_registry ${wl}/dev/null"
+- AC_LINK_IFELSE(int foo(void) {},
++ AC_LINK_IFELSE([AC_LANG_PROGRAM([int foo(void) {}])],
+ _LT_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && $ECHO "X${wl}-set_version ${wl}$verstring" | $Xsed` ${wl}-update_registry ${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o $lib'
+ )
+ LDFLAGS="$save_LDFLAGS"
+diff -urP gperftools-2.0/Makefile.am /home/spot/gperftools/Makefile.am
+--- gperftools-2.0/Makefile.am 2012-02-03 14:32:08.000000000 -0500
++++ /home/spot/gperftools/Makefile.am 2013-03-01 14:25:39.163366809 -0500
+@@ -29,10 +29,15 @@
+ -fno-builtin-calloc -fno-builtin-cfree \
+ -fno-builtin-memalign -fno-builtin-posix_memalign \
+ -fno-builtin-valloc -fno-builtin-pvalloc
++
+ # On i386, -mmmx is needed for the mmx-based instructions in
+-# atomicops-internal-x86.h.
++# atomicops-internal-x86.h. Also as of gcc 4.6, -fomit-frame-pointer
++# is the default. Since we must always have frame pointers for I386
++# in order to generate backtraces we now specify -fno-omit-frame-pointer
++# by default.
+ if I386
+ AM_CXXFLAGS += -mmmx
++AM_CXXFLAGS += -fno-omit-frame-pointer
+ endif I386
+ endif GCC
+ if HAVE_W_NO_UNUSED_RESULT
+@@ -121,8 +126,7 @@
+ src/google/malloc_hook_c.h \
+ src/google/profiler.h \
+ src/google/stacktrace.h \
+- src/google/tcmalloc.h \
+- src/windows/google/tcmalloc.h
++ src/google/tcmalloc.h
+
+ docdir = $(prefix)/share/doc/$(PACKAGE)-$(VERSION)
+ # This is for HTML and other documentation you want to install.
+@@ -432,9 +436,8 @@
+ SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \
+ src/gperftools/malloc_hook_c.h \
+ src/gperftools/malloc_extension.h \
+- src/gperftools/malloc_extension_c.h \
+- src/gperftools/stacktrace.h
+-TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES)
++ src/gperftools/malloc_extension_c.h
++TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SG_STACKTRACE_INCLUDES)
+ perftoolsinclude_HEADERS += $(SG_TCMALLOC_MINIMAL_INCLUDES)
+
+ ### Making the library
+@@ -842,10 +845,10 @@
+ src/base/sysinfo.h \
+ src/base/thread_lister.h \
+ src/heap-profile-table.h
+-SG_TCMALLOC_INCLUDES = $(SG_TCMALLOC_MINIMAL_INCLUDES) \
+- src/gperftools/heap-profiler.h \
++SG_TCMALLOC_INCLUDES = src/gperftools/heap-profiler.h \
+ src/gperftools/heap-checker.h
+-TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_INCLUDES)
++TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) \
++ $(SG_TCMALLOC_INCLUDES) $(SG_STACKTRACE_INCLUDES)
+ perftoolsinclude_HEADERS += $(SG_TCMALLOC_INCLUDES)
+
+ ### Making the library
+@@ -1186,9 +1189,9 @@
+ src/base/sysinfo.h \
+ $(SPINLOCK_INCLUDES) \
+ $(LOGGING_INCLUDES)
+-SG_CPU_PROFILER_INCLUDES = src/gperftools/profiler.h \
+- src/gperftools/stacktrace.h
+-CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES)
++SG_CPU_PROFILER_INCLUDES = src/gperftools/profiler.h
++CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES) \
++ $(SG_STACKTRACE_INCLUDES)
+ perftoolsinclude_HEADERS += $(SG_CPU_PROFILER_INCLUDES)
+
+ ### Making the library
+diff -urP gperftools-2.0/Makefile.in /home/spot/gperftools/Makefile.in
+--- gperftools-2.0/Makefile.in 2012-02-03 14:39:18.000000000 -0500
++++ /home/spot/gperftools/Makefile.in 2013-03-01 14:25:37.963366851 -0500
+@@ -1,8 +1,9 @@
+-# Makefile.in generated by automake 1.10.1 from Makefile.am.
++# Makefile.in generated by automake 1.10.3 from Makefile.am.
+ # @configure_input@
+
+ # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+-# 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
++# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
++# Inc.
+ # This Makefile.in is free software; the Free Software Foundation
+ # gives unlimited permission to copy and/or distribute it,
+ # with or without modifications, as long as this notice is preserved.
+@@ -54,9 +55,13 @@
+ @GCC_TRUE@ -fno-builtin-memalign -fno-builtin-posix_memalign \
+ @GCC_TRUE@ -fno-builtin-valloc -fno-builtin-pvalloc
+
++
+ # On i386, -mmmx is needed for the mmx-based instructions in
+-# atomicops-internal-x86.h.
+-@GCC_TRUE@@I386_TRUE@am__append_3 = -mmmx
++# atomicops-internal-x86.h. Also as of gcc 4.6, -fomit-frame-pointer
++# is the default. Since we must always have frame pointers for I386
++# in order to generate backtraces we now specify -fno-omit-frame-pointer
++# by default.
++@GCC_TRUE@@I386_TRUE@am__append_3 = -mmmx -fno-omit-frame-pointer
+ @HAVE_W_NO_UNUSED_RESULT_TRUE@am__append_4 = -Wno-unused-result
+
+ # These are x86-specific, having to do with frame-pointers. In
+@@ -344,7 +349,7 @@
+ @WITH_CPU_PROFILER_TRUE@am__objects_2 = $(am__objects_1) \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am__objects_3 = $(am__objects_2) \
+-@WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
++@WITH_CPU_PROFILER_TRUE@ $(am__objects_1) $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am_libprofiler_la_OBJECTS = profiler.lo \
+ @WITH_CPU_PROFILER_TRUE@ profile-handler.lo profiledata.lo \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_3)
+@@ -434,26 +439,27 @@
+ src/gperftools/malloc_hook.h src/gperftools/malloc_hook_c.h \
+ src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h \
+- src/gperftools/stacktrace.h src/gperftools/heap-profiler.h \
+- src/gperftools/heap-checker.h src/base/thread_lister.c \
++ src/gperftools/heap-profiler.h src/gperftools/heap-checker.h \
++ src/gperftools/stacktrace.h src/base/thread_lister.c \
+ src/base/linuxthreads.cc src/heap-checker.cc \
+ src/heap-checker-bcad.cc
+ @MINGW_FALSE@am__objects_5 = libtcmalloc_la-tcmalloc.lo
+ am__objects_6 = $(am__objects_1)
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_7 = $(am__objects_6) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_8 = $(am__objects_1)
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_9 = $(am__objects_7) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_8)
+-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_10 = thread_lister.lo \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_8 = $(am__objects_7) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
++@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_9 = thread_lister.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_la-linuxthreads.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_la-heap-checker.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_la-heap-checker-bcad.lo
+-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_11 = $(am__objects_10)
++@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_10 = $(am__objects_9)
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_la_OBJECTS = \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_5) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_9) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_11)
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_8) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_10)
+ libtcmalloc_la_OBJECTS = $(am_libtcmalloc_la_OBJECTS)
+ libtcmalloc_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+@@ -497,29 +503,29 @@
+ src/gperftools/malloc_hook.h src/gperftools/malloc_hook_c.h \
+ src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h \
+- src/gperftools/stacktrace.h src/gperftools/heap-profiler.h \
+- src/gperftools/heap-checker.h src/base/thread_lister.c \
++ src/gperftools/heap-profiler.h src/gperftools/heap-checker.h \
++ src/gperftools/stacktrace.h src/base/thread_lister.c \
+ src/base/linuxthreads.cc src/heap-checker.cc \
+ src/heap-checker-bcad.cc src/profiler.cc \
+ src/profile-handler.cc src/profiledata.cc src/profiledata.h \
+ src/profile-handler.h src/getpc.h src/base/simple_mutex.h \
+ src/gperftools/profiler.h
+-@MINGW_FALSE@am__objects_12 = libtcmalloc_and_profiler_la-tcmalloc.lo
+-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_13 = thread_lister.lo \
++@MINGW_FALSE@am__objects_11 = libtcmalloc_and_profiler_la-tcmalloc.lo
++@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_12 = thread_lister.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_and_profiler_la-linuxthreads.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_and_profiler_la-heap-checker.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_and_profiler_la-heap-checker-bcad.lo
+-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_14 = $(am__objects_13)
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_15 = \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_12) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_9) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_14)
+-@WITH_CPU_PROFILER_TRUE@am__objects_16 = libtcmalloc_and_profiler_la-profiler.lo \
++@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_13 = $(am__objects_12)
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_14 = \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_11) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_8) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_13)
++@WITH_CPU_PROFILER_TRUE@am__objects_15 = libtcmalloc_and_profiler_la-profiler.lo \
+ @WITH_CPU_PROFILER_TRUE@ libtcmalloc_and_profiler_la-profile-handler.lo \
+ @WITH_CPU_PROFILER_TRUE@ libtcmalloc_and_profiler_la-profiledata.lo \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_3)
+-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_and_profiler_la_OBJECTS = $(am__objects_15) \
+-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_16)
++@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_and_profiler_la_OBJECTS = $(am__objects_14) \
++@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_15)
+ libtcmalloc_and_profiler_la_OBJECTS = \
+ $(am_libtcmalloc_and_profiler_la_OBJECTS)
+ libtcmalloc_and_profiler_la_LINK = $(LIBTOOL) --tag=CXX \
+@@ -563,15 +569,15 @@
+ src/gperftools/malloc_hook.h src/gperftools/malloc_hook_c.h \
+ src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h \
+- src/gperftools/stacktrace.h src/gperftools/heap-profiler.h \
+- src/gperftools/heap-checker.h
+-@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_17 = thread_lister.lo \
++ src/gperftools/heap-profiler.h src/gperftools/heap-checker.h \
++ src/gperftools/stacktrace.h
++@WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_16 = thread_lister.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_debug_la-linuxthreads.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_debug_la-heap-checker.lo \
+ @WITH_HEAP_CHECKER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_debug_la-heap-checker-bcad.lo
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_debug_la_OBJECTS = libtcmalloc_debug_la-debugallocation.lo \
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_17) \
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_9)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_16) \
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_8)
+ libtcmalloc_debug_la_OBJECTS = $(am_libtcmalloc_debug_la_OBJECTS)
+ libtcmalloc_debug_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+@@ -623,12 +629,12 @@
+ src/gperftools/heap-checker.h src/base/low_level_alloc.cc \
+ src/heap-profile-table.cc src/heap-profiler.cc \
+ src/raw_printer.cc src/memory_region_map.cc
+-@MINGW_FALSE@am__objects_18 = libtcmalloc_internal_la-system-alloc.lo
+-@MINGW_FALSE@am__objects_19 = \
++@MINGW_FALSE@am__objects_17 = libtcmalloc_internal_la-system-alloc.lo
++@MINGW_FALSE@am__objects_18 = \
+ @MINGW_FALSE@ libtcmalloc_internal_la-maybe_threads.lo
+-am__objects_20 = $(am__objects_6) $(am__objects_1)
+-am__objects_21 = libtcmalloc_internal_la-common.lo \
+- libtcmalloc_internal_la-internal_logging.lo $(am__objects_18) \
++am__objects_19 = $(am__objects_6) $(am__objects_1) $(am__objects_1)
++am__objects_20 = libtcmalloc_internal_la-common.lo \
++ libtcmalloc_internal_la-internal_logging.lo $(am__objects_17) \
+ libtcmalloc_internal_la-memfs_malloc.lo \
+ libtcmalloc_internal_la-central_freelist.lo \
+ libtcmalloc_internal_la-page_heap.lo \
+@@ -639,11 +645,11 @@
+ libtcmalloc_internal_la-symbolize.lo \
+ libtcmalloc_internal_la-thread_cache.lo \
+ libtcmalloc_internal_la-malloc_hook.lo \
+- libtcmalloc_internal_la-malloc_extension.lo $(am__objects_19) \
+- $(am__objects_20)
++ libtcmalloc_internal_la-malloc_extension.lo $(am__objects_18) \
++ $(am__objects_19)
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_libtcmalloc_internal_la_OBJECTS = \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_21) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_9) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_20) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_8) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal_la-low_level_alloc.lo \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal_la-heap-profile-table.lo \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ libtcmalloc_internal_la-heap-profiler.lo \
+@@ -685,9 +691,9 @@
+ src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h \
+ src/gperftools/stacktrace.h
+-@MINGW_FALSE@am__objects_22 = libtcmalloc_minimal_la-tcmalloc.lo
+-am_libtcmalloc_minimal_la_OBJECTS = $(am__objects_22) \
+- $(am__objects_20)
++@MINGW_FALSE@am__objects_21 = libtcmalloc_minimal_la-tcmalloc.lo
++am_libtcmalloc_minimal_la_OBJECTS = $(am__objects_21) \
++ $(am__objects_19)
+ libtcmalloc_minimal_la_OBJECTS = $(am_libtcmalloc_minimal_la_OBJECTS)
+ libtcmalloc_minimal_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+@@ -725,7 +731,7 @@
+ src/gperftools/malloc_extension_c.h \
+ src/gperftools/stacktrace.h
+ @WITH_DEBUGALLOC_TRUE@am_libtcmalloc_minimal_debug_la_OBJECTS = libtcmalloc_minimal_debug_la-debugallocation.lo \
+-@WITH_DEBUGALLOC_TRUE@ $(am__objects_20)
++@WITH_DEBUGALLOC_TRUE@ $(am__objects_19)
+ libtcmalloc_minimal_debug_la_OBJECTS = \
+ $(am_libtcmalloc_minimal_debug_la_OBJECTS)
+ libtcmalloc_minimal_debug_la_LINK = $(LIBTOOL) --tag=CXX \
+@@ -768,14 +774,14 @@
+ src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h \
+ src/gperftools/stacktrace.h
+-@MINGW_FALSE@am__objects_23 = \
++@MINGW_FALSE@am__objects_22 = \
+ @MINGW_FALSE@ libtcmalloc_minimal_internal_la-system-alloc.lo
+-@MINGW_FALSE@am__objects_24 = \
++@MINGW_FALSE@am__objects_23 = \
+ @MINGW_FALSE@ libtcmalloc_minimal_internal_la-maybe_threads.lo
+ am_libtcmalloc_minimal_internal_la_OBJECTS = \
+ libtcmalloc_minimal_internal_la-common.lo \
+ libtcmalloc_minimal_internal_la-internal_logging.lo \
+- $(am__objects_23) \
++ $(am__objects_22) \
+ libtcmalloc_minimal_internal_la-memfs_malloc.lo \
+ libtcmalloc_minimal_internal_la-central_freelist.lo \
+ libtcmalloc_minimal_internal_la-page_heap.lo \
+@@ -787,7 +793,7 @@
+ libtcmalloc_minimal_internal_la-thread_cache.lo \
+ libtcmalloc_minimal_internal_la-malloc_hook.lo \
+ libtcmalloc_minimal_internal_la-malloc_extension.lo \
+- $(am__objects_24) $(am__objects_20)
++ $(am__objects_23) $(am__objects_19)
+ libtcmalloc_minimal_internal_la_OBJECTS = \
+ $(am_libtcmalloc_minimal_internal_la_OBJECTS)
+ libtcmalloc_minimal_internal_la_LINK = $(LIBTOOL) --tag=CXX \
+@@ -873,10 +879,10 @@
+ src/base/basictypes.h src/base/dynamic_annotations.h \
+ src/third_party/valgrind.h src/windows/port.h \
+ src/windows/port.cc
+-@MINGW_TRUE@am__objects_25 = addressmap_unittest-port.$(OBJEXT)
++@MINGW_TRUE@am__objects_24 = addressmap_unittest-port.$(OBJEXT)
+ am_addressmap_unittest_OBJECTS = \
+ addressmap_unittest-addressmap_unittest.$(OBJEXT) \
+- $(am__objects_6) $(am__objects_25)
++ $(am__objects_6) $(am__objects_24)
+ addressmap_unittest_OBJECTS = $(am_addressmap_unittest_OBJECTS)
+ addressmap_unittest_DEPENDENCIES = liblogging.la
+ addressmap_unittest_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+@@ -935,10 +941,10 @@
+ src/base/googleinit.h src/gperftools/heap-checker.h \
+ src/base/logging.h src/base/basictypes.h \
+ src/base/dynamic_annotations.h src/third_party/valgrind.h
+-@WITH_HEAP_CHECKER_TRUE@am__objects_26 = $(am__objects_1)
+-@WITH_HEAP_CHECKER_TRUE@am__objects_27 = heap_checker_debug_unittest-heap-checker_unittest.$(OBJEXT) \
+-@WITH_HEAP_CHECKER_TRUE@ $(am__objects_26)
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am_heap_checker_debug_unittest_OBJECTS = $(am__objects_27)
++@WITH_HEAP_CHECKER_TRUE@am__objects_25 = $(am__objects_1)
++@WITH_HEAP_CHECKER_TRUE@am__objects_26 = heap_checker_debug_unittest-heap-checker_unittest.$(OBJEXT) \
++@WITH_HEAP_CHECKER_TRUE@ $(am__objects_25)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@am_heap_checker_debug_unittest_OBJECTS = $(am__objects_26)
+ heap_checker_debug_unittest_OBJECTS = \
+ $(am_heap_checker_debug_unittest_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_CHECKER_TRUE@heap_checker_debug_unittest_DEPENDENCIES = libtcmalloc_debug.la \
+@@ -961,7 +967,7 @@
+ src/base/logging.h src/base/basictypes.h \
+ src/base/dynamic_annotations.h src/third_party/valgrind.h
+ @WITH_HEAP_CHECKER_TRUE@am_heap_checker_unittest_OBJECTS = heap_checker_unittest-heap-checker_unittest.$(OBJEXT) \
+-@WITH_HEAP_CHECKER_TRUE@ $(am__objects_26)
++@WITH_HEAP_CHECKER_TRUE@ $(am__objects_25)
+ heap_checker_unittest_OBJECTS = $(am_heap_checker_unittest_OBJECTS)
+ @WITH_HEAP_CHECKER_TRUE@heap_checker_unittest_DEPENDENCIES = \
+ @WITH_HEAP_CHECKER_TRUE@ $(LIBTCMALLOC) liblogging.la \
+@@ -979,9 +985,9 @@
+ am__heap_profiler_debug_unittest_SOURCES_DIST = \
+ src/tests/heap-profiler_unittest.cc src/config_for_unittests.h \
+ src/gperftools/heap-profiler.h
+-@WITH_HEAP_PROFILER_TRUE@am__objects_28 = heap_profiler_debug_unittest-heap-profiler_unittest.$(OBJEXT) \
++@WITH_HEAP_PROFILER_TRUE@am__objects_27 = heap_profiler_debug_unittest-heap-profiler_unittest.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_TRUE@ $(am__objects_1)
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am_heap_profiler_debug_unittest_OBJECTS = $(am__objects_28)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@am_heap_profiler_debug_unittest_OBJECTS = $(am__objects_27)
+ heap_profiler_debug_unittest_OBJECTS = \
+ $(am_heap_profiler_debug_unittest_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_TRUE@heap_profiler_debug_unittest_DEPENDENCIES = libtcmalloc_debug.la \
+@@ -1033,15 +1039,15 @@
+ src/base/atomicops-internals-arm-v6plus.h src/base/logging.h \
+ src/base/commandlineflags.h src/base/dynamic_annotations.h \
+ src/third_party/valgrind.h
+-@MINGW_FALSE@am__objects_29 = \
++@MINGW_FALSE@am__objects_28 = \
+ @MINGW_FALSE@ low_level_alloc_unittest-maybe_threads.$(OBJEXT)
+-am__objects_30 = $(am__objects_1) $(am__objects_1)
++am__objects_29 = $(am__objects_1) $(am__objects_1)
+ am_low_level_alloc_unittest_OBJECTS = \
+ low_level_alloc_unittest-low_level_alloc.$(OBJEXT) \
+ low_level_alloc_unittest-malloc_hook.$(OBJEXT) \
+- $(am__objects_29) \
++ $(am__objects_28) \
+ low_level_alloc_unittest-low_level_alloc_unittest.$(OBJEXT) \
+- $(am__objects_30)
++ $(am__objects_29)
+ low_level_alloc_unittest_OBJECTS = \
+ $(am_low_level_alloc_unittest_OBJECTS)
+ low_level_alloc_unittest_DEPENDENCIES = $(LIBSPINLOCK)
+@@ -1067,10 +1073,10 @@
+ src/tests/malloc_extension_test.cc src/config_for_unittests.h \
+ src/base/logging.h src/gperftools/malloc_extension.h \
+ src/gperftools/malloc_extension_c.h
+-am__objects_31 = \
++am__objects_30 = \
+ malloc_extension_debug_test-malloc_extension_test.$(OBJEXT)
+ @WITH_DEBUGALLOC_TRUE@am_malloc_extension_debug_test_OBJECTS = \
+-@WITH_DEBUGALLOC_TRUE@ $(am__objects_31)
++@WITH_DEBUGALLOC_TRUE@ $(am__objects_30)
+ malloc_extension_debug_test_OBJECTS = \
+ $(am_malloc_extension_debug_test_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@malloc_extension_debug_test_DEPENDENCIES = \
+@@ -1119,9 +1125,9 @@
+ src/tests/memalign_unittest.cc src/tcmalloc.h \
+ src/config_for_unittests.h src/tests/testutil.h \
+ src/tests/testutil.cc
+-@MINGW_FALSE@@OSX_FALSE@am__objects_32 = memalign_debug_unittest-memalign_unittest.$(OBJEXT) \
++@MINGW_FALSE@@OSX_FALSE@am__objects_31 = memalign_debug_unittest-memalign_unittest.$(OBJEXT) \
+ @MINGW_FALSE@@OSX_FALSE@ memalign_debug_unittest-testutil.$(OBJEXT)
+-@MINGW_FALSE@@OSX_FALSE@@WITH_DEBUGALLOC_TRUE@am_memalign_debug_unittest_OBJECTS = $(am__objects_32)
++@MINGW_FALSE@@OSX_FALSE@@WITH_DEBUGALLOC_TRUE@am_memalign_debug_unittest_OBJECTS = $(am__objects_31)
+ memalign_debug_unittest_OBJECTS = \
+ $(am_memalign_debug_unittest_OBJECTS)
+ @MINGW_FALSE@@OSX_FALSE@@WITH_DEBUGALLOC_TRUE@memalign_debug_unittest_DEPENDENCIES = libtcmalloc_minimal_debug.la \
+@@ -1192,11 +1198,11 @@
+ am__profiler1_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
+ src/tests/testutil.h src/tests/testutil.cc \
+ src/config_for_unittests.h src/gperftools/profiler.h
+-@WITH_CPU_PROFILER_TRUE@am__objects_33 = profiler1_unittest-profiler_unittest.$(OBJEXT) \
++@WITH_CPU_PROFILER_TRUE@am__objects_32 = profiler1_unittest-profiler_unittest.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ profiler1_unittest-testutil.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am_profiler1_unittest_OBJECTS = \
+-@WITH_CPU_PROFILER_TRUE@ $(am__objects_33)
++@WITH_CPU_PROFILER_TRUE@ $(am__objects_32)
+ profiler1_unittest_OBJECTS = $(am_profiler1_unittest_OBJECTS)
+ @WITH_CPU_PROFILER_TRUE@profiler1_unittest_DEPENDENCIES = \
+ @WITH_CPU_PROFILER_TRUE@ $(LIBPROFILER)
+@@ -1207,11 +1213,11 @@
+ am__profiler2_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
+ src/tests/testutil.h src/tests/testutil.cc \
+ src/config_for_unittests.h src/gperftools/profiler.h
+-@WITH_CPU_PROFILER_TRUE@am__objects_34 = profiler2_unittest-profiler_unittest.$(OBJEXT) \
++@WITH_CPU_PROFILER_TRUE@am__objects_33 = profiler2_unittest-profiler_unittest.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ profiler2_unittest-testutil.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am_profiler2_unittest_OBJECTS = \
+-@WITH_CPU_PROFILER_TRUE@ $(am__objects_34)
++@WITH_CPU_PROFILER_TRUE@ $(am__objects_33)
+ profiler2_unittest_OBJECTS = $(am_profiler2_unittest_OBJECTS)
+ profiler2_unittest_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+@@ -1220,11 +1226,11 @@
+ am__profiler3_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
+ src/tests/testutil.h src/tests/testutil.cc \
+ src/config_for_unittests.h src/gperftools/profiler.h
+-@WITH_CPU_PROFILER_TRUE@am__objects_35 = profiler3_unittest-profiler_unittest.$(OBJEXT) \
++@WITH_CPU_PROFILER_TRUE@am__objects_34 = profiler3_unittest-profiler_unittest.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ profiler3_unittest-testutil.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am_profiler3_unittest_OBJECTS = \
+-@WITH_CPU_PROFILER_TRUE@ $(am__objects_35)
++@WITH_CPU_PROFILER_TRUE@ $(am__objects_34)
+ profiler3_unittest_OBJECTS = $(am_profiler3_unittest_OBJECTS)
+ @WITH_CPU_PROFILER_TRUE@profiler3_unittest_DEPENDENCIES = \
+ @WITH_CPU_PROFILER_TRUE@ $(LIBPROFILER) $(am__DEPENDENCIES_1)
+@@ -1235,11 +1241,11 @@
+ am__profiler4_unittest_SOURCES_DIST = src/tests/profiler_unittest.cc \
+ src/tests/testutil.h src/tests/testutil.cc \
+ src/config_for_unittests.h src/gperftools/profiler.h
+-@WITH_CPU_PROFILER_TRUE@am__objects_36 = profiler4_unittest-profiler_unittest.$(OBJEXT) \
++@WITH_CPU_PROFILER_TRUE@am__objects_35 = profiler4_unittest-profiler_unittest.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ profiler4_unittest-testutil.$(OBJEXT) \
+ @WITH_CPU_PROFILER_TRUE@ $(am__objects_1)
+ @WITH_CPU_PROFILER_TRUE@am_profiler4_unittest_OBJECTS = \
+-@WITH_CPU_PROFILER_TRUE@ $(am__objects_36)
++@WITH_CPU_PROFILER_TRUE@ $(am__objects_35)
+ profiler4_unittest_OBJECTS = $(am_profiler4_unittest_OBJECTS)
+ profiler4_unittest_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CXXLD) \
+@@ -1263,9 +1269,9 @@
+ am__realloc_debug_unittest_SOURCES_DIST = \
+ src/tests/realloc_unittest.cc src/config_for_unittests.h \
+ src/base/logging.h
+-am__objects_37 = realloc_debug_unittest-realloc_unittest.$(OBJEXT)
++am__objects_36 = realloc_debug_unittest-realloc_unittest.$(OBJEXT)
+ @WITH_DEBUGALLOC_TRUE@am_realloc_debug_unittest_OBJECTS = \
+-@WITH_DEBUGALLOC_TRUE@ $(am__objects_37)
++@WITH_DEBUGALLOC_TRUE@ $(am__objects_36)
+ realloc_debug_unittest_OBJECTS = $(am_realloc_debug_unittest_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@realloc_debug_unittest_DEPENDENCIES = \
+ @WITH_DEBUGALLOC_TRUE@ libtcmalloc_minimal_debug.la \
+@@ -1285,8 +1291,8 @@
+ $(realloc_unittest_LDFLAGS) $(LDFLAGS) -o $@
+ am__sampler_debug_test_SOURCES_DIST = src/tests/sampler_test.cc \
+ src/config_for_unittests.h
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_38 = sampler_debug_test-sampler_test.$(OBJEXT)
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_sampler_debug_test_OBJECTS = $(am__objects_38)
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_37 = sampler_debug_test-sampler_test.$(OBJEXT)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_sampler_debug_test_OBJECTS = $(am__objects_37)
+ sampler_debug_test_OBJECTS = $(am_sampler_debug_test_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@sampler_debug_test_DEPENDENCIES = libtcmalloc_debug.la \
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
+@@ -1307,9 +1313,9 @@
+ am__sampling_debug_test_SOURCES_DIST = src/tests/sampling_test.cc \
+ src/config_for_unittests.h src/base/logging.h \
+ src/gperftools/malloc_extension.h
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_39 = sampling_debug_test-sampling_test.$(OBJEXT) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_38 = sampling_debug_test-sampling_test.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_sampling_debug_test_OBJECTS = $(am__objects_39)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_sampling_debug_test_OBJECTS = $(am__objects_38)
+ sampling_debug_test_OBJECTS = $(am_sampling_debug_test_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@sampling_debug_test_DEPENDENCIES = libtcmalloc_debug.la \
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__DEPENDENCIES_1)
+@@ -1342,7 +1348,7 @@
+ src/google/malloc_extension.h src/google/malloc_extension_c.h \
+ src/google/malloc_hook.h src/google/malloc_hook_c.h \
+ src/google/profiler.h src/google/stacktrace.h \
+- src/google/tcmalloc.h src/windows/google/tcmalloc.h
++ src/google/tcmalloc.h
+ @WITH_HEAP_PROFILER_TRUE@am_simple_compat_test_OBJECTS = \
+ @WITH_HEAP_PROFILER_TRUE@ simple_compat_test.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_TRUE@ $(am__objects_1)
+@@ -1371,11 +1377,11 @@
+ src/gperftools/stacktrace.h src/base/logging.h \
+ src/base/basictypes.h src/base/dynamic_annotations.h \
+ src/third_party/valgrind.h
+-@WITH_STACK_TRACE_TRUE@am__objects_40 = $(am__objects_4) \
++@WITH_STACK_TRACE_TRUE@am__objects_39 = $(am__objects_4) \
+ @WITH_STACK_TRACE_TRUE@ $(am__objects_1)
+ @WITH_STACK_TRACE_TRUE@am_stacktrace_unittest_OBJECTS = \
+ @WITH_STACK_TRACE_TRUE@ stacktrace_unittest.$(OBJEXT) \
+-@WITH_STACK_TRACE_TRUE@ $(am__objects_40)
++@WITH_STACK_TRACE_TRUE@ $(am__objects_39)
+ stacktrace_unittest_OBJECTS = $(am_stacktrace_unittest_OBJECTS)
+ @WITH_STACK_TRACE_TRUE@stacktrace_unittest_DEPENDENCIES = \
+ @WITH_STACK_TRACE_TRUE@ libstacktrace.la liblogging.la
+@@ -1393,10 +1399,10 @@
+ src/tests/tcmalloc_unittest.cc src/tests/testutil.h \
+ src/tests/testutil.cc src/config_for_unittests.h \
+ src/gperftools/malloc_extension.h
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_41 = tcmalloc_and_profiler_unittest-tcmalloc_unittest.$(OBJEXT) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_40 = tcmalloc_and_profiler_unittest-tcmalloc_unittest.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_and_profiler_unittest-testutil.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
+-@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_and_profiler_unittest_OBJECTS = $(am__objects_41)
++@WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_and_profiler_unittest_OBJECTS = $(am__objects_40)
+ tcmalloc_and_profiler_unittest_OBJECTS = \
+ $(am_tcmalloc_and_profiler_unittest_OBJECTS)
+ @WITH_CPU_PROFILER_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_and_profiler_unittest_DEPENDENCIES = libtcmalloc_and_profiler.la
+@@ -1408,10 +1414,10 @@
+ src/tests/tcmalloc_unittest.cc src/tests/testutil.h \
+ src/tests/testutil.cc src/config_for_unittests.h \
+ src/gperftools/malloc_extension.h
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_42 = tcmalloc_both_unittest-tcmalloc_unittest.$(OBJEXT) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_41 = tcmalloc_both_unittest-tcmalloc_unittest.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_both_unittest-testutil.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
+-@OSX_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_both_unittest_OBJECTS = $(am__objects_42)
++@OSX_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_both_unittest_OBJECTS = $(am__objects_41)
+ tcmalloc_both_unittest_OBJECTS = $(am_tcmalloc_both_unittest_OBJECTS)
+ @WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__DEPENDENCIES_4 = $(LIBTCMALLOC) \
+ @WITH_CPU_PROFILER_FALSE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(LIBTCMALLOC_MINIMAL) \
+@@ -1431,10 +1437,10 @@
+ src/tests/tcmalloc_unittest.cc src/tcmalloc.h \
+ src/tests/testutil.h src/tests/testutil.cc \
+ src/config_for_unittests.h src/gperftools/malloc_extension.h
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_43 = tcmalloc_debug_unittest-tcmalloc_unittest.$(OBJEXT) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am__objects_42 = tcmalloc_debug_unittest-tcmalloc_unittest.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ tcmalloc_debug_unittest-testutil.$(OBJEXT) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(am__objects_1)
+-@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_debug_unittest_OBJECTS = $(am__objects_43)
++@WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@am_tcmalloc_debug_unittest_OBJECTS = $(am__objects_42)
+ tcmalloc_debug_unittest_OBJECTS = \
+ $(am_tcmalloc_debug_unittest_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@tcmalloc_debug_unittest_DEPENDENCIES = libtcmalloc_debug.la \
+@@ -1459,12 +1465,12 @@
+ src/tests/tcmalloc_unittest.cc src/tests/testutil.h \
+ src/tests/testutil.cc src/config_for_unittests.h \
+ src/gperftools/malloc_extension.h
+-am__objects_44 = \
++am__objects_43 = \
+ tcmalloc_minimal_debug_unittest-tcmalloc_unittest.$(OBJEXT) \
+ tcmalloc_minimal_debug_unittest-testutil.$(OBJEXT) \
+ $(am__objects_1)
+ @WITH_DEBUGALLOC_TRUE@am_tcmalloc_minimal_debug_unittest_OBJECTS = \
+-@WITH_DEBUGALLOC_TRUE@ $(am__objects_44)
++@WITH_DEBUGALLOC_TRUE@ $(am__objects_43)
+ tcmalloc_minimal_debug_unittest_OBJECTS = \
+ $(am_tcmalloc_minimal_debug_unittest_OBJECTS)
+ @WITH_DEBUGALLOC_TRUE@tcmalloc_minimal_debug_unittest_DEPENDENCIES = \
+@@ -1932,8 +1938,7 @@
+ src/google/malloc_hook_c.h \
+ src/google/profiler.h \
+ src/google/stacktrace.h \
+- src/google/tcmalloc.h \
+- src/windows/google/tcmalloc.h
++ src/google/tcmalloc.h
+
+ # This is for HTML and other documentation you want to install.
+ # Add your documentation files (in doc/) in addition to these
+@@ -2236,10 +2241,9 @@
+ SG_TCMALLOC_MINIMAL_INCLUDES = src/gperftools/malloc_hook.h \
+ src/gperftools/malloc_hook_c.h \
+ src/gperftools/malloc_extension.h \
+- src/gperftools/malloc_extension_c.h \
+- src/gperftools/stacktrace.h
++ src/gperftools/malloc_extension_c.h
+
+-TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES)
++TCMALLOC_MINIMAL_INCLUDES = $(S_TCMALLOC_MINIMAL_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) $(SG_STACKTRACE_INCLUDES)
+ libtcmalloc_minimal_internal_la_SOURCES = src/common.cc \
+ src/internal_logging.cc \
+ $(SYSTEM_ALLOC_CC) \
+@@ -2459,11 +2463,12 @@
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/base/thread_lister.h \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/heap-profile-table.h
+
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@SG_TCMALLOC_INCLUDES = $(SG_TCMALLOC_MINIMAL_INCLUDES) \
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/gperftools/heap-profiler.h \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@SG_TCMALLOC_INCLUDES = src/gperftools/heap-profiler.h \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/gperftools/heap-checker.h
+
+-@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_INCLUDES)
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@TCMALLOC_INCLUDES = $(S_TCMALLOC_INCLUDES) $(SG_TCMALLOC_MINIMAL_INCLUDES) \
++@WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(SG_TCMALLOC_INCLUDES) $(SG_STACKTRACE_INCLUDES)
++
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@libtcmalloc_internal_la_SOURCES = $(libtcmalloc_minimal_internal_la_SOURCES) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ $(TCMALLOC_INCLUDES) \
+ @WITH_HEAP_PROFILER_OR_CHECKER_TRUE@ src/base/low_level_alloc.cc \
+@@ -2669,10 +2674,10 @@
+ @WITH_CPU_PROFILER_TRUE@ $(SPINLOCK_INCLUDES) \
+ @WITH_CPU_PROFILER_TRUE@ $(LOGGING_INCLUDES)
+
+-@WITH_CPU_PROFILER_TRUE@SG_CPU_PROFILER_INCLUDES = src/gperftools/profiler.h \
+-@WITH_CPU_PROFILER_TRUE@ src/gperftools/stacktrace.h
++@WITH_CPU_PROFILER_TRUE@SG_CPU_PROFILER_INCLUDES = src/gperftools/profiler.h
++@WITH_CPU_PROFILER_TRUE@CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES) \
++@WITH_CPU_PROFILER_TRUE@ $(SG_STACKTRACE_INCLUDES)
+
+-@WITH_CPU_PROFILER_TRUE@CPU_PROFILER_INCLUDES = $(S_CPU_PROFILER_INCLUDES) $(SG_CPU_PROFILER_INCLUDES)
+ @WITH_CPU_PROFILER_TRUE@libprofiler_la_SOURCES = src/profiler.cc \
+ @WITH_CPU_PROFILER_TRUE@ src/profile-handler.cc \
+ @WITH_CPU_PROFILER_TRUE@ src/profiledata.cc \
+@@ -4744,7 +4749,7 @@
+ -rm -rf .libs _libs
+
+ distclean-libtool:
+- -rm -f libtool
++ -rm -f libtool config.lt
+ install-man1: $(man1_MANS) $(man_MANS)
+ @$(NORMAL_INSTALL)
+ test -z "$(man1dir)" || $(MKDIR_P) "$(DESTDIR)$(man1dir)"
+@@ -4756,8 +4761,8 @@
+ esac; \
+ done; \
+ for i in $$list; do \
+- if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \
+- else file=$$i; fi; \
++ if test -f $$i; then file=$$i; \
++ else file=$(srcdir)/$$i; fi; \
+ ext=`echo $$i | sed -e 's/^.*\\.//'`; \
+ case "$$ext" in \
+ 1*) ;; \
+@@ -4881,7 +4886,7 @@
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+- $(AWK) '{ files[$$0] = 1; nonemtpy = 1; } \
++ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+ tags: TAGS
+@@ -4924,7 +4929,7 @@
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+ check-TESTS: $(TESTS)
+- @failed=0; all=0; xfail=0; xpass=0; skip=0; ws='[ ]'; \
++ @failed=0; all=0; xfail=0; xpass=0; skip=0; \
+ srcdir=$(srcdir); export srcdir; \
+ list=' $(TESTS) '; \
+ if test -n "$$list"; then \
+@@ -4935,7 +4940,7 @@
+ if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+- *$$ws$$tst$$ws*) \
++ *[\ \ ]$$tst[\ \ ]*) \
+ xpass=`expr $$xpass + 1`; \
+ failed=`expr $$failed + 1`; \
+ echo "XPASS: $$tst"; \
+@@ -4947,7 +4952,7 @@
+ elif test $$? -ne 77; then \
+ all=`expr $$all + 1`; \
+ case " $(XFAIL_TESTS) " in \
+- *$$ws$$tst$$ws*) \
++ *[\ \ ]$$tst[\ \ ]*) \
+ xfail=`expr $$xfail + 1`; \
+ echo "XFAIL: $$tst"; \
+ ;; \
+@@ -4961,23 +4966,36 @@
+ echo "SKIP: $$tst"; \
+ fi; \
+ done; \
++ if test "$$all" -eq 1; then \
++ tests="test"; \
++ All=""; \
++ else \
++ tests="tests"; \
++ All="All "; \
++ fi; \
+ if test "$$failed" -eq 0; then \
+ if test "$$xfail" -eq 0; then \
+- banner="All $$all tests passed"; \
++ banner="$$All$$all $$tests passed"; \
+ else \
+- banner="All $$all tests behaved as expected ($$xfail expected failures)"; \
++ if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
++ banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
+ fi; \
+ else \
+ if test "$$xpass" -eq 0; then \
+- banner="$$failed of $$all tests failed"; \
++ banner="$$failed of $$all $$tests failed"; \
+ else \
+- banner="$$failed of $$all tests did not behave as expected ($$xpass unexpected passes)"; \
++ if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
++ banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
+ fi; \
+ fi; \
+ dashes="$$banner"; \
+ skipped=""; \
+ if test "$$skip" -ne 0; then \
+- skipped="($$skip tests were not run)"; \
++ if test "$$skip" -eq 1; then \
++ skipped="($$skip test was not run)"; \
++ else \
++ skipped="($$skip tests were not run)"; \
++ fi; \
+ test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
+ dashes="$$skipped"; \
+ fi; \
+@@ -4997,6 +5015,19 @@
+ else :; fi
+
+ distdir: $(DISTFILES)
++ @list='$(MANS)'; if test -n "$$list"; then \
++ list=`for p in $$list; do \
++ if test -f $$p; then d=; else d="$(srcdir)/"; fi; \
++ if test -f "$$d$$p"; then echo "$$d$$p"; else :; fi; done`; \
++ if test -n "$$list" && \
++ grep 'ab help2man is required to generate this page' $$list >/dev/null; then \
++ echo "error: found man pages containing the \`missing help2man' replacement text:" >&2; \
++ grep -l 'ab help2man is required to generate this page' $$list | sed 's/^/ /' >&2; \
++ echo " to fix them, install help2man, remove and regenerate the man pages;" >&2; \
++ echo " typically \`make maintainer-clean' will remove them" >&2; \
++ exit 1; \
++ else :; fi; \
++ else :; fi
+ $(am__remove_distdir)
+ test -d $(distdir) || mkdir $(distdir)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+@@ -5027,7 +5058,8 @@
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$(top_distdir)" distdir="$(distdir)" \
+ dist-hook
+- -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \
++ -find "$(distdir)" -type d ! -perm -755 \
++ -exec chmod u+rwx,go+rx {} \; -o \
+ ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -400 -exec chmod a+r {} \; -o \
+ ! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
+@@ -5187,6 +5219,8 @@
+
+ html: html-am
+
++html-am:
++
+ info: info-am
+
+ info-am:
+@@ -5197,19 +5231,29 @@
+
+ install-dvi: install-dvi-am
+
++install-dvi-am:
++
+ install-exec-am: install-binPROGRAMS install-binSCRIPTS \
+ install-exec-local install-libLTLIBRARIES
+
+ install-html: install-html-am
+
++install-html-am:
++
+ install-info: install-info-am
+
++install-info-am:
++
+ install-man: install-man1
+
+ install-pdf: install-pdf-am
+
++install-pdf-am:
++
+ install-ps: install-ps-am
+
++install-ps-am:
++
+ installcheck-am:
+
+ maintainer-clean: maintainer-clean-am
+diff -urP gperftools-2.0/NEWS /home/spot/gperftools/NEWS
+--- gperftools-2.0/NEWS 2012-02-03 18:40:46.000000000 -0500
++++ /home/spot/gperftools/NEWS 2013-03-01 14:25:39.165366808 -0500
+@@ -12,7 +12,7 @@
+ I have both renamed the project (the Google Code site renamed a few
+ weeks ago), and bumped the major version number up to 2, to reflect
+ the new community ownership of the project. Almost all the
+-[http://sparsehash.googlecode.com/svn/tags/gperftools-2.0/ChangeLog changes]
++[http://gperftools.googlecode.com/svn/tags/gperftools-2.0/ChangeLog changes]
+ are related to the renaming.
+
+ The main functional change from google-perftools 1.10 is that
+diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h /home/spot/gperftools/src/base/atomicops-internals-linuxppc.h
+--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/atomicops-internals-linuxppc.h 2013-03-01 14:25:38.722366824 -0500
+@@ -237,7 +237,7 @@
+ Atomic64 prev;
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%2\n\
+- cmpw 0,%0,%3\n\
++ cmpd 0,%0,%3\n\
+ bne- 2f\n\
+ stdcx. %4,0,%2\n\
+ bne- 1b\n\
+diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h /home/spot/gperftools/src/base/atomicops-internals-windows.h
+--- gperftools-2.0/src/base/atomicops-internals-windows.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/atomicops-internals-windows.h 2013-03-01 14:25:38.731366824 -0500
+@@ -434,16 +434,14 @@
+ #endif
+ }
+
+-inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
+-#if 0 // Not implemented
+- __asm {
+- mov mm0, value; // Use mmx reg for 64-bit atomic moves
+- mov ptr, mm0;
+- emms; // Empty mmx state to enable FP registers
+- }
+-#else
+- NotImplementedFatalError("NoBarrier_Store");
+-#endif
++inline void NoBarrier_Store(volatile Atomic64* ptrValue, Atomic64 value)
++{
++ __asm {
++ movq mm0, value; // Use mmx reg for 64-bit atomic moves
++ mov eax, ptrValue;
++ movq [eax], mm0;
++ emms; // Empty mmx state to enable FP registers
++ }
+ }
+
+ inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
+@@ -455,19 +453,16 @@
+ NoBarrier_Store(ptr, value);
+ }
+
+-inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
+-#if 0 // Not implemented
+- Atomic64 value;
+- __asm {
+- mov mm0, ptr; // Use mmx reg for 64-bit atomic moves
+- mov value, mm0;
+- emms; // Empty mmx state to enable FP registers
++inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptrValue)
++{
++ Atomic64 value;
++ __asm {
++ mov eax, ptrValue;
++ movq mm0, [eax]; // Use mmx reg for 64-bit atomic moves
++ movq value, mm0;
++ emms; // Empty mmx state to enable FP registers
+ }
+ return value;
+-#else
+- NotImplementedFatalError("NoBarrier_Store");
+- return 0;
+-#endif
+ }
+
+ inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
+diff -urP gperftools-2.0/src/base/basictypes.h /home/spot/gperftools/src/base/basictypes.h
+--- gperftools-2.0/src/base/basictypes.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/basictypes.h 2013-03-01 14:25:38.724366824 -0500
+@@ -1,10 +1,10 @@
+ // Copyright (c) 2005, Google Inc.
+ // All rights reserved.
+-//
++//
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted provided that the following conditions are
+ // met:
+-//
++//
+ // * Redistributions of source code must retain the above copyright
+ // notice, this list of conditions and the following disclaimer.
+ // * Redistributions in binary form must reproduce the above
+@@ -14,7 +14,7 @@
+ // * Neither the name of Google Inc. nor the names of its
+ // contributors may be used to endorse or promote products derived from
+ // this software without specific prior written permission.
+-//
++//
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -331,8 +331,14 @@
+
+ #endif // HAVE___ATTRIBUTE__ and __ELF__ or __MACH__
+
+-#if defined(HAVE___ATTRIBUTE__) && (defined(__i386__) || defined(__x86_64__))
+-# define CACHELINE_ALIGNED __attribute__((aligned(64)))
++#if defined(HAVE___ATTRIBUTE__)
++# if (defined(__i386__) || defined(__x86_64__))
++# define CACHELINE_ALIGNED __attribute__((aligned(64)))
++# elif defined(__arm__)
++# define CACHELINE_ALIGNED __attribute__((aligned(32)))
++# elif (defined(__PPC__) || defined(__PPC64__))
++# define CACHELINE_ALIGNED __attribute__((aligned(16)))
++# endif
+ #else
+ # define CACHELINE_ALIGNED
+ #endif // defined(HAVE___ATTRIBUTE__) && (__i386__ || __x86_64__)
+diff -urP gperftools-2.0/src/base/cycleclock.h /home/spot/gperftools/src/base/cycleclock.h
+--- gperftools-2.0/src/base/cycleclock.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/cycleclock.h 2013-03-01 14:25:38.729366824 -0500
+@@ -97,15 +97,24 @@
+ uint64 low, high;
+ __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high));
+ return (high << 32) | low;
++#elif defined(__powerpc64__) || defined(__ppc64__)
++ uint64 tb;
++ __asm__ volatile (\
++ "mfspr %0, 268"
++ : "=r" (tb));
++ return tb;
+ #elif defined(__powerpc__) || defined(__ppc__)
+ // This returns a time-base, which is not always precisely a cycle-count.
+- int64 tbl, tbu0, tbu1;
+- asm("mftbu %0" : "=r" (tbu0));
+- asm("mftb %0" : "=r" (tbl));
+- asm("mftbu %0" : "=r" (tbu1));
+- tbl &= -static_cast(tbu0 == tbu1);
+- // high 32 bits in tbu1; low 32 bits in tbl (tbu0 is garbage)
+- return (tbu1 << 32) | tbl;
++ uint32 tbu, tbl, tmp;
++ __asm__ volatile (\
++ "0:\n"
++ "mftbu %0\n"
++ "mftbl %1\n"
++ "mftbu %2\n"
++ "cmpw %0, %2\n"
++ "bne- 0b"
++ : "=r" (tbu), "=r" (tbl), "=r" (tmp));
++ return (((uint64) tbu << 32) | tbl);
+ #elif defined(__sparc__)
+ int64 tick;
+ asm(".byte 0x83, 0x41, 0x00, 0x00");
+diff -urP gperftools-2.0/src/base/linux_syscall_support.h /home/spot/gperftools/src/base/linux_syscall_support.h
+--- gperftools-2.0/src/base/linux_syscall_support.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/linux_syscall_support.h 2013-03-01 14:25:38.732366824 -0500
+@@ -243,14 +243,13 @@
+ long ru_nivcsw;
+ };
+
+-struct siginfo;
+ #if defined(__i386__) || defined(__arm__) || defined(__PPC__)
+
+ /* include/asm-{arm,i386,mips,ppc}/signal.h */
+ struct kernel_old_sigaction {
+ union {
+ void (*sa_handler_)(int);
+- void (*sa_sigaction_)(int, struct siginfo *, void *);
++ void (*sa_sigaction_)(int, siginfo_t *, void *);
+ };
+ unsigned long sa_mask;
+ unsigned long sa_flags;
+@@ -287,13 +286,13 @@
+ unsigned long sa_flags;
+ union {
+ void (*sa_handler_)(int);
+- void (*sa_sigaction_)(int, struct siginfo *, void *);
++ void (*sa_sigaction_)(int, siginfo_t *, void *);
+ };
+ struct kernel_sigset_t sa_mask;
+ #else
+ union {
+ void (*sa_handler_)(int);
+- void (*sa_sigaction_)(int, struct siginfo *, void *);
++ void (*sa_sigaction_)(int, siginfo_t *, void *);
+ };
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+diff -urP gperftools-2.0/src/base/linuxthreads.cc /home/spot/gperftools/src/base/linuxthreads.cc
+--- gperftools-2.0/src/base/linuxthreads.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/linuxthreads.cc 2013-03-01 14:25:38.729366824 -0500
+@@ -45,6 +45,8 @@
+ #include
+ #include
+ #include
++#include
++#include
+
+ #include "base/linux_syscall_support.h"
+ #include "base/thread_lister.h"
+@@ -240,6 +242,7 @@
+ ListAllProcessThreadsCallBack callback;
+ void *parameter;
+ va_list ap;
++ sem_t *lock;
+ };
+
+
+@@ -254,6 +257,13 @@
+ struct kernel_stat marker_sb, proc_sb;
+ stack_t altstack;
+
++ /* Wait for parent thread to set appropriate permissions
++ * to allow ptrace activity
++ */
++ if (sem_wait(args->lock) < 0) {
++ goto failure;
++ }
++
+ /* Create "marker" that we can use to detect threads sharing the same
+ * address space and the same file handles. By setting the FD_CLOEXEC flag
+ * we minimize the risk of misidentifying child processes as threads;
+@@ -398,7 +408,7 @@
+ /* Check if the marker is identical to the one we created */
+ if (sys_stat(fname, &tmp_sb) >= 0 &&
+ marker_sb.st_ino == tmp_sb.st_ino) {
+- long i, j;
++ long i;
+
+ /* Found one of our threads, make sure it is no duplicate */
+ for (i = 0; i < num_threads; i++) {
+@@ -434,28 +444,28 @@
+ sig_num_threads = num_threads;
+ goto next_entry;
+ }
+- while (sys_waitpid(pid, (int *)0, __WALL) < 0) {
++ /* Attaching to a process doesn't guarantee it'll stop before
++ * ptrace returns; you have to wait on it. Specifying __WCLONE
++ * means it will only wait for clone children (i.e. threads,
++ * not processes).
++ */
++ while (sys_waitpid(pid, (int *)0, __WCLONE) < 0) {
+ if (errno != EINTR) {
+- sys_ptrace_detach(pid);
+- num_threads--;
+- sig_num_threads = num_threads;
+- goto next_entry;
++ /* Assumes ECHILD */
++ if (pid == ppid) {
++ /* The parent is not a clone */
++ found_parent = true;
++ break;
++ } else {
++ sys_ptrace_detach(pid);
++ num_threads--;
++ sig_num_threads = num_threads;
++ goto next_entry;
++ }
+ }
+ }
+-
+- if (sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i++ != j ||
+- sys_ptrace(PTRACE_PEEKDATA, pid, &i, &j) || i != j) {
+- /* Address spaces are distinct, even though both
+- * processes show the "marker". This is probably
+- * a forked child process rather than a thread.
+- */
+- sys_ptrace_detach(pid);
+- num_threads--;
+- sig_num_threads = num_threads;
+- } else {
+- found_parent |= pid == ppid;
+- added_entries++;
+- }
++
++ added_entries++;
+ }
+ }
+ }
+@@ -536,6 +546,7 @@
+ pid_t clone_pid;
+ int dumpable = 1, sig;
+ struct kernel_sigset_t sig_blocked, sig_old;
++ sem_t lock;
+
+ va_start(args.ap, callback);
+
+@@ -565,6 +576,7 @@
+ args.altstack_mem = altstack_mem;
+ args.parameter = parameter;
+ args.callback = callback;
++ args.lock = &lock;
+
+ /* Before cloning the thread lister, block all asynchronous signals, as we */
+ /* are not prepared to handle them. */
+@@ -596,42 +608,63 @@
+ #undef SYS_LINUX_SYSCALL_SUPPORT_H
+ #include "linux_syscall_support.h"
+ #endif
+-
+- int clone_errno;
+- clone_pid = local_clone((int (*)(void *))ListerThread, &args);
+- clone_errno = errno;
+-
+- sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
+-
+- if (clone_pid >= 0) {
+- int status, rc;
+- while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 &&
+- ERRNO == EINTR) {
+- /* Keep waiting */
+- }
+- if (rc < 0) {
+- args.err = ERRNO;
+- args.result = -1;
+- } else if (WIFEXITED(status)) {
+- switch (WEXITSTATUS(status)) {
+- case 0: break; /* Normal process termination */
+- case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */
+- args.result = -1;
+- break;
+- case 3: args.err = EPERM; /* Process is already being traced */
+- args.result = -1;
+- break;
+- default:args.err = ECHILD; /* Child died unexpectedly */
+- args.result = -1;
+- break;
++
++ /* Lock before clone so that parent can set
++ * ptrace permissions (if necessary) prior
++ * to ListerThread actually executing
++ */
++ if (sem_init(&lock, 0, 0) == 0) {
++
++ int clone_errno;
++ clone_pid = local_clone((int (*)(void *))ListerThread, &args);
++ clone_errno = errno;
++
++ sys_sigprocmask(SIG_SETMASK, &sig_old, &sig_old);
++
++ if (clone_pid >= 0) {
++#ifdef PR_SET_PTRACER
++ /* In newer versions of glibc permission must explicitly
++ * be given to allow for ptrace.
++ */
++ prctl(PR_SET_PTRACER, clone_pid, 0, 0, 0);
++#endif
++ /* Releasing the lock here allows the
++ * ListerThread to execute and ptrace us.
++ */
++ sem_post(&lock);
++ int status, rc;
++ while ((rc = sys0_waitpid(clone_pid, &status, __WALL)) < 0 &&
++ ERRNO == EINTR) {
++ /* Keep waiting */
++ }
++ if (rc < 0) {
++ args.err = ERRNO;
++ args.result = -1;
++ } else if (WIFEXITED(status)) {
++ switch (WEXITSTATUS(status)) {
++ case 0: break; /* Normal process termination */
++ case 2: args.err = EFAULT; /* Some fault (e.g. SIGSEGV) detected */
++ args.result = -1;
++ break;
++ case 3: args.err = EPERM; /* Process is already being traced */
++ args.result = -1;
++ break;
++ default:args.err = ECHILD; /* Child died unexpectedly */
++ args.result = -1;
++ break;
++ }
++ } else if (!WIFEXITED(status)) {
++ args.err = EFAULT; /* Terminated due to an unhandled signal*/
++ args.result = -1;
+ }
+- } else if (!WIFEXITED(status)) {
+- args.err = EFAULT; /* Terminated due to an unhandled signal*/
++ sem_destroy(&lock);
++ } else {
+ args.result = -1;
++ args.err = clone_errno;
+ }
+ } else {
+ args.result = -1;
+- args.err = clone_errno;
++ args.err = errno;
+ }
+ }
+
+diff -urP gperftools-2.0/src/base/spinlock_internal.cc /home/spot/gperftools/src/base/spinlock_internal.cc
+--- gperftools-2.0/src/base/spinlock_internal.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/spinlock_internal.cc 2013-03-01 14:25:38.724366824 -0500
+@@ -80,6 +80,7 @@
+ static int SuggestedDelayNS(int loop) {
+ // Weak pseudo-random number generator to get some spread between threads
+ // when many are spinning.
++#ifdef BASE_HAS_ATOMIC64
+ static base::subtle::Atomic64 rand;
+ uint64 r = base::subtle::NoBarrier_Load(&rand);
+ r = 0x5deece66dLL * r + 0xb; // numbers from nrand48()
+@@ -96,6 +97,24 @@
+ // The futex path multiplies this by 16, since we expect explicit wakeups
+ // almost always on that path.
+ return r >> (44 - (loop >> 3));
++#else
++ static Atomic32 rand;
++ uint32 r = base::subtle::NoBarrier_Load(&rand);
++ r = 0x343fd * r + 0x269ec3; // numbers from MSVC++
++ base::subtle::NoBarrier_Store(&rand, r);
++
++ r <<= 1; // 31-bit random number now in top 31-bits.
++ if (loop < 0 || loop > 32) { // limit loop to 0..32
++ loop = 32;
++ }
++ // loop>>3 cannot exceed 4 because loop cannot exceed 32.
++ // Select top 20..24 bits of lower 31 bits,
++ // giving approximately 0ms to 16ms.
++ // Mean is exponential in loop for first 32 iterations, then 8ms.
++ // The futex path multiplies this by 16, since we expect explicit wakeups
++ // almost always on that path.
++ return r >> (12 - (loop >> 3));
++#endif
+ }
+
+ } // namespace internal
+diff -urP gperftools-2.0/src/base/sysinfo.cc /home/spot/gperftools/src/base/sysinfo.cc
+--- gperftools-2.0/src/base/sysinfo.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/base/sysinfo.cc 2013-03-01 14:25:38.732366824 -0500
+@@ -32,6 +32,7 @@
+ # define PLATFORM_WINDOWS 1
+ #endif
+
++#include // for isspace()
+ #include // for getenv()
+ #include // for snprintf(), sscanf()
+ #include // for memmove(), memchr(), etc.
+@@ -350,6 +351,22 @@
+ if (newline != NULL)
+ *newline = '\0';
+
++#if defined(__powerpc__) || defined(__ppc__)
++ // PowerPC cpus report the frequency in "clock" line
++ if (strncasecmp(line, "clock", sizeof("clock")-1) == 0) {
++ const char* freqstr = strchr(line, ':');
++ if (freqstr) {
++ // PowerPC frequencies are only reported as MHz (check 'show_cpuinfo'
++ // function at arch/powerpc/kernel/setup-common.c)
++ char *endp = strstr(line, "MHz");
++ if (endp) {
++ *endp = 0;
++ cpuinfo_cycles_per_second = strtod(freqstr+1, &err) * 1000000.0;
++ if (freqstr[1] != '\0' && *err == '\0' && cpuinfo_cycles_per_second > 0)
++ saw_mhz = true;
++ }
++ }
++#else
+ // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
+ // accept postive values. Some environments (virtual machines) report zero,
+ // which would cause infinite looping in WallTime_Init.
+@@ -367,6 +384,7 @@
+ if (freqstr[1] != '\0' && *err == '\0' && bogo_clock > 0)
+ saw_bogo = true;
+ }
++#endif
+ } else if (strncasecmp(line, "processor", sizeof("processor")-1) == 0) {
+ num_cpus++; // count up every time we see an "processor :" entry
+ }
+@@ -558,6 +576,145 @@
+ }
+ #endif
+
++// Finds |c| in |text|, and assign '\0' at the found position.
++// The original character at the modified position should be |c|.
++// A pointer to the modified position is stored in |endptr|.
++// |endptr| should not be NULL.
++static bool ExtractUntilChar(char *text, int c, char **endptr) {
++ CHECK_NE(text, NULL);
++ CHECK_NE(endptr, NULL);
++ char *found;
++ found = strchr(text, c);
++ if (found == NULL) {
++ *endptr = NULL;
++ return false;
++ }
++
++ *endptr = found;
++ *found = '\0';
++ return true;
++}
++
++// Increments |*text_pointer| while it points a whitespace character.
++// It is to follow sscanf's whilespace handling.
++static void SkipWhileWhitespace(char **text_pointer, int c) {
++ if (isspace(c)) {
++ while (isspace(**text_pointer) && isspace(*((*text_pointer) + 1))) {
++ ++(*text_pointer);
++ }
++ }
++}
++
++template
++static T StringToInteger(char *text, char **endptr, int base) {
++ assert(false);
++ return T();
++}
++
++template<>
++int StringToInteger(char *text, char **endptr, int base) {
++ return strtol(text, endptr, base);
++}
++
++template<>
++int64 StringToInteger(char *text, char **endptr, int base) {
++ return strtoll(text, endptr, base);
++}
++
++template<>
++uint64 StringToInteger(char *text, char **endptr, int base) {
++ return strtoull(text, endptr, base);
++}
++
++template
++static T StringToIntegerUntilChar(
++ char *text, int base, int c, char **endptr_result) {
++ CHECK_NE(endptr_result, NULL);
++ *endptr_result = NULL;
++
++ char *endptr_extract;
++ if (!ExtractUntilChar(text, c, &endptr_extract))
++ return 0;
++
++ T result;
++ char *endptr_strto;
++ result = StringToInteger(text, &endptr_strto, base);
++ *endptr_extract = c;
++
++ if (endptr_extract != endptr_strto)
++ return 0;
++
++ *endptr_result = endptr_extract;
++ SkipWhileWhitespace(endptr_result, c);
++
++ return result;
++}
++
++static char *CopyStringUntilChar(
++ char *text, unsigned out_len, int c, char *out) {
++ char *endptr;
++ if (!ExtractUntilChar(text, c, &endptr))
++ return NULL;
++
++ strncpy(out, text, out_len);
++ out[out_len-1] = '\0';
++ *endptr = c;
++
++ SkipWhileWhitespace(&endptr, c);
++ return endptr;
++}
++
++template
++static bool StringToIntegerUntilCharWithCheck(
++ T *outptr, char *text, int base, int c, char **endptr) {
++ *outptr = StringToIntegerUntilChar(*endptr, base, c, endptr);
++ if (*endptr == NULL || **endptr == '\0') return false;
++ ++(*endptr);
++ return true;
++}
++
++static bool ParseProcMapsLine(char *text, uint64 *start, uint64 *end,
++ char *flags, uint64 *offset,
++ int *major, int *minor, int64 *inode,
++ unsigned *filename_offset) {
++#if defined(__linux__)
++ /*
++ * It's similar to:
++ * sscanf(text, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
++ * start, end, flags, offset, major, minor, inode, filename_offset)
++ */
++ char *endptr = text;
++ if (endptr == NULL || *endptr == '\0') return false;
++
++ if (!StringToIntegerUntilCharWithCheck(start, endptr, 16, '-', &endptr))
++ return false;
++
++ if (!StringToIntegerUntilCharWithCheck(end, endptr, 16, ' ', &endptr))
++ return false;
++
++ endptr = CopyStringUntilChar(endptr, 5, ' ', flags);
++ if (endptr == NULL || *endptr == '\0') return false;
++ ++endptr;
++
++ if (!StringToIntegerUntilCharWithCheck(offset, endptr, 16, ' ', &endptr))
++ return false;
++
++ if (!StringToIntegerUntilCharWithCheck(major, endptr, 16, ':', &endptr))
++ return false;
++
++ if (!StringToIntegerUntilCharWithCheck(minor, endptr, 16, ' ', &endptr))
++ return false;
++
++ if (!StringToIntegerUntilCharWithCheck(inode, endptr, 10, ' ', &endptr))
++ return false;
++
++ *filename_offset = (endptr - text);
++ return true;
++#else
++ return false;
++#endif
++}
++
+ ProcMapsIterator::ProcMapsIterator(pid_t pid) {
+ Init(pid, NULL, false);
+ }
+@@ -712,13 +869,14 @@
+ unsigned filename_offset = 0;
+ #if defined(__linux__)
+ // for now, assume all linuxes have the same format
+- if (sscanf(stext_, "%"SCNx64"-%"SCNx64" %4s %"SCNx64" %x:%x %"SCNd64" %n",
+- start ? start : &tmpstart,
+- end ? end : &tmpend,
+- flags_,
+- offset ? offset : &tmpoffset,
+- &major, &minor,
+- inode ? inode : &tmpinode, &filename_offset) != 7) continue;
++ if (!ParseProcMapsLine(
++ stext_,
++ start ? start : &tmpstart,
++ end ? end : &tmpend,
++ flags_,
++ offset ? offset : &tmpoffset,
++ &major, &minor,
++ inode ? inode : &tmpinode, &filename_offset)) continue;
+ #elif defined(__CYGWIN__) || defined(__CYGWIN32__)
+ // cygwin is like linux, except the third field is the "entry point"
+ // rather than the offset (see format_process_maps at
+@@ -749,7 +907,7 @@
+ // start end resident privateresident obj(?) prot refcnt shadowcnt
+ // flags copy_on_write needs_copy type filename:
+ // 0x8048000 0x804a000 2 0 0xc104ce70 r-x 1 0 0x0 COW NC vnode /bin/cat
+- if (sscanf(stext_, "0x%"SCNx64" 0x%"SCNx64" %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
++ if (sscanf(stext_, "0x%" SCNx64 " 0x%" SCNx64 " %*d %*d %*p %3s %*d %*d 0x%*x %*s %*s %*s %n",
+ start ? start : &tmpstart,
+ end ? end : &tmpend,
+ flags_,
+@@ -786,7 +944,7 @@
+ uint64 tmp_anon_mapping;
+ uint64 tmp_anon_pages;
+
+- sscanf(backing_ptr+1, "F %"SCNx64" %"SCNd64") (A %"SCNx64" %"SCNd64")",
++ sscanf(backing_ptr+1, "F %" SCNx64 " %" SCNd64 ") (A %" SCNx64 " %" SCNd64 ")",
+ file_mapping ? file_mapping : &tmp_file_mapping,
+ file_pages ? file_pages : &tmp_file_pages,
+ anon_mapping ? anon_mapping : &tmp_anon_mapping,
+@@ -926,7 +1084,7 @@
+ ? '-' : 'p';
+
+ const int rc = snprintf(buffer, bufsize,
+- "%08"PRIx64"-%08"PRIx64" %c%c%c%c %08"PRIx64" %02x:%02x %-11"PRId64" %s\n",
++ "%08" PRIx64 "-%08" PRIx64 " %c%c%c%c %08" PRIx64 " %02x:%02x %-11" PRId64 " %s\n",
+ start, end, r,w,x,p, offset,
+ static_cast(dev/256), static_cast(dev%256),
+ inode, filename);
+diff -urP gperftools-2.0/src/common.cc /home/spot/gperftools/src/common.cc
+--- gperftools-2.0/src/common.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/common.cc 2013-03-01 14:25:38.074366847 -0500
+@@ -60,16 +60,16 @@
+ } else if (size >= 128) {
+ // Space wasted due to alignment is at most 1/8, i.e., 12.5%.
+ alignment = (1 << LgFloor(size)) / 8;
+- } else if (size >= 16) {
++ } else if (size >= kMinAlign) {
+ // We need an alignment of at least 16 bytes to satisfy
+ // requirements for some SSE types.
+- alignment = 16;
++ alignment = kMinAlign;
+ }
+ // Maximum alignment allowed is page size alignment.
+ if (alignment > kPageSize) {
+ alignment = kPageSize;
+ }
+- CHECK_CONDITION(size < 16 || alignment >= 16);
++ CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign);
+ CHECK_CONDITION((alignment & (alignment - 1)) == 0);
+ return alignment;
+ }
+@@ -110,7 +110,7 @@
+ // Compute the size classes we want to use
+ int sc = 1; // Next size class to assign
+ int alignment = kAlignment;
+- CHECK_CONDITION(kAlignment <= 16);
++ CHECK_CONDITION(kAlignment <= kMinAlign);
+ for (size_t size = kAlignment; size <= kMaxSize; size += alignment) {
+ alignment = AlignmentForSize(size);
+ CHECK_CONDITION((size % alignment) == 0);
+diff -urP gperftools-2.0/src/common.h /home/spot/gperftools/src/common.h
+--- gperftools-2.0/src/common.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/common.h 2013-03-01 14:25:38.737366824 -0500
+@@ -1,10 +1,10 @@
+ // Copyright (c) 2008, Google Inc.
+ // All rights reserved.
+-//
++//
+ // Redistribution and use in source and binary forms, with or without
+ // modification, are permitted provided that the following conditions are
+ // met:
+-//
++//
+ // * Redistributions of source code must retain the above copyright
+ // notice, this list of conditions and the following disclaimer.
+ // * Redistributions in binary form must reproduce the above
+@@ -14,7 +14,7 @@
+ // * Neither the name of Google Inc. nor the names of its
+ // contributors may be used to endorse or promote products derived from
+ // this software without specific prior written permission.
+-//
++//
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+@@ -41,6 +41,15 @@
+ #include // for uintptr_t, uint64_t
+ #endif
+ #include "internal_logging.h" // for ASSERT, etc
++#include "base/basictypes.h" // for LIKELY, etc
++
++#ifdef HAVE_BUILTIN_EXPECT
++#define LIKELY(x) __builtin_expect(!!(x), 1)
++#define UNLIKELY(x) __builtin_expect(!!(x), 0)
++#else
++#define LIKELY(x) (x)
++#define UNLIKELY(x) (x)
++#endif
+
+ // Type that can hold a page number
+ typedef uintptr_t PageID;
+@@ -64,9 +73,23 @@
+ #if defined(TCMALLOC_LARGE_PAGES)
+ static const size_t kPageShift = 15;
+ static const size_t kNumClasses = 78;
++static const size_t kMinAlign = 16;
++#elif defined(TCMALLOC_LARGE_PAGES64K)
++static const size_t kPageShift = 16;
++static const size_t kNumClasses = 82;
++static const size_t kMinAlign = 16;
++#elif defined(TCMALLOC_ALIGN_8BYTES)
++static const size_t kPageShift = 13;
++static const size_t kNumClasses = 93;
++// Unless we force to use 8 bytes alignment we use an alignment of
++// at least 16 bytes to statisfy requirements for some SSE types.
++// Keep in mind when using the 16 bytes alignment you can have a space
++// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
++static const size_t kMinAlign = 8;
+ #else
+ static const size_t kPageShift = 13;
+ static const size_t kNumClasses = 86;
++static const size_t kMinAlign = 16;
+ #endif
+ static const size_t kMaxThreadCacheSize = 4 << 20;
+
+@@ -169,13 +192,15 @@
+ unsigned char class_array_[kClassArraySize];
+
+ // Compute index of the class_array[] entry for a given size
+- static inline int ClassIndex(int s) {
++ static inline size_t ClassIndex(int s) {
++ // Use unsigned arithmetic to avoid unnecessary sign extensions.
+ ASSERT(0 <= s);
+ ASSERT(s <= kMaxSize);
+- const bool big = (s > kMaxSmallSize);
+- const int add_amount = big ? (127 + (120<<7)) : 7;
+- const int shift_amount = big ? 7 : 3;
+- return (s + add_amount) >> shift_amount;
++ if (LIKELY(s <= kMaxSmallSize)) {
++ return (static_cast(s) + 7) >> 3;
++ } else {
++ return (static_cast(s) + 127 + (120 << 7)) >> 7;
++ }
+ }
+
+ int NumMoveSize(size_t size);
+diff -urP gperftools-2.0/src/config.h.in /home/spot/gperftools/src/config.h.in
+--- gperftools-2.0/src/config.h.in 2012-02-03 14:47:15.000000000 -0500
++++ /home/spot/gperftools/src/config.h.in 2013-03-01 14:25:38.205366843 -0500
+@@ -5,6 +5,9 @@
+ #define GPERFTOOLS_CONFIG_H_
+
+
++/* Define to 1 if compiler supports __builtin_expect */
++#undef HAVE_BUILTIN_EXPECT
++
+ /* Define to 1 if compiler supports __builtin_stack_pointer */
+ #undef HAVE_BUILTIN_STACK_POINTER
+
+diff -urP gperftools-2.0/src/debugallocation.cc /home/spot/gperftools/src/debugallocation.cc
+--- gperftools-2.0/src/debugallocation.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/debugallocation.cc 2013-03-01 14:25:37.966366851 -0500
+@@ -488,7 +488,7 @@
+ // the address space could take more.
+ static size_t max_size_t = ~0;
+ if (size > max_size_t - sizeof(MallocBlock)) {
+- RAW_LOG(ERROR, "Massive size passed to malloc: %"PRIuS"", size);
++ RAW_LOG(ERROR, "Massive size passed to malloc: %" PRIuS "", size);
+ return NULL;
+ }
+ MallocBlock* b = NULL;
+@@ -958,7 +958,7 @@
+ do { \
+ if (FLAGS_malloctrace) { \
+ SpinLockHolder l(&malloc_trace_lock); \
+- TracePrintf(TraceFd(), "%s\t%"PRIuS"\t%p\t%"GPRIuPTHREAD, \
++ TracePrintf(TraceFd(), "%s\t%" PRIuS "\t%p\t%" GPRIuPTHREAD, \
+ name, size, addr, PRINTABLE_PTHREAD(pthread_self())); \
+ TraceStack(); \
+ TracePrintf(TraceFd(), "\n"); \
+@@ -1215,7 +1215,7 @@
+ void* ptr = debug_cpp_alloc(size, MallocBlock::kNewType, false);
+ MallocHook::InvokeNewHook(ptr, size);
+ if (ptr == NULL) {
+- RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new failed.", size);
++ RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new failed.", size);
+ }
+ return ptr;
+ }
+@@ -1242,7 +1242,7 @@
+ void* ptr = debug_cpp_alloc(size, MallocBlock::kArrayNewType, false);
+ MallocHook::InvokeNewHook(ptr, size);
+ if (ptr == NULL) {
+- RAW_LOG(FATAL, "Unable to allocate %"PRIuS" bytes: new[] failed.", size);
++ RAW_LOG(FATAL, "Unable to allocate %" PRIuS " bytes: new[] failed.", size);
+ }
+ return ptr;
+ }
+diff -urP gperftools-2.0/src/getpc.h /home/spot/gperftools/src/getpc.h
+--- gperftools-2.0/src/getpc.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/getpc.h 2013-03-01 14:25:37.973366851 -0500
+@@ -94,7 +94,7 @@
+ int pc_offset;
+ // The actual instruction bytes. Feel free to make it larger if you
+ // need a longer sequence.
+- char ins[16];
++ unsigned char ins[16];
+ // How many bytes to match from ins array?
+ int ins_size;
+ // The offset from the stack pointer (e)sp where to look for the
+diff -urP gperftools-2.0/src/gperftools/malloc_extension.h /home/spot/gperftools/src/gperftools/malloc_extension.h
+--- gperftools-2.0/src/gperftools/malloc_extension.h 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/gperftools/malloc_extension.h 2013-03-01 14:25:38.126366846 -0500
+@@ -169,6 +169,26 @@
+ // Number of bytes used across all thread caches.
+ // This property is not writable.
+ //
++ // "tcmalloc.central_cache_free_bytes"
++ // Number of free bytes in the central cache that have been
++ // assigned to size classes. They always count towards virtual
++ // memory usage, and unless the underlying memory is swapped out
++ // by the OS, they also count towards physical memory usage.
++ // This property is not writable.
++ //
++ // "tcmalloc.transfer_cache_free_bytes"
++ // Number of free bytes that are waiting to be transfered between
++ // the central cache and a thread cache. They always count
++ // towards virtual memory usage, and unless the underlying memory
++ // is swapped out by the OS, they also count towards physical
++ // memory usage. This property is not writable.
++ //
++ // "tcmalloc.thread_cache_free_bytes"
++ // Number of free bytes in thread caches. They always count
++ // towards virtual memory usage, and unless the underlying memory
++ // is swapped out by the OS, they also count towards physical
++ // memory usage. This property is not writable.
++ //
+ // "tcmalloc.pageheap_free_bytes"
+ // Number of bytes in free, mapped pages in page heap. These
+ // bytes can be used to fulfill allocation requests. They
+@@ -380,7 +400,7 @@
+ INUSE, // Application is using this range
+ FREE, // Range is currently free
+ UNMAPPED, // Backing physical memory has been returned to the OS
+- UNKNOWN,
++ UNKNOWN
+ // More enum values may be added in the future
+ };
+
+diff -urP gperftools-2.0/src/gperftools/tcmalloc.h.in /home/spot/gperftools/src/gperftools/tcmalloc.h.in
+--- gperftools-2.0/src/gperftools/tcmalloc.h.in 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/gperftools/tcmalloc.h.in 2013-03-01 14:25:38.126366846 -0500
+@@ -53,7 +53,17 @@
+ #define TC_VERSION_PATCH "@TC_VERSION_PATCH@"
+ #define TC_VERSION_STRING "gperftools @TC_VERSION_MAJOR@.@TC_VERSION_MINOR@@TC_VERSION_PATCH@"
+
+-#include // for struct mallinfo, if it's defined
++// For struct mallinfo, if it's defined.
++#ifdef HAVE_STRUCT_MALLINFO
++// Malloc can be in several places on older versions of OS X.
++# if defined(HAVE_MALLOC_H)
++# include
++# elif defined(HAVE_SYS_MALLOC_H)
++# include
++# elif defined(HAVE_MALLOC_MALLOC_H)
++# include
++# endif
++#endif
+
+ // Annoying stuff for windows -- makes sure clients can import these functions
+ #ifndef PERFTOOLS_DLL_DECL
+diff -urP gperftools-2.0/src/heap-checker.cc /home/spot/gperftools/src/heap-checker.cc
+--- gperftools-2.0/src/heap-checker.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/heap-checker.cc 2013-03-01 14:25:38.722366824 -0500
+@@ -567,7 +567,7 @@
+ if (ptr != NULL) {
+ const int counter = get_thread_disable_counter();
+ const bool ignore = (counter > 0);
+- RAW_VLOG(16, "Recording Alloc: %p of %"PRIuS "; %d", ptr, size,
++ RAW_VLOG(16, "Recording Alloc: %p of %" PRIuS "; %d", ptr, size,
+ int(counter));
+
+ // Fetch the caller's stack trace before acquiring heap_checker_lock.
+@@ -587,7 +587,7 @@
+ }
+ }
+ }
+- RAW_VLOG(17, "Alloc Recorded: %p of %"PRIuS"", ptr, size);
++ RAW_VLOG(17, "Alloc Recorded: %p of %" PRIuS "", ptr, size);
+ }
+ }
+
+@@ -645,12 +645,12 @@
+ if (MemoryRegionMap::FindAndMarkStackRegion(top, ®ion)) {
+ // Make the proper portion of the stack live:
+ if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
+- RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
++ RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
+ top_ptr, region.end_addr - top);
+ live_objects->push_back(AllocObject(top_ptr, region.end_addr - top,
+ THREAD_DATA));
+ } else { // GROWS_TOWARDS_HIGH_ADDRESSES
+- RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
++ RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
+ AsPtr(region.start_addr),
+ top - region.start_addr);
+ live_objects->push_back(AllocObject(AsPtr(region.start_addr),
+@@ -692,12 +692,12 @@
+ }
+ // Make the proper portion of the stack live:
+ if (stack_direction == GROWS_TOWARDS_LOW_ADDRESSES) {
+- RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
++ RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
+ top_ptr, stack_end - top);
+ live_objects->push_back(
+ AllocObject(top_ptr, stack_end - top, THREAD_DATA));
+ } else { // GROWS_TOWARDS_HIGH_ADDRESSES
+- RAW_VLOG(11, "Live stack at %p of %"PRIuPTR" bytes",
++ RAW_VLOG(11, "Live stack at %p of %" PRIuPTR " bytes",
+ AsPtr(stack_start), top - stack_start);
+ live_objects->push_back(
+ AllocObject(AsPtr(stack_start), top - stack_start, THREAD_DATA));
+@@ -770,14 +770,14 @@
+ // and the rest of the region where the stack lives can well
+ // contain outdated stack variables which are not live anymore,
+ // hence should not be treated as such.
+- RAW_VLOG(11, "Not %s-disabling %"PRIuS" bytes at %p"
++ RAW_VLOG(11, "Not %s-disabling %" PRIuS " bytes at %p"
+ ": have stack inside: %p",
+ (stack_disable ? "stack" : "range"),
+ info.object_size, ptr, AsPtr(*iter));
+ return;
+ }
+ }
+- RAW_VLOG(11, "%s-disabling %"PRIuS" bytes at %p",
++ RAW_VLOG(11, "%s-disabling %" PRIuS " bytes at %p",
+ (stack_disable ? "Stack" : "Range"), info.object_size, ptr);
+ live_objects->push_back(AllocObject(ptr, info.object_size,
+ MUST_BE_ON_HEAP));
+@@ -1061,7 +1061,7 @@
+ if (thread_registers.size()) {
+ // Make thread registers be live heap data sources.
+ // we rely here on the fact that vector is in one memory chunk:
+- RAW_VLOG(11, "Live registers at %p of %"PRIuS" bytes",
++ RAW_VLOG(11, "Live registers at %p of %" PRIuS " bytes",
+ &thread_registers[0], thread_registers.size() * sizeof(void*));
+ live_objects->push_back(AllocObject(&thread_registers[0],
+ thread_registers.size() * sizeof(void*),
+@@ -1098,7 +1098,7 @@
+ for (IgnoredObjectsMap::const_iterator object = ignored_objects->begin();
+ object != ignored_objects->end(); ++object) {
+ const void* ptr = AsPtr(object->first);
+- RAW_VLOG(11, "Ignored live object at %p of %"PRIuS" bytes",
++ RAW_VLOG(11, "Ignored live object at %p of %" PRIuS " bytes",
+ ptr, object->second);
+ live_objects->
+ push_back(AllocObject(ptr, object->second, MUST_BE_ON_HEAP));
+@@ -1107,7 +1107,7 @@
+ size_t object_size;
+ if (!(heap_profile->FindAlloc(ptr, &object_size) &&
+ object->second == object_size)) {
+- RAW_LOG(FATAL, "Object at %p of %"PRIuS" bytes from an"
++ RAW_LOG(FATAL, "Object at %p of %" PRIuS " bytes from an"
+ " IgnoreObject() has disappeared", ptr, object->second);
+ }
+ }
+@@ -1214,7 +1214,7 @@
+ if (VLOG_IS_ON(11)) {
+ for (LiveObjectsStack::const_iterator i = l->second.begin();
+ i != l->second.end(); ++i) {
+- RAW_VLOG(11, "Library live region at %p of %"PRIuPTR" bytes",
++ RAW_VLOG(11, "Library live region at %p of %" PRIuPTR " bytes",
+ i->ptr, i->size);
+ }
+ }
+@@ -1335,7 +1335,7 @@
+ IgnoreNonThreadLiveObjectsLocked();
+ }
+ if (live_objects_total) {
+- RAW_VLOG(10, "Ignoring %"PRId64" reachable objects of %"PRId64" bytes",
++ RAW_VLOG(10, "Ignoring %" PRId64 " reachable objects of %" PRId64 " bytes",
+ live_objects_total, live_bytes_total);
+ }
+ // Free these: we made them here and heap_profile never saw them
+@@ -1394,7 +1394,7 @@
+ live_object_count += 1;
+ live_byte_count += size;
+ }
+- RAW_VLOG(13, "Looking for heap pointers in %p of %"PRIuS" bytes",
++ RAW_VLOG(13, "Looking for heap pointers in %p of %" PRIuS " bytes",
+ object, size);
+ const char* const whole_object = object;
+ size_t const whole_size = size;
+@@ -1465,8 +1465,8 @@
+ // a heap object which is in fact leaked.
+ // I.e. in very rare and probably not repeatable/lasting cases
+ // we might miss some real heap memory leaks.
+- RAW_VLOG(14, "Found pointer to %p of %"PRIuS" bytes at %p "
+- "inside %p of size %"PRIuS"",
++ RAW_VLOG(14, "Found pointer to %p of %" PRIuS " bytes at %p "
++ "inside %p of size %" PRIuS "",
+ ptr, object_size, object, whole_object, whole_size);
+ if (VLOG_IS_ON(15)) {
+ // log call stacks to help debug how come something is not a leak
+@@ -1491,7 +1491,7 @@
+ live_objects_total += live_object_count;
+ live_bytes_total += live_byte_count;
+ if (live_object_count) {
+- RAW_VLOG(10, "Removed %"PRId64" live heap objects of %"PRId64" bytes: %s%s",
++ RAW_VLOG(10, "Removed %" PRId64 " live heap objects of %" PRId64 " bytes: %s%s",
+ live_object_count, live_byte_count, name, name2);
+ }
+ }
+@@ -1513,7 +1513,7 @@
+ if (!HaveOnHeapLocked(&ptr, &object_size)) {
+ RAW_LOG(ERROR, "No live heap object at %p to ignore", ptr);
+ } else {
+- RAW_VLOG(10, "Going to ignore live object at %p of %"PRIuS" bytes",
++ RAW_VLOG(10, "Going to ignore live object at %p of %" PRIuS " bytes",
+ ptr, object_size);
+ if (ignored_objects == NULL) {
+ ignored_objects = new(Allocator::Allocate(sizeof(IgnoredObjectsMap)))
+@@ -1540,7 +1540,7 @@
+ ignored_objects->erase(object);
+ found = true;
+ RAW_VLOG(10, "Now not going to ignore live object "
+- "at %p of %"PRIuS" bytes", ptr, object_size);
++ "at %p of %" PRIuS " bytes", ptr, object_size);
+ }
+ }
+ if (!found) RAW_LOG(FATAL, "Object at %p has not been ignored", ptr);
+@@ -1588,8 +1588,8 @@
+ const HeapProfileTable::Stats& t = heap_profile->total();
+ const size_t start_inuse_bytes = t.alloc_size - t.free_size;
+ const size_t start_inuse_allocs = t.allocs - t.frees;
+- RAW_VLOG(10, "Start check \"%s\" profile: %"PRIuS" bytes "
+- "in %"PRIuS" objects",
++ RAW_VLOG(10, "Start check \"%s\" profile: %" PRIuS " bytes "
++ "in %" PRIuS " objects",
+ name_, start_inuse_bytes, start_inuse_allocs);
+ } else {
+ RAW_LOG(WARNING, "Heap checker is not active, "
+@@ -1813,7 +1813,7 @@
+ RAW_VLOG(heap_checker_info_level,
+ "No leaks found for check \"%s\" "
+ "(but no 100%% guarantee that there aren't any): "
+- "found %"PRId64" reachable heap objects of %"PRId64" bytes",
++ "found %" PRId64 " reachable heap objects of %" PRId64 " bytes",
+ name_,
+ int64(stats.allocs - stats.frees),
+ int64(stats.alloc_size - stats.free_size));
+@@ -2021,9 +2021,9 @@
+ // at the right time, on FreeBSD we always check after, even in the
+ // less strict modes. This just means FreeBSD is always a bit
+ // stricter in its checking than other OSes.
+-#ifdef __FreeBSD__
++ // This now appears to be the case in other OSes as well;
++ // so always check afterwards.
+ FLAGS_heap_check_after_destructors = true;
+-#endif
+
+ { SpinLockHolder l(&heap_checker_lock);
+ RAW_DCHECK(heap_checker_pid == getpid(), "");
+@@ -2353,7 +2353,7 @@
+ const uintptr_t addr = AsInt(*ptr);
+ if (heap_profile->FindInsideAlloc(
+ *ptr, max_heap_object_size, ptr, object_size)) {
+- RAW_VLOG(16, "Got pointer into %p at +%"PRIuPTR" offset",
++ RAW_VLOG(16, "Got pointer into %p at +%" PRIuPTR " offset",
+ *ptr, addr - AsInt(*ptr));
+ return true;
+ }
+diff -urP gperftools-2.0/src/heap-profiler.cc /home/spot/gperftools/src/heap-profiler.cc
+--- gperftools-2.0/src/heap-profiler.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/heap-profiler.cc 2013-03-01 14:25:38.470366833 -0500
+@@ -107,6 +107,10 @@
+ "If non-zero, dump heap profiling information whenever "
+ "the high-water memory usage mark increases by the specified "
+ "number of bytes.");
++DEFINE_int64(heap_profile_time_interval,
++ EnvToInt64("HEAP_PROFILE_TIME_INTERVAL", 0),
++ "If non-zero, dump heap profiling information once every "
++ "specified number of seconds since the last dump.");
+ DEFINE_bool(mmap_log,
+ EnvToBool("HEAP_PROFILE_MMAP_LOG", false),
+ "Should mmap/munmap calls be logged?");
+@@ -168,6 +172,7 @@
+ static int64 last_dump_alloc = 0; // alloc_size when did we last dump
+ static int64 last_dump_free = 0; // free_size when did we last dump
+ static int64 high_water_mark = 0; // In-use-bytes at last high-water dump
++static int64 last_dump_time = 0; // The time of the last dump
+
+ static HeapProfileTable* heap_profile = NULL; // the heap profile table
+
+@@ -264,26 +269,34 @@
+ const int64 inuse_bytes = total.alloc_size - total.free_size;
+ bool need_to_dump = false;
+ char buf[128];
++ int64 current_time = time(NULL);
+ if (FLAGS_heap_profile_allocation_interval > 0 &&
+ total.alloc_size >=
+ last_dump_alloc + FLAGS_heap_profile_allocation_interval) {
+- snprintf(buf, sizeof(buf), ("%"PRId64" MB allocated cumulatively, "
+- "%"PRId64" MB currently in use"),
++ snprintf(buf, sizeof(buf), ("%" PRId64 " MB allocated cumulatively, "
++ "%" PRId64 " MB currently in use"),
+ total.alloc_size >> 20, inuse_bytes >> 20);
+ need_to_dump = true;
+ } else if (FLAGS_heap_profile_deallocation_interval > 0 &&
+ total.free_size >=
+ last_dump_free + FLAGS_heap_profile_deallocation_interval) {
+- snprintf(buf, sizeof(buf), ("%"PRId64" MB freed cumulatively, "
+- "%"PRId64" MB currently in use"),
++ snprintf(buf, sizeof(buf), ("%" PRId64 " MB freed cumulatively, "
++ "%" PRId64 " MB currently in use"),
+ total.free_size >> 20, inuse_bytes >> 20);
+ need_to_dump = true;
+ } else if (FLAGS_heap_profile_inuse_interval > 0 &&
+ inuse_bytes >
+ high_water_mark + FLAGS_heap_profile_inuse_interval) {
+- snprintf(buf, sizeof(buf), "%"PRId64" MB currently in use",
++ snprintf(buf, sizeof(buf), "%" PRId64 " MB currently in use",
+ inuse_bytes >> 20);
+ need_to_dump = true;
++ } else if (FLAGS_heap_profile_time_interval > 0 &&
++ current_time - last_dump_time >=
++ FLAGS_heap_profile_time_interval) {
++ snprintf(buf, sizeof(buf), "%d sec since the last dump",
++ current_time - last_dump_time);
++ need_to_dump = true;
++ last_dump_time = current_time;
+ }
+ if (need_to_dump) {
+ DumpProfileLocked(buf);
+@@ -346,8 +359,8 @@
+ // in pretty-printing of NULL as "nil".
+ // TODO(maxim): instead should use a safe snprintf reimplementation
+ RAW_LOG(INFO,
+- "mmap(start=0x%"PRIxPTR", len=%"PRIuS", prot=0x%x, flags=0x%x, "
+- "fd=%d, offset=0x%x) = 0x%"PRIxPTR"",
++ "mmap(start=0x%" PRIxPTR ", len=%" PRIuS ", prot=0x%x, flags=0x%x, "
++ "fd=%d, offset=0x%x) = 0x%" PRIxPTR "",
+ (uintptr_t) start, size, prot, flags, fd, (unsigned int) offset,
+ (uintptr_t) result);
+ #ifdef TODO_REENABLE_STACK_TRACING
+@@ -364,9 +377,9 @@
+ // in pretty-printing of NULL as "nil".
+ // TODO(maxim): instead should use a safe snprintf reimplementation
+ RAW_LOG(INFO,
+- "mremap(old_addr=0x%"PRIxPTR", old_size=%"PRIuS", "
+- "new_size=%"PRIuS", flags=0x%x, new_addr=0x%"PRIxPTR") = "
+- "0x%"PRIxPTR"",
++ "mremap(old_addr=0x%" PRIxPTR ", old_size=%" PRIuS ", "
++ "new_size=%" PRIuS ", flags=0x%x, new_addr=0x%" PRIxPTR ") = "
++ "0x%" PRIxPTR "",
+ (uintptr_t) old_addr, old_size, new_size, flags,
+ (uintptr_t) new_addr, (uintptr_t) result);
+ #ifdef TODO_REENABLE_STACK_TRACING
+@@ -380,7 +393,7 @@
+ // We use PRIxS not just '%p' to avoid deadlocks
+ // in pretty-printing of NULL as "nil".
+ // TODO(maxim): instead should use a safe snprintf reimplementation
+- RAW_LOG(INFO, "munmap(start=0x%"PRIxPTR", len=%"PRIuS")",
++ RAW_LOG(INFO, "munmap(start=0x%" PRIxPTR ", len=%" PRIuS ")",
+ (uintptr_t) ptr, size);
+ #ifdef TODO_REENABLE_STACK_TRACING
+ DumpStackTrace(1, RawInfoStackDumper, NULL);
+@@ -390,7 +403,7 @@
+
+ static void SbrkHook(const void* result, ptrdiff_t increment) {
+ if (FLAGS_mmap_log) { // log it
+- RAW_LOG(INFO, "sbrk(inc=%"PRIdS") = 0x%"PRIxPTR"",
++ RAW_LOG(INFO, "sbrk(inc=%" PRIdS ") = 0x%" PRIxPTR "",
+ increment, (uintptr_t) result);
+ #ifdef TODO_REENABLE_STACK_TRACING
+ DumpStackTrace(1, RawInfoStackDumper, NULL);
+@@ -447,6 +460,7 @@
+ last_dump_alloc = 0;
+ last_dump_free = 0;
+ high_water_mark = 0;
++ last_dump_time = 0;
+
+ // We do not reset dump_count so if the user does a sequence of
+ // HeapProfilerStart/HeapProfileStop, we will get a continuous
+diff -urP gperftools-2.0/src/heap-profile-table.cc /home/spot/gperftools/src/heap-profile-table.cc
+--- gperftools-2.0/src/heap-profile-table.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/heap-profile-table.cc 2013-03-01 14:25:38.125366846 -0500
+@@ -303,7 +303,7 @@
+ profile_stats->free_size += b.free_size;
+ }
+ int printed =
+- snprintf(buf + buflen, bufsize - buflen, "%6d: %8"PRId64" [%6d: %8"PRId64"] @%s",
++ snprintf(buf + buflen, bufsize - buflen, "%6d: %8" PRId64 " [%6d: %8" PRId64 "] @%s",
+ b.allocs - b.frees,
+ b.alloc_size - b.free_size,
+ b.allocs,
+@@ -616,8 +616,8 @@
+ // This is only used by the heap leak checker, but is intimately
+ // tied to the allocation map that belongs in this module and is
+ // therefore placed here.
+- RAW_LOG(ERROR, "Leak check %s detected leaks of %"PRIuS" bytes "
+- "in %"PRIuS" objects",
++ RAW_LOG(ERROR, "Leak check %s detected leaks of %" PRIuS " bytes "
++ "in %" PRIuS " objects",
+ checker_name,
+ size_t(total_.alloc_size),
+ size_t(total_.allocs));
+@@ -663,7 +663,7 @@
+ e.bytes, e.count);
+ for (int j = 0; j < e.bucket->depth; j++) {
+ const void* pc = e.bucket->stack[j];
+- printer.Printf("\t@ %"PRIxPTR" %s\n",
++ printer.Printf("\t@ %" PRIxPTR " %s\n",
+ reinterpret_cast(pc), symbolization_table.GetSymbol(pc));
+ }
+ RAW_LOG(ERROR, "%s", buffer);
+@@ -687,7 +687,7 @@
+ char* unused) {
+ // Perhaps also log the allocation stack trace (unsymbolized)
+ // on this line in case somebody finds it useful.
+- RAW_LOG(ERROR, "leaked %"PRIuS" byte object %p", v->bytes, ptr);
++ RAW_LOG(ERROR, "leaked %" PRIuS " byte object %p", v->bytes, ptr);
+ }
+
+ void HeapProfileTable::Snapshot::ReportIndividualObjects() {
+diff -urP gperftools-2.0/src/malloc_extension.cc /home/spot/gperftools/src/malloc_extension.cc
+--- gperftools-2.0/src/malloc_extension.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/malloc_extension.cc 2013-03-01 14:25:38.471366833 -0500
+@@ -244,7 +244,7 @@
+ uintptr_t count, uintptr_t size) {
+ char buf[100];
+ snprintf(buf, sizeof(buf),
+- "%6"PRIu64": %8"PRIu64" [%6"PRIu64": %8"PRIu64"] @",
++ "%6" PRIu64 ": %8" PRIu64 " [%6" PRIu64 ": %8" PRIu64 "] @",
+ static_cast(count),
+ static_cast(size),
+ static_cast(count),
+diff -urP gperftools-2.0/src/malloc_hook-inl.h /home/spot/gperftools/src/malloc_hook-inl.h
+--- gperftools-2.0/src/malloc_hook-inl.h 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/malloc_hook-inl.h 2013-03-01 14:25:38.474366833 -0500
+@@ -66,7 +66,7 @@
+ // This prevents MSVC 2005, at least, from complaining (it has to
+ // do with __wp64; AtomicWord is __wp64, but Atomic32/64 aren't).
+ return reinterpret_cast(static_cast(
+- base::subtle::Acquire_Load(&data_)));
++ base::subtle::NoBarrier_Load(&data_)));
+ }
+
+ // Sets the contained value to new_val and returns the old value,
+@@ -124,7 +124,7 @@
+
+ // Fast inline implementation for fast path of Invoke*Hook.
+ bool empty() const {
+- return base::subtle::Acquire_Load(&priv_end) == 0;
++ return base::subtle::NoBarrier_Load(&priv_end) == 0;
+ }
+
+ // This internal data is not private so that the class is an aggregate and can
+diff -urP gperftools-2.0/src/memory_region_map.cc /home/spot/gperftools/src/memory_region_map.cc
+--- gperftools-2.0/src/memory_region_map.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/memory_region_map.cc 2013-03-01 14:25:38.720366824 -0500
+@@ -512,7 +512,7 @@
+ uintptr_t start_addr = reinterpret_cast(start);
+ uintptr_t end_addr = start_addr + size;
+ // subtract start_addr, end_addr from all the regions
+- RAW_VLOG(10, "Removing global region %p..%p; have %"PRIuS" regions",
++ RAW_VLOG(10, "Removing global region %p..%p; have %" PRIuS " regions",
+ reinterpret_cast(start_addr),
+ reinterpret_cast(end_addr),
+ regions_->size());
+@@ -571,7 +571,7 @@
+ }
+ ++region;
+ }
+- RAW_VLOG(12, "Removed region %p..%p; have %"PRIuS" regions",
++ RAW_VLOG(12, "Removed region %p..%p; have %" PRIuS " regions",
+ reinterpret_cast(start_addr),
+ reinterpret_cast(end_addr),
+ regions_->size());
+@@ -584,10 +584,10 @@
+ const void* start, size_t size,
+ int prot, int flags,
+ int fd, off_t offset) {
+- // TODO(maxim): replace all 0x%"PRIxS" by %p when RAW_VLOG uses a safe
++ // TODO(maxim): replace all 0x%" PRIxS " by %p when RAW_VLOG uses a safe
+ // snprintf reimplementation that does not malloc to pretty-print NULL
+- RAW_VLOG(10, "MMap = 0x%"PRIxPTR" of %"PRIuS" at %"PRIu64" "
+- "prot %d flags %d fd %d offs %"PRId64,
++ RAW_VLOG(10, "MMap = 0x%" PRIxPTR " of %" PRIuS " at %" PRIu64 " "
++ "prot %d flags %d fd %d offs %" PRId64,
+ reinterpret_cast(result), size,
+ reinterpret_cast(start), prot, flags, fd,
+ static_cast(offset));
+@@ -597,7 +597,7 @@
+ }
+
+ void MemoryRegionMap::MunmapHook(const void* ptr, size_t size) {
+- RAW_VLOG(10, "MUnmap of %p %"PRIuS"", ptr, size);
++ RAW_VLOG(10, "MUnmap of %p %" PRIuS "", ptr, size);
+ if (size != 0) {
+ RecordRegionRemoval(ptr, size);
+ }
+@@ -607,8 +607,8 @@
+ const void* old_addr, size_t old_size,
+ size_t new_size, int flags,
+ const void* new_addr) {
+- RAW_VLOG(10, "MRemap = 0x%"PRIxPTR" of 0x%"PRIxPTR" %"PRIuS" "
+- "to %"PRIuS" flags %d new_addr=0x%"PRIxPTR,
++ RAW_VLOG(10, "MRemap = 0x%" PRIxPTR " of 0x%" PRIxPTR " %" PRIuS " "
++ "to %" PRIuS " flags %d new_addr=0x%" PRIxPTR,
+ (uintptr_t)result, (uintptr_t)old_addr,
+ old_size, new_size, flags,
+ flags & MREMAP_FIXED ? (uintptr_t)new_addr : 0);
+@@ -621,7 +621,7 @@
+ extern "C" void* __sbrk(ptrdiff_t increment); // defined in libc
+
+ void MemoryRegionMap::SbrkHook(const void* result, ptrdiff_t increment) {
+- RAW_VLOG(10, "Sbrk = 0x%"PRIxPTR" of %"PRIdS"", (uintptr_t)result, increment);
++ RAW_VLOG(10, "Sbrk = 0x%" PRIxPTR " of %" PRIdS "", (uintptr_t)result, increment);
+ if (result != reinterpret_cast(-1)) {
+ if (increment > 0) {
+ void* new_end = sbrk(0);
+@@ -641,8 +641,8 @@
+ uintptr_t previous = 0;
+ for (RegionSet::const_iterator r = regions_->begin();
+ r != regions_->end(); ++r) {
+- RAW_LOG(INFO, "Memory region 0x%"PRIxPTR"..0x%"PRIxPTR" "
+- "from 0x%"PRIxPTR" stack=%d",
++ RAW_LOG(INFO, "Memory region 0x%" PRIxPTR "..0x%" PRIxPTR " "
++ "from 0x%" PRIxPTR " stack=%d",
+ r->start_addr, r->end_addr, r->caller(), r->is_stack);
+ RAW_CHECK(previous < r->end_addr, "wow, we messed up the set order");
+ // this must be caused by uncontrolled recursive operations on regions_
+diff -urP gperftools-2.0/src/page_heap.cc /home/spot/gperftools/src/page_heap.cc
+--- gperftools-2.0/src/page_heap.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/page_heap.cc 2013-03-01 14:25:38.473366833 -0500
+@@ -50,6 +50,14 @@
+ "to return memory slower. Reasonable rates are in the "
+ "range [0,10]");
+
++DEFINE_int64(tcmalloc_heap_limit_mb,
++ EnvToInt("TCMALLOC_HEAP_LIMIT_MB", 0),
++ "Limit total size of the process heap to the "
++ "specified number of MiB. "
++ "When we approach the limit the memory is released "
++ "to the system more aggressively (more minor page faults). "
++ "Zero means to allocate as long as system allows.");
++
+ namespace tcmalloc {
+
+ PageHeap::PageHeap()
+@@ -82,8 +90,18 @@
+ // Alternatively, maybe there's a usable returned span.
+ ll = &free_[s].returned;
+ if (!DLL_IsEmpty(ll)) {
+- ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
+- return Carve(ll->next, n);
++ // We did not call EnsureLimit before, to avoid releasing the span
++ // that will be taken immediately back.
++ // Calling EnsureLimit here is not very expensive, as it fails only if
++ // there is no more normal spans (and it fails efficiently)
++ // or SystemRelease does not work (there is probably no returned spans).
++ if (EnsureLimit(n)) {
++ // ll may have became empty due to coalescing
++ if (!DLL_IsEmpty(ll)) {
++ ASSERT(ll->next->location == Span::ON_RETURNED_FREELIST);
++ return Carve(ll->next, n);
++ }
++ }
+ }
+ }
+ // No luck in free lists, our last chance is in a larger class.
+@@ -125,6 +143,8 @@
+ }
+ }
+
++ Span *bestNormal = best;
++
+ // Search through released list in case it has a better fit
+ for (Span* span = large_.returned.next;
+ span != &large_.returned;
+@@ -139,7 +159,27 @@
+ }
+ }
+
+- return best == NULL ? NULL : Carve(best, n);
++ if (best == bestNormal) {
++ return best == NULL ? NULL : Carve(best, n);
++ }
++
++ // best comes from returned list.
++
++ if (EnsureLimit(n, false)) {
++ return Carve(best, n);
++ }
++
++ if (EnsureLimit(n, true)) {
++ // best could have been destroyed by coalescing.
++ // bestNormal is not a best-fit, and it could be destroyed as well.
++ // We retry, the limit is already ensured:
++ return AllocLarge(n);
++ }
++
++ // If bestNormal existed, EnsureLimit would succeeded:
++ ASSERT(bestNormal == NULL);
++ // We are not allowed to take best from returned list.
++ return NULL;
+ }
+
+ Span* PageHeap::Split(Span* span, Length n) {
+@@ -294,28 +334,26 @@
+ Length PageHeap::ReleaseLastNormalSpan(SpanList* slist) {
+ Span* s = slist->normal.prev;
+ ASSERT(s->location == Span::ON_NORMAL_FREELIST);
+- RemoveFromFreeList(s);
+- const Length n = s->length;
+- TCMalloc_SystemRelease(reinterpret_cast(s->start << kPageShift),
+- static_cast(s->length << kPageShift));
+- s->location = Span::ON_RETURNED_FREELIST;
+- MergeIntoFreeList(s); // Coalesces if possible.
+- return n;
++
++ if (TCMalloc_SystemRelease(reinterpret_cast(s->start << kPageShift),
++ static_cast(s->length << kPageShift))) {
++ RemoveFromFreeList(s);
++ const Length n = s->length;
++ s->location = Span::ON_RETURNED_FREELIST;
++ MergeIntoFreeList(s); // Coalesces if possible.
++ return n;
++ }
++
++ return 0;
+ }
+
+ Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
+ Length released_pages = 0;
+- Length prev_released_pages = -1;
+
+ // Round robin through the lists of free spans, releasing the last
+- // span in each list. Stop after releasing at least num_pages.
+- while (released_pages < num_pages) {
+- if (released_pages == prev_released_pages) {
+- // Last iteration of while loop made no progress.
+- break;
+- }
+- prev_released_pages = released_pages;
+-
++ // span in each list. Stop after releasing at least num_pages
++ // or when there is nothing more to release.
++ while (released_pages < num_pages && stats_.free_bytes > 0) {
+ for (int i = 0; i < kMaxPages+1 && released_pages < num_pages;
+ i++, release_index_++) {
+ if (release_index_ > kMaxPages) release_index_ = 0;
+@@ -323,6 +361,8 @@
+ &large_ : &free_[release_index_];
+ if (!DLL_IsEmpty(&slist->normal)) {
+ Length released_len = ReleaseLastNormalSpan(slist);
++ // Some systems do not support release
++ if (released_len == 0) return released_pages;
+ released_pages += released_len;
+ }
+ }
+@@ -330,6 +370,30 @@
+ return released_pages;
+ }
+
++bool PageHeap::EnsureLimit(Length n, bool withRelease)
++{
++ Length limit = (FLAGS_tcmalloc_heap_limit_mb*1024*1024) >> kPageShift;
++ if (limit == 0) return true; //there is no limit
++
++ // We do not use stats_.system_bytes because it does not take
++ // MetaDataAllocs into account.
++ Length takenPages = TCMalloc_SystemTaken >> kPageShift;
++ //XXX takenPages may be slightly bigger than limit for two reasons:
++ //* MetaDataAllocs ignore the limit (it is not easy to handle
++ // out of memory there)
++ //* sys_alloc may round allocation up to huge page size,
++ // although smaller limit was ensured
++
++ ASSERT(takenPages >= stats_.unmapped_bytes >> kPageShift);
++ takenPages -= stats_.unmapped_bytes >> kPageShift;
++
++ if (takenPages + n > limit && withRelease) {
++ takenPages -= ReleaseAtLeastNPages(takenPages + n - limit);
++ }
++
++ return takenPages + n <= limit;
++}
++
+ void PageHeap::RegisterSizeClass(Span* span, size_t sc) {
+ // Associate span object with all interior pages as well
+ ASSERT(span->location == Span::IN_USE);
+@@ -407,12 +471,17 @@
+ if (n > kMaxValidPages) return false;
+ Length ask = (n>kMinSystemAlloc) ? n : static_cast(kMinSystemAlloc);
+ size_t actual_size;
+- void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
++ void* ptr = NULL;
++ if (EnsureLimit(ask)) {
++ ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
++ }
+ if (ptr == NULL) {
+ if (n < ask) {
+ // Try growing just "n" pages
+ ask = n;
+- ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
++ if (EnsureLimit(ask)) {
++ ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
++ }
+ }
+ if (ptr == NULL) return false;
+ }
+diff -urP gperftools-2.0/src/page_heap.h /home/spot/gperftools/src/page_heap.h
+--- gperftools-2.0/src/page_heap.h 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/page_heap.h 2013-03-01 14:25:38.072366848 -0500
+@@ -274,9 +274,13 @@
+ void IncrementalScavenge(Length n);
+
+ // Release the last span on the normal portion of this list.
+- // Return the length of that span.
++ // Return the length of that span or zero if release failed.
+ Length ReleaseLastNormalSpan(SpanList* slist);
+
++ // Checks if we are allowed to take more memory from the system.
++ // If limit is reached and allowRelease is true, tries to release
++ // some unused spans.
++ bool EnsureLimit(Length n, bool allowRelease = true);
+
+ // Number of pages to deallocate before doing more scavenging
+ int64_t scavenge_counter_;
+diff -urP gperftools-2.0/src/pprof /home/spot/gperftools/src/pprof
+--- gperftools-2.0/src/pprof 2012-02-03 18:39:48.000000000 -0500
++++ /home/spot/gperftools/src/pprof 2013-03-01 14:25:37.971366851 -0500
+@@ -285,7 +285,6 @@
+ my $msg = shift;
+ print STDERR "$msg\n\n";
+ print STDERR usage_string();
+- print STDERR "\nFATAL ERROR: $msg\n"; # just as a reminder
+ exit(1);
+ }
+
+diff -urP gperftools-2.0/src/profiler.cc /home/spot/gperftools/src/profiler.cc
+--- gperftools-2.0/src/profiler.cc 2012-02-03 14:18:22.000000000 -0500
++++ /home/spot/gperftools/src/profiler.cc 2013-03-01 14:25:38.206366843 -0500
+@@ -70,8 +70,21 @@
+
+ using std::string;
+
+-// Collects up all profile data. This is a singleton, which is
+-// initialized by a constructor at startup.
++DEFINE_bool(cpu_profiler_unittest,
++ EnvToBool("PERFTOOLS_UNITTEST", true),
++ "Determines whether or not we are running under the \
++ control of a unit test. This allows us to include or \
++ exclude certain behaviours.");
++
++// Collects up all profile data. This is a singleton, which is
++// initialized by a constructor at startup. If no cpu profiler
++// signal is specified then the profiler lifecycle is either
++// manaully controlled via the API or attached to the scope of
++// the singleton (program scope). Otherwise the cpu toggle is
++// used to allow for user selectable control via signal generation.
++// This is very useful for profiling a daemon process without
++// having to start and stop the daemon or having to modify the
++// source code to use the cpu profiler API.
+ class CpuProfiler {
+ public:
+ CpuProfiler();
+@@ -126,6 +139,40 @@
+ void* cpu_profiler);
+ };
+
++// Signal handler that is registered when a user selectable signal
++// number is defined in the environment variable CPUPROFILESIGNAL.
++static void CpuProfilerSwitch(int signal_number)
++{
++ bool static started = false;
++ static unsigned profile_count = 0;
++ static char base_profile_name[1024] = "\0";
++
++ if (base_profile_name[0] == '\0') {
++ if (!GetUniquePathFromEnv("CPUPROFILE", base_profile_name)) {
++ RAW_LOG(FATAL,"Cpu profiler switch is registered but no CPUPROFILE is defined");
++ return;
++ }
++ }
++ if (!started)
++ {
++ char full_profile_name[1024];
++
++ snprintf(full_profile_name, sizeof(full_profile_name), "%s.%u",
++ base_profile_name, profile_count++);
++
++ if(!ProfilerStart(full_profile_name))
++ {
++ RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n",
++ full_profile_name, strerror(errno));
++ }
++ }
++ else
++ {
++ ProfilerStop();
++ }
++ started = !started;
++}
++
+ // Profile data structure singleton: Constructor will check to see if
+ // profiling should be enabled. Destructor will write profile data
+ // out to disk.
+@@ -137,19 +184,60 @@
+ // TODO(cgd) Move this code *out* of the CpuProfile constructor into a
+ // separate object responsible for initialization. With ProfileHandler there
+ // is no need to limit the number of profilers.
+- char fname[PATH_MAX];
+- if (!GetUniquePathFromEnv("CPUPROFILE", fname)) {
++ if (getenv("CPUPROFILE") == NULL) {
++ if (!FLAGS_cpu_profiler_unittest) {
++ RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n");
++ }
+ return;
+ }
++
+ // We don't enable profiling if setuid -- it's a security risk
+ #ifdef HAVE_GETEUID
+- if (getuid() != geteuid())
++ if (getuid() != geteuid()) {
++ if (!FLAGS_cpu_profiler_unittest) {
++ RAW_LOG(WARNING, "Cannot perform CPU profiling when running with setuid\n");
++ }
+ return;
++ }
+ #endif
+
+- if (!Start(fname, NULL)) {
+- RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n",
+- fname, strerror(errno));
++ char *signal_number_str = getenv("CPUPROFILESIGNAL");
++ if (signal_number_str != NULL)
++ {
++ long int signal_number = strtol(signal_number_str, NULL, 10);
++ printf(" signal_number=%d\n", signal_number);
++ if (signal_number >=1 && signal_number <=64)
++ {
++ sighandler_t old_signal_handler = signal(signal_number, CpuProfilerSwitch);
++ if (old_signal_handler == NULL)
++ {
++ RAW_LOG(INFO,"Using signal %d as cpu profiling switch", signal_number);
++
++ }
++ else
++ {
++ RAW_LOG(FATAL, "Signal %d already in use\n", signal_number);
++ }
++ }
++ else
++ {
++ RAW_LOG(FATAL, "Signal number %s is invalid\n", signal_number_str);
++ }
++ }
++ else
++ {
++ char fname[PATH_MAX];
++ if (!GetUniquePathFromEnv("CPUPROFILE", fname)) {
++ if (!FLAGS_cpu_profiler_unittest) {
++ RAW_LOG(WARNING, "CPU profiler linked but no valid CPUPROFILE environment variable found\n");
++ }
++ return;
++ }
++
++ if (!Start(fname, NULL)) {
++ RAW_LOG(FATAL, "Can't turn on cpu profiling for '%s': %s\n",
++ fname, strerror(errno));
++ }
+ }
+ }
+
+diff -urP gperftools-2.0/src/static_vars.h /home/spot/gperftools/src/static_vars.h
+--- gperftools-2.0/src/static_vars.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/static_vars.h 2013-03-01 14:25:38.720366824 -0500
+@@ -82,6 +82,9 @@
+ return &bucket_allocator_;
+ }
+
++ // Check if InitStaticVars() has been run.
++ static bool IsInited() { return pageheap() != NULL; }
++
+ private:
+ static SpinLock pageheap_lock_;
+
+diff -urP gperftools-2.0/src/symbolize.cc /home/spot/gperftools/src/symbolize.cc
+--- gperftools-2.0/src/symbolize.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/symbolize.cc 2013-03-01 14:25:38.474366833 -0500
+@@ -229,7 +229,7 @@
+ iter != symbolization_table_.end(); ++iter) {
+ written += snprintf(pprof_buffer + written, kOutBufSize - written,
+ // pprof expects format to be 0xXXXXXX
+- "0x%"PRIxPTR"\n", reinterpret_cast(iter->first));
++ "0x%" PRIxPTR "\n", reinterpret_cast(iter->first));
+ }
+ write(child_in[1], pprof_buffer, strlen(pprof_buffer));
+ close(child_in[1]); // that's all we need to write
+diff -urP gperftools-2.0/src/system-alloc.cc /home/spot/gperftools/src/system-alloc.cc
+--- gperftools-2.0/src/system-alloc.cc 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/system-alloc.cc 2013-03-01 14:25:38.721366824 -0500
+@@ -122,6 +122,9 @@
+ // The current system allocator
+ SysAllocator* sys_alloc = NULL;
+
++// Number of bytes taken from system.
++size_t TCMalloc_SystemTaken = 0;
++
+ // Configuration parameters.
+ DEFINE_int32(malloc_devmem_start,
+ EnvToInt("TCMALLOC_DEVMEM_START", 0),
+@@ -137,6 +140,10 @@
+ DEFINE_bool(malloc_skip_mmap,
+ EnvToBool("TCMALLOC_SKIP_MMAP", false),
+ "Whether mmap can be used to obtain memory.");
++DEFINE_bool(malloc_disable_memory_release,
++ EnvToBool("TCMALLOC_DISABLE_MEMORY_RELEASE", false),
++ "Whether MADV_FREE/MADV_DONTNEED should be used"
++ " to return unused memory to the system.");
+
+ // static allocators
+ class SbrkSysAllocator : public SysAllocator {
+@@ -442,6 +449,12 @@
+ return NULL;
+ }
+
++ATTRIBUTE_WEAK ATTRIBUTE_NOINLINE
++SysAllocator *tc_get_sysalloc_override(SysAllocator *def)
++{
++ return def;
++}
++
+ static bool system_alloc_inited = false;
+ void InitSystemAllocators(void) {
+ MmapSysAllocator *mmap = new (mmap_space) MmapSysAllocator();
+@@ -462,7 +475,8 @@
+ sdef->SetChildAllocator(sbrk, 0, sbrk_name);
+ sdef->SetChildAllocator(mmap, 1, mmap_name);
+ }
+- sys_alloc = sdef;
++
++ sys_alloc = tc_get_sysalloc_override(sdef);
+ }
+
+ void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
+@@ -485,21 +499,24 @@
+ if (actual_size) {
+ CheckAddressBits(
+ reinterpret_cast(result) + *actual_size - 1);
++ TCMalloc_SystemTaken += *actual_size;
+ } else {
+ CheckAddressBits(
+ reinterpret_cast(result) + size - 1);
++ TCMalloc_SystemTaken += size;
+ }
+ }
+ return result;
+ }
+
+-void TCMalloc_SystemRelease(void* start, size_t length) {
++bool TCMalloc_SystemRelease(void* start, size_t length) {
+ #ifdef MADV_FREE
+ if (FLAGS_malloc_devmem_start) {
+ // It's not safe to use MADV_FREE/MADV_DONTNEED if we've been
+ // mapping /dev/mem for heap memory.
+- return;
++ return false;
+ }
++ if (FLAGS_malloc_disable_memory_release) return false;
+ if (pagesize == 0) pagesize = getpagesize();
+ const size_t pagemask = pagesize - 1;
+
+@@ -518,13 +535,14 @@
+ ASSERT(new_end <= end);
+
+ if (new_end > new_start) {
+- // Note -- ignoring most return codes, because if this fails it
+- // doesn't matter...
+- while (madvise(reinterpret_cast(new_start), new_end - new_start,
+- MADV_FREE) == -1 &&
+- errno == EAGAIN) {
+- // NOP
+- }
++ int result;
++ do {
++ result = madvise(reinterpret_cast(new_start),
++ new_end - new_start, MADV_FREE);
++ } while (result == -1 && errno == EAGAIN);
++
++ return result != -1;
+ }
+ #endif
++ return false;
+ }
+diff -urP gperftools-2.0/src/system-alloc.h /home/spot/gperftools/src/system-alloc.h
+--- gperftools-2.0/src/system-alloc.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/system-alloc.h 2013-03-01 14:25:38.474366833 -0500
+@@ -69,9 +69,14 @@
+ // the address space next time they are touched, which can impact
+ // performance. (Only pages fully covered by the memory region will
+ // be released, partial pages will not.)
+-extern void TCMalloc_SystemRelease(void* start, size_t length);
++//
++// Returns false if release failed or not supported.
++extern bool TCMalloc_SystemRelease(void* start, size_t length);
+
+ // The current system allocator.
+ extern PERFTOOLS_DLL_DECL SysAllocator* sys_alloc;
+
++// Number of bytes taken from system.
++extern PERFTOOLS_DLL_DECL size_t TCMalloc_SystemTaken;
++
+ #endif /* TCMALLOC_SYSTEM_ALLOC_H_ */
+diff -urP gperftools-2.0/src/tcmalloc.cc /home/spot/gperftools/src/tcmalloc.cc
+--- gperftools-2.0/src/tcmalloc.cc 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/tcmalloc.cc 2013-03-01 14:25:37.968366851 -0500
+@@ -131,18 +131,6 @@
+ #include "tcmalloc_guard.h" // for TCMallocGuard
+ #include "thread_cache.h" // for ThreadCache
+
+-// We only need malloc.h for struct mallinfo.
+-#ifdef HAVE_STRUCT_MALLINFO
+-// Malloc can be in several places on older versions of OS X.
+-# if defined(HAVE_MALLOC_H)
+-# include
+-# elif defined(HAVE_SYS_MALLOC_H)
+-# include
+-# elif defined(HAVE_MALLOC_MALLOC_H)
+-# include
+-# endif
+-#endif
+-
+ #if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
+ # define WIN32_DO_PATCHING 1
+ #endif
+@@ -311,7 +299,10 @@
+ PageHeap::Stats pageheap; // Stats from page heap
+ };
+
+-// Get stats into "r". Also get per-size-class counts if class_count != NULL
++// Get stats into "r". Also, if class_count != NULL, class_count[k]
++// will be set to the total number of objects of size class k in the
++// central cache, transfer cache, and per-thread caches. If small_spans
++// is non-NULL, it is filled. Same for large_spans.
+ static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
+ PageHeap::SmallSpanStats* small_spans,
+ PageHeap::LargeSpanStats* large_spans) {
+@@ -325,7 +316,12 @@
+ Static::sizemap()->ByteSizeForClass(cl));
+ r->central_bytes += (size * length) + cache_overhead;
+ r->transfer_bytes += (size * tc_length);
+- if (class_count) class_count[cl] = length + tc_length;
++ if (class_count) {
++ // Sum the lengths of all per-class freelists, except the per-thread
++ // freelists, which get counted when we call GetThreadStats(), below.
++ class_count[cl] = length + tc_length;
++ }
++
+ }
+
+ // Add stats from per-thread heaps
+@@ -414,7 +410,8 @@
+
+ if (level >= 2) {
+ out->printf("------------------------------------------------\n");
+- out->printf("Size class breakdown\n");
++ out->printf("Total size of freelists for per-thread caches,\n");
++ out->printf("transfer cache, and central cache, by size class\n");
+ out->printf("------------------------------------------------\n");
+ uint64_t cumulative = 0;
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+@@ -656,6 +653,27 @@
+ return true;
+ }
+
++ if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
++ TCMallocStats stats;
++ ExtractStats(&stats, NULL, NULL, NULL);
++ *value = stats.central_bytes;
++ return true;
++ }
++
++ if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
++ TCMallocStats stats;
++ ExtractStats(&stats, NULL, NULL, NULL);
++ *value = stats.transfer_bytes;
++ return true;
++ }
++
++ if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
++ TCMallocStats stats;
++ ExtractStats(&stats, NULL, NULL, NULL);
++ *value = stats.thread_bytes;
++ return true;
++ }
++
+ if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
+ SpinLockHolder l(Static::pageheap_lock());
+ *value = Static::pageheap()->stats().free_bytes;
+@@ -951,13 +969,13 @@
+ SpinLockHolder h(Static::pageheap_lock());
+ // Allocate span
+ Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
+- if (span == NULL) {
++ if (UNLIKELY(span == NULL)) {
+ return NULL;
+ }
+
+ // Allocate stack trace
+ StackTrace *stack = Static::stacktrace_allocator()->New();
+- if (stack == NULL) {
++ if (UNLIKELY(stack == NULL)) {
+ // Sampling failed because of lack of memory
+ return span;
+ }
+@@ -984,7 +1002,7 @@
+ static const int N = 1000;
+ char buffer[N];
+ TCMalloc_Printer printer(buffer, N);
+- printer.printf("tcmalloc: large alloc %"PRIu64" bytes == %p @ ",
++ printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ",
+ static_cast(num_pages) << kPageShift,
+ result);
+ for (int i = 0; i < stack.depth; i++) {
+@@ -996,6 +1014,7 @@
+
+ inline void* cpp_alloc(size_t size, bool nothrow);
+ inline void* do_malloc(size_t size);
++inline void* do_malloc_no_errno(size_t size);
+
+ // TODO(willchan): Investigate whether or not lining this much is harmful to
+ // performance.
+@@ -1005,6 +1024,10 @@
+ return tc_new_mode ? cpp_alloc(size, true) : do_malloc(size);
+ }
+
++inline void* do_malloc_no_errno_or_cpp_alloc(size_t size) {
++ return tc_new_mode ? cpp_alloc(size, true) : do_malloc_no_errno(size);
++}
++
+ void* cpp_memalign(size_t align, size_t size);
+ void* do_memalign(size_t align, size_t size);
+
+@@ -1041,7 +1064,7 @@
+ } else {
+ SpinLockHolder h(Static::pageheap_lock());
+ Span* span = Static::pageheap()->New(num_pages);
+- result = (span == NULL ? NULL : SpanToMallocResult(span));
++ result = (UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span));
+ report_large = should_report_large(num_pages);
+ }
+
+@@ -1051,26 +1074,35 @@
+ return result;
+ }
+
+-inline void* do_malloc(size_t size) {
+- void* ret = NULL;
++inline void* do_malloc_small(ThreadCache* heap, size_t size) {
++ ASSERT(Static::IsInited());
++ ASSERT(heap != NULL);
++ size_t cl = Static::sizemap()->SizeClass(size);
++ size = Static::sizemap()->class_to_size(cl);
+
+- // The following call forces module initialization
+- ThreadCache* heap = ThreadCache::GetCache();
+- if (size <= kMaxSize) {
+- size_t cl = Static::sizemap()->SizeClass(size);
+- size = Static::sizemap()->class_to_size(cl);
++ if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
++ return DoSampledAllocation(size);
++ } else {
++ // The common case, and also the simplest. This just pops the
++ // size-appropriate freelist, after replenishing it if it's empty.
++ return CheckedMallocResult(heap->Allocate(size, cl));
++ }
++}
+
+- if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
+- ret = DoSampledAllocation(size);
+- } else {
+- // The common case, and also the simplest. This just pops the
+- // size-appropriate freelist, after replenishing it if it's empty.
+- ret = CheckedMallocResult(heap->Allocate(size, cl));
+- }
++inline void* do_malloc_no_errno(size_t size) {
++ if (ThreadCache::have_tls &&
++ LIKELY(size < ThreadCache::MinSizeForSlowPath())) {
++ return do_malloc_small(ThreadCache::GetCacheWhichMustBePresent(), size);
++ } else if (size <= kMaxSize) {
++ return do_malloc_small(ThreadCache::GetCache(), size);
+ } else {
+- ret = do_malloc_pages(heap, size);
++ return do_malloc_pages(ThreadCache::GetCache(), size);
+ }
+- if (ret == NULL) errno = ENOMEM;
++}
++
++inline void* do_malloc(size_t size) {
++ void* ret = do_malloc_no_errno(size);
++ if (UNLIKELY(ret == NULL)) errno = ENOMEM;
+ return ret;
+ }
+
+@@ -1079,55 +1111,72 @@
+ const size_t size = n * elem_size;
+ if (elem_size != 0 && size / elem_size != n) return NULL;
+
+- void* result = do_malloc_or_cpp_alloc(size);
+- if (result != NULL) {
++ void* result = do_malloc_no_errno_or_cpp_alloc(size);
++ if (result == NULL) {
++ errno = ENOMEM;
++ } else {
+ memset(result, 0, size);
+ }
+ return result;
+ }
+
+-static inline ThreadCache* GetCacheIfPresent() {
+- void* const p = ThreadCache::GetCacheIfPresent();
+- return reinterpret_cast(p);
++// If ptr is NULL, do nothing. Otherwise invoke the given function.
++inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) {
++ if (ptr != NULL) {
++ (*invalid_free_fn)(ptr);
++ }
+ }
+
+-// This lets you call back to a given function pointer if ptr is invalid.
+-// It is used primarily by windows code which wants a specialized callback.
+-inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
+- if (ptr == NULL) return;
+- if (Static::pageheap() == NULL) {
++// Helper for do_free_with_callback(), below. Inputs:
++// ptr is object to be freed
++// invalid_free_fn is a function that gets invoked on certain "bad frees"
++// heap is the ThreadCache for this thread, or NULL if it isn't known
++// heap_must_be_valid is whether heap is known to be non-NULL
++//
++// This function may only be used after Static::IsInited() is true.
++//
++// We can usually detect the case where ptr is not pointing to a page that
++// tcmalloc is using, and in those cases we invoke invalid_free_fn.
++//
++// To maximize speed in the common case, we usually get here with
++// heap_must_be_valid being a manifest constant equal to true.
++inline void do_free_helper(void* ptr,
++ void (*invalid_free_fn)(void*),
++ ThreadCache* heap,
++ bool heap_must_be_valid) {
++ ASSERT((Static::IsInited() && heap != NULL) || !heap_must_be_valid);
++ if (!heap_must_be_valid && !Static::IsInited()) {
+ // We called free() before malloc(). This can occur if the
+ // (system) malloc() is called before tcmalloc is loaded, and then
+ // free() is called after tcmalloc is loaded (and tc_free has
+ // replaced free), but before the global constructor has run that
+ // sets up the tcmalloc data structures.
+- (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
++ free_null_or_invalid(ptr, invalid_free_fn);
+ return;
+ }
+- const PageID p = reinterpret_cast(ptr) >> kPageShift;
+ Span* span = NULL;
++ const PageID p = reinterpret_cast(ptr) >> kPageShift;
+ size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
+-
+- if (cl == 0) {
++ if (UNLIKELY(cl == 0)) {
+ span = Static::pageheap()->GetDescriptor(p);
+- if (!span) {
+- // span can be NULL because the pointer passed in is invalid
++ if (UNLIKELY(!span)) {
++ // span can be NULL because the pointer passed in is NULL or invalid
+ // (not something returned by malloc or friends), or because the
+ // pointer was allocated with some other allocator besides
+ // tcmalloc. The latter can happen if tcmalloc is linked in via
+ // a dynamic library, but is not listed last on the link line.
+ // In that case, libraries after it on the link line will
+ // allocate with libc malloc, but free with tcmalloc's free.
+- (*invalid_free_fn)(ptr); // Decide how to handle the bad free request
++ free_null_or_invalid(ptr, invalid_free_fn);
+ return;
+ }
+ cl = span->sizeclass;
+ Static::pageheap()->CacheSizeClass(p, cl);
+ }
+- if (cl != 0) {
++ ASSERT(ptr != NULL);
++ if (LIKELY(cl != 0)) {
+ ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
+- ThreadCache* heap = GetCacheIfPresent();
+- if (heap != NULL) {
++ if (heap_must_be_valid || heap != NULL) {
+ heap->Deallocate(ptr, cl);
+ } else {
+ // Delete directly into central cache
+@@ -1148,6 +1197,23 @@
+ }
+ }
+
++// Helper for the object deletion (free, delete, etc.). Inputs:
++// ptr is object to be freed
++// invalid_free_fn is a function that gets invoked on certain "bad frees"
++//
++// We can usually detect the case where ptr is not pointing to a page that
++// tcmalloc is using, and in those cases we invoke invalid_free_fn.
++inline void do_free_with_callback(void* ptr, void (*invalid_free_fn)(void*)) {
++ ThreadCache* heap = NULL;
++ if (LIKELY(ThreadCache::IsFastPathAllowed())) {
++ heap = ThreadCache::GetCacheWhichMustBePresent();
++ do_free_helper(ptr, invalid_free_fn, heap, true);
++ } else {
++ heap = ThreadCache::GetCacheIfPresent();
++ do_free_helper(ptr, invalid_free_fn, heap, false);
++ }
++}
++
+ // The default "do_free" that uses the default callback.
+ inline void do_free(void* ptr) {
+ return do_free_with_callback(ptr, &InvalidFree);
+@@ -1165,7 +1231,7 @@
+ return Static::sizemap()->ByteSizeForClass(cl);
+ } else {
+ const Span *span = Static::pageheap()->GetDescriptor(p);
+- if (span == NULL) { // means we do not own this memory
++ if (UNLIKELY(span == NULL)) { // means we do not own this memory
+ return (*invalid_getsize_fn)(ptr);
+ } else if (span->sizeclass != 0) {
+ Static::pageheap()->CacheSizeClass(p, span->sizeclass);
+@@ -1191,20 +1257,20 @@
+ // . If we need to grow, grow to max(new_size, old_size * 1.X)
+ // . Don't shrink unless new_size < old_size * 0.Y
+ // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5.
+- const int lower_bound_to_grow = old_size + old_size / 4;
+- const int upper_bound_to_shrink = old_size / 2;
++ const size_t lower_bound_to_grow = old_size + old_size / 4ul;
++ const size_t upper_bound_to_shrink = old_size / 2ul;
+ if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
+ // Need to reallocate.
+ void* new_ptr = NULL;
+
+ if (new_size > old_size && new_size < lower_bound_to_grow) {
+- new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
++ new_ptr = do_malloc_no_errno_or_cpp_alloc(lower_bound_to_grow);
+ }
+ if (new_ptr == NULL) {
+ // Either new_size is not a tiny increment, or last do_malloc failed.
+ new_ptr = do_malloc_or_cpp_alloc(new_size);
+ }
+- if (new_ptr == NULL) {
++ if (UNLIKELY(new_ptr == NULL)) {
+ return NULL;
+ }
+ MallocHook::InvokeNewHook(new_ptr, new_size);
+@@ -1247,7 +1313,7 @@
+ return p;
+ }
+
+- if (Static::pageheap() == NULL) ThreadCache::InitModule();
++ if (UNLIKELY(Static::pageheap() == NULL)) ThreadCache::InitModule();
+
+ // Allocate at least one byte to avoid boundary conditions below
+ if (size == 0) size = 1;
+@@ -1279,13 +1345,13 @@
+ // TODO: We could put the rest of this page in the appropriate
+ // TODO: cache but it does not seem worth it.
+ Span* span = Static::pageheap()->New(tcmalloc::pages(size));
+- return span == NULL ? NULL : SpanToMallocResult(span);
++ return UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span);
+ }
+
+ // Allocate extra pages and carve off an aligned portion
+ const Length alloc = tcmalloc::pages(size + align);
+ Span* span = Static::pageheap()->New(alloc);
+- if (span == NULL) return NULL;
++ if (UNLIKELY(span == NULL)) return NULL;
+
+ // Skip starting portion so that we end up aligned
+ Length skip = 0;
+@@ -1350,12 +1416,12 @@
+ static SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
+
+ inline void* cpp_alloc(size_t size, bool nothrow) {
+- for (;;) {
+- void* p = do_malloc(size);
+ #ifdef PREANSINEW
+- return p;
++ return do_malloc(size);
+ #else
+- if (p == NULL) { // allocation failed
++ for (;;) {
++ void* p = do_malloc_no_errno(size);
++ if (UNLIKELY(p == NULL)) { // allocation failed
+ // Get the current new handler. NB: this function is not
+ // thread-safe. We make a feeble stab at making it so here, but
+ // this lock only protects against tcmalloc interfering with
+@@ -1373,11 +1439,11 @@
+ (*nh)();
+ continue;
+ }
+- return 0;
++ goto fail;
+ #else
+ // If no new_handler is established, the allocation failed.
+ if (!nh) {
+- if (nothrow) return 0;
++ if (nothrow) goto fail;
+ throw std::bad_alloc();
+ }
+ // Otherwise, try the new_handler. If it returns, retry the
+@@ -1387,7 +1453,7 @@
+ (*nh)();
+ } catch (const std::bad_alloc&) {
+ if (!nothrow) throw;
+- return p;
++ goto fail;
+ }
+ #endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
+ } else { // allocation success
+@@ -1395,6 +1461,9 @@
+ }
+ #endif // PREANSINEW
+ }
++fail:
++ errno = ENOMEM;
++ return 0;
+ }
+
+ void* cpp_memalign(size_t align, size_t size) {
+@@ -1403,7 +1472,7 @@
+ #ifdef PREANSINEW
+ return p;
+ #else
+- if (p == NULL) { // allocation failed
++ if (UNLIKELY(p == NULL)) { // allocation failed
+ // Get the current new handler. NB: this function is not
+ // thread-safe. We make a feeble stab at making it so here, but
+ // this lock only protects against tcmalloc interfering with
+@@ -1447,6 +1516,8 @@
+
+ // As promised, the definition of this function, declared above.
+ size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
++ if (ptr == NULL)
++ return 0;
+ ASSERT(TCMallocImplementation::GetOwnership(ptr)
+ != TCMallocImplementation::kNotOwned);
+ return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
+@@ -1601,7 +1672,7 @@
+
+ void* result = do_memalign_or_cpp_memalign(align, size);
+ MallocHook::InvokeNewHook(result, size);
+- if (result == NULL) {
++ if (UNLIKELY(result == NULL)) {
+ return ENOMEM;
+ } else {
+ *result_ptr = result;
+diff -urP gperftools-2.0/src/tests/getpc_test.cc /home/spot/gperftools/src/tests/getpc_test.cc
+--- gperftools-2.0/src/tests/getpc_test.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/getpc_test.cc 2013-03-01 14:25:38.480366833 -0500
+@@ -106,7 +106,7 @@
+ // ElfW(Addr) gp; /* global pointer */
+ // };
+ // We want the code entry point.
+-#if defined(__ia64) || defined(__ppc64) // NOTE: ppc64 is UNTESTED
++#if defined(__ia64) || defined(__powerpc64__) // NOTE: ppc64 is UNTESTED
+ expected = ((char**)expected)[0]; // this is "ip"
+ #endif
+
+diff -urP gperftools-2.0/src/tests/malloc_hook_test.cc /home/spot/gperftools/src/tests/malloc_hook_test.cc
+--- gperftools-2.0/src/tests/malloc_hook_test.cc 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/malloc_hook_test.cc 2013-03-01 14:25:38.478366833 -0500
+@@ -97,11 +97,11 @@
+ // values as integers for testing.
+ typedef base::internal::HookList TestHookList;
+
+-int TestHookList_Traverse(const TestHookList& list, int* output_array, int n) {
++int TestHookList_Traverse(const TestHookList& list, uintptr_t* output_array, int n) {
+ MallocHook::NewHook values_as_hooks[kHookListMaxValues];
+ int result = list.Traverse(values_as_hooks, min(n, kHookListMaxValues));
+ for (int i = 0; i < result; ++i) {
+- output_array[i] = reinterpret_cast(values_as_hooks[i]);
++ output_array[i] = reinterpret_cast(*values_as_hooks[i]);
+ }
+ return result;
+ }
+@@ -120,7 +120,7 @@
+
+ TEST(HookListTest, InitialValueExists) {
+ TestHookList list = INIT_HOOK_LIST(69);
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(1, TestHookList_Traverse(list, values, 2));
+ EXPECT_EQ(69, values[0]);
+ EXPECT_EQ(1, list.priv_end);
+@@ -131,7 +131,7 @@
+ ASSERT_TRUE(TestHookList_Remove(&list, 69));
+ EXPECT_EQ(0, list.priv_end);
+
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(0, TestHookList_Traverse(list, values, 2));
+ }
+
+@@ -140,7 +140,7 @@
+ ASSERT_TRUE(TestHookList_Add(&list, 42));
+ EXPECT_EQ(2, list.priv_end);
+
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(2, TestHookList_Traverse(list, values, 2));
+ EXPECT_EQ(69, values[0]);
+ EXPECT_EQ(42, values[1]);
+@@ -153,7 +153,7 @@
+ ASSERT_TRUE(TestHookList_Remove(&list, 69));
+ EXPECT_EQ(2, list.priv_end);
+
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(1, TestHookList_Traverse(list, values, 2));
+ EXPECT_EQ(42, values[0]);
+
+@@ -172,7 +172,7 @@
+ ASSERT_TRUE(TestHookList_Add(&list, 7));
+ EXPECT_EQ(2, list.priv_end);
+
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(2, TestHookList_Traverse(list, values, 2));
+ EXPECT_EQ(7, values[0]);
+ EXPECT_EQ(42, values[1]);
+@@ -182,7 +182,7 @@
+ TestHookList list = INIT_HOOK_LIST(69);
+ EXPECT_FALSE(TestHookList_Add(&list, 0));
+
+- int values[2] = { 0, 0 };
++ uintptr_t values[2] = { 0, 0 };
+ EXPECT_EQ(1, TestHookList_Traverse(list, values, 2));
+ EXPECT_EQ(69, values[0]);
+ EXPECT_EQ(1, list.priv_end);
+@@ -196,7 +196,7 @@
+ EXPECT_EQ(kHookListMaxValues, num_inserts);
+ EXPECT_EQ(kHookListMaxValues, list.priv_end);
+
+- int values[kHookListMaxValues + 1];
++ uintptr_t values[kHookListMaxValues + 1];
+ EXPECT_EQ(kHookListMaxValues, TestHookList_Traverse(list, values,
+ kHookListMaxValues));
+ EXPECT_EQ(69, values[0]);
+@@ -218,7 +218,7 @@
+ int value = (i << shift) + thread_num;
+ EXPECT_TRUE(TestHookList_Add(list, value));
+ sched_yield(); // Ensure some more interleaving.
+- int values[kHookListMaxValues + 1];
++ uintptr_t values[kHookListMaxValues + 1];
+ int num_values = TestHookList_Traverse(*list, values, kHookListMaxValues);
+ EXPECT_LT(0, num_values);
+ int value_index;
+@@ -284,7 +284,7 @@
+ RunManyThreadsWithId(&MultithreadedTestThreadRunner, num_threads_remaining,
+ 1 << 15);
+
+- int values[kHookListMaxValues + 1];
++ uintptr_t values[kHookListMaxValues + 1];
+ EXPECT_EQ(0, TestHookList_Traverse(list, values, kHookListMaxValues));
+ EXPECT_EQ(0, list.priv_end);
+ }
+diff -urP gperftools-2.0/src/tests/markidle_unittest.cc /home/spot/gperftools/src/tests/markidle_unittest.cc
+--- gperftools-2.0/src/tests/markidle_unittest.cc 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/markidle_unittest.cc 2013-03-01 14:25:38.485366833 -0500
+@@ -92,9 +92,9 @@
+ CHECK_LE(post_idle, original);
+
+ // Log after testing because logging can allocate heap memory.
+- VLOG(0, "Original usage: %"PRIuS"\n", original);
+- VLOG(0, "Post allocation: %"PRIuS"\n", post_allocation);
+- VLOG(0, "Post idle: %"PRIuS"\n", post_idle);
++ VLOG(0, "Original usage: %" PRIuS "\n", original);
++ VLOG(0, "Post allocation: %" PRIuS "\n", post_allocation);
++ VLOG(0, "Post idle: %" PRIuS "\n", post_idle);
+ }
+
+ int main(int argc, char** argv) {
+diff -urP gperftools-2.0/src/tests/page_heap_test.cc /home/spot/gperftools/src/tests/page_heap_test.cc
+--- gperftools-2.0/src/tests/page_heap_test.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/page_heap_test.cc 2013-03-01 14:25:38.483366833 -0500
+@@ -3,17 +3,29 @@
+
+ #include "config_for_unittests.h"
+ #include "page_heap.h"
++#include "system-alloc.h"
+ #include
+ #include "base/logging.h"
+ #include "common.h"
+
++DECLARE_int64(tcmalloc_heap_limit_mb);
++
+ namespace {
+
++static bool HaveSystemRelease =
++ TCMalloc_SystemRelease(TCMalloc_SystemAlloc(kPageSize, NULL, 0), kPageSize);
++
+ static void CheckStats(const tcmalloc::PageHeap* ph,
+ uint64_t system_pages,
+ uint64_t free_pages,
+ uint64_t unmapped_pages) {
+ tcmalloc::PageHeap::Stats stats = ph->stats();
++
++ if (!HaveSystemRelease) {
++ free_pages += unmapped_pages;
++ unmapped_pages = 0;
++ }
++
+ EXPECT_EQ(system_pages, stats.system_bytes >> kPageShift);
+ EXPECT_EQ(free_pages, stats.free_bytes >> kPageShift);
+ EXPECT_EQ(unmapped_pages, stats.unmapped_bytes >> kPageShift);
+@@ -36,7 +48,7 @@
+ CheckStats(ph, 256, 128, 0);
+
+ // Unmap deleted span 's2'
+- EXPECT_EQ(s2_len, ph->ReleaseAtLeastNPages(1));
++ ph->ReleaseAtLeastNPages(1);
+ CheckStats(ph, 256, 0, 128);
+
+ // Delete span 's1'
+@@ -46,10 +58,99 @@
+ delete ph;
+ }
+
++static void TestPageHeap_Limit() {
++ tcmalloc::PageHeap* ph = new tcmalloc::PageHeap();
++
++ CHECK_EQ(kMaxPages, 1 << (20 - kPageShift));
++
++ // We do not know much is taken from the system for other purposes,
++ // so we detect the proper limit:
++ {
++ FLAGS_tcmalloc_heap_limit_mb = 1;
++ tcmalloc::Span* s = NULL;
++ while((s = ph->New(kMaxPages)) == NULL) {
++ FLAGS_tcmalloc_heap_limit_mb++;
++ }
++ FLAGS_tcmalloc_heap_limit_mb += 9;
++ ph->Delete(s);
++ // We are [10, 11) mb from the limit now.
++ }
++
++ // Test AllocLarge and GrowHeap first:
++ {
++ tcmalloc::Span * spans[10];
++ for (int i=0; i<10; ++i) {
++ spans[i] = ph->New(kMaxPages);
++ EXPECT_NE(spans[i], NULL);
++ }
++ EXPECT_EQ(ph->New(kMaxPages), NULL);
++
++ for (int i=0; i<10; i += 2) {
++ ph->Delete(spans[i]);
++ }
++
++ tcmalloc::Span *defragmented = ph->New(5 * kMaxPages);
++
++ if (HaveSystemRelease) {
++ // EnsureLimit should release deleted normal spans
++ EXPECT_NE(defragmented, NULL);
++ EXPECT_TRUE(ph->CheckExpensive());
++ ph->Delete(defragmented);
++ }
++ else
++ {
++ EXPECT_EQ(defragmented, NULL);
++ EXPECT_TRUE(ph->CheckExpensive());
++ }
++
++ for (int i=1; i<10; i += 2) {
++ ph->Delete(spans[i]);
++ }
++ }
++
++ // Once again, testing small lists this time (twice smaller spans):
++ {
++ tcmalloc::Span * spans[20];
++ for (int i=0; i<20; ++i) {
++ spans[i] = ph->New(kMaxPages >> 1);
++ EXPECT_NE(spans[i], NULL);
++ }
++ // one more half size allocation may be possible:
++ tcmalloc::Span * lastHalf = ph->New(kMaxPages >> 1);
++ EXPECT_EQ(ph->New(kMaxPages >> 1), NULL);
++
++ for (int i=0; i<20; i += 2) {
++ ph->Delete(spans[i]);
++ }
++
++ for(Length len = kMaxPages >> 2; len < 5 * kMaxPages; len = len << 1)
++ {
++ if(len <= kMaxPages >> 1 || HaveSystemRelease) {
++ tcmalloc::Span *s = ph->New(len);
++ EXPECT_NE(s, NULL);
++ ph->Delete(s);
++ }
++ }
++
++ EXPECT_TRUE(ph->CheckExpensive());
++
++ for (int i=1; i<20; i += 2) {
++ ph->Delete(spans[i]);
++ }
++
++ if (lastHalf != NULL) {
++ ph->Delete(lastHalf);
++ }
++ }
++
++ delete ph;
++}
++
+ } // namespace
+
+ int main(int argc, char **argv) {
+ TestPageHeap_Stats();
++ TestPageHeap_Limit();
+ printf("PASS\n");
+ return 0;
+ }
+diff -urP gperftools-2.0/src/tests/profiler_unittest.sh /home/spot/gperftools/src/tests/profiler_unittest.sh
+--- gperftools-2.0/src/tests/profiler_unittest.sh 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/profiler_unittest.sh 2013-03-01 14:25:38.476366833 -0500
+@@ -85,6 +85,14 @@
+ # It's meaningful to the profiler, so make sure we know its state
+ unset CPUPROFILE
+
++# Some output/logging in the profiler can cause issues when running the unit
++# tests. For example, logging a warning when the profiler is detected as being
++# present but no CPUPROFILE is specified in the environment. Especially when
++# we are checking for a silent run or specific timing constraints are being
++# checked. So set the env variable signifying that we are running in a unit
++# test environment.
++PERFTOOLS_UNITTEST=1
++
+ rm -rf "$TMPDIR"
+ mkdir "$TMPDIR" || exit 2
+
+@@ -95,11 +103,11 @@
+ }
+
+ # Takes two filenames representing profiles, with their executable scripts,
+-# and a multiplier, and verifies that the 'contentful' functions in
+-# each profile take the same time (possibly scaled by the given
+-# multiplier). It used to be "same" meant within 50%, after adding an
+-# noise-reducing X units to each value. But even that would often
+-# spuriously fail, so now it's "both non-zero". We're pretty forgiving.
++# and a multiplier, and verifies that the 'contentful' functions in each
++# profile take the same time (possibly scaled by the given multiplier). It
++# used to be "same" meant within 50%, after adding an noise-reducing X units
++# to each value. But even that would often spuriously fail, so now it's
++# "both non-zero". We're pretty forgiving.
+ VerifySimilar() {
+ prof1="$TMPDIR/$1"
+ exec1="$2"
+diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc /home/spot/gperftools/src/tests/tcmalloc_unittest.cc
+--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2012-02-03 14:18:23.000000000 -0500
++++ /home/spot/gperftools/src/tests/tcmalloc_unittest.cc 2013-03-01 14:25:38.476366833 -0500
+@@ -92,6 +92,7 @@
+ #include "gperftools/malloc_extension.h"
+ #include "gperftools/tcmalloc.h"
+ #include "thread_cache.h"
++#include "system-alloc.h"
+ #include "tests/testutil.h"
+
+ // Windows doesn't define pvalloc and a few other obsolete unix
+@@ -579,7 +580,7 @@
+ static void TestCalloc(size_t n, size_t s, bool ok) {
+ char* p = reinterpret_cast(calloc(n, s));
+ if (FLAGS_verbose)
+- fprintf(LOGSTREAM, "calloc(%"PRIxS", %"PRIxS"): %p\n", n, s, p);
++ fprintf(LOGSTREAM, "calloc(%" PRIxS ", %" PRIxS "): %p\n", n, s, p);
+ if (!ok) {
+ CHECK(p == NULL); // calloc(n, s) should not succeed
+ } else {
+@@ -759,9 +760,10 @@
+ CHECK((p % sizeof(void*)) == 0);
+ CHECK((p % sizeof(double)) == 0);
+
+- // Must have 16-byte alignment for large enough objects
+- if (size >= 16) {
+- CHECK((p % 16) == 0);
++ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
++ // alignment for large enough objects
++ if (size >= kMinAlign) {
++ CHECK((p % kMinAlign) == 0);
+ }
+ }
+ for (int i = 0; i < kNum; i++) {
+@@ -834,20 +836,26 @@
+
+ }
+
++static bool HaveSystemRelease =
++ TCMalloc_SystemRelease(TCMalloc_SystemAlloc(kPageSize, NULL, 0), kPageSize);
++
+ static void TestRanges() {
+ static const int MB = 1048576;
+ void* a = malloc(MB);
+ void* b = malloc(MB);
++ base::MallocRange::Type releasedType =
++ HaveSystemRelease ? base::MallocRange::UNMAPPED : base::MallocRange::FREE;
++
+ CheckRangeCallback(a, base::MallocRange::INUSE, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ free(a);
+ CheckRangeCallback(a, base::MallocRange::FREE, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ MallocExtension::instance()->ReleaseFreeMemory();
+- CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB);
++ CheckRangeCallback(a, releasedType, MB);
+ CheckRangeCallback(b, base::MallocRange::INUSE, MB);
+ free(b);
+- CheckRangeCallback(a, base::MallocRange::UNMAPPED, MB);
++ CheckRangeCallback(a, releasedType, MB);
+ CheckRangeCallback(b, base::MallocRange::FREE, MB);
+ }
+
+@@ -865,6 +873,9 @@
+ // messes up all the equality tests here. I just disable the
+ // teset in this mode. TODO(csilvers): get it to work for debugalloc?
+ #ifndef DEBUGALLOCATION
++
++ if(!HaveSystemRelease) return;
++
+ const double old_tcmalloc_release_rate = FLAGS_tcmalloc_release_rate;
+ FLAGS_tcmalloc_release_rate = 0;
+
+diff -urP gperftools-2.0/src/thread_cache.cc /home/spot/gperftools/src/thread_cache.cc
+--- gperftools-2.0/src/thread_cache.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/thread_cache.cc 2013-03-01 14:25:38.720366824 -0500
+@@ -63,11 +63,9 @@
+ int ThreadCache::thread_heap_count_ = 0;
+ ThreadCache* ThreadCache::next_memory_steal_ = NULL;
+ #ifdef HAVE_TLS
+-__thread ThreadCache* ThreadCache::threadlocal_heap_
+-# ifdef HAVE___ATTRIBUTE__
+- __attribute__ ((tls_model ("initial-exec")))
+-# endif
+- ;
++__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
++ ATTR_INITIAL_EXEC
++ = {0, 0};
+ #endif
+ bool ThreadCache::tsd_inited_ = false;
+ pthread_key_t ThreadCache::heap_key_;
+@@ -379,7 +377,8 @@
+ perftools_pthread_setspecific(heap_key_, heap);
+ #ifdef HAVE_TLS
+ // Also keep a copy in __thread for faster retrieval
+- threadlocal_heap_ = heap;
++ threadlocal_data_.heap = heap;
++ SetMinSizeForSlowPath(kMaxSize + 1);
+ #endif
+ heap->in_setspecific_ = false;
+ }
+@@ -414,7 +413,8 @@
+ perftools_pthread_setspecific(heap_key_, NULL);
+ #ifdef HAVE_TLS
+ // Also update the copy in __thread
+- threadlocal_heap_ = NULL;
++ threadlocal_data_.heap = NULL;
++ SetMinSizeForSlowPath(0);
+ #endif
+ heap->in_setspecific_ = false;
+ if (GetThreadHeap() == heap) {
+@@ -434,7 +434,8 @@
+ if (ptr == NULL) return;
+ #ifdef HAVE_TLS
+ // Prevent fast path of GetThreadHeap() from returning heap.
+- threadlocal_heap_ = NULL;
++ threadlocal_data_.heap = NULL;
++ SetMinSizeForSlowPath(0);
+ #endif
+ DeleteCache(reinterpret_cast(ptr));
+ }
+diff -urP gperftools-2.0/src/thread_cache.h /home/spot/gperftools/src/thread_cache.h
+--- gperftools-2.0/src/thread_cache.h 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/thread_cache.h 2013-03-01 14:25:37.965366851 -0500
+@@ -75,6 +75,12 @@
+
+ class ThreadCache {
+ public:
++#ifdef HAVE_TLS
++ enum { have_tls = true };
++#else
++ enum { have_tls = false };
++#endif
++
+ // All ThreadCache objects are kept in a linked list (for stats collection)
+ ThreadCache* next_;
+ ThreadCache* prev_;
+@@ -106,16 +112,21 @@
+ static ThreadCache* GetThreadHeap();
+ static ThreadCache* GetCache();
+ static ThreadCache* GetCacheIfPresent();
++ static ThreadCache* GetCacheWhichMustBePresent();
+ static ThreadCache* CreateCacheIfNecessary();
+ static void BecomeIdle();
++ static size_t MinSizeForSlowPath();
++ static void SetMinSizeForSlowPath(size_t size);
++
++ static bool IsFastPathAllowed() { return MinSizeForSlowPath() != 0; }
+
+ // Return the number of thread heaps in use.
+ static inline int HeapsInUse();
+
+- // Writes to total_bytes the total number of bytes used by all thread heaps.
+- // class_count must be an array of size kNumClasses. Writes the number of
+- // items on the corresponding freelist. class_count may be NULL.
+- // The storage of both parameters must be zero intialized.
++ // Adds to *total_bytes the total number of bytes used by all thread heaps.
++ // Also, if class_count is not NULL, it must be an array of size kNumClasses,
++ // and this function will increment each element of class_count by the number
++ // of items in all thread-local freelists of the corresponding size class.
+ // REQUIRES: Static::pageheap_lock is held.
+ static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
+
+@@ -251,12 +262,24 @@
+ // Since we don't really use dlopen in google code -- and using dlopen
+ // on a malloc replacement is asking for trouble in any case -- that's
+ // a good tradeoff for us.
++#ifdef HAVE___ATTRIBUTE__
++#define ATTR_INITIAL_EXEC __attribute__ ((tls_model ("initial-exec")))
++#else
++#define ATTR_INITIAL_EXEC
++#endif
++
+ #ifdef HAVE_TLS
+- static __thread ThreadCache* threadlocal_heap_
+-# ifdef HAVE___ATTRIBUTE__
+- __attribute__ ((tls_model ("initial-exec")))
+-# endif
+- ;
++ struct ThreadLocalData {
++ ThreadCache* heap;
++ // min_size_for_slow_path is 0 if heap is NULL or kMaxSize + 1 otherwise.
++ // The latter is the common case and allows allocation to be faster
++ // than it would be otherwise: typically a single branch will
++ // determine that the requested allocation is no more than kMaxSize
++ // and we can then proceed, knowing that global and thread-local tcmalloc
++ // state is initialized.
++ size_t min_size_for_slow_path;
++ };
++ static __thread ThreadLocalData threadlocal_data_ ATTR_INITIAL_EXEC;
+ #endif
+
+ // Thread-specific key. Initialization here is somewhat tricky
+@@ -373,12 +396,23 @@
+ #ifdef HAVE_TLS
+ // __thread is faster, but only when the kernel supports it
+ if (KernelSupportsTLS())
+- return threadlocal_heap_;
++ return threadlocal_data_.heap;
+ #endif
+ return reinterpret_cast(
+ perftools_pthread_getspecific(heap_key_));
+ }
+
++inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() {
++#ifdef HAVE_TLS
++ ASSERT(threadlocal_data_.heap);
++ return threadlocal_data_.heap;
++#else
++ ASSERT(perftools_pthread_getspecific(heap_key_));
++ return reinterpret_cast(
++ perftools_pthread_getspecific(heap_key_));
++#endif
++}
++
+ inline ThreadCache* ThreadCache::GetCache() {
+ ThreadCache* ptr = NULL;
+ if (!tsd_inited_) {
+@@ -398,6 +432,20 @@
+ return GetThreadHeap();
+ }
+
++inline size_t ThreadCache::MinSizeForSlowPath() {
++#ifdef HAVE_TLS
++ return threadlocal_data_.min_size_for_slow_path;
++#else
++ return 0;
++#endif
++}
++
++inline void ThreadCache::SetMinSizeForSlowPath(size_t size) {
++#ifdef HAVE_TLS
++ threadlocal_data_.min_size_for_slow_path = size;
++#endif
++}
++
+ } // namespace tcmalloc
+
+ #endif // TCMALLOC_THREAD_CACHE_H_
+diff -urP gperftools-2.0/src/windows/port.cc /home/spot/gperftools/src/windows/port.cc
+--- gperftools-2.0/src/windows/port.cc 2012-02-02 16:36:23.000000000 -0500
++++ /home/spot/gperftools/src/windows/port.cc 2013-03-01 14:25:38.029366849 -0500
+@@ -218,6 +218,11 @@
+ // -----------------------------------------------------------------------
+ // These functions replace system-alloc.cc
+
++// The current system allocator declaration (unused here)
++SysAllocator* sys_alloc = NULL;
++// Number of bytes taken from system.
++size_t TCMalloc_SystemTaken = 0;
++
+ // This is mostly like MmapSysAllocator::Alloc, except it does these weird
+ // munmap's in the middle of the page, which is forbidden in windows.
+ extern void* TCMalloc_SystemAlloc(size_t size, size_t *actual_size,
+@@ -243,6 +248,8 @@
+ if (result == NULL)
+ return NULL;
+
++ TCMalloc_SystemTaken += size + extra;
++
+ // Adjust the return memory so it is aligned
+ uintptr_t ptr = reinterpret_cast(result);
+ size_t adjust = 0;
+@@ -254,8 +261,9 @@
+ return reinterpret_cast(ptr);
+ }
+
+-void TCMalloc_SystemRelease(void* start, size_t length) {
++bool TCMalloc_SystemRelease(void* start, size_t length) {
+ // TODO(csilvers): should I be calling VirtualFree here?
++ return false;
+ }
+
+ bool RegisterSystemAllocator(SysAllocator *allocator, int priority) {
+@@ -266,9 +274,6 @@
+ // We don't dump stats on windows, right now
+ }
+
+-// The current system allocator
+-SysAllocator* sys_alloc = NULL;
+-
+
+ // -----------------------------------------------------------------------
+ // These functions rework existing functions of the same name in the
+diff -urP gperftools-2.0/src/windows/TODO /home/spot/gperftools/src/windows/TODO
+--- gperftools-2.0/src/windows/TODO 1969-12-31 19:00:00.000000000 -0500
++++ /home/spot/gperftools/src/windows/TODO 2013-03-01 14:25:38.027366849 -0500
+@@ -0,0 +1,86 @@
++* Get heap-profile-table.cc using DeleteMatchingFiles
++* Get heap-profile-table.cc using FillProcSelfMaps, DumpProcSelfMaps
++* Play around with ExperimentalGetStackTrace
++* Support the windows-level memory-allocation functions? See
++ /home/build/googleclient/earth/client/tools/memorytracking/client/memorytrace/src/memorytrace.cpp
++ /home/build/googleclient/total_recall/common/sitestep/*
++ http://www.internals.com/articles/apispy/apispy.htm
++ http://www.wheaty.net/APISPY32.zip
++* Verify /proc/xxx/maps:
++ http://www.geocities.com/wah_java_dotnet/procmap/index.html
++* Figure out how to edit the executable IAT so tcmalloc.dll is loaded first
++* Use QueryPerformanceCounter instead of GetTickCount() (also for sparsehash)
++
++----
++More info on windows-level memory-allocation functions:
++ C runtime malloc
++ LocalAlloc
++ GlobalAlloc
++ HeapAlloc
++ VirtualAlloc
++ mmap stuff
++
++malloc, LocalAlloc and GlobalAlloc call HeapAlloc, which calls
++VirtualAlloc when needed, which calls VirtualAllocEx (the __sbrk equiv?)
++
++siggi sez: If you want to do a generic job, you probably need to
++preserve the semantics of all of these Win32 calls:
++ Heap32First
++ Heap32ListFirst
++ Heap32ListNext
++ Heap32Next
++ HeapAlloc
++ HeapCompact
++ HeapCreate
++ HeapCreateTagsW
++ HeapDestroy
++ HeapExtend
++ HeapFree
++ HeapLock
++ HeapQueryInformation
++ HeapQueryTagW
++ HeapReAlloc
++ HeapSetInformation
++ HeapSize
++ HeapSummary
++ HeapUnlock
++ HeapUsage
++ HeapValidate
++ HeapWalk
++
++kernel32.dll export functions and nt.dll export functions:
++ http://www.shorthike.com/svn/trunk/tools_win32/dm/lib/kernel32.def
++ http://undocumented.ntinternals.net/
++
++You can edit the executable IAT to have the patching DLL be the
++first one loaded.
++
++Most complete way to intercept system calls is patch the functions
++(not the IAT).
++
++Microsoft has somee built-in routines for heap-checking:
++ http://support.microsoft.com/kb/268343
++
++----
++Itimer replacement:
++ http://msdn2.microsoft.com/en-us/library/ms712713.aspx
++
++----
++Changes I've had to make to the project file:
++
++0) When creating the project file, click on "no autogenerated files"
++
++--- For each project:
++1) Alt-F7 -> General -> [pulldown "all configurations" ] -> Output Directory -> $(SolutionDir)$(ConfigurationName)
++2) Alt-F7 -> General -> [pulldown "all configurations" ] -> Intermediate Directory -> $(ConfigurationName)
++
++--- For each .cc file:
++1) Alt-F7 -> C/C++ -> General -> [pulldown "all configurations"] -> Additional Include Directives --> src/windows + src/
++2) Alt-F7 -> C/C++ -> Code Generation -> Runtime Library -> Multi-threaded, debug/release, DLL or not
++
++--- For DLL:
++3) Alt-F7 -> Linker -> Input -> [pulldown "all configurations" ] -> Module Definition File -> src\windows\vc7and8.def
++--- For binaries depending on a DLL:
++3) Right-click on project -> Project Dependencies -> [add dll]
++--- For static binaries (not depending on a DLL)
++3) Alt-F7 -> C/C++ -> Command Line -> [pulldown "all configurations"] -> /D PERFTOOLS_DLL_DECL=
diff --git a/gperftools.spec b/gperftools.spec
index 8e09bc5..ed75371 100644
--- a/gperftools.spec
+++ b/gperftools.spec
@@ -2,17 +2,22 @@
Name: gperftools
Version: 2.0
-Release: 3%{?dist}.2
+Release: 11%{?dist}.1
License: BSD
Group: Development/Tools
Summary: Very fast malloc and performance analysis tools
URL: http://code.google.com/p/gperftools/
Source0: http://gperftools.googlecode.com/files/%{name}-%{version}.tar.gz
-# ppc64 still broken, bz 238390
-ExclusiveArch: %{ix86} x86_64 ppc
+# Update to latest svn, since google forgets how to make releases
+Patch0: gperftools-svn-r190.patch
+Patch1: gperftools-2.0-svn190-to-svn218.patch
+ExclusiveArch: %{ix86} x86_64 ppc ppc64 %{arm}
%ifnarch ppc ppc64
# BuildRequires: libunwind-devel
%endif
+BuildRequires: autoconf, automake, libtool
+Requires: gperftools-devel = %{version}-%{release}
+Requires: pprof = %{version}-%{release}
%description
Perf Tools is a collection of performance analysis tools, including a
@@ -20,6 +25,9 @@ high-performance multi-threaded malloc() implementation that works
particularly well with threads and STL, a thread-friendly heap-checker,
a heap profiler, and a cpu-profiler.
+This is a metapackage which pulls in all of the gperftools (and pprof)
+binaries, libraries, and development headers, so that you can use them.
+
%package devel
Summary: Development libraries and headers for gperftools
Group: Development/Libraries
@@ -50,6 +58,8 @@ Pprof is a heap and CPU profiler tool, part of the gperftools suite.
%prep
%setup -q
+%patch0 -p1 -b .svn-r190
+%patch1 -p1 -b .svn-r218
# Fix end-of-line encoding
sed -i 's/\r//' README_windows.txt
@@ -57,9 +67,11 @@ sed -i 's/\r//' README_windows.txt
# No need to have exec permissions on source code
chmod -x src/sampler.h src/sampler.cc
+autoreconf -i
+
%build
-CXXFLAGS=`echo $RPM_OPT_FLAGS -DTCMALLOC_LARGE_PAGES -fno-omit-frame-pointer| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'`
-%configure --disable-static --enable-frame-pointers
+CXXFLAGS=`echo $RPM_OPT_FLAGS -fno-strict-aliasing -Wno-unused-local-typedefs -DTCMALLOC_LARGE_PAGES| sed -e 's/-Wp,-D_FORTIFY_SOURCE=2//g'`
+%configure --disable-static
# Bad rpath!
sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
@@ -80,13 +92,15 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL
%check
# http://code.google.com/p/google-perftools/issues/detail?id=153
%ifnarch ppc
-# Their test suite is junk. Disabling.
+# Their test suite is almost always broken.
# LD_LIBRARY_PATH=./.libs make check
%endif
%post libs -p /sbin/ldconfig
%postun libs -p /sbin/ldconfig
+%files
+
%files -n pprof
%{_bindir}/pprof
%{_mandir}/man1/*
@@ -102,11 +116,31 @@ rm -rf %{buildroot}%{_docdir}/%{name}-%{version}/INSTALL
%{_libdir}/*.so.*
%changelog
-* Thu Feb 16 2012 Tom Callaway - 2.0-3.2
-- fix broken Requires on -devel package
+* Tue Jun 4 2013 Tom Callaway - 2.0-11.1
+- pass -fno-strict-aliasing
+- create "gperftools" metapackage.
+- update to svn r218 (cleanups, some ARM fixes)
-* Tue Feb 14 2012 Tom Callaway - 2.0-3.1
-- disable libunwind-devel BR on EPEL
+* Thu Mar 14 2013 Dan HorĂ¡k - 2.0-10
+- build on ppc64 as well
+
+* Fri Mar 1 2013 Tom Callaway - 2.0-9
+- update to svn r190 (because google can't make releases)
+
+* Thu Feb 14 2013 Fedora Release Engineering - 2.0-8
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_19_Mass_Rebuild
+
+* Fri Aug 3 2012 Tom Callaway - 2.0-7
+- fix compile with glibc 2.16
+
+* Thu Jul 19 2012 Fedora Release Engineering - 2.0-6
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_18_Mass_Rebuild
+
+* Mon Feb 20 2012 Peter Robinson - 2.0-5
+- Enable ARM as a supported arch
+
+* Thu Feb 16 2012 Tom Callaway - 2.0-4
+- fix bug in -devel Requires
* Tue Feb 14 2012 Tom Callaway - 2.0-3
- pprof doesn't actually need gperftools-libs
|