1973 lines
83 KiB
Diff
1973 lines
83 KiB
Diff
Only in gperftools-2.0: aclocal.m4
|
|
Only in gperftools-2.0: aclocal.m4.svn-r190
|
|
diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh
|
|
--- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400
|
|
+++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400
|
|
@@ -1,54 +1,3 @@
|
|
#!/bin/sh
|
|
|
|
-# Before using, you should figure out all the .m4 macros that your
|
|
-# configure.m4 script needs and make sure they exist in the m4/
|
|
-# directory.
|
|
-#
|
|
-# These are the files that this script might edit:
|
|
-# aclocal.m4 configure Makefile.in src/config.h.in \
|
|
-# depcomp config.guess config.sub install-sh missing mkinstalldirs \
|
|
-# ltmain.sh
|
|
-#
|
|
-# Here's a command you can run to see what files aclocal will import:
|
|
-# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p'
|
|
-
|
|
-set -ex
|
|
-rm -rf autom4te.cache
|
|
-
|
|
-trap 'rm -f aclocal.m4.tmp' EXIT
|
|
-
|
|
-# Returns the first binary in $* that exists, or the last arg, if none exists.
|
|
-WhichOf() {
|
|
- for candidate in "$@"; do
|
|
- if "$candidate" --version >/dev/null 2>&1; then
|
|
- echo "$candidate"
|
|
- return
|
|
- fi
|
|
- done
|
|
- echo "$candidate" # the last one in $@
|
|
-}
|
|
-
|
|
-# Use version 1.9 of aclocal and automake if available.
|
|
-ACLOCAL=`WhichOf aclocal-1.9 aclocal`
|
|
-AUTOMAKE=`WhichOf automake-1.9 automake`
|
|
-LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize`
|
|
-
|
|
-# aclocal tries to overwrite aclocal.m4 even if the contents haven't
|
|
-# changed, which is annoying when the file is not open for edit (in
|
|
-# p4). We work around this by writing to a temp file and just
|
|
-# updating the timestamp if the file hasn't change.
|
|
-"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp
|
|
-if cmp aclocal.m4.tmp aclocal.m4; then
|
|
- touch aclocal.m4 # pretend that we regenerated the file
|
|
- rm -f aclocal.m4.tmp
|
|
-else
|
|
- mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails
|
|
-fi
|
|
-
|
|
-grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f
|
|
-autoconf -f -W all,no-obsolete
|
|
-autoheader -f -W all
|
|
-"$AUTOMAKE" -a -c -f -W all
|
|
-
|
|
-rm -rf autom4te.cache
|
|
-exit 0
|
|
+autoreconf -i
|
|
Only in gperftools-2.0: autogen.sh.svn-r190
|
|
Only in gperftools-2.0: compile
|
|
Only in gperftools-2.0: config.guess
|
|
Only in gperftools-2.0: config.sub
|
|
Only in gperftools-2.0: configure
|
|
diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac
|
|
--- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400
|
|
+++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400
|
|
@@ -99,28 +99,7 @@
|
|
[gpt_cv_objcopy_weaken=no])
|
|
AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes)
|
|
|
|
-case $host_os in
|
|
- *mingw*)
|
|
- # Disabling fast install keeps libtool from creating wrapper scripts
|
|
- # around the executables it builds. Such scripts have caused failures on
|
|
- # MinGW. Using this option means an extra link step is executed during
|
|
- # "make install".
|
|
- _LT_SET_OPTION([LT_INIT],[disable-fast-install])
|
|
-AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
|
|
-the `disable-fast-install' option into LT_INIT's first parameter.])
|
|
-
|
|
- ;;
|
|
- *)
|
|
- _LT_SET_OPTION([LT_INIT],[fast-install])
|
|
-AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
|
|
-the `fast-install' option into LT_INIT's first parameter.])
|
|
-
|
|
- ;;
|
|
-esac
|
|
-
|
|
-LT_INIT
|
|
-AC_SUBST(LIBTOOL_DEPS)
|
|
-AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x")
|
|
+LT_INIT([])
|
|
|
|
AC_C_INLINE
|
|
AX_C___ATTRIBUTE__
|
|
@@ -134,6 +113,7 @@
|
|
AC_CHECK_TYPES([Elf32_Versym],,, [#include <elf.h>]) # for vdso_support.h
|
|
AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory
|
|
AC_CHECK_FUNCS(geteuid) # for turning off services when run as root
|
|
+AC_CHECK_FUNCS(fork) # for the pthread_atfork setup
|
|
AC_CHECK_HEADERS(features.h) # for vdso_support.h
|
|
AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not
|
|
AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h
|
|
@@ -183,6 +163,11 @@
|
|
# This workaround comes from
|
|
# http://cygwin.com/ml/cygwin/2004-11/msg00138.html
|
|
case "$host" in
|
|
+ *-*-mingw*)
|
|
+ dnl mingw doesn't have mmap, not worth
|
|
+ dnl checking. Especially given that mingw can be a
|
|
+ dnl cross-compiler
|
|
+ ;;
|
|
*-*-cygwin*)
|
|
ac_cv_func_mmap_fixed_mapped=yes
|
|
AC_DEFINE(HAVE_MMAP, 1,
|
|
@@ -310,10 +295,18 @@
|
|
# Note, however, that our code tickles a bug in gcc < 4.1.2
|
|
# involving TLS and -fPIC (which our libraries will use) on x86:
|
|
# http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
|
|
+#
|
|
+# And mingw also does compile __thread but resultant code actually
|
|
+# fails to work correctly at least in some not so ancient version:
|
|
+# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html
|
|
AC_MSG_CHECKING([for __thread])
|
|
AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2))
|
|
#error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
|
|
-#endif], [static __thread int p = 0])],
|
|
+#endif
|
|
+#if defined(__MINGW32__)
|
|
+#error mingw doesn't really support tls
|
|
+#endif
|
|
+], [static __thread int p = 0])],
|
|
[AC_DEFINE(HAVE_TLS, 1,
|
|
Define to 1 if compiler supports __thread)
|
|
AC_MSG_RESULT([yes])],
|
|
Only in gperftools-2.0: configure.ac.svn-r190
|
|
Only in gperftools-2.0: configure.svn-r190
|
|
Only in gperftools-2.0: depcomp
|
|
Only in gperftools-2.0/doc: cpuprofile.html.svn-r190
|
|
Only in gperftools-2.0/doc: heapprofile.html.svn-r190
|
|
Only in gperftools-2.0/doc: pprof.see_also.svn-r190
|
|
diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL
|
|
--- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500
|
|
+++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400
|
|
@@ -8,6 +8,28 @@
|
|
Perftools-Specific Install Notes
|
|
================================
|
|
|
|
+*** Building from source repository
|
|
+
|
|
+As of 2.1 gperftools does not have configure and other autotools
|
|
+products checked into it's source repository. This is common practice
|
|
+for projects using autotools.
|
|
+
|
|
+NOTE: Source releases (.tar.gz that you download from
|
|
+code.google.com/p/gperftools) still have all required files just as
|
|
+before. Nothing has changed w.r.t. building from .tar.gz releases.
|
|
+
|
|
+But, in order to build gperftools checked out from subversion
|
|
+repository you need to have autoconf, automake and libtool
|
|
+installed. And before running ./configure you have to generate it (and
|
|
+a bunch of other files) by running ./autogen.sh script. That script
|
|
+will take care of calling correct autotools programs in correct order.
|
|
+
|
|
+If you're maintainer then it's business as usual too. Just run make
|
|
+dist (or, preferably, make distcheck) and it'll produce .tar.gz or
|
|
+.tar.bz2 with all autotools magic already included. So that users can
|
|
+build our software without having autotools.
|
|
+
|
|
+
|
|
*** NOTE FOR 64-BIT LINUX SYSTEMS
|
|
|
|
The glibc built-in stack-unwinder on 64-bit systems has some problems
|
|
Only in gperftools-2.0: install-sh
|
|
Only in gperftools-2.0: libtool
|
|
Only in gperftools-2.0: ltmain.sh
|
|
Only in gperftools-2.0/m4: libtool.m4
|
|
Only in gperftools-2.0/m4: libtool.m4.svn-r190
|
|
Only in gperftools-2.0/m4: lt~obsolete.m4
|
|
Only in gperftools-2.0/m4: ltoptions.m4
|
|
Only in gperftools-2.0/m4: ltsugar.m4
|
|
Only in gperftools-2.0/m4: ltversion.m4
|
|
diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am
|
|
--- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400
|
|
+++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400
|
|
@@ -221,7 +221,7 @@
|
|
src/windows/preamble_patcher.cc \
|
|
src/windows/preamble_patcher_with_stub.cc
|
|
# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
|
|
-libwindows_la_LIBADD = -lPsapi
|
|
+libwindows_la_LIBADD = -lpsapi
|
|
|
|
SPINLOCK_INCLUDES = src/base/spinlock.h \
|
|
src/base/spinlock_internal.h \
|
|
@@ -238,6 +238,7 @@
|
|
noinst_LTLIBRARIES += libspinlock.la
|
|
libspinlock_la_SOURCES = src/base/spinlock.cc \
|
|
src/base/spinlock_internal.cc \
|
|
+ src/base/atomicops-internals-x86.cc \
|
|
$(SPINLOCK_INCLUDES)
|
|
|
|
LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la
|
|
@@ -355,7 +356,7 @@
|
|
$(STACKTRACE_INCLUDES)
|
|
libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK)
|
|
STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)'
|
|
-libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS)
|
|
+libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS)
|
|
|
|
### Unittests
|
|
TESTS += stacktrace_unittest
|
|
@@ -468,7 +469,7 @@
|
|
-DNO_HEAP_CHECK \
|
|
$(PTHREAD_CFLAGS) -DNDEBUG \
|
|
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
|
|
-libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
|
|
+libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
|
|
libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
|
|
|
|
lib_LTLIBRARIES += libtcmalloc_minimal.la
|
|
@@ -477,7 +478,7 @@
|
|
libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
|
|
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
|
|
# -version-info gets passed to libtool
|
|
-libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
|
|
+libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS)
|
|
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
|
|
|
|
# For windows, we're playing around with trying to do some stacktrace
|
|
@@ -539,6 +540,12 @@
|
|
tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
|
tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
|
|
|
+TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest
|
|
+tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
|
|
+tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
|
+tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
|
+tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
|
|
+
|
|
# This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc)
|
|
# In theory this should work under mingw, but mingw has trouble running
|
|
# shell scripts that end in .exe. And it doesn't seem to build shared
|
|
@@ -898,8 +905,16 @@
|
|
|
|
### Unittests
|
|
|
|
-TESTS += tcmalloc_unittest
|
|
-TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \
|
|
+TESTS += tcmalloc_unittest.sh$(EXEEXT)
|
|
+tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh
|
|
+noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES)
|
|
+tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \
|
|
+ tcmalloc_unittest
|
|
+ rm -f $@
|
|
+ cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@
|
|
+
|
|
+noinst_PROGRAMS += tcmalloc_unittest
|
|
+tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \
|
|
src/gperftools/malloc_extension.h
|
|
tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
|
|
src/tcmalloc.h \
|
|
@@ -956,6 +971,12 @@
|
|
tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
|
tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
|
|
|
|
+TESTS += tcmalloc_large_heap_fragmentation_unittest
|
|
+tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
|
|
+tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
|
+tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
|
|
+tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
|
|
+
|
|
TESTS += raw_printer_test
|
|
raw_printer_test_SOURCES = src/tests/raw_printer_test.cc
|
|
raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
|
|
Only in gperftools-2.0: Makefile.am.svn-r190
|
|
Only in gperftools-2.0: Makefile.in
|
|
Only in gperftools-2.0: Makefile.in.svn-r190
|
|
Only in gperftools-2.0: missing
|
|
Only in gperftools-2.0: mkinstalldirs
|
|
Only in gperftools-2.0: NEWS.svn-r190
|
|
diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h
|
|
--- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400
|
|
@@ -50,6 +50,16 @@
|
|
// implementations on other archtectures will cause your code to break. If you
|
|
// do not know what you are doing, avoid these routines, and use a Mutex.
|
|
//
|
|
+// These following lower-level operations are typically useful only to people
|
|
+// implementing higher-level synchronization operations like spinlocks,
|
|
+// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
|
|
+// a store with appropriate memory-ordering instructions. "Acquire" operations
|
|
+// ensure that no later memory access can be reordered ahead of the operation.
|
|
+// "Release" operations ensure that no previous memory access can be reordered
|
|
+// after the operation. "Barrier" operations have both "Acquire" and "Release"
|
|
+// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
|
|
+// access.
|
|
+//
|
|
// It is incorrect to make direct assignments to/from an atomic variable.
|
|
// You should use one of the Load or Store routines. The NoBarrier
|
|
// versions are provided when no barriers are needed:
|
|
@@ -95,10 +105,10 @@
|
|
#include "base/atomicops-internals-arm-v6plus.h"
|
|
#elif defined(ARMV3)
|
|
#include "base/atomicops-internals-arm-generic.h"
|
|
-#elif defined(_WIN32)
|
|
-#include "base/atomicops-internals-windows.h"
|
|
#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
|
|
#include "base/atomicops-internals-x86.h"
|
|
+#elif defined(_WIN32)
|
|
+#include "base/atomicops-internals-windows.h"
|
|
#elif defined(__linux__) && defined(__PPC__)
|
|
#include "base/atomicops-internals-linuxppc.h"
|
|
#else
|
|
@@ -149,6 +159,18 @@
|
|
reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
|
|
}
|
|
|
|
+AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
|
|
+ AtomicWord new_value) {
|
|
+ return Acquire_AtomicExchange(
|
|
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
|
|
+}
|
|
+
|
|
+AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
|
|
+ AtomicWord new_value) {
|
|
+ return Release_AtomicExchange(
|
|
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
|
|
+}
|
|
+
|
|
// Atomically increment *ptr by "increment". Returns the new value of
|
|
// *ptr with the increment applied. This routine implies no memory
|
|
// barriers.
|
|
@@ -164,17 +186,6 @@
|
|
reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment);
|
|
}
|
|
|
|
-// ------------------------------------------------------------------------
|
|
-// These following lower-level operations are typically useful only to people
|
|
-// implementing higher-level synchronization operations like spinlocks,
|
|
-// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
|
|
-// a store with appropriate memory-ordering instructions. "Acquire" operations
|
|
-// ensure that no later memory access can be reordered ahead of the operation.
|
|
-// "Release" operations ensure that no previous memory access can be reordered
|
|
-// after the operation. "Barrier" operations have both "Acquire" and "Release"
|
|
-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
|
|
-// access.
|
|
-// ------------------------------------------------------------------------
|
|
inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
|
|
AtomicWord old_value,
|
|
AtomicWord new_value) {
|
|
@@ -250,6 +261,8 @@
|
|
Atomic32 old_value,
|
|
Atomic32 new_value);
|
|
Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
|
|
+Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
|
|
+Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
|
|
Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment);
|
|
Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment);
|
|
@@ -271,6 +284,8 @@
|
|
Atomic64 old_value,
|
|
Atomic64 new_value);
|
|
Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
|
|
+Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
|
|
+Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
|
|
Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
|
|
Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
|
|
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400
|
|
@@ -89,6 +89,18 @@
|
|
return old_value;
|
|
}
|
|
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment) {
|
|
for (;;) {
|
|
@@ -176,6 +188,18 @@
|
|
return 0;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
|
|
Atomic64 increment) {
|
|
NotImplementedFatalError("NoBarrier_AtomicIncrement");
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400
|
|
@@ -94,6 +94,28 @@
|
|
return old;
|
|
}
|
|
|
|
+inline void MemoryBarrier() {
|
|
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
|
|
+ uint32_t dest = 0;
|
|
+ __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
|
|
+#else
|
|
+ __asm__ __volatile__("dmb" : : : "memory");
|
|
+#endif
|
|
+}
|
|
+
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
|
|
+ MemoryBarrier();
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ MemoryBarrier();
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment) {
|
|
Atomic32 tmp, res;
|
|
@@ -110,10 +132,6 @@
|
|
return res;
|
|
}
|
|
|
|
-inline void MemoryBarrier() {
|
|
- __asm__ __volatile__("dmb" : : : "memory");
|
|
-}
|
|
-
|
|
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment) {
|
|
Atomic32 tmp, res;
|
|
@@ -220,6 +238,19 @@
|
|
return old;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
|
|
+ MemoryBarrier();
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ MemoryBarrier();
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
|
|
Atomic64 increment) {
|
|
int store_failed;
|
|
@@ -303,6 +334,18 @@
|
|
return 0;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ NotImplementedFatalError("Acquire_AtomicExchange");
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ NotImplementedFatalError("Release_AtomicExchange");
|
|
+ return 0;
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
|
|
Atomic64 increment) {
|
|
NotImplementedFatalError("NoBarrier_AtomicIncrement");
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400
|
|
@@ -163,6 +163,26 @@
|
|
return old_value;
|
|
}
|
|
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
|
|
+ Atomic32 new_value) {
|
|
+ Atomic32 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
|
|
+ const_cast<Atomic32*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
|
|
+ Atomic32 new_value) {
|
|
+ Atomic32 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
|
|
+ const_cast<Atomic32*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
|
|
Atomic32 increment) {
|
|
return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
|
|
@@ -294,6 +314,26 @@
|
|
return old_value;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
|
|
+ Atomic64 new_value) {
|
|
+ Atomic64 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
|
|
+ const_cast<Atomic64*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
|
|
+ Atomic64 new_value) {
|
|
+ Atomic64 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
|
|
+ const_cast<Atomic64*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
|
|
Atomic64 increment) {
|
|
return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
|
|
Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400
|
|
@@ -132,6 +132,21 @@
|
|
return old_value;
|
|
}
|
|
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
|
|
+ Atomic32 new_value) {
|
|
+ Atomic32 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
|
|
+ const_cast<Atomic32*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
|
|
+ Atomic32 new_value) {
|
|
+ return Acquire_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
|
|
Atomic32 increment) {
|
|
return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
|
|
@@ -217,6 +232,21 @@
|
|
return old_value;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
|
|
+ Atomic64 new_value) {
|
|
+ Atomic64 old_value;
|
|
+ do {
|
|
+ old_value = *ptr;
|
|
+ } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
|
|
+ const_cast<Atomic64*>(ptr)));
|
|
+ return old_value;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
|
|
+ Atomic64 new_value) {
|
|
+ return Acquire_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
|
|
Atomic64 increment) {
|
|
return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400
|
|
@@ -137,6 +137,18 @@
|
|
return static_cast<Atomic32>(result);
|
|
}
|
|
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ // FastInterlockedExchange has both acquire and release memory barriers.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ // FastInterlockedExchange has both acquire and release memory barriers.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment) {
|
|
return FastInterlockedExchangeAdd(
|
|
@@ -188,8 +200,7 @@
|
|
}
|
|
|
|
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
|
|
- NoBarrier_AtomicExchange(ptr, value);
|
|
- // acts as a barrier in this implementation
|
|
+ Acquire_AtomicExchange(ptr, value);
|
|
}
|
|
|
|
inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
|
|
@@ -478,6 +489,18 @@
|
|
#endif // defined(_WIN64) || defined(__MINGW64__)
|
|
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ // FastInterlockedExchange has both acquire and release memory barriers.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ // FastInterlockedExchange has both acquire and release memory barriers.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
|
|
Atomic64 old_value,
|
|
Atomic64 new_value) {
|
|
Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190
|
|
diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h
|
|
--- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400
|
|
@@ -89,6 +89,21 @@
|
|
return new_value; // Now it's the previous value.
|
|
}
|
|
|
|
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
|
|
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
|
|
+ __asm__ __volatile__("lfence" : : : "memory");
|
|
+ }
|
|
+ return old_val;
|
|
+}
|
|
+
|
|
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
|
|
+ Atomic32 new_value) {
|
|
+ // xchgl already has release memory barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
|
|
Atomic32 increment) {
|
|
Atomic32 temp = increment;
|
|
@@ -152,7 +167,7 @@
|
|
__asm__ __volatile__("mfence" : : : "memory");
|
|
} else { // mfence is faster but not present on PIII
|
|
Atomic32 x = 0;
|
|
- NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
|
|
+ Acquire_AtomicExchange(&x, 0);
|
|
}
|
|
}
|
|
|
|
@@ -161,8 +176,7 @@
|
|
*ptr = value;
|
|
__asm__ __volatile__("mfence" : : : "memory");
|
|
} else {
|
|
- NoBarrier_AtomicExchange(ptr, value);
|
|
- // acts as a barrier on PIII
|
|
+ Acquire_AtomicExchange(ptr, value);
|
|
}
|
|
}
|
|
#endif
|
|
@@ -213,6 +227,21 @@
|
|
return new_value; // Now it's the previous value.
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
|
|
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
|
|
+ __asm__ __volatile__("lfence" : : : "memory");
|
|
+ }
|
|
+ return old_val;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_value) {
|
|
+ // xchgq already has release memory barrier semantics.
|
|
+ return NoBarrier_AtomicExchange(ptr, new_value);
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
|
|
Atomic64 increment) {
|
|
Atomic64 temp = increment;
|
|
@@ -334,6 +363,20 @@
|
|
return old_val;
|
|
}
|
|
|
|
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_val) {
|
|
+ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
|
|
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
|
|
+ __asm__ __volatile__("lfence" : : : "memory");
|
|
+ }
|
|
+ return old_val;
|
|
+}
|
|
+
|
|
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
|
|
+ Atomic64 new_val) {
|
|
+ return NoBarrier_AtomicExchange(ptr, new_val);
|
|
+}
|
|
+
|
|
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
|
|
Atomic64 increment) {
|
|
Atomic64 old_val, new_val;
|
|
diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h
|
|
--- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400
|
|
+++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400
|
|
@@ -334,10 +334,13 @@
|
|
#if defined(HAVE___ATTRIBUTE__)
|
|
# if (defined(__i386__) || defined(__x86_64__))
|
|
# define CACHELINE_ALIGNED __attribute__((aligned(64)))
|
|
-# elif defined(__arm__)
|
|
-# define CACHELINE_ALIGNED __attribute__((aligned(32)))
|
|
# elif (defined(__PPC__) || defined(__PPC64__))
|
|
# define CACHELINE_ALIGNED __attribute__((aligned(16)))
|
|
+# elif (defined(__arm__))
|
|
+# define CACHELINE_ALIGNED __attribute__((aligned(64)))
|
|
+ // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
|
|
+# else
|
|
+# error Could not determine cache line length - unknown architecture
|
|
# endif
|
|
#else
|
|
# define CACHELINE_ALIGNED
|
|
Only in gperftools-2.0/src/base: basictypes.h.svn-r190
|
|
Only in gperftools-2.0/src/base: cycleclock.h.svn-r190
|
|
diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h
|
|
--- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400
|
|
+++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400
|
|
@@ -148,6 +148,8 @@
|
|
#include <errno.h>
|
|
#include <signal.h>
|
|
#include <stdarg.h>
|
|
+#include <stddef.h>
|
|
+#include <stdint.h>
|
|
#include <string.h>
|
|
#include <sys/ptrace.h>
|
|
#include <sys/resource.h>
|
|
@@ -404,24 +406,24 @@
|
|
};
|
|
#elif defined(__x86_64__)
|
|
struct kernel_stat {
|
|
- unsigned long st_dev;
|
|
- unsigned long st_ino;
|
|
- unsigned long st_nlink;
|
|
+ uint64_t st_dev;
|
|
+ uint64_t st_ino;
|
|
+ uint64_t st_nlink;
|
|
unsigned st_mode;
|
|
unsigned st_uid;
|
|
unsigned st_gid;
|
|
unsigned __pad0;
|
|
- unsigned long st_rdev;
|
|
- long st_size;
|
|
- long st_blksize;
|
|
- long st_blocks;
|
|
- unsigned long st_atime_;
|
|
- unsigned long st_atime_nsec_;
|
|
- unsigned long st_mtime_;
|
|
- unsigned long st_mtime_nsec_;
|
|
- unsigned long st_ctime_;
|
|
- unsigned long st_ctime_nsec_;
|
|
- long __unused[3];
|
|
+ uint64_t st_rdev;
|
|
+ int64_t st_size;
|
|
+ int64_t st_blksize;
|
|
+ int64_t st_blocks;
|
|
+ uint64_t st_atime_;
|
|
+ uint64_t st_atime_nsec_;
|
|
+ uint64_t st_mtime_;
|
|
+ uint64_t st_mtime_nsec_;
|
|
+ uint64_t st_ctime_;
|
|
+ uint64_t st_ctime_nsec_;
|
|
+ int64_t __unused[3];
|
|
};
|
|
#elif defined(__PPC__)
|
|
struct kernel_stat {
|
|
@@ -1013,74 +1015,141 @@
|
|
* location (e.g. when using the clone() system call with the CLONE_VM
|
|
* option).
|
|
*/
|
|
+ #undef LSS_ENTRYPOINT
|
|
+ #define LSS_ENTRYPOINT "syscall\n"
|
|
+
|
|
+ /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
|
|
+ * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
|
|
+ * sign extension. We can't cast pointers directly because those are
|
|
+ * 32 bits, and gcc will dump ugly warnings about casting from a pointer
|
|
+ * to an integer of a different size.
|
|
+ */
|
|
+ #undef LSS_SYSCALL_ARG
|
|
+ #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
|
|
+ #undef _LSS_RETURN
|
|
+ #define _LSS_RETURN(type, res, cast) \
|
|
+ do { \
|
|
+ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \
|
|
+ LSS_ERRNO = -(res); \
|
|
+ res = -1; \
|
|
+ } \
|
|
+ return (type)(cast)(res); \
|
|
+ } while (0)
|
|
+ #undef LSS_RETURN
|
|
+ #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
|
|
+
|
|
+ #undef _LSS_BODY
|
|
+ #define _LSS_BODY(nr, type, name, cast, ...) \
|
|
+ long long __res; \
|
|
+ __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \
|
|
+ : "=a" (__res) \
|
|
+ : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \
|
|
+ : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \
|
|
+ _LSS_RETURN(type, __res, cast)
|
|
#undef LSS_BODY
|
|
- #define LSS_BODY(type,name, ...) \
|
|
- long __res; \
|
|
- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \
|
|
- ##__VA_ARGS__ : "r11", "rcx", "memory"); \
|
|
- LSS_RETURN(type, __res)
|
|
+ #define LSS_BODY(nr, type, name, args...) \
|
|
+ _LSS_BODY(nr, type, name, uintptr_t, ## args)
|
|
+
|
|
+ #undef LSS_BODY_ASM0
|
|
+ #undef LSS_BODY_ASM1
|
|
+ #undef LSS_BODY_ASM2
|
|
+ #undef LSS_BODY_ASM3
|
|
+ #undef LSS_BODY_ASM4
|
|
+ #undef LSS_BODY_ASM5
|
|
+ #undef LSS_BODY_ASM6
|
|
+ #define LSS_BODY_ASM0
|
|
+ #define LSS_BODY_ASM1 LSS_BODY_ASM0
|
|
+ #define LSS_BODY_ASM2 LSS_BODY_ASM1
|
|
+ #define LSS_BODY_ASM3 LSS_BODY_ASM2
|
|
+ #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
|
|
+ #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
|
|
+ #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
|
|
+
|
|
+ #undef LSS_BODY_CLOBBER0
|
|
+ #undef LSS_BODY_CLOBBER1
|
|
+ #undef LSS_BODY_CLOBBER2
|
|
+ #undef LSS_BODY_CLOBBER3
|
|
+ #undef LSS_BODY_CLOBBER4
|
|
+ #undef LSS_BODY_CLOBBER5
|
|
+ #undef LSS_BODY_CLOBBER6
|
|
+ #define LSS_BODY_CLOBBER0
|
|
+ #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
|
|
+ #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
|
|
+ #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
|
|
+ #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
|
|
+ #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
|
|
+ #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
|
|
+
|
|
+ #undef LSS_BODY_ARG0
|
|
+ #undef LSS_BODY_ARG1
|
|
+ #undef LSS_BODY_ARG2
|
|
+ #undef LSS_BODY_ARG3
|
|
+ #undef LSS_BODY_ARG4
|
|
+ #undef LSS_BODY_ARG5
|
|
+ #undef LSS_BODY_ARG6
|
|
+ #define LSS_BODY_ARG0()
|
|
+ #define LSS_BODY_ARG1(arg1) \
|
|
+ LSS_BODY_ARG0(), "D" (arg1)
|
|
+ #define LSS_BODY_ARG2(arg1, arg2) \
|
|
+ LSS_BODY_ARG1(arg1), "S" (arg2)
|
|
+ #define LSS_BODY_ARG3(arg1, arg2, arg3) \
|
|
+ LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
|
|
+ #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
|
|
+ LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
|
|
+ #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
|
|
+ LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
|
|
+ #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
|
|
+ LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
|
|
+
|
|
#undef _syscall0
|
|
#define _syscall0(type,name) \
|
|
type LSS_NAME(name)() { \
|
|
- LSS_BODY(type, name); \
|
|
+ LSS_BODY(0, type, name); \
|
|
}
|
|
#undef _syscall1
|
|
#define _syscall1(type,name,type1,arg1) \
|
|
type LSS_NAME(name)(type1 arg1) { \
|
|
- LSS_BODY(type, name, "D" ((long)(arg1))); \
|
|
+ LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \
|
|
}
|
|
#undef _syscall2
|
|
#define _syscall2(type,name,type1,arg1,type2,arg2) \
|
|
type LSS_NAME(name)(type1 arg1, type2 arg2) { \
|
|
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \
|
|
+ LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
|
|
}
|
|
#undef _syscall3
|
|
#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
|
|
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
|
|
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \
|
|
- "d" ((long)(arg3))); \
|
|
+ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
|
|
+ LSS_SYSCALL_ARG(arg3)); \
|
|
}
|
|
#undef _syscall4
|
|
#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
|
|
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
|
|
- long __res; \
|
|
- __asm__ __volatile__("movq %5,%%r10; syscall" : \
|
|
- "=a" (__res) : "0" (__NR_##name), \
|
|
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
|
|
- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
|
|
- LSS_RETURN(type, __res); \
|
|
+ LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
|
|
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
|
|
}
|
|
#undef _syscall5
|
|
#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
|
|
type5,arg5) \
|
|
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
|
|
type5 arg5) { \
|
|
- long __res; \
|
|
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
|
|
- "=a" (__res) : "0" (__NR_##name), \
|
|
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
|
|
- "r" ((long)(arg4)), "r" ((long)(arg5)) : \
|
|
- "r8", "r10", "r11", "rcx", "memory"); \
|
|
- LSS_RETURN(type, __res); \
|
|
+ LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
|
|
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
|
|
+ LSS_SYSCALL_ARG(arg5)); \
|
|
}
|
|
#undef _syscall6
|
|
#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
|
|
type5,arg5,type6,arg6) \
|
|
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
|
|
type5 arg5, type6 arg6) { \
|
|
- long __res; \
|
|
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \
|
|
- "syscall" : \
|
|
- "=a" (__res) : "0" (__NR_##name), \
|
|
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
|
|
- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
|
|
- "r8", "r9", "r10", "r11", "rcx", "memory"); \
|
|
- LSS_RETURN(type, __res); \
|
|
+ LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
|
|
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
|
|
+ LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
|
|
}
|
|
LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
|
|
int flags, void *arg, int *parent_tidptr,
|
|
void *newtls, int *child_tidptr) {
|
|
- long __res;
|
|
+ long long __res;
|
|
{
|
|
__asm__ __volatile__(/* if (fn == NULL)
|
|
* return -EINVAL;
|
|
@@ -1145,8 +1214,13 @@
|
|
"1:\n"
|
|
: "=a" (__res)
|
|
: "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
|
|
- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
|
|
- "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
|
|
+ "r"(LSS_SYSCALL_ARG(fn)),
|
|
+ "S"(LSS_SYSCALL_ARG(child_stack)),
|
|
+ "D"(LSS_SYSCALL_ARG(flags)),
|
|
+ "r"(LSS_SYSCALL_ARG(arg)),
|
|
+ "d"(LSS_SYSCALL_ARG(parent_tidptr)),
|
|
+ "r"(LSS_SYSCALL_ARG(newtls)),
|
|
+ "r"(LSS_SYSCALL_ARG(child_tidptr))
|
|
: "rsp", "memory", "r8", "r10", "r11", "rcx");
|
|
}
|
|
LSS_RETURN(int, __res);
|
|
@@ -1159,7 +1233,7 @@
|
|
* Unfortunately, we cannot just reference the glibc version of this
|
|
* function, as glibc goes out of its way to make it inaccessible.
|
|
*/
|
|
- void (*res)(void);
|
|
+ long long res;
|
|
__asm__ __volatile__("call 2f\n"
|
|
"0:.align 16\n"
|
|
"1:movq %1,%%rax\n"
|
|
@@ -1168,7 +1242,7 @@
|
|
"addq $(1b-0b),%0\n"
|
|
: "=a" (res)
|
|
: "i" (__NR_rt_sigreturn));
|
|
- return res;
|
|
+ return (void (*)(void))(uintptr_t)res;
|
|
}
|
|
#elif defined(__arm__)
|
|
/* Most definitions of _syscallX() neglect to mark "memory" as being
|
|
@@ -1797,8 +1871,16 @@
|
|
LSS_INLINE _syscall0(pid_t, _gettid)
|
|
LSS_INLINE _syscall2(int, kill, pid_t, p,
|
|
int, s)
|
|
- LSS_INLINE _syscall3(off_t, lseek, int, f,
|
|
- off_t, o, int, w)
|
|
+ #if defined(__x86_64__)
|
|
+ /* Need to make sure off_t isn't truncated to 32-bits under x32. */
|
|
+ LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
|
|
+ _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
|
|
+ LSS_SYSCALL_ARG(w));
|
|
+ }
|
|
+ #else
|
|
+ LSS_INLINE _syscall3(off_t, lseek, int, f,
|
|
+ off_t, o, int, w)
|
|
+ #endif
|
|
LSS_INLINE _syscall2(int, munmap, void*, s,
|
|
size_t, l)
|
|
LSS_INLINE _syscall5(void*, _mremap, void*, o,
|
|
@@ -1835,10 +1917,13 @@
|
|
int, t, int, p)
|
|
#endif
|
|
#if defined(__x86_64__)
|
|
- LSS_INLINE _syscall6(void*, mmap, void*, s,
|
|
- size_t, l, int, p,
|
|
- int, f, int, d,
|
|
- __off64_t, o)
|
|
+ /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */
|
|
+ LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
|
|
+ __off64_t o) {
|
|
+ LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
|
|
+ LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
|
|
+ LSS_SYSCALL_ARG(d), (uint64_t)(o));
|
|
+ }
|
|
|
|
LSS_INLINE int LSS_NAME(sigaction)(int signum,
|
|
const struct kernel_sigaction *act,
|
|
Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190
|
|
Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h
|
|
--- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400
|
|
@@ -31,11 +31,6 @@
|
|
* Author: Sanjay Ghemawat
|
|
*/
|
|
|
|
-//
|
|
-// Fast spinlocks (at least on x86, a lock/unlock pair is approximately
|
|
-// half the cost of a Mutex because the unlock just does a store instead
|
|
-// of a compare-and-swap which is expensive).
|
|
-
|
|
// SpinLock is async signal safe.
|
|
// If used within a signal handler, all lock holders
|
|
// should block the signal even outside the signal handler.
|
|
@@ -95,10 +90,9 @@
|
|
// TODO(csilvers): uncomment the annotation when we figure out how to
|
|
// support this macro with 0 args (see thread_annotations.h)
|
|
inline void Unlock() /*UNLOCK_FUNCTION()*/ {
|
|
- uint64 wait_cycles =
|
|
- static_cast<uint64>(base::subtle::NoBarrier_Load(&lockword_));
|
|
ANNOTATE_RWLOCK_RELEASED(this, 1);
|
|
- base::subtle::Release_Store(&lockword_, kSpinLockFree);
|
|
+ uint64 wait_cycles = static_cast<uint64>(
|
|
+ base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
|
|
if (wait_cycles != kSpinLockHeld) {
|
|
// Collect contentionz profile info, and speed the wakeup of any waiter.
|
|
// The wait_cycles value indicates how long this thread spent waiting
|
|
Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190
|
|
Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h
|
|
--- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400
|
|
@@ -38,7 +38,7 @@
|
|
#include <time.h>
|
|
#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
|
|
#include <windows.h> // for DWORD
|
|
-#include <TlHelp32.h> // for CreateToolhelp32Snapshot
|
|
+#include <tlhelp32.h> // for CreateToolhelp32Snapshot
|
|
#endif
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h> // for pid_t
|
|
diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h
|
|
--- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400
|
|
@@ -79,6 +79,16 @@
|
|
// page full of 5-byte objects would have 2 bytes memory overhead).
|
|
size_t OverheadBytes();
|
|
|
|
+ // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
|
|
+ // to set the lock in a consistent state before the fork.
|
|
+ void Lock() {
|
|
+ lock_.Lock();
|
|
+ }
|
|
+
|
|
+ void Unlock() {
|
|
+ lock_.Unlock();
|
|
+ }
|
|
+
|
|
private:
|
|
// TransferCache is used to cache transfers of
|
|
// sizemap.num_objects_to_move(size_class) back and forth between
|
|
diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc
|
|
--- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400
|
|
+++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400
|
|
@@ -30,12 +30,32 @@
|
|
// ---
|
|
// Author: Sanjay Ghemawat <opensource@google.com>
|
|
|
|
+#include <stdlib.h> // for getenv and strtol
|
|
#include "config.h"
|
|
#include "common.h"
|
|
#include "system-alloc.h"
|
|
+#include "base/spinlock.h"
|
|
|
|
namespace tcmalloc {
|
|
|
|
+// Define the maximum number of object per classe type to transfer between
|
|
+// thread and central caches.
|
|
+static int32 FLAGS_tcmalloc_transfer_num_objects;
|
|
+
|
|
+static const int32 kDefaultTransferNumObjecs = 32768;
|
|
+
|
|
+// The init function is provided to explicit initialize the variable value
|
|
+// from the env. var to avoid C++ global construction that might defer its
|
|
+// initialization after a malloc/new call.
|
|
+static inline void InitTCMallocTransferNumObjects()
|
|
+{
|
|
+ if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
|
|
+ const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ");
|
|
+ FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
|
|
+ strtol(envval, NULL, 10);
|
|
+ }
|
|
+}
|
|
+
|
|
// Note: the following only works for "n"s that fit in 32-bits, but
|
|
// that is fine since we only use it for small sizes.
|
|
static inline int LgFloor(size_t n) {
|
|
@@ -90,13 +110,16 @@
|
|
// - We go to the central freelist too often and we have to acquire
|
|
// its lock each time.
|
|
// This value strikes a balance between the constraints above.
|
|
- if (num > 32) num = 32;
|
|
+ if (num > FLAGS_tcmalloc_transfer_num_objects)
|
|
+ num = FLAGS_tcmalloc_transfer_num_objects;
|
|
|
|
return num;
|
|
}
|
|
|
|
// Initialize the mapping arrays
|
|
void SizeMap::Init() {
|
|
+ InitTCMallocTransferNumObjects();
|
|
+
|
|
// Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
|
|
if (ClassIndex(0) < 0) {
|
|
Log(kCrash, __FILE__, __LINE__,
|
|
@@ -189,12 +212,56 @@
|
|
|
|
// Metadata allocator -- keeps stats about how many bytes allocated.
|
|
static uint64_t metadata_system_bytes_ = 0;
|
|
+static const size_t kMetadataAllocChunkSize = 8*1024*1024;
|
|
+static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8;
|
|
+// usually malloc uses larger alignments, but because metadata cannot
|
|
+// have and fancy simd types, aligning on pointer size seems fine
|
|
+static const size_t kMetadataAllignment = sizeof(void *);
|
|
+
|
|
+static char *metadata_chunk_alloc_;
|
|
+static size_t metadata_chunk_avail_;
|
|
+
|
|
+static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
|
|
+
|
|
void* MetaDataAlloc(size_t bytes) {
|
|
- void* result = TCMalloc_SystemAlloc(bytes, NULL);
|
|
- if (result != NULL) {
|
|
- metadata_system_bytes_ += bytes;
|
|
+ if (bytes >= kMetadataAllocChunkSize) {
|
|
+ void *rv = TCMalloc_SystemAlloc(bytes,
|
|
+ NULL, kMetadataAllignment);
|
|
+ if (rv != NULL) {
|
|
+ metadata_system_bytes_ += bytes;
|
|
+ }
|
|
+ return rv;
|
|
}
|
|
- return result;
|
|
+
|
|
+ SpinLockHolder h(&metadata_alloc_lock);
|
|
+
|
|
+ // the following works by essentially turning address to integer of
|
|
+ // log_2 kMetadataAllignment size and negating it. I.e. negated
|
|
+ // value + original value gets 0 and that's what we want modulo
|
|
+ // kMetadataAllignment. Note, we negate before masking higher bits
|
|
+ // off, otherwise we'd have to mask them off after negation anyways.
|
|
+ intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1);
|
|
+
|
|
+ if (metadata_chunk_avail_ < bytes + alignment) {
|
|
+ size_t real_size;
|
|
+ void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
|
|
+ &real_size, kMetadataAllignment);
|
|
+ if (ptr == NULL) {
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ metadata_chunk_alloc_ = static_cast<char *>(ptr);
|
|
+ metadata_chunk_avail_ = real_size;
|
|
+
|
|
+ alignment = 0;
|
|
+ }
|
|
+
|
|
+ void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment);
|
|
+ bytes += alignment;
|
|
+ metadata_chunk_alloc_ += bytes;
|
|
+ metadata_chunk_avail_ -= bytes;
|
|
+ metadata_system_bytes_ += bytes;
|
|
+ return rv;
|
|
}
|
|
|
|
uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
|
|
Only in gperftools-2.0/src: common.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h
|
|
--- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400
|
|
+++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400
|
|
@@ -80,7 +80,7 @@
|
|
static const size_t kMinAlign = 16;
|
|
#elif defined(TCMALLOC_ALIGN_8BYTES)
|
|
static const size_t kPageShift = 13;
|
|
-static const size_t kNumClasses = 93;
|
|
+static const size_t kNumClasses = 95;
|
|
// Unless we force to use 8 bytes alignment we use an alignment of
|
|
// at least 16 bytes to statisfy requirements for some SSE types.
|
|
// Keep in mind when using the 16 bytes alignment you can have a space
|
|
@@ -88,7 +88,7 @@
|
|
static const size_t kMinAlign = 8;
|
|
#else
|
|
static const size_t kPageShift = 13;
|
|
-static const size_t kNumClasses = 86;
|
|
+static const size_t kNumClasses = 88;
|
|
static const size_t kMinAlign = 16;
|
|
#endif
|
|
static const size_t kMaxThreadCacheSize = 4 << 20;
|
|
Only in gperftools-2.0/src: common.h.svn-r190
|
|
diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in
|
|
--- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400
|
|
+++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400
|
|
@@ -56,6 +56,9 @@
|
|
/* Define to 1 if you have the <features.h> header file. */
|
|
#undef HAVE_FEATURES_H
|
|
|
|
+/* Define to 1 if you have the `fork' function. */
|
|
+#undef HAVE_FORK
|
|
+
|
|
/* Define to 1 if you have the `geteuid' function. */
|
|
#undef HAVE_GETEUID
|
|
|
|
Only in gperftools-2.0/src: config.h.in.svn-r190
|
|
Only in gperftools-2.0/src: debugallocation.cc.svn-r190
|
|
Only in gperftools-2.0/src: getpc.h.svn-r190
|
|
Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190
|
|
Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190
|
|
Only in gperftools-2.0/src: heap-checker.cc.svn-r190
|
|
Only in gperftools-2.0/src: heap-profiler.cc.svn-r190
|
|
Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190
|
|
Only in gperftools-2.0/src: malloc_extension.cc.svn-r190
|
|
Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190
|
|
Only in gperftools-2.0/src: memory_region_map.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc
|
|
--- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400
|
|
+++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400
|
|
@@ -108,6 +108,8 @@
|
|
return AllocLarge(n); // May be NULL
|
|
}
|
|
|
|
+static const size_t kForcedCoalesceInterval = 128*1024*1024;
|
|
+
|
|
Span* PageHeap::New(Length n) {
|
|
ASSERT(Check());
|
|
ASSERT(n > 0);
|
|
@@ -116,6 +118,38 @@
|
|
if (result != NULL)
|
|
return result;
|
|
|
|
+ if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
|
|
+ && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
|
|
+ && (stats_.system_bytes / kForcedCoalesceInterval
|
|
+ != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
|
|
+ // We're about to grow heap, but there are lots of free pages.
|
|
+ // tcmalloc's design decision to keep unmapped and free spans
|
|
+ // separately and never coalesce them means that sometimes there
|
|
+ // can be free pages span of sufficient size, but it consists of
|
|
+ // "segments" of different type so page heap search cannot find
|
|
+ // it. In order to prevent growing heap and wasting memory in such
|
|
+ // case we're going to unmap all free pages. So that all free
|
|
+ // spans are maximally coalesced.
|
|
+ //
|
|
+ // We're also limiting 'rate' of going into this path to be at
|
|
+ // most once per 128 megs of heap growth. Otherwise programs that
|
|
+ // grow heap frequently (and that means by small amount) could be
|
|
+ // penalized with higher count of minor page faults.
|
|
+ //
|
|
+ // See also large_heap_fragmentation_unittest.cc and
|
|
+ // https://code.google.com/p/gperftools/issues/detail?id=368
|
|
+ ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff));
|
|
+
|
|
+ // then try again. If we are forced to grow heap because of large
|
|
+ // spans fragmentation and not because of problem described above,
|
|
+ // then at the very least we've just unmapped free but
|
|
+ // insufficiently big large spans back to OS. So in case of really
|
|
+ // unlucky memory fragmentation we'll be consuming virtual address
|
|
+ // space, but not real memory
|
|
+ result = SearchFreeAndLargeLists(n);
|
|
+ if (result != NULL) return result;
|
|
+ }
|
|
+
|
|
// Grow the heap and try again.
|
|
if (!GrowHeap(n)) {
|
|
ASSERT(Check());
|
|
Only in gperftools-2.0/src: page_heap.cc.svn-r190
|
|
Only in gperftools-2.0/src: page_heap.h.svn-r190
|
|
Only in gperftools-2.0/src: pprof.svn-r190
|
|
Only in gperftools-2.0/src: profiler.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc
|
|
--- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400
|
|
@@ -39,6 +39,39 @@
|
|
|
|
namespace tcmalloc {
|
|
|
|
+#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
|
|
+// These following two functions are registered via pthread_atfork to make
|
|
+// sure the central_cache locks remain in a consisten state in the forked
|
|
+// version of the thread.
|
|
+
|
|
+static
|
|
+void CentralCacheLockAll()
|
|
+{
|
|
+ Static::pageheap_lock()->Lock();
|
|
+ for (int i = 0; i < kNumClasses; ++i)
|
|
+ Static::central_cache()[i].Lock();
|
|
+}
|
|
+
|
|
+static
|
|
+void CentralCacheUnlockAll()
|
|
+{
|
|
+ for (int i = 0; i < kNumClasses; ++i)
|
|
+ Static::central_cache()[i].Unlock();
|
|
+ Static::pageheap_lock()->Unlock();
|
|
+}
|
|
+#endif
|
|
+
|
|
+static inline
|
|
+void SetupAtForkLocksHandler()
|
|
+{
|
|
+#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
|
|
+ pthread_atfork(CentralCacheLockAll, // parent calls before fork
|
|
+ CentralCacheUnlockAll, // parent calls after fork
|
|
+ CentralCacheUnlockAll); // child calls after fork
|
|
+#endif
|
|
+}
|
|
+
|
|
+
|
|
SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
|
|
SizeMap Static::sizemap_;
|
|
CentralFreeListPadded Static::central_cache_[kNumClasses];
|
|
@@ -49,6 +82,7 @@
|
|
StackTrace* Static::growth_stacks_ = NULL;
|
|
PageHeap* Static::pageheap_ = NULL;
|
|
|
|
+
|
|
void Static::InitStaticVars() {
|
|
sizemap_.Init();
|
|
span_allocator_.Init();
|
|
@@ -61,6 +95,8 @@
|
|
for (int i = 0; i < kNumClasses; ++i) {
|
|
central_cache_[i].Init(i);
|
|
}
|
|
+ SetupAtForkLocksHandler();
|
|
+
|
|
// It's important to have PageHeap allocated, not in static storage,
|
|
// so that HeapLeakChecker does not consider all the byte patterns stored
|
|
// in is caches as pointers that are sources of heap object liveness,
|
|
Only in gperftools-2.0/src: static_vars.h.svn-r190
|
|
Only in gperftools-2.0/src: symbolize.cc.svn-r190
|
|
Only in gperftools-2.0/src: system-alloc.cc.svn-r190
|
|
Only in gperftools-2.0/src: system-alloc.h.svn-r190
|
|
Only in gperftools-2.0/src: tcmalloc.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc
|
|
--- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400
|
|
@@ -38,13 +38,14 @@
|
|
#define GG_ULONGLONG(x) static_cast<uint64>(x)
|
|
|
|
template <class AtomicType>
|
|
-static void TestAtomicIncrement() {
|
|
+static void TestAtomicIncrement(AtomicType (*atomic_increment_func)
|
|
+ (volatile AtomicType*, AtomicType)) {
|
|
// For now, we just test single threaded execution
|
|
|
|
- // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go
|
|
+ // use a guard value to make sure the atomic_increment_func doesn't go
|
|
// outside the expected address bounds. This is in particular to
|
|
// test that some future change to the asm code doesn't cause the
|
|
- // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit
|
|
+ // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit
|
|
// machines.
|
|
struct {
|
|
AtomicType prev_word;
|
|
@@ -60,47 +61,47 @@
|
|
s.count = 0;
|
|
s.next_word = next_word_value;
|
|
|
|
- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1));
|
|
+ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1));
|
|
ASSERT_EQ(1, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2));
|
|
+ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2));
|
|
ASSERT_EQ(3, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3));
|
|
+ ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3));
|
|
ASSERT_EQ(6, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3));
|
|
+ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3));
|
|
ASSERT_EQ(3, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2));
|
|
+ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2));
|
|
ASSERT_EQ(1, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
|
|
+ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1));
|
|
ASSERT_EQ(0, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
|
|
+ ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1));
|
|
ASSERT_EQ(-1, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4));
|
|
+ ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4));
|
|
ASSERT_EQ(-5, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
|
|
- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5));
|
|
+ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5));
|
|
ASSERT_EQ(0, s.count);
|
|
ASSERT_EQ(prev_word_value, s.prev_word);
|
|
ASSERT_EQ(next_word_value, s.next_word);
|
|
@@ -111,9 +112,10 @@
|
|
|
|
|
|
template <class AtomicType>
|
|
-static void TestCompareAndSwap() {
|
|
+static void TestCompareAndSwap(AtomicType (*compare_and_swap_func)
|
|
+ (volatile AtomicType*, AtomicType, AtomicType)) {
|
|
AtomicType value = 0;
|
|
- AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1);
|
|
+ AtomicType prev = (*compare_and_swap_func)(&value, 0, 1);
|
|
ASSERT_EQ(1, value);
|
|
ASSERT_EQ(0, prev);
|
|
|
|
@@ -122,21 +124,22 @@
|
|
const AtomicType k_test_val = (GG_ULONGLONG(1) <<
|
|
(NUM_BITS(AtomicType) - 2)) + 11;
|
|
value = k_test_val;
|
|
- prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5);
|
|
+ prev = (*compare_and_swap_func)(&value, 0, 5);
|
|
ASSERT_EQ(k_test_val, value);
|
|
ASSERT_EQ(k_test_val, prev);
|
|
|
|
value = k_test_val;
|
|
- prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5);
|
|
+ prev = (*compare_and_swap_func)(&value, k_test_val, 5);
|
|
ASSERT_EQ(5, value);
|
|
ASSERT_EQ(k_test_val, prev);
|
|
}
|
|
|
|
|
|
template <class AtomicType>
|
|
-static void TestAtomicExchange() {
|
|
+static void TestAtomicExchange(AtomicType (*atomic_exchange_func)
|
|
+ (volatile AtomicType*, AtomicType)) {
|
|
AtomicType value = 0;
|
|
- AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1);
|
|
+ AtomicType new_value = (*atomic_exchange_func)(&value, 1);
|
|
ASSERT_EQ(1, value);
|
|
ASSERT_EQ(0, new_value);
|
|
|
|
@@ -145,28 +148,29 @@
|
|
const AtomicType k_test_val = (GG_ULONGLONG(1) <<
|
|
(NUM_BITS(AtomicType) - 2)) + 11;
|
|
value = k_test_val;
|
|
- new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val);
|
|
+ new_value = (*atomic_exchange_func)(&value, k_test_val);
|
|
ASSERT_EQ(k_test_val, value);
|
|
ASSERT_EQ(k_test_val, new_value);
|
|
|
|
value = k_test_val;
|
|
- new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5);
|
|
+ new_value = (*atomic_exchange_func)(&value, 5);
|
|
ASSERT_EQ(5, value);
|
|
ASSERT_EQ(k_test_val, new_value);
|
|
}
|
|
|
|
|
|
template <class AtomicType>
|
|
-static void TestAtomicIncrementBounds() {
|
|
+static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func)
|
|
+ (volatile AtomicType*, AtomicType)) {
|
|
// Test increment at the half-width boundary of the atomic type.
|
|
// It is primarily for testing at the 32-bit boundary for 64-bit atomic type.
|
|
AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2);
|
|
AtomicType value = test_val - 1;
|
|
- AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1);
|
|
+ AtomicType new_value = (*atomic_increment_func)(&value, 1);
|
|
ASSERT_EQ(test_val, value);
|
|
ASSERT_EQ(value, new_value);
|
|
|
|
- base::subtle::NoBarrier_AtomicIncrement(&value, -1);
|
|
+ (*atomic_increment_func)(&value, -1);
|
|
ASSERT_EQ(test_val - 1, value);
|
|
}
|
|
|
|
@@ -222,16 +226,28 @@
|
|
|
|
template <class AtomicType>
|
|
static void TestAtomicOps() {
|
|
- TestCompareAndSwap<AtomicType>();
|
|
- TestAtomicExchange<AtomicType>();
|
|
- TestAtomicIncrementBounds<AtomicType>();
|
|
+ TestCompareAndSwap<AtomicType>(base::subtle::NoBarrier_CompareAndSwap);
|
|
+ TestCompareAndSwap<AtomicType>(base::subtle::Acquire_CompareAndSwap);
|
|
+ TestCompareAndSwap<AtomicType>(base::subtle::Release_CompareAndSwap);
|
|
+
|
|
+ TestAtomicExchange<AtomicType>(base::subtle::NoBarrier_AtomicExchange);
|
|
+ TestAtomicExchange<AtomicType>(base::subtle::Acquire_AtomicExchange);
|
|
+ TestAtomicExchange<AtomicType>(base::subtle::Release_AtomicExchange);
|
|
+
|
|
+ TestAtomicIncrementBounds<AtomicType>(
|
|
+ base::subtle::NoBarrier_AtomicIncrement);
|
|
+ TestAtomicIncrementBounds<AtomicType>(
|
|
+ base::subtle::Barrier_AtomicIncrement);
|
|
+
|
|
TestStore<AtomicType>();
|
|
TestLoad<AtomicType>();
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
- TestAtomicIncrement<AtomicWord>();
|
|
- TestAtomicIncrement<Atomic32>();
|
|
+ TestAtomicIncrement<AtomicWord>(base::subtle::NoBarrier_AtomicIncrement);
|
|
+ TestAtomicIncrement<AtomicWord>(base::subtle::Barrier_AtomicIncrement);
|
|
+ TestAtomicIncrement<Atomic32>(base::subtle::NoBarrier_AtomicIncrement);
|
|
+ TestAtomicIncrement<Atomic32>(base::subtle::Barrier_AtomicIncrement);
|
|
|
|
TestAtomicOps<AtomicWord>();
|
|
TestAtomicOps<Atomic32>();
|
|
@@ -248,8 +264,10 @@
|
|
// If we ever *do* want to enable this, try adding -msse (or -mmmx?)
|
|
// to the CXXFLAGS in Makefile.am.
|
|
#if 0 and defined(BASE_HAS_ATOMIC64)
|
|
- TestAtomicIncrement<base::subtle::Atomic64>();
|
|
- TestAtomicOps<base::subtle::Atomic64>();
|
|
+ TestAtomicIncrement<base::subtle::Atomic64>(
|
|
+ base::subtle::NoBarrier_AtomicIncrement);
|
|
+ TestAtomicIncrement<base::subtle::Atomic64>(
|
|
+ base::subtle::Barrier_AtomicIncrement);
|
|
#endif
|
|
|
|
printf("PASS\n");
|
|
Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc
|
|
--- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400
|
|
@@ -0,0 +1,62 @@
|
|
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
|
|
+// Redistribution and use in source and binary forms, with or without
|
|
+// modification, are permitted provided that the following conditions are
|
|
+// met:
|
|
+//
|
|
+// * Redistributions of source code must retain the above copyright
|
|
+// notice, this list of conditions and the following disclaimer.
|
|
+// * Redistributions in binary form must reproduce the above
|
|
+// copyright notice, this list of conditions and the following disclaimer
|
|
+// in the documentation and/or other materials provided with the
|
|
+// distribution.
|
|
+// * Neither the name of Google Inc. nor the names of its
|
|
+// contributors may be used to endorse or promote products derived from
|
|
+// this software without specific prior written permission.
|
|
+//
|
|
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+
|
|
+// This is a unit test for exercising fragmentation of large (over 1
|
|
+// meg) page spans. It makes sure that allocations/releases of
|
|
+// increasing memory chunks do not blowup memory
|
|
+// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368
|
|
+
|
|
+
|
|
+#include <stddef.h>
|
|
+#include <stdlib.h>
|
|
+#include <stdio.h>
|
|
+
|
|
+#include "base/logging.h"
|
|
+#include "common.h"
|
|
+#include <gperftools/malloc_extension.h>
|
|
+
|
|
+
|
|
+int main (int argc, char** argv) {
|
|
+ for (int pass = 1; pass <= 3; pass++) {
|
|
+ size_t size = 100*1024*1024;
|
|
+ while (size < 500*1024*1024) {
|
|
+ void *ptr = malloc(size);
|
|
+ free(ptr);
|
|
+ size += 20000;
|
|
+
|
|
+ size_t heap_size = static_cast<size_t>(-1);
|
|
+ MallocExtension::instance()->GetNumericProperty("generic.heap_size",
|
|
+ &heap_size);
|
|
+
|
|
+
|
|
+ CHECK_LT(heap_size, 1*1024*1024*1024);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ printf("PASS\n");
|
|
+ return 0;
|
|
+}
|
|
diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c
|
|
--- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400
|
|
@@ -59,6 +59,16 @@
|
|
g_delete_hook_calls++;
|
|
}
|
|
|
|
+static
|
|
+void *forced_malloc(size_t size)
|
|
+{
|
|
+ void *rv = malloc(size);
|
|
+ if (!rv) {
|
|
+ FAIL("malloc is not supposed to fail here");
|
|
+ }
|
|
+ return rv;
|
|
+}
|
|
+
|
|
void TestMallocHook(void) {
|
|
/* TODO(csilvers): figure out why we get:
|
|
* E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken!
|
|
@@ -78,8 +88,9 @@
|
|
if (!MallocHook_AddDeleteHook(&TestDeleteHook)) {
|
|
FAIL("Failed to add delete hook");
|
|
}
|
|
- free(malloc(10));
|
|
- free(malloc(20));
|
|
+
|
|
+ free(forced_malloc(10));
|
|
+ free(forced_malloc(20));
|
|
if (g_new_hook_calls != 2) {
|
|
FAIL("Wrong number of calls to the new hook");
|
|
}
|
|
Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190
|
|
Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190
|
|
Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190
|
|
Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190
|
|
diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc
|
|
--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400
|
|
+++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400
|
|
@@ -725,7 +725,7 @@
|
|
// Note the ... in the hook signature: we don't care what arguments
|
|
// the hook takes.
|
|
#define MAKE_HOOK_CALLBACK(hook_type) \
|
|
- static int g_##hook_type##_calls = 0; \
|
|
+ static volatile int g_##hook_type##_calls = 0; \
|
|
static void IncrementCallsTo##hook_type(...) { \
|
|
g_##hook_type##_calls++; \
|
|
} \
|
|
@@ -760,7 +760,7 @@
|
|
CHECK((p % sizeof(void*)) == 0);
|
|
CHECK((p % sizeof(double)) == 0);
|
|
|
|
- // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
|
|
+ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
|
|
// alignment for large enough objects
|
|
if (size >= kMinAlign) {
|
|
CHECK((p % kMinAlign) == 0);
|
|
Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh
|
|
--- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400
|
|
@@ -0,0 +1,68 @@
|
|
+#!/bin/sh
|
|
+
|
|
+# Copyright (c) 2013, Google Inc.
|
|
+# All rights reserved.
|
|
+#
|
|
+# Redistribution and use in source and binary forms, with or without
|
|
+# modification, are permitted provided that the following conditions are
|
|
+# met:
|
|
+#
|
|
+# * Redistributions of source code must retain the above copyright
|
|
+# notice, this list of conditions and the following disclaimer.
|
|
+# * Redistributions in binary form must reproduce the above
|
|
+# copyright notice, this list of conditions and the following disclaimer
|
|
+# in the documentation and/or other materials provided with the
|
|
+# distribution.
|
|
+# * Neither the name of Google Inc. nor the names of its
|
|
+# contributors may be used to endorse or promote products derived from
|
|
+# this software without specific prior written permission.
|
|
+#
|
|
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
+
|
|
+# ---
|
|
+# Author: Adhemerval Zanella
|
|
+#
|
|
+# Runs the tcmalloc_unittest with various environment variables.
|
|
+# This is necessary because tuning some environment variables
|
|
+# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program
|
|
+# behavior, just performance.
|
|
+
|
|
+BINDIR="${BINDIR:-.}"
|
|
+TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest"
|
|
+
|
|
+TMPDIR=/tmp/tcmalloc_unittest
|
|
+rm -rf $TMPDIR || exit 2
|
|
+mkdir $TMPDIR || exit 3
|
|
+
|
|
+# $1: value of tcmalloc_unittest env. var.
|
|
+run_check_transfer_num_obj() {
|
|
+ [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1"
|
|
+
|
|
+ echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... "
|
|
+ if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then
|
|
+ echo "OK"
|
|
+ else
|
|
+ echo "FAILED"
|
|
+ echo "Output from the failed run:"
|
|
+ echo "----"
|
|
+ cat $TMPDIR/output
|
|
+ echo "----"
|
|
+ exit 4
|
|
+ fi
|
|
+}
|
|
+
|
|
+run_check_transfer_num_obj ""
|
|
+run_check_transfer_num_obj "40"
|
|
+run_check_transfer_num_obj "4096"
|
|
+
|
|
+echo "PASS"
|
|
Only in gperftools-2.0/src: thread_cache.cc.svn-r190
|
|
Only in gperftools-2.0/src: thread_cache.h.svn-r190
|
|
diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h
|
|
--- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400
|
|
@@ -60,6 +60,8 @@
|
|
// pretend the pthreads wrapper doesn't exist, even when it does.
|
|
#undef HAVE_PTHREAD
|
|
|
|
+#define HAVE_PID_T
|
|
+
|
|
#include "windows/port.h"
|
|
|
|
#endif /* __MINGW32__ */
|
|
diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc
|
|
--- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400
|
|
@@ -85,7 +85,7 @@
|
|
#include <windows.h>
|
|
#include <stdio.h>
|
|
#include <malloc.h> // for _msize and _expand
|
|
-#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
|
|
+#include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
|
|
#include <set>
|
|
#include <map>
|
|
#include <vector>
|
|
Only in gperftools-2.0/src/windows: port.cc.svn-r190
|
|
diff -urP gperftools-2.0/src/windows/port.h gperftools-2.0-svn218/src/windows/port.h
|
|
--- gperftools-2.0/src/windows/port.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/port.h 2013-06-04 10:16:57.683841683 -0400
|
|
@@ -390,7 +390,10 @@
|
|
|
|
/* ----------------------------------- SYSTEM/PROCESS */
|
|
|
|
+#ifndef HAVE_PID_T
|
|
typedef int pid_t;
|
|
+#endif
|
|
+
|
|
#if __STDC__ && !defined(__MINGW32__)
|
|
inline pid_t getpid(void) { return _getpid(); }
|
|
#endif
|
|
diff -urP gperftools-2.0/src/windows/preamble_patcher.cc gperftools-2.0-svn218/src/windows/preamble_patcher.cc
|
|
--- gperftools-2.0/src/windows/preamble_patcher.cc 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/preamble_patcher.cc 2013-06-04 10:16:57.601841682 -0400
|
|
@@ -103,6 +103,7 @@
|
|
new_target = target + 2 + relative_offset;
|
|
} else if (target[0] == ASM_JMP32ABS_0 &&
|
|
target[1] == ASM_JMP32ABS_1) {
|
|
+ jmp32rel:
|
|
// Visual studio seems to sometimes do it this way instead of the
|
|
// previous way. Not sure what the rules are, but it was happening
|
|
// with operator new in some binaries.
|
|
@@ -118,6 +119,18 @@
|
|
memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
|
|
}
|
|
new_target = reinterpret_cast<unsigned char*>(*new_target_v);
|
|
+ } else if (kIs64BitBinary && target[0] == ASM_REXW
|
|
+ && target[1] == ASM_JMP32ABS_0
|
|
+ && target[2] == ASM_JMP32ABS_1) {
|
|
+ // in Visual Studio 2012 we're seeing jump like that:
|
|
+ // rex.W jmpq *0x11d019(%rip)
|
|
+ //
|
|
+ // according to docs I have, rex prefix is actually unneeded and
|
|
+ // can be ignored. I.e. docs say for jumps like that operand
|
|
+ // already defaults to 64-bit. But clearly it breaks abs. jump
|
|
+ // detection above and we just skip rex
|
|
+ target++;
|
|
+ goto jmp32rel;
|
|
} else {
|
|
break;
|
|
}
|
|
@@ -535,6 +548,12 @@
|
|
return (*(target) & 0x70) == 0x70 && instruction_size == 2;
|
|
}
|
|
|
|
+bool PreamblePatcher::IsShortJump(
|
|
+ unsigned char* target,
|
|
+ unsigned int instruction_size) {
|
|
+ return target[0] == 0xeb && instruction_size == 2;
|
|
+}
|
|
+
|
|
bool PreamblePatcher::IsNearConditionalJump(
|
|
unsigned char* target,
|
|
unsigned int instruction_size) {
|
|
@@ -575,7 +594,9 @@
|
|
unsigned char* target,
|
|
unsigned int* target_bytes,
|
|
unsigned int target_size) {
|
|
- unsigned char* original_jump_dest = (source + 2) + source[1];
|
|
+ // note: rel8 offset is signed. Thus we need to ask for signed char
|
|
+ // to negative offsets right
|
|
+ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
|
|
unsigned char* stub_jump_from = target + 6;
|
|
__int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
|
|
if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
|
|
@@ -597,6 +618,36 @@
|
|
reinterpret_cast<void*>(&fixup_jump_offset), 4);
|
|
}
|
|
|
|
+ return SIDESTEP_SUCCESS;
|
|
+}
|
|
+
|
|
+SideStepError PreamblePatcher::PatchShortJump(
|
|
+ unsigned char* source,
|
|
+ unsigned int instruction_size,
|
|
+ unsigned char* target,
|
|
+ unsigned int* target_bytes,
|
|
+ unsigned int target_size) {
|
|
+ // note: rel8 offset is _signed_. Thus we need signed char here.
|
|
+ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
|
|
+ unsigned char* stub_jump_from = target + 5;
|
|
+ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
|
|
+ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
|
|
+ SIDESTEP_ASSERT(false &&
|
|
+ "Unable to fix up short jump because target"
|
|
+ " is too far away.");
|
|
+ return SIDESTEP_JUMP_INSTRUCTION;
|
|
+ }
|
|
+
|
|
+ *target_bytes = 5;
|
|
+ if (target_size > *target_bytes) {
|
|
+ // Convert the short jump to a near jump.
|
|
+ //
|
|
+ // e9 xx xx xx xx = jmp rel32off
|
|
+ target[0] = 0xe9;
|
|
+ memcpy(reinterpret_cast<void*>(target + 1),
|
|
+ reinterpret_cast<void*>(&fixup_jump_offset), 4);
|
|
+ }
|
|
+
|
|
return SIDESTEP_SUCCESS;
|
|
}
|
|
|
|
diff -urP gperftools-2.0/src/windows/preamble_patcher.h gperftools-2.0-svn218/src/windows/preamble_patcher.h
|
|
--- gperftools-2.0/src/windows/preamble_patcher.h 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/preamble_patcher.h 2013-06-04 10:16:57.601841682 -0400
|
|
@@ -467,6 +467,8 @@
|
|
static bool IsShortConditionalJump(unsigned char* target,
|
|
unsigned int instruction_size);
|
|
|
|
+ static bool IsShortJump(unsigned char *target, unsigned int instruction_size);
|
|
+
|
|
// Helper routine that determines if a target instruction is a near
|
|
// conditional jump.
|
|
//
|
|
@@ -547,6 +549,12 @@
|
|
unsigned int* target_bytes,
|
|
unsigned int target_size);
|
|
|
|
+ static SideStepError PatchShortJump(unsigned char* source,
|
|
+ unsigned int instruction_size,
|
|
+ unsigned char* target,
|
|
+ unsigned int* target_bytes,
|
|
+ unsigned int target_size);
|
|
+
|
|
// Helper routine that converts an instruction that will convert various
|
|
// jump-like instructions to corresponding instructions in the target buffer.
|
|
// What this routine does is fix up the relative offsets contained in jump
|
|
diff -urP gperftools-2.0/src/windows/preamble_patcher_with_stub.cc gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc
|
|
--- gperftools-2.0/src/windows/preamble_patcher_with_stub.cc 2012-02-02 16:36:23.000000000 -0500
|
|
+++ gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc 2013-06-04 10:16:57.682841683 -0400
|
|
@@ -150,6 +150,11 @@
|
|
preamble_stub + stub_bytes,
|
|
&jump_bytes,
|
|
stub_size - stub_bytes);
|
|
+ } else if (IsShortJump(target + preamble_bytes, cur_bytes)) {
|
|
+ jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes,
|
|
+ preamble_stub + stub_bytes,
|
|
+ &jump_bytes,
|
|
+ stub_size - stub_bytes);
|
|
} else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) ||
|
|
IsNearRelativeJump(target + preamble_bytes, cur_bytes) ||
|
|
IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) ||
|
|
Only in gperftools-2.0/src/windows: TODO.svn-r190
|