gperftools/gperftools-2.0-svn190-to-svn218.patch
2013-06-04 12:01:26 -04:00

1973 lines
83 KiB
Diff

Only in gperftools-2.0: aclocal.m4
Only in gperftools-2.0: aclocal.m4.svn-r190
diff -urP gperftools-2.0/autogen.sh gperftools-2.0-svn218/autogen.sh
--- gperftools-2.0/autogen.sh 2013-06-04 10:20:21.135844736 -0400
+++ gperftools-2.0-svn218/autogen.sh 2013-06-04 10:16:58.887841701 -0400
@@ -1,54 +1,3 @@
#!/bin/sh
-# Before using, you should figure out all the .m4 macros that your
-# configure.m4 script needs and make sure they exist in the m4/
-# directory.
-#
-# These are the files that this script might edit:
-# aclocal.m4 configure Makefile.in src/config.h.in \
-# depcomp config.guess config.sub install-sh missing mkinstalldirs \
-# ltmain.sh
-#
-# Here's a command you can run to see what files aclocal will import:
-# aclocal -I ../autoconf --output=- | sed -n 's/^m4_include..\([^]]*\).*/\1/p'
-
-set -ex
-rm -rf autom4te.cache
-
-trap 'rm -f aclocal.m4.tmp' EXIT
-
-# Returns the first binary in $* that exists, or the last arg, if none exists.
-WhichOf() {
- for candidate in "$@"; do
- if "$candidate" --version >/dev/null 2>&1; then
- echo "$candidate"
- return
- fi
- done
- echo "$candidate" # the last one in $@
-}
-
-# Use version 1.9 of aclocal and automake if available.
-ACLOCAL=`WhichOf aclocal-1.9 aclocal`
-AUTOMAKE=`WhichOf automake-1.9 automake`
-LIBTOOLIZE=`WhichOf glibtoolize libtoolize15 libtoolize14 libtoolize`
-
-# aclocal tries to overwrite aclocal.m4 even if the contents haven't
-# changed, which is annoying when the file is not open for edit (in
-# p4). We work around this by writing to a temp file and just
-# updating the timestamp if the file hasn't change.
-"$ACLOCAL" --force -I m4 --output=aclocal.m4.tmp
-if cmp aclocal.m4.tmp aclocal.m4; then
- touch aclocal.m4 # pretend that we regenerated the file
- rm -f aclocal.m4.tmp
-else
- mv aclocal.m4.tmp aclocal.m4 # we did set -e above, so we die if this fails
-fi
-
-grep -q '^[^#]*AC_PROG_LIBTOOL' configure.ac && "$LIBTOOLIZE" -c -f
-autoconf -f -W all,no-obsolete
-autoheader -f -W all
-"$AUTOMAKE" -a -c -f -W all
-
-rm -rf autom4te.cache
-exit 0
+autoreconf -i
Only in gperftools-2.0: autogen.sh.svn-r190
Only in gperftools-2.0: compile
Only in gperftools-2.0: config.guess
Only in gperftools-2.0: config.sub
Only in gperftools-2.0: configure
diff -urP gperftools-2.0/configure.ac gperftools-2.0-svn218/configure.ac
--- gperftools-2.0/configure.ac 2013-06-04 10:20:21.138844736 -0400
+++ gperftools-2.0-svn218/configure.ac 2013-06-04 10:16:58.805841700 -0400
@@ -99,28 +99,7 @@
[gpt_cv_objcopy_weaken=no])
AM_CONDITIONAL(HAVE_OBJCOPY_WEAKEN, test $gpt_cv_objcopy_weaken = yes)
-case $host_os in
- *mingw*)
- # Disabling fast install keeps libtool from creating wrapper scripts
- # around the executables it builds. Such scripts have caused failures on
- # MinGW. Using this option means an extra link step is executed during
- # "make install".
- _LT_SET_OPTION([LT_INIT],[disable-fast-install])
-AC_DIAGNOSE([obsolete],[AC_DISABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
-the `disable-fast-install' option into LT_INIT's first parameter.])
-
- ;;
- *)
- _LT_SET_OPTION([LT_INIT],[fast-install])
-AC_DIAGNOSE([obsolete],[AC_ENABLE_FAST_INSTALL: Remove this warning and the call to _LT_SET_OPTION when you put
-the `fast-install' option into LT_INIT's first parameter.])
-
- ;;
-esac
-
-LT_INIT
-AC_SUBST(LIBTOOL_DEPS)
-AM_CONDITIONAL(USE_LIBTOOL, test "x$LIBTOOL" != "x")
+LT_INIT([])
AC_C_INLINE
AX_C___ATTRIBUTE__
@@ -134,6 +113,7 @@
AC_CHECK_TYPES([Elf32_Versym],,, [#include <elf.h>]) # for vdso_support.h
AC_CHECK_FUNCS(sbrk) # for tcmalloc to get memory
AC_CHECK_FUNCS(geteuid) # for turning off services when run as root
+AC_CHECK_FUNCS(fork) # for the pthread_atfork setup
AC_CHECK_HEADERS(features.h) # for vdso_support.h
AC_CHECK_HEADERS(malloc.h) # some systems define stuff there, others not
AC_CHECK_HEADERS(sys/malloc.h) # where some versions of OS X put malloc.h
@@ -183,6 +163,11 @@
# This workaround comes from
# http://cygwin.com/ml/cygwin/2004-11/msg00138.html
case "$host" in
+ *-*-mingw*)
+ dnl mingw doesn't have mmap, not worth
+ dnl checking. Especially given that mingw can be a
+ dnl cross-compiler
+ ;;
*-*-cygwin*)
ac_cv_func_mmap_fixed_mapped=yes
AC_DEFINE(HAVE_MMAP, 1,
@@ -310,10 +295,18 @@
# Note, however, that our code tickles a bug in gcc < 4.1.2
# involving TLS and -fPIC (which our libraries will use) on x86:
# http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
+#
+# And mingw also does compile __thread but resultant code actually
+# fails to work correctly at least in some not so ancient version:
+# http://mingw-users.1079350.n2.nabble.com/gcc-4-4-multi-threaded-exception-handling-amp-thread-specifier-not-working-td3440749.html
AC_MSG_CHECKING([for __thread])
AC_LINK_IFELSE([AC_LANG_PROGRAM([#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && ((__GNUC__ < 4) || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) || (__GNUC__ == 4 && __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ < 2))
#error gcc has this bug: http://gcc.gnu.org/ml/gcc-bugs/2006-09/msg02275.html
-#endif], [static __thread int p = 0])],
+#endif
+#if defined(__MINGW32__)
+#error mingw doesn't really support tls
+#endif
+], [static __thread int p = 0])],
[AC_DEFINE(HAVE_TLS, 1,
Define to 1 if compiler supports __thread)
AC_MSG_RESULT([yes])],
Only in gperftools-2.0: configure.ac.svn-r190
Only in gperftools-2.0: configure.svn-r190
Only in gperftools-2.0: depcomp
Only in gperftools-2.0/doc: cpuprofile.html.svn-r190
Only in gperftools-2.0/doc: heapprofile.html.svn-r190
Only in gperftools-2.0/doc: pprof.see_also.svn-r190
diff -urP gperftools-2.0/INSTALL gperftools-2.0-svn218/INSTALL
--- gperftools-2.0/INSTALL 2012-02-03 14:40:32.000000000 -0500
+++ gperftools-2.0-svn218/INSTALL 2013-06-04 10:16:58.886841701 -0400
@@ -8,6 +8,28 @@
Perftools-Specific Install Notes
================================
+*** Building from source repository
+
+As of 2.1 gperftools does not have configure and other autotools
+products checked into it's source repository. This is common practice
+for projects using autotools.
+
+NOTE: Source releases (.tar.gz that you download from
+code.google.com/p/gperftools) still have all required files just as
+before. Nothing has changed w.r.t. building from .tar.gz releases.
+
+But, in order to build gperftools checked out from subversion
+repository you need to have autoconf, automake and libtool
+installed. And before running ./configure you have to generate it (and
+a bunch of other files) by running ./autogen.sh script. That script
+will take care of calling correct autotools programs in correct order.
+
+If you're maintainer then it's business as usual too. Just run make
+dist (or, preferably, make distcheck) and it'll produce .tar.gz or
+.tar.bz2 with all autotools magic already included. So that users can
+build our software without having autotools.
+
+
*** NOTE FOR 64-BIT LINUX SYSTEMS
The glibc built-in stack-unwinder on 64-bit systems has some problems
Only in gperftools-2.0: install-sh
Only in gperftools-2.0: libtool
Only in gperftools-2.0: ltmain.sh
Only in gperftools-2.0/m4: libtool.m4
Only in gperftools-2.0/m4: libtool.m4.svn-r190
Only in gperftools-2.0/m4: lt~obsolete.m4
Only in gperftools-2.0/m4: ltoptions.m4
Only in gperftools-2.0/m4: ltsugar.m4
Only in gperftools-2.0/m4: ltversion.m4
diff -urP gperftools-2.0/Makefile.am gperftools-2.0-svn218/Makefile.am
--- gperftools-2.0/Makefile.am 2013-06-04 10:20:21.140844736 -0400
+++ gperftools-2.0-svn218/Makefile.am 2013-06-04 10:16:58.887841701 -0400
@@ -221,7 +221,7 @@
src/windows/preamble_patcher.cc \
src/windows/preamble_patcher_with_stub.cc
# patch_functions.cc uses Psapi.lib. MSVC has a #pragma for that, but not us.
-libwindows_la_LIBADD = -lPsapi
+libwindows_la_LIBADD = -lpsapi
SPINLOCK_INCLUDES = src/base/spinlock.h \
src/base/spinlock_internal.h \
@@ -238,6 +238,7 @@
noinst_LTLIBRARIES += libspinlock.la
libspinlock_la_SOURCES = src/base/spinlock.cc \
src/base/spinlock_internal.cc \
+ src/base/atomicops-internals-x86.cc \
$(SPINLOCK_INCLUDES)
LIBSPINLOCK = libwindows.la libspinlock.la libsysinfo.la liblogging.la
@@ -355,7 +356,7 @@
$(STACKTRACE_INCLUDES)
libstacktrace_la_LIBADD = $(UNWIND_LIBS) $(LIBSPINLOCK)
STACKTRACE_SYMBOLS = '(GetStackTrace|GetStackFrames|GetStackTraceWithContext|GetStackFramesWithContext)'
-libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS)
+libstacktrace_la_LDFLAGS = -export-symbols-regex $(STACKTRACE_SYMBOLS) $(AM_LDFLAGS)
### Unittests
TESTS += stacktrace_unittest
@@ -468,7 +469,7 @@
-DNO_HEAP_CHECK \
$(PTHREAD_CFLAGS) -DNDEBUG \
$(AM_CXXFLAGS) $(NO_EXCEPTIONS)
-libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS)
+libtcmalloc_minimal_internal_la_LDFLAGS = $(PTHREAD_CFLAGS) $(AM_LDFLAGS)
libtcmalloc_minimal_internal_la_LIBADD = $(PTHREAD_LIBS) $(LIBSPINLOCK)
lib_LTLIBRARIES += libtcmalloc_minimal.la
@@ -477,7 +478,7 @@
libtcmalloc_minimal_la_CXXFLAGS = -DNO_TCMALLOC_SAMPLES \
$(PTHREAD_CFLAGS) -DNDEBUG $(AM_CXXFLAGS)
# -version-info gets passed to libtool
-libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@
+libtcmalloc_minimal_la_LDFLAGS = $(PTHREAD_CFLAGS) -version-info @TCMALLOC_SO_VERSION@ $(AM_LDFLAGS)
libtcmalloc_minimal_la_LIBADD = libtcmalloc_minimal_internal.la $(PTHREAD_LIBS)
# For windows, we're playing around with trying to do some stacktrace
@@ -539,6 +540,12 @@
tcmalloc_minimal_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
tcmalloc_minimal_large_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+TESTS += tcmalloc_minimal_large_heap_fragmentation_unittest
+tcmalloc_minimal_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
+tcmalloc_minimal_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
+tcmalloc_minimal_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+tcmalloc_minimal_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC_MINIMAL) $(PTHREAD_LIBS)
+
# This tests it works to LD_PRELOAD libtcmalloc (tests maybe_threads.cc)
# In theory this should work under mingw, but mingw has trouble running
# shell scripts that end in .exe. And it doesn't seem to build shared
@@ -898,8 +905,16 @@
### Unittests
-TESTS += tcmalloc_unittest
-TCMALLOC_UNITTEST_INCLUDES = src/config_for_unittests.h \
+TESTS += tcmalloc_unittest.sh$(EXEEXT)
+tcmalloc_unittest_sh_SOURCES = src/tests/tcmalloc_unittest.sh
+noinst_SCRIPTS += $(tcmalloc_unittest_sh_SOURCES)
+tcmalloc_unittest.sh$(EXEEXT): $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) \
+ tcmalloc_unittest
+ rm -f $@
+ cp -p $(top_srcdir)/$(tcmalloc_unittest_sh_SOURCES) $@
+
+noinst_PROGRAMS += tcmalloc_unittest
+tcmalloc_unittest_INCLUDES = src/config_for_unittests.h \
src/gperftools/malloc_extension.h
tcmalloc_unittest_SOURCES = src/tests/tcmalloc_unittest.cc \
src/tcmalloc.h \
@@ -956,6 +971,12 @@
tcmalloc_large_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
tcmalloc_large_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
+TESTS += tcmalloc_large_heap_fragmentation_unittest
+tcmalloc_large_heap_fragmentation_unittest_SOURCES = src/tests/large_heap_fragmentation_unittest.cc
+tcmalloc_large_heap_fragmentation_unittest_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
+tcmalloc_large_heap_fragmentation_unittest_LDFLAGS = $(PTHREAD_CFLAGS) $(TCMALLOC_FLAGS)
+tcmalloc_large_heap_fragmentation_unittest_LDADD = $(LIBTCMALLOC) $(PTHREAD_LIBS)
+
TESTS += raw_printer_test
raw_printer_test_SOURCES = src/tests/raw_printer_test.cc
raw_printer_test_CXXFLAGS = $(PTHREAD_CFLAGS) $(AM_CXXFLAGS)
Only in gperftools-2.0: Makefile.am.svn-r190
Only in gperftools-2.0: Makefile.in
Only in gperftools-2.0: Makefile.in.svn-r190
Only in gperftools-2.0: missing
Only in gperftools-2.0: mkinstalldirs
Only in gperftools-2.0: NEWS.svn-r190
diff -urP gperftools-2.0/src/base/atomicops.h gperftools-2.0-svn218/src/base/atomicops.h
--- gperftools-2.0/src/base/atomicops.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/atomicops.h 2013-06-04 10:16:58.375841694 -0400
@@ -50,6 +50,16 @@
// implementations on other archtectures will cause your code to break. If you
// do not know what you are doing, avoid these routines, and use a Mutex.
//
+// These following lower-level operations are typically useful only to people
+// implementing higher-level synchronization operations like spinlocks,
+// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
+// a store with appropriate memory-ordering instructions. "Acquire" operations
+// ensure that no later memory access can be reordered ahead of the operation.
+// "Release" operations ensure that no previous memory access can be reordered
+// after the operation. "Barrier" operations have both "Acquire" and "Release"
+// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
+// access.
+//
// It is incorrect to make direct assignments to/from an atomic variable.
// You should use one of the Load or Store routines. The NoBarrier
// versions are provided when no barriers are needed:
@@ -95,10 +105,10 @@
#include "base/atomicops-internals-arm-v6plus.h"
#elif defined(ARMV3)
#include "base/atomicops-internals-arm-generic.h"
-#elif defined(_WIN32)
-#include "base/atomicops-internals-windows.h"
#elif defined(__GNUC__) && (defined(__i386) || defined(__x86_64__))
#include "base/atomicops-internals-x86.h"
+#elif defined(_WIN32)
+#include "base/atomicops-internals-windows.h"
#elif defined(__linux__) && defined(__PPC__)
#include "base/atomicops-internals-linuxppc.h"
#else
@@ -149,6 +159,18 @@
reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
}
+AtomicWord Acquire_AtomicExchange(volatile AtomicWord* ptr,
+ AtomicWord new_value) {
+ return Acquire_AtomicExchange(
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+}
+
+AtomicWord Release_AtomicExchange(volatile AtomicWord* ptr,
+ AtomicWord new_value) {
+ return Release_AtomicExchange(
+ reinterpret_cast<volatile AtomicWordCastType*>(ptr), new_value);
+}
+
// Atomically increment *ptr by "increment". Returns the new value of
// *ptr with the increment applied. This routine implies no memory
// barriers.
@@ -164,17 +186,6 @@
reinterpret_cast<volatile AtomicWordCastType*>(ptr), increment);
}
-// ------------------------------------------------------------------------
-// These following lower-level operations are typically useful only to people
-// implementing higher-level synchronization operations like spinlocks,
-// mutexes, and condition-variables. They combine CompareAndSwap(), a load, or
-// a store with appropriate memory-ordering instructions. "Acquire" operations
-// ensure that no later memory access can be reordered ahead of the operation.
-// "Release" operations ensure that no previous memory access can be reordered
-// after the operation. "Barrier" operations have both "Acquire" and "Release"
-// semantics. A MemoryBarrier() has "Barrier" semantics, but does no memory
-// access.
-// ------------------------------------------------------------------------
inline AtomicWord Acquire_CompareAndSwap(volatile AtomicWord* ptr,
AtomicWord old_value,
AtomicWord new_value) {
@@ -250,6 +261,8 @@
Atomic32 old_value,
Atomic32 new_value);
Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
+Atomic32 Release_AtomicExchange(volatile Atomic32* ptr, Atomic32 new_value);
Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr, Atomic32 increment);
Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment);
@@ -271,6 +284,8 @@
Atomic64 old_value,
Atomic64 new_value);
Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
+Atomic64 Release_AtomicExchange(volatile Atomic64* ptr, Atomic64 new_value);
Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr, Atomic64 increment);
diff -urP gperftools-2.0/src/base/atomicops-internals-arm-generic.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h
--- gperftools-2.0/src/base/atomicops-internals-arm-generic.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-generic.h 2013-06-04 10:16:58.378841694 -0400
@@ -89,6 +89,18 @@
return old_value;
}
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
for (;;) {
@@ -176,6 +188,18 @@
return 0;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ // pLinuxKernelCmpxchg already has acquire and release barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
NotImplementedFatalError("NoBarrier_AtomicIncrement");
diff -urP gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h
--- gperftools-2.0/src/base/atomicops-internals-arm-v6plus.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/atomicops-internals-arm-v6plus.h 2013-06-04 10:16:58.372841694 -0400
@@ -94,6 +94,28 @@
return old;
}
+inline void MemoryBarrier() {
+#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6KZ__) || defined(__ARM_ARCH_6T2__)
+ uint32_t dest = 0;
+ __asm__ __volatile__("mcr p15,0,%0,c7,c10,5" :"=&r"(dest) : : "memory");
+#else
+ __asm__ __volatile__("dmb" : : : "memory");
+#endif
+}
+
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+ MemoryBarrier();
+ return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ MemoryBarrier();
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
Atomic32 tmp, res;
@@ -110,10 +132,6 @@
return res;
}
-inline void MemoryBarrier() {
- __asm__ __volatile__("dmb" : : : "memory");
-}
-
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
Atomic32 tmp, res;
@@ -220,6 +238,19 @@
return old;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ Atomic64 old_value = NoBarrier_AtomicExchange(ptr, new_value);
+ MemoryBarrier();
+ return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ MemoryBarrier();
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
int store_failed;
@@ -303,6 +334,18 @@
return 0;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ NotImplementedFatalError("Acquire_AtomicExchange");
+ return 0;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ NotImplementedFatalError("Release_AtomicExchange");
+ return 0;
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
NotImplementedFatalError("NoBarrier_AtomicIncrement");
diff -urP gperftools-2.0/src/base/atomicops-internals-linuxppc.h gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h
--- gperftools-2.0/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:20:21.141844736 -0400
+++ gperftools-2.0-svn218/src/base/atomicops-internals-linuxppc.h 2013-06-04 10:16:58.371841694 -0400
@@ -163,6 +163,26 @@
return old_value;
}
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
+ Atomic32 new_value) {
+ Atomic32 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap32Acquire(old_value, new_value,
+ const_cast<Atomic32*>(ptr)));
+ return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
+ Atomic32 new_value) {
+ Atomic32 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap32Release(old_value, new_value,
+ const_cast<Atomic32*>(ptr)));
+ return old_value;
+}
+
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
Atomic32 increment) {
return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
@@ -294,6 +314,26 @@
return old_value;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
+ Atomic64 new_value) {
+ Atomic64 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap64Acquire(old_value, new_value,
+ const_cast<Atomic64*>(ptr)));
+ return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
+ Atomic64 new_value) {
+ Atomic64 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap64Release(old_value, new_value,
+ const_cast<Atomic64*>(ptr)));
+ return old_value;
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
Atomic64 increment) {
return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
Only in gperftools-2.0/src/base: atomicops-internals-linuxppc.h.svn-r190
diff -urP gperftools-2.0/src/base/atomicops-internals-macosx.h gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h
--- gperftools-2.0/src/base/atomicops-internals-macosx.h 2012-02-02 16:36:22.000000000 -0500
+++ gperftools-2.0-svn218/src/base/atomicops-internals-macosx.h 2013-06-04 10:16:58.378841694 -0400
@@ -132,6 +132,21 @@
return old_value;
}
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32 *ptr,
+ Atomic32 new_value) {
+ Atomic32 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap32Barrier(old_value, new_value,
+ const_cast<Atomic32*>(ptr)));
+ return old_value;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32 *ptr,
+ Atomic32 new_value) {
+ return Acquire_AtomicExchange(ptr, new_value);
+}
+
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32 *ptr,
Atomic32 increment) {
return OSAtomicAdd32(increment, const_cast<Atomic32*>(ptr));
@@ -217,6 +232,21 @@
return old_value;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64 *ptr,
+ Atomic64 new_value) {
+ Atomic64 old_value;
+ do {
+ old_value = *ptr;
+ } while (!OSAtomicCompareAndSwap64Barrier(old_value, new_value,
+ const_cast<Atomic64*>(ptr)));
+ return old_value;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64 *ptr,
+ Atomic64 new_value) {
+ return Acquire_AtomicExchange(ptr, new_value);
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64 *ptr,
Atomic64 increment) {
return OSAtomicAdd64(increment, const_cast<Atomic64*>(ptr));
diff -urP gperftools-2.0/src/base/atomicops-internals-windows.h gperftools-2.0-svn218/src/base/atomicops-internals-windows.h
--- gperftools-2.0/src/base/atomicops-internals-windows.h 2013-06-04 10:20:21.142844736 -0400
+++ gperftools-2.0-svn218/src/base/atomicops-internals-windows.h 2013-06-04 10:16:58.378841694 -0400
@@ -137,6 +137,18 @@
return static_cast<Atomic32>(result);
}
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ // FastInterlockedExchange has both acquire and release memory barriers.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ // FastInterlockedExchange has both acquire and release memory barriers.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
return FastInterlockedExchangeAdd(
@@ -188,8 +200,7 @@
}
inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
- NoBarrier_AtomicExchange(ptr, value);
- // acts as a barrier in this implementation
+ Acquire_AtomicExchange(ptr, value);
}
inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
@@ -478,6 +489,18 @@
#endif // defined(_WIN64) || defined(__MINGW64__)
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ // FastInterlockedExchange has both acquire and release memory barriers.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ // FastInterlockedExchange has both acquire and release memory barriers.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
Atomic64 old_value,
Atomic64 new_value) {
Only in gperftools-2.0/src/base: atomicops-internals-windows.h.svn-r190
diff -urP gperftools-2.0/src/base/atomicops-internals-x86.h gperftools-2.0-svn218/src/base/atomicops-internals-x86.h
--- gperftools-2.0/src/base/atomicops-internals-x86.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/atomicops-internals-x86.h 2013-06-04 10:16:58.373841694 -0400
@@ -89,6 +89,21 @@
return new_value; // Now it's the previous value.
}
+inline Atomic32 Acquire_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ Atomic32 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
+ __asm__ __volatile__("lfence" : : : "memory");
+ }
+ return old_val;
+}
+
+inline Atomic32 Release_AtomicExchange(volatile Atomic32* ptr,
+ Atomic32 new_value) {
+ // xchgl already has release memory barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
Atomic32 increment) {
Atomic32 temp = increment;
@@ -152,7 +167,7 @@
__asm__ __volatile__("mfence" : : : "memory");
} else { // mfence is faster but not present on PIII
Atomic32 x = 0;
- NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
+ Acquire_AtomicExchange(&x, 0);
}
}
@@ -161,8 +176,7 @@
*ptr = value;
__asm__ __volatile__("mfence" : : : "memory");
} else {
- NoBarrier_AtomicExchange(ptr, value);
- // acts as a barrier on PIII
+ Acquire_AtomicExchange(ptr, value);
}
}
#endif
@@ -213,6 +227,21 @@
return new_value; // Now it's the previous value.
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_value);
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
+ __asm__ __volatile__("lfence" : : : "memory");
+ }
+ return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_value) {
+ // xchgq already has release memory barrier semantics.
+ return NoBarrier_AtomicExchange(ptr, new_value);
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 temp = increment;
@@ -334,6 +363,20 @@
return old_val;
}
+inline Atomic64 Acquire_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_val) {
+ Atomic64 old_val = NoBarrier_AtomicExchange(ptr, new_val);
+ if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
+ __asm__ __volatile__("lfence" : : : "memory");
+ }
+ return old_val;
+}
+
+inline Atomic64 Release_AtomicExchange(volatile Atomic64* ptr,
+ Atomic64 new_val) {
+ return NoBarrier_AtomicExchange(ptr, new_val);
+}
+
inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
Atomic64 increment) {
Atomic64 old_val, new_val;
diff -urP gperftools-2.0/src/base/basictypes.h gperftools-2.0-svn218/src/base/basictypes.h
--- gperftools-2.0/src/base/basictypes.h 2013-06-04 10:20:21.142844736 -0400
+++ gperftools-2.0-svn218/src/base/basictypes.h 2013-06-04 10:16:58.372841694 -0400
@@ -334,10 +334,13 @@
#if defined(HAVE___ATTRIBUTE__)
# if (defined(__i386__) || defined(__x86_64__))
# define CACHELINE_ALIGNED __attribute__((aligned(64)))
-# elif defined(__arm__)
-# define CACHELINE_ALIGNED __attribute__((aligned(32)))
# elif (defined(__PPC__) || defined(__PPC64__))
# define CACHELINE_ALIGNED __attribute__((aligned(16)))
+# elif (defined(__arm__))
+# define CACHELINE_ALIGNED __attribute__((aligned(64)))
+ // some ARMs have shorter cache lines (ARM1176JZF-S is 32 bytes for example) but obviously 64-byte aligned implies 32-byte aligned
+# else
+# error Could not determine cache line length - unknown architecture
# endif
#else
# define CACHELINE_ALIGNED
Only in gperftools-2.0/src/base: basictypes.h.svn-r190
Only in gperftools-2.0/src/base: cycleclock.h.svn-r190
diff -urP gperftools-2.0/src/base/linux_syscall_support.h gperftools-2.0-svn218/src/base/linux_syscall_support.h
--- gperftools-2.0/src/base/linux_syscall_support.h 2013-06-04 10:20:21.142844736 -0400
+++ gperftools-2.0-svn218/src/base/linux_syscall_support.h 2013-06-04 10:16:58.379841694 -0400
@@ -148,6 +148,8 @@
#include <errno.h>
#include <signal.h>
#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
#include <string.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
@@ -404,24 +406,24 @@
};
#elif defined(__x86_64__)
struct kernel_stat {
- unsigned long st_dev;
- unsigned long st_ino;
- unsigned long st_nlink;
+ uint64_t st_dev;
+ uint64_t st_ino;
+ uint64_t st_nlink;
unsigned st_mode;
unsigned st_uid;
unsigned st_gid;
unsigned __pad0;
- unsigned long st_rdev;
- long st_size;
- long st_blksize;
- long st_blocks;
- unsigned long st_atime_;
- unsigned long st_atime_nsec_;
- unsigned long st_mtime_;
- unsigned long st_mtime_nsec_;
- unsigned long st_ctime_;
- unsigned long st_ctime_nsec_;
- long __unused[3];
+ uint64_t st_rdev;
+ int64_t st_size;
+ int64_t st_blksize;
+ int64_t st_blocks;
+ uint64_t st_atime_;
+ uint64_t st_atime_nsec_;
+ uint64_t st_mtime_;
+ uint64_t st_mtime_nsec_;
+ uint64_t st_ctime_;
+ uint64_t st_ctime_nsec_;
+ int64_t __unused[3];
};
#elif defined(__PPC__)
struct kernel_stat {
@@ -1013,74 +1015,141 @@
* location (e.g. when using the clone() system call with the CLONE_VM
* option).
*/
+ #undef LSS_ENTRYPOINT
+ #define LSS_ENTRYPOINT "syscall\n"
+
+ /* The x32 ABI has 32 bit longs, but the syscall interface is 64 bit.
+ * We need to explicitly cast to an unsigned 64 bit type to avoid implicit
+ * sign extension. We can't cast pointers directly because those are
+ * 32 bits, and gcc will dump ugly warnings about casting from a pointer
+ * to an integer of a different size.
+ */
+ #undef LSS_SYSCALL_ARG
+ #define LSS_SYSCALL_ARG(a) ((uint64_t)(uintptr_t)(a))
+ #undef _LSS_RETURN
+ #define _LSS_RETURN(type, res, cast) \
+ do { \
+ if ((uint64_t)(res) >= (uint64_t)(-4095)) { \
+ LSS_ERRNO = -(res); \
+ res = -1; \
+ } \
+ return (type)(cast)(res); \
+ } while (0)
+ #undef LSS_RETURN
+ #define LSS_RETURN(type, res) _LSS_RETURN(type, res, uintptr_t)
+
+ #undef _LSS_BODY
+ #define _LSS_BODY(nr, type, name, cast, ...) \
+ long long __res; \
+ __asm__ __volatile__(LSS_BODY_ASM##nr LSS_ENTRYPOINT \
+ : "=a" (__res) \
+ : "0" (__NR_##name) LSS_BODY_ARG##nr(__VA_ARGS__) \
+ : LSS_BODY_CLOBBER##nr "r11", "rcx", "memory"); \
+ _LSS_RETURN(type, __res, cast)
#undef LSS_BODY
- #define LSS_BODY(type,name, ...) \
- long __res; \
- __asm__ __volatile__("syscall" : "=a" (__res) : "0" (__NR_##name), \
- ##__VA_ARGS__ : "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res)
+ #define LSS_BODY(nr, type, name, args...) \
+ _LSS_BODY(nr, type, name, uintptr_t, ## args)
+
+ #undef LSS_BODY_ASM0
+ #undef LSS_BODY_ASM1
+ #undef LSS_BODY_ASM2
+ #undef LSS_BODY_ASM3
+ #undef LSS_BODY_ASM4
+ #undef LSS_BODY_ASM5
+ #undef LSS_BODY_ASM6
+ #define LSS_BODY_ASM0
+ #define LSS_BODY_ASM1 LSS_BODY_ASM0
+ #define LSS_BODY_ASM2 LSS_BODY_ASM1
+ #define LSS_BODY_ASM3 LSS_BODY_ASM2
+ #define LSS_BODY_ASM4 LSS_BODY_ASM3 "movq %5,%%r10;"
+ #define LSS_BODY_ASM5 LSS_BODY_ASM4 "movq %6,%%r8;"
+ #define LSS_BODY_ASM6 LSS_BODY_ASM5 "movq %7,%%r9;"
+
+ #undef LSS_BODY_CLOBBER0
+ #undef LSS_BODY_CLOBBER1
+ #undef LSS_BODY_CLOBBER2
+ #undef LSS_BODY_CLOBBER3
+ #undef LSS_BODY_CLOBBER4
+ #undef LSS_BODY_CLOBBER5
+ #undef LSS_BODY_CLOBBER6
+ #define LSS_BODY_CLOBBER0
+ #define LSS_BODY_CLOBBER1 LSS_BODY_CLOBBER0
+ #define LSS_BODY_CLOBBER2 LSS_BODY_CLOBBER1
+ #define LSS_BODY_CLOBBER3 LSS_BODY_CLOBBER2
+ #define LSS_BODY_CLOBBER4 LSS_BODY_CLOBBER3 "r10",
+ #define LSS_BODY_CLOBBER5 LSS_BODY_CLOBBER4 "r8",
+ #define LSS_BODY_CLOBBER6 LSS_BODY_CLOBBER5 "r9",
+
+ #undef LSS_BODY_ARG0
+ #undef LSS_BODY_ARG1
+ #undef LSS_BODY_ARG2
+ #undef LSS_BODY_ARG3
+ #undef LSS_BODY_ARG4
+ #undef LSS_BODY_ARG5
+ #undef LSS_BODY_ARG6
+ #define LSS_BODY_ARG0()
+ #define LSS_BODY_ARG1(arg1) \
+ LSS_BODY_ARG0(), "D" (arg1)
+ #define LSS_BODY_ARG2(arg1, arg2) \
+ LSS_BODY_ARG1(arg1), "S" (arg2)
+ #define LSS_BODY_ARG3(arg1, arg2, arg3) \
+ LSS_BODY_ARG2(arg1, arg2), "d" (arg3)
+ #define LSS_BODY_ARG4(arg1, arg2, arg3, arg4) \
+ LSS_BODY_ARG3(arg1, arg2, arg3), "r" (arg4)
+ #define LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5) \
+ LSS_BODY_ARG4(arg1, arg2, arg3, arg4), "r" (arg5)
+ #define LSS_BODY_ARG6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ LSS_BODY_ARG5(arg1, arg2, arg3, arg4, arg5), "r" (arg6)
+
#undef _syscall0
#define _syscall0(type,name) \
type LSS_NAME(name)() { \
- LSS_BODY(type, name); \
+ LSS_BODY(0, type, name); \
}
#undef _syscall1
#define _syscall1(type,name,type1,arg1) \
type LSS_NAME(name)(type1 arg1) { \
- LSS_BODY(type, name, "D" ((long)(arg1))); \
+ LSS_BODY(1, type, name, LSS_SYSCALL_ARG(arg1)); \
}
#undef _syscall2
#define _syscall2(type,name,type1,arg1,type2,arg2) \
type LSS_NAME(name)(type1 arg1, type2 arg2) { \
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2))); \
+ LSS_BODY(2, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2));\
}
#undef _syscall3
#define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3) { \
- LSS_BODY(type, name, "D" ((long)(arg1)), "S" ((long)(arg2)), \
- "d" ((long)(arg3))); \
+ LSS_BODY(3, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+ LSS_SYSCALL_ARG(arg3)); \
}
#undef _syscall4
#define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)) : "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
+ LSS_BODY(4, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4));\
}
#undef _syscall5
#define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
type5,arg5) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
type5 arg5) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)), "r" ((long)(arg5)) : \
- "r8", "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
+ LSS_BODY(5, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+ LSS_SYSCALL_ARG(arg5)); \
}
#undef _syscall6
#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4, \
type5,arg5,type6,arg6) \
type LSS_NAME(name)(type1 arg1, type2 arg2, type3 arg3, type4 arg4, \
type5 arg5, type6 arg6) { \
- long __res; \
- __asm__ __volatile__("movq %5,%%r10; movq %6,%%r8; movq %7,%%r9;" \
- "syscall" : \
- "=a" (__res) : "0" (__NR_##name), \
- "D" ((long)(arg1)), "S" ((long)(arg2)), "d" ((long)(arg3)), \
- "r" ((long)(arg4)), "r" ((long)(arg5)), "r" ((long)(arg6)) : \
- "r8", "r9", "r10", "r11", "rcx", "memory"); \
- LSS_RETURN(type, __res); \
+ LSS_BODY(6, type, name, LSS_SYSCALL_ARG(arg1), LSS_SYSCALL_ARG(arg2), \
+ LSS_SYSCALL_ARG(arg3), LSS_SYSCALL_ARG(arg4), \
+ LSS_SYSCALL_ARG(arg5), LSS_SYSCALL_ARG(arg6));\
}
LSS_INLINE int LSS_NAME(clone)(int (*fn)(void *), void *child_stack,
int flags, void *arg, int *parent_tidptr,
void *newtls, int *child_tidptr) {
- long __res;
+ long long __res;
{
__asm__ __volatile__(/* if (fn == NULL)
* return -EINVAL;
@@ -1145,8 +1214,13 @@
"1:\n"
: "=a" (__res)
: "0"(-EINVAL), "i"(__NR_clone), "i"(__NR_exit),
- "r"(fn), "S"(child_stack), "D"(flags), "r"(arg),
- "d"(parent_tidptr), "g"(newtls), "g"(child_tidptr)
+ "r"(LSS_SYSCALL_ARG(fn)),
+ "S"(LSS_SYSCALL_ARG(child_stack)),
+ "D"(LSS_SYSCALL_ARG(flags)),
+ "r"(LSS_SYSCALL_ARG(arg)),
+ "d"(LSS_SYSCALL_ARG(parent_tidptr)),
+ "r"(LSS_SYSCALL_ARG(newtls)),
+ "r"(LSS_SYSCALL_ARG(child_tidptr))
: "rsp", "memory", "r8", "r10", "r11", "rcx");
}
LSS_RETURN(int, __res);
@@ -1159,7 +1233,7 @@
* Unfortunately, we cannot just reference the glibc version of this
* function, as glibc goes out of its way to make it inaccessible.
*/
- void (*res)(void);
+ long long res;
__asm__ __volatile__("call 2f\n"
"0:.align 16\n"
"1:movq %1,%%rax\n"
@@ -1168,7 +1242,7 @@
"addq $(1b-0b),%0\n"
: "=a" (res)
: "i" (__NR_rt_sigreturn));
- return res;
+ return (void (*)(void))(uintptr_t)res;
}
#elif defined(__arm__)
/* Most definitions of _syscallX() neglect to mark "memory" as being
@@ -1797,8 +1871,16 @@
LSS_INLINE _syscall0(pid_t, _gettid)
LSS_INLINE _syscall2(int, kill, pid_t, p,
int, s)
- LSS_INLINE _syscall3(off_t, lseek, int, f,
- off_t, o, int, w)
+ #if defined(__x86_64__)
+ /* Need to make sure off_t isn't truncated to 32-bits under x32. */
+ LSS_INLINE off_t LSS_NAME(lseek)(int f, off_t o, int w) {
+ _LSS_BODY(3, off_t, lseek, off_t, LSS_SYSCALL_ARG(f), (uint64_t)(o),
+ LSS_SYSCALL_ARG(w));
+ }
+ #else
+ LSS_INLINE _syscall3(off_t, lseek, int, f,
+ off_t, o, int, w)
+ #endif
LSS_INLINE _syscall2(int, munmap, void*, s,
size_t, l)
LSS_INLINE _syscall5(void*, _mremap, void*, o,
@@ -1835,10 +1917,13 @@
int, t, int, p)
#endif
#if defined(__x86_64__)
- LSS_INLINE _syscall6(void*, mmap, void*, s,
- size_t, l, int, p,
- int, f, int, d,
- __off64_t, o)
+ /* Need to make sure __off64_t isn't truncated to 32-bits under x32. */
+ LSS_INLINE void* LSS_NAME(mmap)(void *s, size_t l, int p, int f, int d,
+ __off64_t o) {
+ LSS_BODY(6, void*, mmap, LSS_SYSCALL_ARG(s), LSS_SYSCALL_ARG(l),
+ LSS_SYSCALL_ARG(p), LSS_SYSCALL_ARG(f),
+ LSS_SYSCALL_ARG(d), (uint64_t)(o));
+ }
LSS_INLINE int LSS_NAME(sigaction)(int signum,
const struct kernel_sigaction *act,
Only in gperftools-2.0/src/base: linux_syscall_support.h.svn-r190
Only in gperftools-2.0/src/base: linuxthreads.cc.svn-r190
diff -urP gperftools-2.0/src/base/spinlock.h gperftools-2.0-svn218/src/base/spinlock.h
--- gperftools-2.0/src/base/spinlock.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/spinlock.h 2013-06-04 10:16:58.374841694 -0400
@@ -31,11 +31,6 @@
* Author: Sanjay Ghemawat
*/
-//
-// Fast spinlocks (at least on x86, a lock/unlock pair is approximately
-// half the cost of a Mutex because the unlock just does a store instead
-// of a compare-and-swap which is expensive).
-
// SpinLock is async signal safe.
// If used within a signal handler, all lock holders
// should block the signal even outside the signal handler.
@@ -95,10 +90,9 @@
// TODO(csilvers): uncomment the annotation when we figure out how to
// support this macro with 0 args (see thread_annotations.h)
inline void Unlock() /*UNLOCK_FUNCTION()*/ {
- uint64 wait_cycles =
- static_cast<uint64>(base::subtle::NoBarrier_Load(&lockword_));
ANNOTATE_RWLOCK_RELEASED(this, 1);
- base::subtle::Release_Store(&lockword_, kSpinLockFree);
+ uint64 wait_cycles = static_cast<uint64>(
+ base::subtle::Release_AtomicExchange(&lockword_, kSpinLockFree));
if (wait_cycles != kSpinLockHeld) {
// Collect contentionz profile info, and speed the wakeup of any waiter.
// The wait_cycles value indicates how long this thread spent waiting
Only in gperftools-2.0/src/base: spinlock_internal.cc.svn-r190
Only in gperftools-2.0/src/base: sysinfo.cc.svn-r190
diff -urP gperftools-2.0/src/base/sysinfo.h gperftools-2.0-svn218/src/base/sysinfo.h
--- gperftools-2.0/src/base/sysinfo.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/base/sysinfo.h 2013-06-04 10:16:58.375841694 -0400
@@ -38,7 +38,7 @@
#include <time.h>
#if (defined(_WIN32) || defined(__MINGW32__)) && (!defined(__CYGWIN__) && !defined(__CYGWIN32__))
#include <windows.h> // for DWORD
-#include <TlHelp32.h> // for CreateToolhelp32Snapshot
+#include <tlhelp32.h> // for CreateToolhelp32Snapshot
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h> // for pid_t
diff -urP gperftools-2.0/src/central_freelist.h gperftools-2.0-svn218/src/central_freelist.h
--- gperftools-2.0/src/central_freelist.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/central_freelist.h 2013-06-04 10:16:57.724841684 -0400
@@ -79,6 +79,16 @@
// page full of 5-byte objects would have 2 bytes memory overhead).
size_t OverheadBytes();
+ // Lock/Unlock the internal SpinLock. Used on the pthread_atfork call
+ // to set the lock in a consistent state before the fork.
+ void Lock() {
+ lock_.Lock();
+ }
+
+ void Unlock() {
+ lock_.Unlock();
+ }
+
private:
// TransferCache is used to cache transfers of
// sizemap.num_objects_to_move(size_class) back and forth between
diff -urP gperftools-2.0/src/common.cc gperftools-2.0-svn218/src/common.cc
--- gperftools-2.0/src/common.cc 2013-06-04 10:20:21.143844736 -0400
+++ gperftools-2.0-svn218/src/common.cc 2013-06-04 10:16:57.724841684 -0400
@@ -30,12 +30,32 @@
// ---
// Author: Sanjay Ghemawat <opensource@google.com>
+#include <stdlib.h> // for getenv and strtol
#include "config.h"
#include "common.h"
#include "system-alloc.h"
+#include "base/spinlock.h"
namespace tcmalloc {
+// Define the maximum number of object per classe type to transfer between
+// thread and central caches.
+static int32 FLAGS_tcmalloc_transfer_num_objects;
+
+static const int32 kDefaultTransferNumObjecs = 32768;
+
+// The init function is provided to explicit initialize the variable value
+// from the env. var to avoid C++ global construction that might defer its
+// initialization after a malloc/new call.
+static inline void InitTCMallocTransferNumObjects()
+{
+ if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) {
+ const char *envval = getenv("TCMALLOC_TRANSFER_NUM_OBJ");
+ FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs :
+ strtol(envval, NULL, 10);
+ }
+}
+
// Note: the following only works for "n"s that fit in 32-bits, but
// that is fine since we only use it for small sizes.
static inline int LgFloor(size_t n) {
@@ -90,13 +110,16 @@
// - We go to the central freelist too often and we have to acquire
// its lock each time.
// This value strikes a balance between the constraints above.
- if (num > 32) num = 32;
+ if (num > FLAGS_tcmalloc_transfer_num_objects)
+ num = FLAGS_tcmalloc_transfer_num_objects;
return num;
}
// Initialize the mapping arrays
void SizeMap::Init() {
+ InitTCMallocTransferNumObjects();
+
// Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
if (ClassIndex(0) < 0) {
Log(kCrash, __FILE__, __LINE__,
@@ -189,12 +212,56 @@
// Metadata allocator -- keeps stats about how many bytes allocated.
static uint64_t metadata_system_bytes_ = 0;
+static const size_t kMetadataAllocChunkSize = 8*1024*1024;
+static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8;
+// usually malloc uses larger alignments, but because metadata cannot
+// have and fancy simd types, aligning on pointer size seems fine
+static const size_t kMetadataAllignment = sizeof(void *);
+
+static char *metadata_chunk_alloc_;
+static size_t metadata_chunk_avail_;
+
+static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED);
+
void* MetaDataAlloc(size_t bytes) {
- void* result = TCMalloc_SystemAlloc(bytes, NULL);
- if (result != NULL) {
- metadata_system_bytes_ += bytes;
+ if (bytes >= kMetadataAllocChunkSize) {
+ void *rv = TCMalloc_SystemAlloc(bytes,
+ NULL, kMetadataAllignment);
+ if (rv != NULL) {
+ metadata_system_bytes_ += bytes;
+ }
+ return rv;
}
- return result;
+
+ SpinLockHolder h(&metadata_alloc_lock);
+
+ // the following works by essentially turning address to integer of
+ // log_2 kMetadataAllignment size and negating it. I.e. negated
+ // value + original value gets 0 and that's what we want modulo
+ // kMetadataAllignment. Note, we negate before masking higher bits
+ // off, otherwise we'd have to mask them off after negation anyways.
+ intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1);
+
+ if (metadata_chunk_avail_ < bytes + alignment) {
+ size_t real_size;
+ void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize,
+ &real_size, kMetadataAllignment);
+ if (ptr == NULL) {
+ return NULL;
+ }
+
+ metadata_chunk_alloc_ = static_cast<char *>(ptr);
+ metadata_chunk_avail_ = real_size;
+
+ alignment = 0;
+ }
+
+ void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment);
+ bytes += alignment;
+ metadata_chunk_alloc_ += bytes;
+ metadata_chunk_avail_ -= bytes;
+ metadata_system_bytes_ += bytes;
+ return rv;
}
uint64_t metadata_system_bytes() { return metadata_system_bytes_; }
Only in gperftools-2.0/src: common.cc.svn-r190
diff -urP gperftools-2.0/src/common.h gperftools-2.0-svn218/src/common.h
--- gperftools-2.0/src/common.h 2013-06-04 10:20:21.143844736 -0400
+++ gperftools-2.0-svn218/src/common.h 2013-06-04 10:16:58.382841694 -0400
@@ -80,7 +80,7 @@
static const size_t kMinAlign = 16;
#elif defined(TCMALLOC_ALIGN_8BYTES)
static const size_t kPageShift = 13;
-static const size_t kNumClasses = 93;
+static const size_t kNumClasses = 95;
// Unless we force to use 8 bytes alignment we use an alignment of
// at least 16 bytes to statisfy requirements for some SSE types.
// Keep in mind when using the 16 bytes alignment you can have a space
@@ -88,7 +88,7 @@
static const size_t kMinAlign = 8;
#else
static const size_t kPageShift = 13;
-static const size_t kNumClasses = 86;
+static const size_t kNumClasses = 88;
static const size_t kMinAlign = 16;
#endif
static const size_t kMaxThreadCacheSize = 4 << 20;
Only in gperftools-2.0/src: common.h.svn-r190
diff -urP gperftools-2.0/src/config.h.in gperftools-2.0-svn218/src/config.h.in
--- gperftools-2.0/src/config.h.in 2013-06-04 10:20:21.143844736 -0400
+++ gperftools-2.0-svn218/src/config.h.in 2013-06-04 10:16:57.816841685 -0400
@@ -56,6 +56,9 @@
/* Define to 1 if you have the <features.h> header file. */
#undef HAVE_FEATURES_H
+/* Define to 1 if you have the `fork' function. */
+#undef HAVE_FORK
+
/* Define to 1 if you have the `geteuid' function. */
#undef HAVE_GETEUID
Only in gperftools-2.0/src: config.h.in.svn-r190
Only in gperftools-2.0/src: debugallocation.cc.svn-r190
Only in gperftools-2.0/src: getpc.h.svn-r190
Only in gperftools-2.0/src/gperftools: malloc_extension.h.svn-r190
Only in gperftools-2.0/src/gperftools: tcmalloc.h.in.svn-r190
Only in gperftools-2.0/src: heap-checker.cc.svn-r190
Only in gperftools-2.0/src: heap-profiler.cc.svn-r190
Only in gperftools-2.0/src: heap-profile-table.cc.svn-r190
Only in gperftools-2.0/src: malloc_extension.cc.svn-r190
Only in gperftools-2.0/src: malloc_hook-inl.h.svn-r190
Only in gperftools-2.0/src: memory_region_map.cc.svn-r190
diff -urP gperftools-2.0/src/page_heap.cc gperftools-2.0-svn218/src/page_heap.cc
--- gperftools-2.0/src/page_heap.cc 2013-06-04 10:20:21.145844736 -0400
+++ gperftools-2.0-svn218/src/page_heap.cc 2013-06-04 10:16:58.070841689 -0400
@@ -108,6 +108,8 @@
return AllocLarge(n); // May be NULL
}
+static const size_t kForcedCoalesceInterval = 128*1024*1024;
+
Span* PageHeap::New(Length n) {
ASSERT(Check());
ASSERT(n > 0);
@@ -116,6 +118,38 @@
if (result != NULL)
return result;
+ if (stats_.free_bytes != 0 && stats_.unmapped_bytes != 0
+ && stats_.free_bytes + stats_.unmapped_bytes >= stats_.system_bytes / 4
+ && (stats_.system_bytes / kForcedCoalesceInterval
+ != (stats_.system_bytes + (n << kPageShift)) / kForcedCoalesceInterval)) {
+ // We're about to grow heap, but there are lots of free pages.
+ // tcmalloc's design decision to keep unmapped and free spans
+ // separately and never coalesce them means that sometimes there
+ // can be free pages span of sufficient size, but it consists of
+ // "segments" of different type so page heap search cannot find
+ // it. In order to prevent growing heap and wasting memory in such
+ // case we're going to unmap all free pages. So that all free
+ // spans are maximally coalesced.
+ //
+ // We're also limiting 'rate' of going into this path to be at
+ // most once per 128 megs of heap growth. Otherwise programs that
+ // grow heap frequently (and that means by small amount) could be
+ // penalized with higher count of minor page faults.
+ //
+ // See also large_heap_fragmentation_unittest.cc and
+ // https://code.google.com/p/gperftools/issues/detail?id=368
+ ReleaseAtLeastNPages(static_cast<Length>(0x7fffffff));
+
+ // then try again. If we are forced to grow heap because of large
+ // spans fragmentation and not because of problem described above,
+ // then at the very least we've just unmapped free but
+ // insufficiently big large spans back to OS. So in case of really
+ // unlucky memory fragmentation we'll be consuming virtual address
+ // space, but not real memory
+ result = SearchFreeAndLargeLists(n);
+ if (result != NULL) return result;
+ }
+
// Grow the heap and try again.
if (!GrowHeap(n)) {
ASSERT(Check());
Only in gperftools-2.0/src: page_heap.cc.svn-r190
Only in gperftools-2.0/src: page_heap.h.svn-r190
Only in gperftools-2.0/src: pprof.svn-r190
Only in gperftools-2.0/src: profiler.cc.svn-r190
diff -urP gperftools-2.0/src/static_vars.cc gperftools-2.0-svn218/src/static_vars.cc
--- gperftools-2.0/src/static_vars.cc 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/static_vars.cc 2013-06-04 10:16:57.817841685 -0400
@@ -39,6 +39,39 @@
namespace tcmalloc {
+#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
+// These following two functions are registered via pthread_atfork to make
+// sure the central_cache locks remain in a consisten state in the forked
+// version of the thread.
+
+static
+void CentralCacheLockAll()
+{
+ Static::pageheap_lock()->Lock();
+ for (int i = 0; i < kNumClasses; ++i)
+ Static::central_cache()[i].Lock();
+}
+
+static
+void CentralCacheUnlockAll()
+{
+ for (int i = 0; i < kNumClasses; ++i)
+ Static::central_cache()[i].Unlock();
+ Static::pageheap_lock()->Unlock();
+}
+#endif
+
+static inline
+void SetupAtForkLocksHandler()
+{
+#if defined(HAVE_FORK) && defined(HAVE_PTHREAD)
+ pthread_atfork(CentralCacheLockAll, // parent calls before fork
+ CentralCacheUnlockAll, // parent calls after fork
+ CentralCacheUnlockAll); // child calls after fork
+#endif
+}
+
+
SpinLock Static::pageheap_lock_(SpinLock::LINKER_INITIALIZED);
SizeMap Static::sizemap_;
CentralFreeListPadded Static::central_cache_[kNumClasses];
@@ -49,6 +82,7 @@
StackTrace* Static::growth_stacks_ = NULL;
PageHeap* Static::pageheap_ = NULL;
+
void Static::InitStaticVars() {
sizemap_.Init();
span_allocator_.Init();
@@ -61,6 +95,8 @@
for (int i = 0; i < kNumClasses; ++i) {
central_cache_[i].Init(i);
}
+ SetupAtForkLocksHandler();
+
// It's important to have PageHeap allocated, not in static storage,
// so that HeapLeakChecker does not consider all the byte patterns stored
// in is caches as pointers that are sources of heap object liveness,
Only in gperftools-2.0/src: static_vars.h.svn-r190
Only in gperftools-2.0/src: symbolize.cc.svn-r190
Only in gperftools-2.0/src: system-alloc.cc.svn-r190
Only in gperftools-2.0/src: system-alloc.h.svn-r190
Only in gperftools-2.0/src: tcmalloc.cc.svn-r190
diff -urP gperftools-2.0/src/tests/atomicops_unittest.cc gperftools-2.0-svn218/src/tests/atomicops_unittest.cc
--- gperftools-2.0/src/tests/atomicops_unittest.cc 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/tests/atomicops_unittest.cc 2013-06-04 10:16:58.072841689 -0400
@@ -38,13 +38,14 @@
#define GG_ULONGLONG(x) static_cast<uint64>(x)
template <class AtomicType>
-static void TestAtomicIncrement() {
+static void TestAtomicIncrement(AtomicType (*atomic_increment_func)
+ (volatile AtomicType*, AtomicType)) {
// For now, we just test single threaded execution
- // use a guard value to make sure the NoBarrier_AtomicIncrement doesn't go
+ // use a guard value to make sure the atomic_increment_func doesn't go
// outside the expected address bounds. This is in particular to
// test that some future change to the asm code doesn't cause the
- // 32-bit NoBarrier_AtomicIncrement doesn't do the wrong thing on 64-bit
+ // 32-bit atomic_increment_func doesn't do the wrong thing on 64-bit
// machines.
struct {
AtomicType prev_word;
@@ -60,47 +61,47 @@
s.count = 0;
s.next_word = next_word_value;
- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, 1));
+ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, 1));
ASSERT_EQ(1, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, 2));
+ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, 2));
ASSERT_EQ(3, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(6, base::subtle::NoBarrier_AtomicIncrement(&s.count, 3));
+ ASSERT_EQ(6, (*atomic_increment_func)(&s.count, 3));
ASSERT_EQ(6, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(3, base::subtle::NoBarrier_AtomicIncrement(&s.count, -3));
+ ASSERT_EQ(3, (*atomic_increment_func)(&s.count, -3));
ASSERT_EQ(3, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -2));
+ ASSERT_EQ(1, (*atomic_increment_func)(&s.count, -2));
ASSERT_EQ(1, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
+ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, -1));
ASSERT_EQ(0, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(-1, base::subtle::NoBarrier_AtomicIncrement(&s.count, -1));
+ ASSERT_EQ(-1, (*atomic_increment_func)(&s.count, -1));
ASSERT_EQ(-1, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(-5, base::subtle::NoBarrier_AtomicIncrement(&s.count, -4));
+ ASSERT_EQ(-5, (*atomic_increment_func)(&s.count, -4));
ASSERT_EQ(-5, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
- ASSERT_EQ(0, base::subtle::NoBarrier_AtomicIncrement(&s.count, 5));
+ ASSERT_EQ(0, (*atomic_increment_func)(&s.count, 5));
ASSERT_EQ(0, s.count);
ASSERT_EQ(prev_word_value, s.prev_word);
ASSERT_EQ(next_word_value, s.next_word);
@@ -111,9 +112,10 @@
template <class AtomicType>
-static void TestCompareAndSwap() {
+static void TestCompareAndSwap(AtomicType (*compare_and_swap_func)
+ (volatile AtomicType*, AtomicType, AtomicType)) {
AtomicType value = 0;
- AtomicType prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 1);
+ AtomicType prev = (*compare_and_swap_func)(&value, 0, 1);
ASSERT_EQ(1, value);
ASSERT_EQ(0, prev);
@@ -122,21 +124,22 @@
const AtomicType k_test_val = (GG_ULONGLONG(1) <<
(NUM_BITS(AtomicType) - 2)) + 11;
value = k_test_val;
- prev = base::subtle::NoBarrier_CompareAndSwap(&value, 0, 5);
+ prev = (*compare_and_swap_func)(&value, 0, 5);
ASSERT_EQ(k_test_val, value);
ASSERT_EQ(k_test_val, prev);
value = k_test_val;
- prev = base::subtle::NoBarrier_CompareAndSwap(&value, k_test_val, 5);
+ prev = (*compare_and_swap_func)(&value, k_test_val, 5);
ASSERT_EQ(5, value);
ASSERT_EQ(k_test_val, prev);
}
template <class AtomicType>
-static void TestAtomicExchange() {
+static void TestAtomicExchange(AtomicType (*atomic_exchange_func)
+ (volatile AtomicType*, AtomicType)) {
AtomicType value = 0;
- AtomicType new_value = base::subtle::NoBarrier_AtomicExchange(&value, 1);
+ AtomicType new_value = (*atomic_exchange_func)(&value, 1);
ASSERT_EQ(1, value);
ASSERT_EQ(0, new_value);
@@ -145,28 +148,29 @@
const AtomicType k_test_val = (GG_ULONGLONG(1) <<
(NUM_BITS(AtomicType) - 2)) + 11;
value = k_test_val;
- new_value = base::subtle::NoBarrier_AtomicExchange(&value, k_test_val);
+ new_value = (*atomic_exchange_func)(&value, k_test_val);
ASSERT_EQ(k_test_val, value);
ASSERT_EQ(k_test_val, new_value);
value = k_test_val;
- new_value = base::subtle::NoBarrier_AtomicExchange(&value, 5);
+ new_value = (*atomic_exchange_func)(&value, 5);
ASSERT_EQ(5, value);
ASSERT_EQ(k_test_val, new_value);
}
template <class AtomicType>
-static void TestAtomicIncrementBounds() {
+static void TestAtomicIncrementBounds(AtomicType (*atomic_increment_func)
+ (volatile AtomicType*, AtomicType)) {
// Test increment at the half-width boundary of the atomic type.
// It is primarily for testing at the 32-bit boundary for 64-bit atomic type.
AtomicType test_val = GG_ULONGLONG(1) << (NUM_BITS(AtomicType) / 2);
AtomicType value = test_val - 1;
- AtomicType new_value = base::subtle::NoBarrier_AtomicIncrement(&value, 1);
+ AtomicType new_value = (*atomic_increment_func)(&value, 1);
ASSERT_EQ(test_val, value);
ASSERT_EQ(value, new_value);
- base::subtle::NoBarrier_AtomicIncrement(&value, -1);
+ (*atomic_increment_func)(&value, -1);
ASSERT_EQ(test_val - 1, value);
}
@@ -222,16 +226,28 @@
template <class AtomicType>
static void TestAtomicOps() {
- TestCompareAndSwap<AtomicType>();
- TestAtomicExchange<AtomicType>();
- TestAtomicIncrementBounds<AtomicType>();
+ TestCompareAndSwap<AtomicType>(base::subtle::NoBarrier_CompareAndSwap);
+ TestCompareAndSwap<AtomicType>(base::subtle::Acquire_CompareAndSwap);
+ TestCompareAndSwap<AtomicType>(base::subtle::Release_CompareAndSwap);
+
+ TestAtomicExchange<AtomicType>(base::subtle::NoBarrier_AtomicExchange);
+ TestAtomicExchange<AtomicType>(base::subtle::Acquire_AtomicExchange);
+ TestAtomicExchange<AtomicType>(base::subtle::Release_AtomicExchange);
+
+ TestAtomicIncrementBounds<AtomicType>(
+ base::subtle::NoBarrier_AtomicIncrement);
+ TestAtomicIncrementBounds<AtomicType>(
+ base::subtle::Barrier_AtomicIncrement);
+
TestStore<AtomicType>();
TestLoad<AtomicType>();
}
int main(int argc, char** argv) {
- TestAtomicIncrement<AtomicWord>();
- TestAtomicIncrement<Atomic32>();
+ TestAtomicIncrement<AtomicWord>(base::subtle::NoBarrier_AtomicIncrement);
+ TestAtomicIncrement<AtomicWord>(base::subtle::Barrier_AtomicIncrement);
+ TestAtomicIncrement<Atomic32>(base::subtle::NoBarrier_AtomicIncrement);
+ TestAtomicIncrement<Atomic32>(base::subtle::Barrier_AtomicIncrement);
TestAtomicOps<AtomicWord>();
TestAtomicOps<Atomic32>();
@@ -248,8 +264,10 @@
// If we ever *do* want to enable this, try adding -msse (or -mmmx?)
// to the CXXFLAGS in Makefile.am.
#if 0 and defined(BASE_HAS_ATOMIC64)
- TestAtomicIncrement<base::subtle::Atomic64>();
- TestAtomicOps<base::subtle::Atomic64>();
+ TestAtomicIncrement<base::subtle::Atomic64>(
+ base::subtle::NoBarrier_AtomicIncrement);
+ TestAtomicIncrement<base::subtle::Atomic64>(
+ base::subtle::Barrier_AtomicIncrement);
#endif
printf("PASS\n");
Only in gperftools-2.0/src/tests: getpc_test.cc.svn-r190
diff -urP gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc
--- gperftools-2.0/src/tests/large_heap_fragmentation_unittest.cc 1969-12-31 19:00:00.000000000 -0500
+++ gperftools-2.0-svn218/src/tests/large_heap_fragmentation_unittest.cc 2013-06-04 10:16:58.073841689 -0400
@@ -0,0 +1,62 @@
+// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This is a unit test for exercising fragmentation of large (over 1
+// meg) page spans. It makes sure that allocations/releases of
+// increasing memory chunks do not blowup memory
+// usage. See also https://code.google.com/p/gperftools/issues/detail?id=368
+
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "base/logging.h"
+#include "common.h"
+#include <gperftools/malloc_extension.h>
+
+
+int main (int argc, char** argv) {
+ for (int pass = 1; pass <= 3; pass++) {
+ size_t size = 100*1024*1024;
+ while (size < 500*1024*1024) {
+ void *ptr = malloc(size);
+ free(ptr);
+ size += 20000;
+
+ size_t heap_size = static_cast<size_t>(-1);
+ MallocExtension::instance()->GetNumericProperty("generic.heap_size",
+ &heap_size);
+
+
+ CHECK_LT(heap_size, 1*1024*1024*1024);
+ }
+ }
+
+ printf("PASS\n");
+ return 0;
+}
diff -urP gperftools-2.0/src/tests/malloc_extension_c_test.c gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c
--- gperftools-2.0/src/tests/malloc_extension_c_test.c 2012-02-03 14:18:23.000000000 -0500
+++ gperftools-2.0-svn218/src/tests/malloc_extension_c_test.c 2013-06-04 10:16:58.077841689 -0400
@@ -59,6 +59,16 @@
g_delete_hook_calls++;
}
+static
+void *forced_malloc(size_t size)
+{
+ void *rv = malloc(size);
+ if (!rv) {
+ FAIL("malloc is not supposed to fail here");
+ }
+ return rv;
+}
+
void TestMallocHook(void) {
/* TODO(csilvers): figure out why we get:
* E0100 00:00:00.000000 7383 malloc_hook.cc:244] RAW: google_malloc section is missing, thus InHookCaller is broken!
@@ -78,8 +88,9 @@
if (!MallocHook_AddDeleteHook(&TestDeleteHook)) {
FAIL("Failed to add delete hook");
}
- free(malloc(10));
- free(malloc(20));
+
+ free(forced_malloc(10));
+ free(forced_malloc(20));
if (g_new_hook_calls != 2) {
FAIL("Wrong number of calls to the new hook");
}
Only in gperftools-2.0/src/tests: malloc_hook_test.cc.svn-r190
Only in gperftools-2.0/src/tests: markidle_unittest.cc.svn-r190
Only in gperftools-2.0/src/tests: page_heap_test.cc.svn-r190
Only in gperftools-2.0/src/tests: profiler_unittest.sh.svn-r190
diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.cc gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc
--- gperftools-2.0/src/tests/tcmalloc_unittest.cc 2013-06-04 10:20:21.147844736 -0400
+++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.cc 2013-06-04 10:16:58.073841689 -0400
@@ -725,7 +725,7 @@
// Note the ... in the hook signature: we don't care what arguments
// the hook takes.
#define MAKE_HOOK_CALLBACK(hook_type) \
- static int g_##hook_type##_calls = 0; \
+ static volatile int g_##hook_type##_calls = 0; \
static void IncrementCallsTo##hook_type(...) { \
g_##hook_type##_calls++; \
} \
@@ -760,7 +760,7 @@
CHECK((p % sizeof(void*)) == 0);
CHECK((p % sizeof(double)) == 0);
- // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
+ // Must have 16-byte (or 8-byte in case of -DTCMALLOC_ALIGN_8BYTES)
// alignment for large enough objects
if (size >= kMinAlign) {
CHECK((p % kMinAlign) == 0);
Only in gperftools-2.0/src/tests: tcmalloc_unittest.cc.svn-r190
diff -urP gperftools-2.0/src/tests/tcmalloc_unittest.sh gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh
--- gperftools-2.0/src/tests/tcmalloc_unittest.sh 1969-12-31 19:00:00.000000000 -0500
+++ gperftools-2.0-svn218/src/tests/tcmalloc_unittest.sh 2013-06-04 10:16:58.075841689 -0400
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# Copyright (c) 2013, Google Inc.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ---
+# Author: Adhemerval Zanella
+#
+# Runs the tcmalloc_unittest with various environment variables.
+# This is necessary because tuning some environment variables
+# (TCMALLOC_TRANSFER_NUM_OBJ for instance) should not change program
+# behavior, just performance.
+
+BINDIR="${BINDIR:-.}"
+TCMALLOC_UNITTEST="${1:-$BINDIR}/tcmalloc_unittest"
+
+TMPDIR=/tmp/tcmalloc_unittest
+rm -rf $TMPDIR || exit 2
+mkdir $TMPDIR || exit 3
+
+# $1: value of tcmalloc_unittest env. var.
+run_check_transfer_num_obj() {
+ [ -n "$1" ] && export TCMALLOC_TRANSFER_NUM_OBJ="$1"
+
+ echo -n "Testing $TCMALLOC_UNITTEST with TCMALLOC_TRANSFER_NUM_OBJ=$1 ... "
+ if $TCMALLOC_UNITTEST > $TMPDIR/output 2>&1; then
+ echo "OK"
+ else
+ echo "FAILED"
+ echo "Output from the failed run:"
+ echo "----"
+ cat $TMPDIR/output
+ echo "----"
+ exit 4
+ fi
+}
+
+run_check_transfer_num_obj ""
+run_check_transfer_num_obj "40"
+run_check_transfer_num_obj "4096"
+
+echo "PASS"
Only in gperftools-2.0/src: thread_cache.cc.svn-r190
Only in gperftools-2.0/src: thread_cache.h.svn-r190
diff -urP gperftools-2.0/src/windows/mingw.h gperftools-2.0-svn218/src/windows/mingw.h
--- gperftools-2.0/src/windows/mingw.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/mingw.h 2013-06-04 10:16:57.682841683 -0400
@@ -60,6 +60,8 @@
// pretend the pthreads wrapper doesn't exist, even when it does.
#undef HAVE_PTHREAD
+#define HAVE_PID_T
+
#include "windows/port.h"
#endif /* __MINGW32__ */
diff -urP gperftools-2.0/src/windows/patch_functions.cc gperftools-2.0-svn218/src/windows/patch_functions.cc
--- gperftools-2.0/src/windows/patch_functions.cc 2012-02-03 14:18:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/patch_functions.cc 2013-06-04 10:16:57.683841683 -0400
@@ -85,7 +85,7 @@
#include <windows.h>
#include <stdio.h>
#include <malloc.h> // for _msize and _expand
-#include <Psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
+#include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc.
#include <set>
#include <map>
#include <vector>
Only in gperftools-2.0/src/windows: port.cc.svn-r190
diff -urP gperftools-2.0/src/windows/port.h gperftools-2.0-svn218/src/windows/port.h
--- gperftools-2.0/src/windows/port.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/port.h 2013-06-04 10:16:57.683841683 -0400
@@ -390,7 +390,10 @@
/* ----------------------------------- SYSTEM/PROCESS */
+#ifndef HAVE_PID_T
typedef int pid_t;
+#endif
+
#if __STDC__ && !defined(__MINGW32__)
inline pid_t getpid(void) { return _getpid(); }
#endif
diff -urP gperftools-2.0/src/windows/preamble_patcher.cc gperftools-2.0-svn218/src/windows/preamble_patcher.cc
--- gperftools-2.0/src/windows/preamble_patcher.cc 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/preamble_patcher.cc 2013-06-04 10:16:57.601841682 -0400
@@ -103,6 +103,7 @@
new_target = target + 2 + relative_offset;
} else if (target[0] == ASM_JMP32ABS_0 &&
target[1] == ASM_JMP32ABS_1) {
+ jmp32rel:
// Visual studio seems to sometimes do it this way instead of the
// previous way. Not sure what the rules are, but it was happening
// with operator new in some binaries.
@@ -118,6 +119,18 @@
memcpy(&new_target_v, reinterpret_cast<void*>(target + 2), 4);
}
new_target = reinterpret_cast<unsigned char*>(*new_target_v);
+ } else if (kIs64BitBinary && target[0] == ASM_REXW
+ && target[1] == ASM_JMP32ABS_0
+ && target[2] == ASM_JMP32ABS_1) {
+ // in Visual Studio 2012 we're seeing jump like that:
+ // rex.W jmpq *0x11d019(%rip)
+ //
+ // according to docs I have, rex prefix is actually unneeded and
+ // can be ignored. I.e. docs say for jumps like that operand
+ // already defaults to 64-bit. But clearly it breaks abs. jump
+ // detection above and we just skip rex
+ target++;
+ goto jmp32rel;
} else {
break;
}
@@ -535,6 +548,12 @@
return (*(target) & 0x70) == 0x70 && instruction_size == 2;
}
+bool PreamblePatcher::IsShortJump(
+ unsigned char* target,
+ unsigned int instruction_size) {
+ return target[0] == 0xeb && instruction_size == 2;
+}
+
bool PreamblePatcher::IsNearConditionalJump(
unsigned char* target,
unsigned int instruction_size) {
@@ -575,7 +594,9 @@
unsigned char* target,
unsigned int* target_bytes,
unsigned int target_size) {
- unsigned char* original_jump_dest = (source + 2) + source[1];
+ // note: rel8 offset is signed. Thus we need to ask for signed char
+ // to negative offsets right
+ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
unsigned char* stub_jump_from = target + 6;
__int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
@@ -597,6 +618,36 @@
reinterpret_cast<void*>(&fixup_jump_offset), 4);
}
+ return SIDESTEP_SUCCESS;
+}
+
+SideStepError PreamblePatcher::PatchShortJump(
+ unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size) {
+ // note: rel8 offset is _signed_. Thus we need signed char here.
+ unsigned char* original_jump_dest = (source + 2) + static_cast<signed char>(source[1]);
+ unsigned char* stub_jump_from = target + 5;
+ __int64 fixup_jump_offset = original_jump_dest - stub_jump_from;
+ if (fixup_jump_offset > INT_MAX || fixup_jump_offset < INT_MIN) {
+ SIDESTEP_ASSERT(false &&
+ "Unable to fix up short jump because target"
+ " is too far away.");
+ return SIDESTEP_JUMP_INSTRUCTION;
+ }
+
+ *target_bytes = 5;
+ if (target_size > *target_bytes) {
+ // Convert the short jump to a near jump.
+ //
+ // e9 xx xx xx xx = jmp rel32off
+ target[0] = 0xe9;
+ memcpy(reinterpret_cast<void*>(target + 1),
+ reinterpret_cast<void*>(&fixup_jump_offset), 4);
+ }
+
return SIDESTEP_SUCCESS;
}
diff -urP gperftools-2.0/src/windows/preamble_patcher.h gperftools-2.0-svn218/src/windows/preamble_patcher.h
--- gperftools-2.0/src/windows/preamble_patcher.h 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/preamble_patcher.h 2013-06-04 10:16:57.601841682 -0400
@@ -467,6 +467,8 @@
static bool IsShortConditionalJump(unsigned char* target,
unsigned int instruction_size);
+ static bool IsShortJump(unsigned char *target, unsigned int instruction_size);
+
// Helper routine that determines if a target instruction is a near
// conditional jump.
//
@@ -547,6 +549,12 @@
unsigned int* target_bytes,
unsigned int target_size);
+ static SideStepError PatchShortJump(unsigned char* source,
+ unsigned int instruction_size,
+ unsigned char* target,
+ unsigned int* target_bytes,
+ unsigned int target_size);
+
// Helper routine that converts an instruction that will convert various
// jump-like instructions to corresponding instructions in the target buffer.
// What this routine does is fix up the relative offsets contained in jump
diff -urP gperftools-2.0/src/windows/preamble_patcher_with_stub.cc gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc
--- gperftools-2.0/src/windows/preamble_patcher_with_stub.cc 2012-02-02 16:36:23.000000000 -0500
+++ gperftools-2.0-svn218/src/windows/preamble_patcher_with_stub.cc 2013-06-04 10:16:57.682841683 -0400
@@ -150,6 +150,11 @@
preamble_stub + stub_bytes,
&jump_bytes,
stub_size - stub_bytes);
+ } else if (IsShortJump(target + preamble_bytes, cur_bytes)) {
+ jump_ret = PatchShortJump(target + preamble_bytes, cur_bytes,
+ preamble_stub + stub_bytes,
+ &jump_bytes,
+ stub_size - stub_bytes);
} else if (IsNearConditionalJump(target + preamble_bytes, cur_bytes) ||
IsNearRelativeJump(target + preamble_bytes, cur_bytes) ||
IsNearAbsoluteCall(target + preamble_bytes, cur_bytes) ||
Only in gperftools-2.0/src/windows: TODO.svn-r190