From 5d225ec2025ba21e6cabdb8eb91bbb63b57f0ced Mon Sep 17 00:00:00 2001 From: "Richard M. Shaw" Date: Thu, 16 May 2013 15:48:07 -0500 Subject: [PATCH] Update to latest upstream release. Use new internal atomics as they are now faster than TBB. --- 589.patch | 362 +++++++++++++++++++++++ OpenImageIO-1.1.2-use_external_tbb.patch | 68 ----- OpenImageIO-1.1.3-SHA1_undef_ref.patch | 14 - OpenImageIO.spec | 41 +-- 4 files changed, 377 insertions(+), 108 deletions(-) create mode 100644 589.patch delete mode 100644 OpenImageIO-1.1.2-use_external_tbb.patch delete mode 100644 OpenImageIO-1.1.3-SHA1_undef_ref.patch diff --git a/589.patch b/589.patch new file mode 100644 index 0000000..8fd97a7 --- /dev/null +++ b/589.patch @@ -0,0 +1,362 @@ +From a58d0ff4935ef14f32f01d4de362bba242f07e0c Mon Sep 17 00:00:00 2001 +From: Larry Gritz +Date: Sat, 4 May 2013 10:22:12 -0700 +Subject: [PATCH] spinlock tweaks that finally make it as good or better than + TBB. + +--- + src/include/thread.h | 89 ++++++++++++++++-------------------- + src/libOpenImageIO/atomic_test.cpp | 9 ++-- + src/libOpenImageIO/spinlock_test.cpp | 22 +++++++-- + src/libtexture/imagecache_pvt.h | 2 +- + 4 files changed, 62 insertions(+), 60 deletions(-) + +diff --git a/src/include/thread.h b/src/include/thread.h +index 28645fc..2cd03c1 100644 +--- a/src/include/thread.h ++++ b/src/include/thread.h +@@ -78,16 +78,22 @@ + // Some day, we hope this is all replaced by use of std::atomic<>. + #if USE_TBB + # include +- using tbb::atomic; + # include ++# define USE_TBB_ATOMIC 1 ++# define USE_TBB_SPINLOCK 1 ++#else ++# define USE_TBB_ATOMIC 0 ++# define USE_TBB_SPINLOCK 0 + #endif + ++ + #if defined(_MSC_VER) && !USE_TBB + # include + # include + # pragma intrinsic (_InterlockedExchangeAdd) + # pragma intrinsic (_InterlockedCompareExchange) + # pragma intrinsic (_InterlockedCompareExchange64) ++# pragma intrinsic (_ReadWriteBarrier) + # if defined(_WIN64) + # pragma intrinsic(_InterlockedExchangeAdd64) + # endif +@@ -105,10 +111,6 @@ + # endif + #endif + +-#ifdef __APPLE__ +-# include +-#endif +- + #if defined(__GNUC__) && (defined(_GLIBCXX_ATOMIC_BUILTINS) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 401)) + #if !defined(__FreeBSD__) || defined(__x86_64__) + #define USE_GCC_ATOMICS +@@ -230,9 +232,6 @@ class thread_specific_ptr { + #elif USE_TBB + atomic *a = (atomic *)at; + return a->fetch_and_add (x); +-#elif defined(no__APPLE__) +- // Apple, not inline for Intel (only PPC?) +- return OSAtomicAdd32Barrier (x, at) - x; + #elif defined(_MSC_VER) + // Windows + return _InterlockedExchangeAdd ((volatile LONG *)at, x); +@@ -251,9 +250,6 @@ class thread_specific_ptr { + #elif USE_TBB + atomic *a = (atomic *)at; + return a->fetch_and_add (x); +-#elif defined(no__APPLE__) +- // Apple, not inline for Intel (only PPC?) +- return OSAtomicAdd64Barrier (x, at) - x; + #elif defined(_MSC_VER) + // Windows + # if defined(_WIN64) +@@ -282,8 +278,6 @@ class thread_specific_ptr { + #elif USE_TBB + atomic *a = (atomic *)at; + return a->compare_and_swap (newval, compareval) == newval; +-#elif defined(no__APPLE__) +- return OSAtomicCompareAndSwap32Barrier (compareval, newval, at); + #elif defined(_MSC_VER) + return (_InterlockedCompareExchange ((volatile LONG *)at, newval, compareval) == compareval); + #else +@@ -301,8 +295,6 @@ class thread_specific_ptr { + #elif USE_TBB + atomic *a = (atomic *)at; + return a->compare_and_swap (newval, compareval) == newval; +-#elif defined(no__APPLE__) +- return OSAtomicCompareAndSwap64Barrier (compareval, newval, at); + #elif defined(_MSC_VER) + return (_InterlockedCompareExchange64 ((volatile LONGLONG *)at, newval, compareval) == compareval); + #else +@@ -317,9 +309,7 @@ class thread_specific_ptr { + inline void + yield () + { +-#if USE_TBB +- __TBB_Yield (); +-#elif defined(__GNUC__) ++#if defined(__GNUC__) + sched_yield (); + #elif defined(_MSC_VER) + SwitchToThread (); +@@ -334,12 +324,12 @@ class thread_specific_ptr { + inline void + pause (int delay) + { +-#if USE_TBB +- __TBB_Pause(delay); +-#elif defined(__GNUC__) ++#if defined(__GNUC__) + for (int i = 0; i < delay; ++i) { + __asm__ __volatile__("pause;"); + } ++#elif USE_TBB ++ __TBB_Pause(delay); + #elif defined(_MSC_VER) + for (int i = 0; i < delay; ++i) { + #if defined (_WIN64) +@@ -369,14 +359,17 @@ class atomic_backoff { + yield(); + } + } ++ + private: + int m_count; + }; + + + +-#if (! USE_TBB) +-// If we're not using TBB, we need to define our own atomic<>. ++#if USE_TBB_ATOMIC ++using tbb::atomic; ++#else ++// If we're not using TBB's atomic, we need to define our own atomic<>. + + + /// Atomic integer. Increment, decrement, add, and subtract in a +@@ -456,7 +449,7 @@ class atomic { + }; + + +-#endif /* ! USE_TBB */ ++#endif /* ! USE_TBB_ATOMIC */ + + + #ifdef NOTHREADS +@@ -478,7 +471,7 @@ class atomic { + typedef null_mutex spin_mutex; + typedef null_lock spin_lock; + +-#elif USE_TBB ++#elif USE_TBB_SPINLOCK + + // Use TBB's spin locks + typedef tbb::spin_mutex spin_mutex; +@@ -529,63 +522,61 @@ class spin_mutex { + /// Acquire the lock, spin until we have it. + /// + void lock () { +-#if defined(no__APPLE__) +- // OS X has dedicated spin lock routines, may as well use them. +- OSSpinLockLock ((OSSpinLock *)&m_locked); +-#else + // To avoid spinning too tightly, we use the atomic_backoff to + // provide increasingly longer pauses, and if the lock is under + // lots of contention, eventually yield the timeslice. + atomic_backoff backoff; ++ + // Try to get ownership of the lock. Though experimentation, we + // found that OIIO_UNLIKELY makes this just a bit faster on + // gcc x86/x86_64 systems. + while (! OIIO_UNLIKELY(try_lock())) { + do { + backoff(); +- } while (*(volatile int *)&m_locked); ++ } while (m_locked); ++ + // The full try_lock() involves a compare_and_swap, which + // writes memory, and that will lock the bus. But a normal + // read of m_locked will let us spin until the value + // changes, without locking the bus. So it's faster to + // check in this manner until the mutex appears to be free. + } +-#endif + } + + /// Release the lock that we hold. + /// + void unlock () { +-#if defined(no__APPLE__) +- OSSpinLockUnlock ((OSSpinLock *)&m_locked); +-#elif defined(__GNUC__) +- // GCC gives us an intrinsic that is even better, an atomic +- // assignment of 0 with "release" barrier semantics. +- __sync_lock_release ((volatile int *)&m_locked); ++#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) ++ // Fastest way to do it is with a store with "release" semantics ++ __asm__ __volatile__("": : :"memory"); ++ m_locked = 0; ++ // N.B. GCC gives us an intrinsic that is even better, an atomic ++ // assignment of 0 with "release" barrier semantics: ++ // __sync_lock_release (&m_locked); ++ // But empirically we found it not as performant as the above. ++#elif defined(_MSC_VER) ++ _ReadWriteBarrier(); ++ m_locked = 0; + #else + // Otherwise, just assign zero to the atomic (but that's a full + // memory barrier). +- m_locked = 0; ++ *(atomic_int *)&m_locked = 0; + #endif + } + + /// Try to acquire the lock. Return true if we have it, false if + /// somebody else is holding the lock. + bool try_lock () { +-#if defined(no__APPLE__) +- return OSSpinLockTry ((OSSpinLock *)&m_locked); +-#else +-# if USE_TBB ++#if USE_TBB_ATOMIC + // TBB's compare_and_swap returns the original value +- return m_locked.compare_and_swap (0, 1) == 0; +-# elif defined(__GNUC__) ++ return (*(atomic_int *)&m_locked).compare_and_swap (0, 1) == 0; ++#elif defined(__GNUC__) + // GCC gives us an intrinsic that is even better -- an atomic + // exchange with "acquire" barrier semantics. +- return __sync_lock_test_and_set ((volatile int *)&m_locked, 1) == 0; +-# else ++ return __sync_lock_test_and_set (&m_locked, 1) == 0; ++#else + // Our compare_and_swap returns true if it swapped +- return m_locked.bool_compare_and_swap (0, 1); +-# endif ++ return atomic_compare_and_exchange (&m_locked, 0, 1); + #endif + } + +@@ -603,7 +594,7 @@ class spin_mutex { + }; + + private: +- atomic_int m_locked; ///< Atomic counter is zero if nobody holds the lock ++ volatile int m_locked; ///< Atomic counter is zero if nobody holds the lock + }; + + +diff --git a/src/libOpenImageIO/atomic_test.cpp b/src/libOpenImageIO/atomic_test.cpp +index 2c1e807..42d469a 100644 +--- a/src/libOpenImageIO/atomic_test.cpp ++++ b/src/libOpenImageIO/atomic_test.cpp +@@ -49,7 +49,7 @@ + // and decrementing the crap out of it, and make sure it has the right + // value at the end. + +-static int iterations = 160000000; ++static int iterations = 40000000; + static int numthreads = 16; + static int ntrials = 1; + static bool verbose = false; +@@ -184,16 +184,15 @@ int main (int argc, char *argv[]) + + static int threadcounts[] = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 64, 128, 1024, 1<<30 }; + for (int i = 0; threadcounts[i] <= numthreads; ++i) { +- int nt = threadcounts[i]; ++ int nt = wedge ? threadcounts[i] : numthreads; + int its = iterations/nt; + + double range; + double t = time_trial (boost::bind(test_atomics,nt,its), + ntrials, &range); + +- std::cout << Strutil::format ("%2d\t%s\t%5.1fs, range %.1f\t(%d iters/thread)\n", +- nt, Strutil::timeintervalformat(t), +- t, range, its); ++ std::cout << Strutil::format ("%2d\t%5.1f range %.2f\t(%d iters/thread)\n", ++ nt, t, range, its); + if (! wedge) + break; // don't loop if we're not wedging + } +diff --git a/src/libOpenImageIO/spinlock_test.cpp b/src/libOpenImageIO/spinlock_test.cpp +index 60c192b..64adbce 100644 +--- a/src/libOpenImageIO/spinlock_test.cpp ++++ b/src/libOpenImageIO/spinlock_test.cpp +@@ -50,7 +50,7 @@ + // accumulated value is equal to iterations*threads, then the spin locks + // worked. + +-static int iterations = 160000000; ++static int iterations = 40000000; + static int numthreads = 16; + static int ntrials = 1; + static bool verbose = false; +@@ -58,6 +58,7 @@ + + static spin_mutex print_mutex; // make the prints not clobber each other + volatile long long accum = 0; ++float faccum = 0; + spin_mutex mymutex; + + +@@ -71,10 +72,22 @@ + std::cout << "thread " << boost::this_thread::get_id() + << ", accum = " << accum << "\n"; + } ++#if 1 + for (int i = 0; i < iterations; ++i) { + spin_lock lock (mymutex); + accum += 1; + } ++#else ++ // Alternate one that mixes in some math to make longer lock hold time, ++ // and also more to do between locks. Interesting contrast in timings. ++ float last = 0.0f; ++ for (int i = 0; i < iterations; ++i) { ++ last = fmodf (sinf(last), 1.0f); ++ spin_lock lock (mymutex); ++ accum += 1; ++ faccum = fmod (sinf(faccum+last), 1.0f); ++ } ++#endif + } + + +@@ -134,16 +147,15 @@ int main (int argc, char *argv[]) + + static int threadcounts[] = { 1, 2, 4, 8, 12, 16, 20, 24, 28, 32, 64, 128, 1024, 1<<30 }; + for (int i = 0; threadcounts[i] <= numthreads; ++i) { +- int nt = threadcounts[i]; ++ int nt = wedge ? threadcounts[i] : numthreads; + int its = iterations/nt; + + double range; + double t = time_trial (boost::bind(test_spinlock,nt,its), + ntrials, &range); + +- std::cout << Strutil::format ("%2d\t%s\t%5.1fs, range %.1f\t(%d iters/thread)\n", +- nt, Strutil::timeintervalformat(t), +- t, range, its); ++ std::cout << Strutil::format ("%2d\t%5.1f range %.2f\t(%d iters/thread)\n", ++ nt, t, range, its); + if (! wedge) + break; // don't loop if we're not wedging + } +diff --git a/src/libtexture/imagecache_pvt.h b/src/libtexture/imagecache_pvt.h +index 5d29782..3a49616 100644 +--- a/src/libtexture/imagecache_pvt.h ++++ b/src/libtexture/imagecache_pvt.h +@@ -1003,7 +1003,7 @@ class ImageCacheImpl : public ImageCache { + newval = oldval + incr; + // Now try to atomically swap it, and repeat until we've + // done it with nobody else interfering. +-# if USE_TBB ++# if USE_TBB_ATOMIC + } while (llstat->compare_and_swap (*llnewval,*lloldval) != *lloldval); + # else + } while (llstat->bool_compare_and_swap (*llnewval,*lloldval)); +-- +1.8.1.6 + diff --git a/OpenImageIO-1.1.2-use_external_tbb.patch b/OpenImageIO-1.1.2-use_external_tbb.patch deleted file mode 100644 index d8cc28f..0000000 --- a/OpenImageIO-1.1.2-use_external_tbb.patch +++ /dev/null @@ -1,68 +0,0 @@ -diff -Naur oiio-Release-1.1.2.orig/src/CMakeLists.txt oiio-Release-1.1.2/src/CMakeLists.txt ---- oiio-Release-1.1.2.orig/src/CMakeLists.txt 2012-12-05 12:46:56.000000000 -0600 -+++ oiio-Release-1.1.2/src/CMakeLists.txt 2013-01-02 15:52:43.941560982 -0600 -@@ -83,6 +83,8 @@ - set (PYTHON_VERSION 2.6) - set (USE_EXTERNAL_PUGIXML OFF CACHE BOOL - "Use an externally built shared library version of the pugixml library") -+set (USE_EXTERNAL_TBB OFF CACHE BOOL -+ "Use system TBB library instead of bundled.") - - set (SOVERSION ${OIIO_VERSION_MAJOR}.${OIIO_VERSION_MINOR} - CACHE STRING "Set the SO version in the SO name of the output library") -diff -Naur oiio-Release-1.1.2.orig/src/include/CMakeLists.txt oiio-Release-1.1.2/src/include/CMakeLists.txt ---- oiio-Release-1.1.2.orig/src/include/CMakeLists.txt 2012-12-05 12:46:56.000000000 -0600 -+++ oiio-Release-1.1.2/src/include/CMakeLists.txt 2013-01-02 15:52:43.940561015 -0600 -@@ -22,7 +22,7 @@ - install (FILES ${public_headers} DESTINATION ${INCLUDE_INSTALL_DIR} - COMPONENT developer) - --if (USE_TBB) -+if (USE_TBB AND NOT USE_EXTERNAL_TBB) - install (DIRECTORY tbb DESTINATION ${INCLUDE_INSTALL_DIR} - COMPONENT developer) - endif () -diff -Naur oiio-Release-1.1.2.orig/src/libOpenImageIO/CMakeLists.txt oiio-Release-1.1.2/src/libOpenImageIO/CMakeLists.txt ---- oiio-Release-1.1.2.orig/src/libOpenImageIO/CMakeLists.txt 2012-12-05 12:46:56.000000000 -0600 -+++ oiio-Release-1.1.2/src/libOpenImageIO/CMakeLists.txt 2013-01-02 15:52:43.941560982 -0600 -@@ -62,7 +62,13 @@ - endif () - - # Include our own TBB if using it --if (USE_TBB) -+if (USE_TBB AND USE_EXTERNAL_TBB) -+ message (STATUS "System TBB library will be used.") -+ find_package (TBB REQUIRED) -+ include_directories (${TBB_INCLUDE_DIRS}) -+ set (libOpenImageIO_srcs ${libOpenImageIO_srcs}) -+elseif (USE_TBB AND NOT USE_EXTERNAL_TBB) -+ message (STATUS "Built-in TBB library will be used.") - set (libOpenImageIO_srcs ${libOpenImageIO_srcs} ../libutil/tbb_misc.cpp) - endif () - -@@ -202,7 +208,11 @@ - ${VISIBILITY_COMMAND} ${VISIBILITY_MAP_COMMAND} - ${Boost_LIBRARIES}) - -- -+# Link against system TBB library if specified -+if (USE_TBB AND USE_EXTERNAL_TBB) -+ message (STATUS "Linking TBB: ${TBB_LIBRARIES}") -+ target_link_libraries (OpenImageIO ${TBB_LIBRARIES}) -+endif () - - # Include OpenColorIO if using it - if (USE_OCIO AND OCIO_FOUND) -diff -Naur oiio-Release-1.1.2.orig/src/libutil/tbb_misc.cpp oiio-Release-1.1.2/src/libutil/tbb_misc.cpp ---- oiio-Release-1.1.2.orig/src/libutil/tbb_misc.cpp 2012-12-05 12:46:56.000000000 -0600 -+++ oiio-Release-1.1.2/src/libutil/tbb_misc.cpp 2013-01-02 15:53:10.403678615 -0600 -@@ -30,8 +30,7 @@ - // an executing program. - - #include "tbb/tbb_stddef.h" --// Out-of-line TBB assertion handling routines are instantiated here. --#include "tbb/tbb_assert_impl.h" -+#include "tbb/tbb_machine.h" - - #include "tbb/tbb_misc.h" - #include diff --git a/OpenImageIO-1.1.3-SHA1_undef_ref.patch b/OpenImageIO-1.1.3-SHA1_undef_ref.patch deleted file mode 100644 index 972895b..0000000 --- a/OpenImageIO-1.1.3-SHA1_undef_ref.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff -Naur oiio-Release-1.1.3.orig/src/libutil/SHA1.cpp oiio-Release-1.1.3/src/libutil/SHA1.cpp ---- oiio-Release-1.1.3.orig/src/libutil/SHA1.cpp 2013-01-09 19:13:37.000000000 -0600 -+++ oiio-Release-1.1.3/src/libutil/SHA1.cpp 2013-01-15 07:53:27.479132623 -0600 -@@ -8,9 +8,9 @@ - - // If compiling with MFC, you might want to add #include "StdAfx.h" - -+#include "SHA1.h" - #include "hash.h" - #include "dassert.h" --#include "SHA1.h" - - #ifdef SHA1_UTILITY_FUNCTIONS - #define SHA1_MAX_FILE_BUFFER 8000 diff --git a/OpenImageIO.spec b/OpenImageIO.spec index f7c5398..0de2c4a 100644 --- a/OpenImageIO.spec +++ b/OpenImageIO.spec @@ -1,28 +1,20 @@ -%global githash1 g0b78dec -%global githash2 0d48631 -%global githash3 9bf4356 - Name: OpenImageIO -Version: 1.1.3 -Release: 7%{?dist} +Version: 1.1.10 +Release: 1%{?dist} Summary: Library for reading and writing images Group: Development/Libraries License: BSD URL: https://sites.google.com/site/openimageio/home -#Source0: https://download.github.com/%{name}-oiio-Release-%{version}-0-%{githash1}.tar.gz Source0: https://download.github.com/oiio-Release-%{version}.tar.gz # Images for test suite -#Source1: %{name}-oiio-images-%{githash3}.tar.gz +Source1: oiio-images.tar.gz Source101: FindTBB.cmake -Patch0: OpenImageIO-1.1.2-use_external_tbb.patch -Patch2: OpenImageIO-ppc.patch -# https://github.com/OpenImageIO/oiio/issues/473 -Patch3: OpenImageIO-1.1.3-SHA1_undef_ref.patch -# https://github.com/The11ers/oiio/commit/010754d2a9b4b41f658a7752046c9217abaf98fc -Patch4: oiio-arm.patch +Patch0: OpenImageIO-ppc.patch +Patch1: 589.patch +Patch2: oiio-arm.patch BuildRequires: cmake txt2man BuildRequires: qt4-devel @@ -87,14 +79,12 @@ Development files for package %{name} %prep -#setup -q -n %{name}-oiio-%{githash2} %setup -q -n oiio-Release-%{version} -%patch0 -p1 -b .exttbb -%ifarch ppc %{power64} -%patch2 -p1 -b .ppc +%ifarch ppc ppc64 +%patch0 -p1 -b .ppc %endif -%patch3 -p1 -b .sha1 -%patch4 -p1 -b .arm +%patch1 -p1 -b .spinlocks +%patch2 -p1 -b .arm # Install FindTBB.cmake install %{SOURCE101} src/cmake/modules/ @@ -122,13 +112,8 @@ rm -rf build/linux && mkdir -p build/linux && pushd build/linux -DPYLIB_INSTALL_DIR:PATH=%{python_sitearch} \ -DINSTALL_DOCS:BOOL=FALSE \ -DUSE_EXTERNAL_PUGIXML:BOOL=TRUE \ -%ifarch x86_64 - -DUSE_TBB:BOOL=TRUE \ - -DUSE_EXTERNAL_TBB=TRUE \ -%else -DUSE_TBB:BOOL=FALSE \ -%endif -%ifarch ppc %{power64} +%ifarch ppc ppc64 -DNOTHREADS:BOOL=TRUE \ %endif ../../src @@ -150,6 +135,7 @@ cp -a doc/*.1 %{buildroot}%{_mandir}/man1 %check +# Not all tests pass on linux #pushd build/linux && make test @@ -175,6 +161,9 @@ cp -a doc/*.1 %{buildroot}%{_mandir}/man1 %changelog +* Tue Apr 23 2013 Richard Shaw - 1.1.10-1 +- Update to latest upstream release. + * Sun Mar 31 2013 Peter Robinson 1.1.3-7 - Add upstream patch to fix FTBFS on ARM (RHBZ 924932)