From 83e5c415dab11f7932e4ea68b4ac8843448db45d Mon Sep 17 00:00:00 2001 From: Carlos O'Donell Date: Mon, 17 Oct 2016 22:19:05 -0400 Subject: [PATCH] glibc-2.24.90-11 - Add prototype support for detecting invalid IFUNC calls (swbz#20019). - New POSIX thread condition variable implementation (swbz#13165). --- glibc-swbz13165.patch | 6998 +++++++++++++++++++++++++++++++++++++++++ glibc-swbz20019.patch | 33 + glibc.spec | 15 +- 3 files changed, 7045 insertions(+), 1 deletion(-) create mode 100644 glibc-swbz13165.patch create mode 100644 glibc-swbz20019.patch diff --git a/glibc-swbz13165.patch b/glibc-swbz13165.patch new file mode 100644 index 0000000..7443e85 --- /dev/null +++ b/glibc-swbz13165.patch @@ -0,0 +1,6998 @@ +Content-Type: text/plain; charset="utf-8" +MIME-Version: 1.0 +Content-Transfer-Encoding: 7bit +Subject: New condvar implementation that provides stronger ordering guarantees. +From: Torvald Riegel +X-Patchwork-Id: 13085 +Message-Id: <1465937639.31784.7.camel@localhost.localdomain> +To: GLIBC Devel +Cc: "Carlos O'Donell" , David Miller , + Darren Hart +Date: Tue, 14 Jun 2016 22:53:59 +0200 + +I've now tested this patch successfully using our existing tests on ppc, +ppc64, ppc64le, s390x, and aarch64. I couldn't test on s390 due to +https://www.sourceware.org/ml/libc-alpha/2016-06/msg00545.html. + +The attached patch is a minor revision that just fixes some formatting +and adds an include (revealing by testing on s390x). + +I think this patch is ready for wider testing, for example in Rawhide or +perhaps even by committing it on trunk. +Some more testing of the overflow case in the atomic monotonic 64b +counters we use on archs that just have 32b atomic ops (see +pthread_cond_common.c) would be good; I plan to do this later, as it +requires creating new tests and doesn't affect the core condvar +algorithm really. + +I'd still appreciate further reviews, even if this is just about whether +the algorithm is accessible enough and the comments do a good-enough job +of explaining it. + +Thoughts? + +On Thu, 2016-05-26 at 15:21 +0200, Torvald Riegel wrote: +> This replaces the current condvar with a new algorithm that's different +> that the one I proposed last year. I wasn't able to fix the +> futex-spurious-wake-up-related issue in last year's algorithm without +> the risk of decreasing performance significantly. +> +> I would appreciate testing on weak memory model architectures such as +> ARM and POWER. I have tested on x86 and x86_64. +> I would of course also appreciate more sets of eyes on the algorithm. +> I'm not aware of any issues, but reviews never hurt; even if you just +> would like to see more detailed comments, please let me know. +> +> Once we have that, testing in Rawhide should be the next step. We still +> have a month until the feature freeze deadline, so I'd like to get this +> done this cycle if possible. +> +> pre-v9 sparc and hppa are currently broken by this patch. +> For sparc, I'd like to get feedback by sparc maintainers regarding how +> they would like to deal with the lack of proper atomics support in +> pre-v9 sparc; it's a recurring theme, so maybe we should tackle that in +> a more foundational way than just creating custom pre-v9 sparc for all +> synchronization algorithms. +> Regarding hppa, I might leave it to the hppa maintainer to use the new +> condvar (it has to support old LinuxThreads initializers). Or I'll do +> it later if he tells me I should do it myself ;) +> +> This patch doesn't do anything in terms of support for PI and real time. +> I believe we'd need a significantly adapted implementation (and perhaps +> algorithm) to support it; for example, we're short on space in +> pthread_cond_t, so even sticking in a PI mutex as replacement for the +> currently used condvar-internal lock would require jumping through +> hoops. +> +> +> 2016-05-26 Torvald Riegel +> +> [BZ #13165] +> * nptl/pthread_cond_broadcast.c (__pthread_cond_broadcast): Rewrite to +> use new algorithm. +> * nptl/pthread_cond_destroy.c (__pthread_cond_destroy): Likewise. +> * nptl/pthread_cond_init.c (__pthread_cond_init): Likewise. +> * nptl/pthread_cond_signal.c (__pthread_cond_signal): Likewise. +> * nptl/pthread_cond_wait.c (__pthread_cond_wait): Likewise. +> (__pthread_cond_timedwait): Move here from pthread_cond_timedwait.c. +> (__condvar_confirm_wakeup, __condvar_cancel_waiting, +> __condvar_cleanup_waiting, __condvar_dec_grefs, +> __pthread_cond_wait_common): New. +> (__condvar_cleanup): Remove. +> * npt/pthread_condattr_getclock.c (pthread_condattr_getclock): Adapt. +> * npt/pthread_condattr_setclock.c (pthread_condattr_setclock): +> Likewise. +> * npt/pthread_condattr_getpshared.c (pthread_condattr_getpshared): +> Likewise. +> * npt/pthread_condattr_init.c (pthread_condattr_init): Likewise. +> * nptl/tst-cond1.c: Add comment. +> * nptl/tst-cond20.c (do_test): Adapt. +> * nptl/tst-cond22.c (do_test): Likewise. +> * sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_cond_t): Adapt +> structure. +> * sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_cond_t): +> Likewise. +> * sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t): +> Likewise. +> * sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_cond_t): +> Likewise. +> * sysdeps/x86/bits/pthreadtypes.h (pthread_cond_t): Likewise. +> * sysdeps/nptl/internaltypes.h (COND_NWAITERS_SHIFT): Remove. +> (COND_CLOCK_BITS): Adapt. +> * sysdeps/nptl/pthread.h (PTHREAD_COND_INITIALIZER): Adapt. +> * sysdeps/unix/sysv/linux/hppa/internaltypes.h (cond_compat_clear, +> cond_compat_check_and_clear): Adapt. +> * sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c: Remove file ... +> * sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c +> (__pthread_cond_timedwait): ... and move here. +> * nptl/DESIGN-condvar.txt: Remove file. +> * nptl/lowlevelcond.sym: Likewise. +> * nptl/pthread_cond_timedwait.c: Likewise. +> * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_broadcast.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_signal.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_timedwait.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_wait.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_broadcast.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_signal.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S: Likewise. +> * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_wait.S: Likewise. +> * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise. +> * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise. +> * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise. +> * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise. + + +commit 1846a30fb9728dbc22730e56a9eaa3a996a4ef08 +Author: Torvald Riegel +Date: Wed May 25 23:43:36 2016 +0200 + + New condvar implementation that provides stronger ordering guarantees. + + This is a new implementation for condition variables, required + after http://austingroupbugs.net/view.php?id=609 to fix bug 13165. In + essence, we need to be stricter in which waiters a signal or broadcast + is required to wake up; this couldn't be solved using the old algorithm. + ISO C++ made a similar clarification, so this also fixes a bug in + current libstdc++, for example. + + We can't use the old algorithm anymore because futexes do not guarantee + to wake in FIFO order. Thus, when we wake, we can't simply let any + waiter grab a signal, but we need to ensure that one of the waiters + happening before the signal is woken up. This is something the previous + algorithm violated (see bug 13165). + + There's another issue specific to condvars: ABA issues on the underlying + futexes. Unlike mutexes that have just three states, or semaphores that + have no tokens or a limited number of them, the state of a condvar is + the *order* of the waiters. A waiter on a semaphore can grab a token + whenever one is available; a condvar waiter must only consume a signal + if it is eligible to do so as determined by the relative order of the + waiter and the signal. + Therefore, this new algorithm maintains two groups of waiters: Those + eligible to consume signals (G1), and those that have to wait until + previous waiters have consumed signals (G2). Once G1 is empty, G2 + becomes the new G1. 64b counters are used to avoid ABA issues. + + This condvar doesn't yet use a requeue optimization (ie, on a broadcast, + waking just one thread and requeueing all others on the futex of the + mutex supplied by the program). I don't think doing the requeue is + necessarily the right approach (but I haven't done real measurements + yet): + * If a program expects to wake many threads at the same time and make + that scalable, a condvar isn't great anyway because of how it requires + waiters to operate mutually exclusive (due to the mutex usage). Thus, a + thundering herd problem is a scalability problem with or without the + optimization. Using something like a semaphore might be more + appropriate in such a case. + * The scalability problem is actually at the mutex side; the condvar + could help (and it tries to with the requeue optimization), but it + should be the mutex who decides how that is done, and whether it is done + at all. + * Forcing all but one waiter into the kernel-side wait queue of the + mutex prevents/avoids the use of lock elision on the mutex. Thus, it + prevents the only cure against the underlying scalability problem + inherent to condvars. + * If condvars use short critical sections (ie, hold the mutex just to + check a binary flag or such), which they should do ideally, then forcing + all those waiter to proceed serially with kernel-based hand-off (ie, + futex ops in the mutex' contended state, via the futex wait queues) will + be less efficient than just letting a scalable mutex implementation take + care of it. Our current mutex impl doesn't employ spinning at all, but + if critical sections are short, spinning can be much better. + * Doing the requeue stuff requires all waiters to always drive the mutex + into the contended state. This leads to each waiter having to call + futex_wake after lock release, even if this wouldn't be necessary. + + [BZ #13165] + * nptl/pthread_cond_broadcast.c (__pthread_cond_broadcast): Rewrite to + use new algorithm. + * nptl/pthread_cond_destroy.c (__pthread_cond_destroy): Likewise. + * nptl/pthread_cond_init.c (__pthread_cond_init): Likewise. + * nptl/pthread_cond_signal.c (__pthread_cond_signal): Likewise. + * nptl/pthread_cond_wait.c (__pthread_cond_wait): Likewise. + (__pthread_cond_timedwait): Move here from pthread_cond_timedwait.c. + (__condvar_confirm_wakeup, __condvar_cancel_waiting, + __condvar_cleanup_waiting, __condvar_dec_grefs, + __pthread_cond_wait_common): New. + (__condvar_cleanup): Remove. + * npt/pthread_condattr_getclock.c (pthread_condattr_getclock): Adapt. + * npt/pthread_condattr_setclock.c (pthread_condattr_setclock): + Likewise. + * npt/pthread_condattr_getpshared.c (pthread_condattr_getpshared): + Likewise. + * npt/pthread_condattr_init.c (pthread_condattr_init): Likewise. + * nptl/tst-cond1.c: Add comment. + * nptl/tst-cond20.c (do_test): Adapt. + * nptl/tst-cond22.c (do_test): Likewise. + * sysdeps/aarch64/nptl/bits/pthreadtypes.h (pthread_cond_t): Adapt + structure. + * sysdeps/arm/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/ia64/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/m68k/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/microblaze/nptl/bits/pthreadtypes.h (pthread_cond_t): + Likewise. + * sysdeps/mips/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/nios2/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/s390/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/sh/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/tile/nptl/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h (pthread_cond_t): + Likewise. + * sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h (pthread_cond_t): + Likewise. + * sysdeps/x86/bits/pthreadtypes.h (pthread_cond_t): Likewise. + * sysdeps/nptl/internaltypes.h (COND_NWAITERS_SHIFT): Remove. + (COND_CLOCK_BITS): Adapt. + * sysdeps/nptl/pthread.h (PTHREAD_COND_INITIALIZER): Adapt. + * sysdeps/unix/sysv/linux/hppa/internaltypes.h (cond_compat_clear, + cond_compat_check_and_clear): Adapt. + * sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c: Remove file ... + * sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c + (__pthread_cond_timedwait): ... and move here. + * nptl/DESIGN-condvar.txt: Remove file. + * nptl/lowlevelcond.sym: Likewise. + * nptl/pthread_cond_timedwait.c: Likewise. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_broadcast.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_broadcast.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_timedwait.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i586/pthread_cond_wait.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_broadcast.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S: Likewise. + * sysdeps/unix/sysv/linux/i386/i686/pthread_cond_wait.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S: Likewise. + * sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: Likewise. + +Index: glibc-2.24-256-g5140d03/nptl/DESIGN-condvar.txt +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/DESIGN-condvar.txt ++++ /dev/null +@@ -1,134 +0,0 @@ +-Conditional Variable pseudocode. +-================================ +- +- int pthread_cond_timedwait (pthread_cond_t *cv, pthread_mutex_t *mutex); +- int pthread_cond_signal (pthread_cond_t *cv); +- int pthread_cond_broadcast (pthread_cond_t *cv); +- +-struct pthread_cond_t { +- +- unsigned int cond_lock; +- +- internal mutex +- +- uint64_t total_seq; +- +- Total number of threads using the conditional variable. +- +- uint64_t wakeup_seq; +- +- sequence number for next wakeup. +- +- uint64_t woken_seq; +- +- sequence number of last woken thread. +- +- uint32_t broadcast_seq; +- +-} +- +- +-struct cv_data { +- +- pthread_cond_t *cv; +- +- uint32_t bc_seq +- +-} +- +- +- +-cleanup_handler(cv_data) +-{ +- cv = cv_data->cv; +- lll_lock(cv->lock); +- +- if (cv_data->bc_seq == cv->broadcast_seq) { +- ++cv->wakeup_seq; +- ++cv->woken_seq; +- } +- +- /* make sure no signal gets lost. */ +- FUTEX_WAKE(cv->wakeup_seq, ALL); +- +- lll_unlock(cv->lock); +-} +- +- +-cond_timedwait(cv, mutex, timeout): +-{ +- lll_lock(cv->lock); +- mutex_unlock(mutex); +- +- cleanup_push +- +- ++cv->total_seq; +- val = seq = cv->wakeup_seq; +- cv_data.bc = cv->broadcast_seq; +- cv_data.cv = cv; +- +- while (1) { +- +- lll_unlock(cv->lock); +- +- enable_async(&cv_data); +- +- ret = FUTEX_WAIT(cv->wakeup_seq, val, timeout); +- +- restore_async +- +- lll_lock(cv->lock); +- +- if (bc != cv->broadcast_seq) +- goto bc_out; +- +- val = cv->wakeup_seq; +- +- if (val != seq && cv->woken_seq != val) { +- ret = 0; +- break; +- } +- +- if (ret == TIMEDOUT) { +- ++cv->wakeup_seq; +- break; +- } +- } +- +- ++cv->woken_seq; +- +- bc_out: +- lll_unlock(cv->lock); +- +- cleanup_pop +- +- mutex_lock(mutex); +- +- return ret; +-} +- +-cond_signal(cv) +-{ +- lll_lock(cv->lock); +- +- if (cv->total_seq > cv->wakeup_seq) { +- ++cv->wakeup_seq; +- FUTEX_WAKE(cv->wakeup_seq, 1); +- } +- +- lll_unlock(cv->lock); +-} +- +-cond_broadcast(cv) +-{ +- lll_lock(cv->lock); +- +- if (cv->total_seq > cv->wakeup_seq) { +- cv->wakeup_seq = cv->total_seq; +- cv->woken_seq = cv->total_seq; +- ++cv->broadcast_seq; +- FUTEX_WAKE(cv->wakeup_seq, ALL); +- } +- +- lll_unlock(cv->lock); +-} +Index: glibc-2.24-256-g5140d03/nptl/Makefile +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/Makefile ++++ glibc-2.24-256-g5140d03/nptl/Makefile +@@ -84,7 +84,7 @@ libpthread-routines = nptl-init vars eve + pthread_rwlockattr_getkind_np \ + pthread_rwlockattr_setkind_np \ + pthread_cond_init pthread_cond_destroy \ +- pthread_cond_wait pthread_cond_timedwait \ ++ pthread_cond_wait \ + pthread_cond_signal pthread_cond_broadcast \ + old_pthread_cond_init old_pthread_cond_destroy \ + old_pthread_cond_wait old_pthread_cond_timedwait \ +@@ -187,7 +187,6 @@ CFLAGS-pthread_timedjoin.c = -fexception + CFLAGS-pthread_once.c = $(uses-callbacks) -fexceptions \ + -fasynchronous-unwind-tables + CFLAGS-pthread_cond_wait.c = -fexceptions -fasynchronous-unwind-tables +-CFLAGS-pthread_cond_timedwait.c = -fexceptions -fasynchronous-unwind-tables + CFLAGS-sem_wait.c = -fexceptions -fasynchronous-unwind-tables + CFLAGS-sem_timedwait.c = -fexceptions -fasynchronous-unwind-tables + +@@ -308,8 +307,7 @@ test-xfail-tst-once5 = yes + # Files which must not be linked with libpthread. + tests-nolibpthread = tst-unload + +-gen-as-const-headers = pthread-errnos.sym \ +- lowlevelcond.sym lowlevelrwlock.sym \ ++gen-as-const-headers = pthread-errnos.sym lowlevelrwlock.sym \ + unwindbuf.sym \ + lowlevelrobustlock.sym pthread-pi-defines.sym + +Index: glibc-2.24-256-g5140d03/nptl/lowlevelcond.sym +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/lowlevelcond.sym ++++ /dev/null +@@ -1,16 +0,0 @@ +-#include +-#include +-#include +-#include +- +--- +- +-cond_lock offsetof (pthread_cond_t, __data.__lock) +-cond_futex offsetof (pthread_cond_t, __data.__futex) +-cond_nwaiters offsetof (pthread_cond_t, __data.__nwaiters) +-total_seq offsetof (pthread_cond_t, __data.__total_seq) +-wakeup_seq offsetof (pthread_cond_t, __data.__wakeup_seq) +-woken_seq offsetof (pthread_cond_t, __data.__woken_seq) +-dep_mutex offsetof (pthread_cond_t, __data.__mutex) +-broadcast_seq offsetof (pthread_cond_t, __data.__broadcast_seq) +-nwaiters_shift COND_NWAITERS_SHIFT +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_broadcast.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_broadcast.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_broadcast.c +@@ -19,72 +19,71 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include ++#include + + #include +-#include + ++#include "pthread_cond_common.c" + ++ ++/* We do the following steps from __pthread_cond_signal in one critical ++ section: (1) signal all waiters in G1, (2) close G1 so that it can become ++ the new G2 and make G2 the new G1, and (3) signal all waiters in the new ++ G1. We don't need to do all these steps if there are no waiters in G1 ++ and/or G2. See __pthread_cond_signal for further details. */ + int + __pthread_cond_broadcast (pthread_cond_t *cond) + { + LIBC_PROBE (cond_broadcast, 1, cond); + +- int pshared = (cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- /* Make sure we are alone. */ +- lll_lock (cond->__data.__lock, pshared); ++ unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs); ++ if (wrefs >> 3 == 0) ++ return 0; ++ int private = __condvar_get_private (wrefs); ++ ++ __condvar_acquire_lock (cond, private); ++ ++ unsigned long long int wseq = __condvar_load_wseq_relaxed (cond); ++ unsigned int g2 = wseq & 1; ++ unsigned int g1 = g2 ^ 1; ++ wseq >>= 1; ++ bool do_futex_wake = false; + +- /* Are there any waiters to be woken? */ +- if (cond->__data.__total_seq > cond->__data.__wakeup_seq) ++ /* Step (1): signal all waiters remaining in G1. */ ++ if (cond->__data.__g_size[g1] != 0) + { +- /* Yes. Mark them all as woken. */ +- cond->__data.__wakeup_seq = cond->__data.__total_seq; +- cond->__data.__woken_seq = cond->__data.__total_seq; +- cond->__data.__futex = (unsigned int) cond->__data.__total_seq * 2; +- int futex_val = cond->__data.__futex; +- /* Signal that a broadcast happened. */ +- ++cond->__data.__broadcast_seq; +- +- /* We are done. */ +- lll_unlock (cond->__data.__lock, pshared); +- +- /* Wake everybody. */ +- pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex; +- +- /* Do not use requeue for pshared condvars. */ +- if (mut == (void *) ~0l +- || PTHREAD_MUTEX_PSHARED (mut) & PTHREAD_MUTEX_PSHARED_BIT) +- goto wake_all; +- +-#if (defined lll_futex_cmp_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- if (USE_REQUEUE_PI (mut)) +- { +- if (lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, INT_MAX, +- &mut->__data.__lock, futex_val, +- LLL_PRIVATE) == 0) +- return 0; +- } +- else +-#endif +- /* lll_futex_requeue returns 0 for success and non-zero +- for errors. */ +- if (!__builtin_expect (lll_futex_requeue (&cond->__data.__futex, 1, +- INT_MAX, &mut->__data.__lock, +- futex_val, LLL_PRIVATE), 0)) +- return 0; +- +-wake_all: +- lll_futex_wake (&cond->__data.__futex, INT_MAX, pshared); +- return 0; ++ /* Add as many signals as the remaining size of the group. */ ++ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, ++ cond->__data.__g_size[g1] << 1); ++ cond->__data.__g_size[g1] = 0; ++ ++ /* We need to wake G1 waiters before we quiesce G1 below. */ ++ /* TODO Only set it if there are indeed futex waiters. We could ++ also try to move this out of the critical section in cases when ++ G2 is empty (and we don't need to quiesce). */ ++ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); + } + +- /* We are done. */ +- lll_unlock (cond->__data.__lock, pshared); ++ /* G1 is complete. Step (2) is next unless there are no waiters in G2, in ++ which case we can stop. */ ++ if (__condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) ++ { ++ /* Step (3): Send signals to all waiters in the old G2 / new G1. */ ++ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, ++ cond->__data.__g_size[g1] << 1); ++ cond->__data.__g_size[g1] = 0; ++ /* TODO Only set it if there are indeed futex waiters. */ ++ do_futex_wake = true; ++ } ++ ++ __condvar_release_lock (cond, private); ++ ++ if (do_futex_wake) ++ futex_wake (cond->__data.__g_signals + g1, INT_MAX, private); + + return 0; + } +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_common.c +=================================================================== +--- /dev/null ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_common.c +@@ -0,0 +1,466 @@ ++/* pthread_cond_common -- shared code for condition variable. ++ Copyright (C) 2016 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++/* We need 3 least-significant bits on __wrefs for something else. */ ++#define __PTHREAD_COND_MAX_GROUP_SIZE ((unsigned) 1 << 29) ++ ++#if __HAVE_64B_ATOMICS == 1 ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_load_wseq_relaxed (pthread_cond_t *cond) ++{ ++ return atomic_load_relaxed (&cond->__data.__wseq); ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) ++{ ++ return atomic_fetch_add_acquire (&cond->__data.__wseq, val); ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) ++{ ++ return atomic_fetch_xor_release (&cond->__data.__wseq, val); ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_load_g1_start_relaxed (pthread_cond_t *cond) ++{ ++ return atomic_load_relaxed (&cond->__data.__g1_start); ++} ++ ++static void __attribute__ ((unused)) ++__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) ++{ ++ atomic_store_relaxed (&cond->__data.__g1_start, ++ atomic_load_relaxed (&cond->__data.__g1_start) + val); ++} ++ ++#else ++ ++/* We use two 64b counters: __wseq and __g1_start. They are monotonically ++ increasing and single-writer-multiple-readers counters, so we can implement ++ load, fetch-and-add, and fetch-and-xor operations even when we just have ++ 32b atomics. Values we add or xor are less than or equal to 1<<31 (*), ++ so we only have to make overflow-and-addition atomic wrt. to concurrent ++ load operations and xor operations. To do that, we split each counter into ++ two 32b values of which we reserve the MSB of each to represent an ++ overflow from the lower-order half to the higher-order half. ++ ++ In the common case, the state is (higher-order / lower-order half, and . is ++ basically concatenation of the bits): ++ 0.h / 0.l = h.l ++ ++ When we add a value of x that overflows (i.e., 0.l + x == 1.L), we run the ++ following steps S1-S4 (the values these represent are on the right-hand ++ side): ++ S1: 0.h / 1.L == (h+1).L ++ S2: 1.(h+1) / 1.L == (h+1).L ++ S3: 1.(h+1) / 0.L == (h+1).L ++ S4: 0.(h+1) / 0.L == (h+1).L ++ If the LSB of the higher-order half is set, readers will ignore the ++ overflow bit in the lower-order half. ++ ++ To get an atomic snapshot in load operations, we exploit that the ++ higher-order half is monotonically increasing; if we load a value V from ++ it, then read the lower-order half, and then read the higher-order half ++ again and see the same value V, we know that both halves have existed in ++ the sequence of values the full counter had. This is similar to the ++ validated reads in the time-based STMs in GCC's libitm (e.g., ++ method_ml_wt). ++ ++ The xor operation needs to be an atomic read-modify-write. The write ++ itself is not an issue as it affects just the lower-order half but not bits ++ used in the add operation. To make the full fetch-and-xor atomic, we ++ exploit that concurrently, the value can increase by at most 1<<31 (*): The ++ xor operation is only called while having acquired the lock, so not more ++ than __PTHREAD_COND_MAX_GROUP_SIZE waiters can enter concurrently and thus ++ increment __wseq. Therefore, if the xor operation observes a value of ++ __wseq, then the value it applies the modification to later on can be ++ derived (see below). ++ ++ One benefit of this scheme is that this makes load operations ++ obstruction-free because unlike if we would just lock the counter, readers ++ can almost always interpret a snapshot of each halves. Readers can be ++ forced to read a new snapshot when the read is concurrent with an overflow. ++ However, overflows will happen infrequently, so load operations are ++ practically lock-free. ++ ++ (*) The highest value we add is __PTHREAD_COND_MAX_GROUP_SIZE << 2 to ++ __g1_start (the two extra bits are for the lock in the two LSBs of ++ __g1_start). */ ++ ++typedef struct ++{ ++ unsigned int low; ++ unsigned int high; ++} _condvar_lohi; ++ ++static uint64_t ++__condvar_fetch_add_64_relaxed (_condvar_lohi *lh, unsigned int op) ++{ ++ /* S1. Note that this is an atomic read-modify-write so it extends the ++ release sequence of release MO store at S3. */ ++ unsigned int l = atomic_fetch_add_relaxed (&lh->low, op); ++ unsigned int h = atomic_load_relaxed (&lh->high); ++ uint64_t result = ((uint64_t) h << 31) | l; ++ l += op; ++ if ((l >> 31) > 0) ++ { ++ /* Overflow. Need to increment higher-order half. Note that all ++ add operations are ordered in happens-before. */ ++ h++; ++ /* S2. Release MO to synchronize with the loads of the higher-order half ++ in the load operation. See __condvar_load_64_relaxed. */ ++ atomic_store_release (&lh->high, h | ((unsigned int) 1 << 31)); ++ l ^= (unsigned int) 1 << 31; ++ /* S3. See __condvar_load_64_relaxed. */ ++ atomic_store_release (&lh->low, l); ++ /* S4. Likewise. */ ++ atomic_store_release (&lh->high, h); ++ } ++ return result; ++} ++ ++static uint64_t ++__condvar_load_64_relaxed (_condvar_lohi *lh) ++{ ++ unsigned int h, l, h2; ++ do ++ { ++ /* This load and the second one below to the same location read from the ++ stores in the overflow handling of the add operation or the ++ initializing stores (which is a simple special case because ++ initialization always completely happens before further use). ++ Because no two stores to the higher-order half write the same value, ++ the loop ensures that if we continue to use the snapshot, this load ++ and the second one read from the same store operation. All candidate ++ store operations have release MO. ++ If we read from S2 in the first load, then we will see the value of ++ S1 on the next load (because we synchronize with S2), or a value ++ later in modification order. We correctly ignore the lower-half's ++ overflow bit in this case. If we read from S4, then we will see the ++ value of S3 in the next load (or a later value), which does not have ++ the overflow bit set anymore. ++ */ ++ h = atomic_load_acquire (&lh->high); ++ /* This will read from the release sequence of S3 (i.e, either the S3 ++ store or the read-modify-writes at S1 following S3 in modification ++ order). Thus, the read synchronizes with S3, and the following load ++ of the higher-order half will read from the matching S2 (or a later ++ value). ++ Thus, if we read a lower-half value here that already overflowed and ++ belongs to an increased higher-order half value, we will see the ++ latter and h and h2 will not be equal. */ ++ l = atomic_load_acquire (&lh->low); ++ /* See above. */ ++ h2 = atomic_load_relaxed (&lh->high); ++ } ++ while (h != h2); ++ if (((l >> 31) > 0) && ((h >> 31) > 0)) ++ l ^= (unsigned int) 1 << 31; ++ return ((uint64_t) (h & ~((unsigned int) 1 << 31)) << 31) + l; ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_load_wseq_relaxed (pthread_cond_t *cond) ++{ ++ return __condvar_load_64_relaxed ((_condvar_lohi *) &cond->__data.__wseq32); ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_fetch_add_wseq_acquire (pthread_cond_t *cond, unsigned int val) ++{ ++ uint64_t r = __condvar_fetch_add_64_relaxed ++ ((_condvar_lohi *) &cond->__data.__wseq32, val); ++ atomic_thread_fence_acquire (); ++ return r; ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_fetch_xor_wseq_release (pthread_cond_t *cond, unsigned int val) ++{ ++ _condvar_lohi *lh = (_condvar_lohi *) &cond->__data.__wseq32; ++ /* First, get the current value. See __condvar_load_64_relaxed. */ ++ unsigned int h, l, h2; ++ do ++ { ++ h = atomic_load_acquire (&lh->high); ++ l = atomic_load_acquire (&lh->low); ++ h2 = atomic_load_relaxed (&lh->high); ++ } ++ while (h != h2); ++ if (((l >> 31) > 0) && ((h >> 31) == 0)) ++ h++; ++ h &= ~((unsigned int) 1 << 31); ++ l &= ~((unsigned int) 1 << 31); ++ ++ /* Now modify. Due to the coherence rules, the prior load will read a value ++ earlier in modification order than the following fetch-xor. ++ This uses release MO to make the full operation have release semantics ++ (all other operations access the lower-order half). */ ++ unsigned int l2 = atomic_fetch_xor_release (&lh->low, val) ++ & ~((unsigned int) 1 << 31); ++ if (l2 < l) ++ /* The lower-order half overflowed in the meantime. This happened exactly ++ once due to the limit on concurrent waiters (see above). */ ++ h++; ++ return ((uint64_t) h << 31) + l2; ++} ++ ++static uint64_t __attribute__ ((unused)) ++__condvar_load_g1_start_relaxed (pthread_cond_t *cond) ++{ ++ return __condvar_load_64_relaxed ++ ((_condvar_lohi *) &cond->__data.__g1_start32); ++} ++ ++static void __attribute__ ((unused)) ++__condvar_add_g1_start_relaxed (pthread_cond_t *cond, unsigned int val) ++{ ++ ignore_value (__condvar_fetch_add_64_relaxed ++ ((_condvar_lohi *) &cond->__data.__g1_start32, val)); ++} ++ ++#endif /* !__HAVE_64B_ATOMICS */ ++ ++ ++/* The lock that signalers use. See pthread_cond_wait_common for uses. ++ The lock is our normal three-state lock: not acquired (0) / acquired (1) / ++ acquired-with-futex_wake-request (2). However, we need to preserve the ++ other bits in the unsigned int used for the lock, and therefore it is a ++ little more complex. */ ++static void __attribute__ ((unused)) ++__condvar_acquire_lock (pthread_cond_t *cond, int private) ++{ ++ unsigned int s = atomic_load_relaxed (&cond->__data.__g1_orig_size); ++ while ((s & 3) == 0) ++ { ++ if (atomic_compare_exchange_weak_acquire (&cond->__data.__g1_orig_size, ++ &s, s | 1)) ++ return; ++ /* TODO Spinning and back-off. */ ++ } ++ /* We can't change from not acquired to acquired, so try to change to ++ acquired-with-futex-wake-request and do a futex wait if we cannot change ++ from not acquired. */ ++ while (1) ++ { ++ while ((s & 3) != 2) ++ { ++ if (atomic_compare_exchange_weak_acquire ++ (&cond->__data.__g1_orig_size, &s, (s & ~(unsigned int) 3) | 2)) ++ { ++ if ((s & 3) == 0) ++ return; ++ break; ++ } ++ /* TODO Back off. */ ++ } ++ futex_wait_simple (&cond->__data.__g1_orig_size, ++ (s & ~(unsigned int) 3) | 2, private); ++ /* Reload so we see a recent value. */ ++ s = atomic_load_relaxed (&cond->__data.__g1_orig_size); ++ } ++} ++ ++/* See __condvar_acquire_lock. */ ++static void __attribute__ ((unused)) ++__condvar_release_lock (pthread_cond_t *cond, int private) ++{ ++ if ((atomic_fetch_and_release (&cond->__data.__g1_orig_size, ++ ~(unsigned int) 3) & 3) ++ == 2) ++ futex_wake (&cond->__data.__g1_orig_size, 1, private); ++} ++ ++/* Only use this when having acquired the lock. */ ++static unsigned int __attribute__ ((unused)) ++__condvar_get_orig_size (pthread_cond_t *cond) ++{ ++ return atomic_load_relaxed (&cond->__data.__g1_orig_size) >> 2; ++} ++ ++/* Only use this when having acquired the lock. */ ++static void __attribute__ ((unused)) ++__condvar_set_orig_size (pthread_cond_t *cond, unsigned int size) ++{ ++ /* We have acquired the lock, but might get one concurrent update due to a ++ lock state change from acquired to acquired-with-futex_wake-request. ++ The store with relaxed MO is fine because there will be no further ++ changes to the lock bits nor the size, and we will subsequently release ++ the lock with release MO. */ ++ unsigned int s; ++ s = (atomic_load_relaxed (&cond->__data.__g1_orig_size) & 3) ++ | (size << 2); ++ if ((atomic_exchange_relaxed (&cond->__data.__g1_orig_size, s) & 3) ++ != (s & 3)) ++ atomic_store_relaxed (&cond->__data.__g1_orig_size, (size << 2) | 2); ++} ++ ++/* Returns FUTEX_SHARED or FUTEX_PRIVATE based on the provided __wrefs ++ value. */ ++static int __attribute__ ((unused)) ++__condvar_get_private (int flags) ++{ ++ if ((flags & __PTHREAD_COND_SHARED_MASK) == 0) ++ return FUTEX_PRIVATE; ++ else ++ return FUTEX_SHARED; ++} ++ ++/* This closes G1 (whose index is in G1INDEX), waits for all futex waiters to ++ leave G1, converts G1 into a fresh G2, and then switches group roles so that ++ the former G2 becomes the new G1 ending at the current __wseq value when we ++ eventually make the switch (WSEQ is just an observation of __wseq by the ++ signaler). ++ If G2 is empty, it will not switch groups because then it would create an ++ empty G1 which would require switching groups again on the next signal. ++ Returns false iff groups were not switched because G2 was empty. */ ++static bool __attribute__ ((unused)) ++__condvar_quiesce_and_switch_g1 (pthread_cond_t *cond, uint64_t wseq, ++ unsigned int *g1index, int private) ++{ ++ const unsigned int maxspin = 0; ++ unsigned int g1 = *g1index; ++ ++ /* If there is no waiter in G2, we don't do anything. The expression may ++ look odd but remember that __g_size might hold a negative value, so ++ putting the expression this way avoids relying on implementation-defined ++ behavior. ++ Note that this works correctly for a zero-initialized condvar too. */ ++ unsigned int old_orig_size = __condvar_get_orig_size (cond); ++ uint64_t old_g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; ++ if (((unsigned) (wseq - old_g1_start - old_orig_size) ++ + cond->__data.__g_size[g1 ^ 1]) == 0) ++ return false; ++ ++ /* Now try to close and quiesce G1. We have to consider the following kinds ++ of waiters: ++ * Waiters from less recent groups than G1 are not affected because ++ nothing will change for them apart from __g1_start getting larger. ++ * New waiters arriving concurrently with the group switching will all go ++ into G2 until we atomically make the switch. Waiters existing in G2 ++ are not affected. ++ * Waiters in G1 will be closed out immediately by setting a flag in ++ __g_signals, which will prevent waiters from blocking using a futex on ++ __g_signals and also notifies them that the group is closed. As a ++ result, they will eventually remove their group reference, allowing us ++ to close switch group roles. */ ++ ++ /* First, set the closed flag on __g_signals. This tells waiters that are ++ about to wait that they shouldn't do that anymore. This basically ++ serves as an advance notificaton of the upcoming change to __g1_start; ++ waiters interpret it as if __g1_start was larger than their waiter ++ sequence position. This allows us to change __g1_start after waiting ++ for all existing waiters with group references to leave, which in turn ++ makes recovery after stealing a signal simpler because it then can be ++ skipped if __g1_start indicates that the group is closed (otherwise, ++ we would have to recover always because waiters don't know how big their ++ groups are). Relaxed MO is fine. */ ++ atomic_fetch_or_relaxed (cond->__data.__g_signals + g1, 1); ++ ++ /* Wait until there are no group references anymore. The fetch-or operation ++ injects us into the modification order of __g_refs; release MO ensures ++ that waiters incrementing __g_refs after our fetch-or see the previous ++ changes to __g_signals and to __g1_start that had to happen before we can ++ switch this G1 and alias with an older group (we have two groups, so ++ aliasing requires switching group roles twice). Note that nobody else ++ can have set the wake-request flag, so we do not have to act upon it. ++ ++ Also note that it is harmless if older waiters or waiters from this G1 ++ get a group reference after we have quiesced the group because it will ++ remain closed for them either because of the closed flag in __g_signals ++ or the later update to __g1_start. New waiters will never arrive here ++ but instead continue to go into the still current G2. */ ++ unsigned r = atomic_fetch_or_release (cond->__data.__g_refs + g1, 0); ++ while ((r >> 1) > 0) ++ { ++ for (unsigned int spin = maxspin; ((r >> 1) > 0) && (spin > 0); spin--) ++ { ++ /* TODO Back off. */ ++ r = atomic_load_relaxed (cond->__data.__g_refs + g1); ++ } ++ if ((r >> 1) > 0) ++ { ++ /* There is still a waiter after spinning. Set the wake-request ++ flag and block. Relaxed MO is fine because this is just about ++ this futex word. */ ++ r = atomic_fetch_or_relaxed (cond->__data.__g_refs + g1, 1); ++ ++ if ((r >> 1) > 0) ++ futex_wait_simple (cond->__data.__g_refs + g1, r, private); ++ /* Reload here so we eventually see the most recent value even if we ++ do not spin. */ ++ r = atomic_load_relaxed (cond->__data.__g_refs + g1); ++ } ++ } ++ /* Acquire MO so that we synchronize with the release operation that waiters ++ use to decrement __g_refs and thus happen after the waiters we waited ++ for. */ ++ atomic_thread_fence_acquire (); ++ ++ /* Update __g1_start, which finishes closing this group. The value we add ++ will never be negative because old_orig_size can only be zero when we ++ switch groups the first time after a condvar was initialized, in which ++ case G1 will be at index 1 and we will add a value of 1. See above for ++ why this takes place after waiting for quiescence of the group. ++ Relaxed MO is fine because the change comes with no additional ++ constraints that others would have to observe. */ ++ __condvar_add_g1_start_relaxed (cond, ++ (old_orig_size << 1) + (g1 == 1 ? 1 : - 1)); ++ ++ /* Now reopen the group, thus enabling waiters to again block using the ++ futex controlled by __g_signals. Release MO so that observers that see ++ no signals (and thus can block) also see the write __g1_start and thus ++ that this is now a new group (see __pthread_cond_wait_common for the ++ matching acquire MO loads). */ ++ atomic_store_release (cond->__data.__g_signals + g1, 0); ++ ++ /* At this point, the old G1 is now a valid new G2 (but not in use yet). ++ No old waiter can neither grab a signal nor acquire a reference without ++ noticing that __g1_start is larger. ++ We can now publish the group switch by flipping the G2 index in __wseq. ++ Release MO so that this synchronizes with the acquire MO operation ++ waiters use to obtain a position in the waiter sequence. */ ++ wseq = __condvar_fetch_xor_wseq_release (cond, 1) >> 1; ++ g1 ^= 1; ++ *g1index ^= 1; ++ ++ /* These values are just observed by signalers, and thus protected by the ++ lock. */ ++ unsigned int orig_size = wseq - (old_g1_start + old_orig_size); ++ __condvar_set_orig_size (cond, orig_size); ++ /* Use and addition to not loose track of cancellations in what was ++ previously G2. */ ++ cond->__data.__g_size[g1] += orig_size; ++ ++ /* The new G1's size may be zero because of cancellations during its time ++ as G2. If this happens, there are no waiters that have to receive a ++ signal, so we do not need to add any and return false. */ ++ if (cond->__data.__g_size[g1] == 0) ++ return false; ++ ++ return true; ++} +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_destroy.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_destroy.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_destroy.c +@@ -20,66 +20,42 @@ + #include + #include "pthreadP.h" + #include ++#include ++#include + ++#include "pthread_cond_common.c" + ++/* See __pthread_cond_wait for a high-level description of the algorithm. ++ ++ A correct program must make sure that no waiters are blocked on the condvar ++ when it is destroyed, and that there are no concurrent signals or ++ broadcasts. To wake waiters reliably, the program must signal or ++ broadcast while holding the mutex or after having held the mutex. It must ++ also ensure that no signal or broadcast are still pending to unblock ++ waiters; IOW, because waiters can wake up spuriously, the program must ++ effectively ensure that destruction happens after the execution of those ++ signal or broadcast calls. ++ Thus, we can assume that all waiters that are still accessing the condvar ++ have been woken. We wait until they have confirmed to have woken up by ++ decrementing __wrefs. */ + int + __pthread_cond_destroy (pthread_cond_t *cond) + { +- int pshared = (cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- + LIBC_PROBE (cond_destroy, 1, cond); + +- /* Make sure we are alone. */ +- lll_lock (cond->__data.__lock, pshared); +- +- if (cond->__data.__total_seq > cond->__data.__wakeup_seq) ++ /* Set the wake request flag. We could also spin, but destruction that is ++ concurrent with still-active waiters is probably neither common nor ++ performance critical. Acquire MO to synchronize with waiters confirming ++ that they finished. */ ++ unsigned int wrefs = atomic_fetch_or_acquire (&cond->__data.__wrefs, 4); ++ int private = __condvar_get_private (wrefs); ++ while (wrefs >> 3 != 0) + { +- /* If there are still some waiters which have not been +- woken up, this is an application bug. */ +- lll_unlock (cond->__data.__lock, pshared); +- return EBUSY; ++ futex_wait_simple (&cond->__data.__wrefs, wrefs, private); ++ /* See above. */ ++ wrefs = atomic_load_acquire (&cond->__data.__wrefs); + } +- +- /* Tell pthread_cond_*wait that this condvar is being destroyed. */ +- cond->__data.__total_seq = -1ULL; +- +- /* If there are waiters which have been already signalled or +- broadcasted, but still are using the pthread_cond_t structure, +- pthread_cond_destroy needs to wait for them. */ +- unsigned int nwaiters = cond->__data.__nwaiters; +- +- if (nwaiters >= (1 << COND_NWAITERS_SHIFT)) +- { +- /* Wake everybody on the associated mutex in case there are +- threads that have been requeued to it. +- Without this, pthread_cond_destroy could block potentially +- for a long time or forever, as it would depend on other +- thread's using the mutex. +- When all threads waiting on the mutex are woken up, pthread_cond_wait +- only waits for threads to acquire and release the internal +- condvar lock. */ +- if (cond->__data.__mutex != NULL +- && cond->__data.__mutex != (void *) ~0l) +- { +- pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex; +- lll_futex_wake (&mut->__data.__lock, INT_MAX, +- PTHREAD_MUTEX_PSHARED (mut)); +- } +- +- do +- { +- lll_unlock (cond->__data.__lock, pshared); +- +- lll_futex_wait (&cond->__data.__nwaiters, nwaiters, pshared); +- +- lll_lock (cond->__data.__lock, pshared); +- +- nwaiters = cond->__data.__nwaiters; +- } +- while (nwaiters >= (1 << COND_NWAITERS_SHIFT)); +- } +- ++ /* The memory the condvar occupies can now be reused. */ + return 0; + } + versioned_symbol (libpthread, __pthread_cond_destroy, +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_init.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_init.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_init.c +@@ -19,25 +19,25 @@ + #include + #include "pthreadP.h" + #include ++#include + + ++/* See __pthread_cond_wait for details. */ + int + __pthread_cond_init (pthread_cond_t *cond, const pthread_condattr_t *cond_attr) + { + struct pthread_condattr *icond_attr = (struct pthread_condattr *) cond_attr; + +- cond->__data.__lock = LLL_LOCK_INITIALIZER; +- cond->__data.__futex = 0; +- cond->__data.__nwaiters = (icond_attr != NULL +- ? ((icond_attr->value >> 1) +- & ((1 << COND_NWAITERS_SHIFT) - 1)) +- : CLOCK_REALTIME); +- cond->__data.__total_seq = 0; +- cond->__data.__wakeup_seq = 0; +- cond->__data.__woken_seq = 0; +- cond->__data.__mutex = (icond_attr == NULL || (icond_attr->value & 1) == 0 +- ? NULL : (void *) ~0l); +- cond->__data.__broadcast_seq = 0; ++ memset (cond, 0, sizeof (pthread_cond_t)); ++ /* Iff not equal to ~0l, this is a PTHREAD_PROCESS_PRIVATE condvar. */ ++ if (icond_attr != NULL && (icond_attr->value & 1) != 0) ++ cond->__data.__wrefs |= __PTHREAD_COND_SHARED_MASK; ++ int clockid = (icond_attr != NULL ++ ? ((icond_attr->value >> 1) & ((1 << COND_CLOCK_BITS) - 1)) ++ : CLOCK_REALTIME); ++ /* If 0, CLOCK_REALTIME is used; CLOCK_MONOTONIC otherwise. */ ++ if (clockid != CLOCK_REALTIME) ++ cond->__data.__wrefs |= __PTHREAD_COND_CLOCK_MONOTONIC_MASK; + + LIBC_PROBE (cond_init, 2, cond, cond_attr); + +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_signal.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_signal.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_signal.c +@@ -19,62 +19,79 @@ + #include + #include + #include +-#include ++#include + #include + #include ++#include ++#include + + #include +-#include + #include + ++#include "pthread_cond_common.c" + ++/* See __pthread_cond_wait for a high-level description of the algorithm. */ + int + __pthread_cond_signal (pthread_cond_t *cond) + { +- int pshared = (cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- + LIBC_PROBE (cond_signal, 1, cond); + +- /* Make sure we are alone. */ +- lll_lock (cond->__data.__lock, pshared); +- +- /* Are there any waiters to be woken? */ +- if (cond->__data.__total_seq > cond->__data.__wakeup_seq) ++ /* First check whether there are waiters. Relaxed MO is fine for that for ++ the same reasons that relaxed MO is fine when observing __wseq (see ++ below). */ ++ unsigned int wrefs = atomic_load_relaxed (&cond->__data.__wrefs); ++ if (wrefs >> 3 == 0) ++ return 0; ++ int private = __condvar_get_private (wrefs); ++ ++ __condvar_acquire_lock (cond, private); ++ ++ /* Load the waiter sequence number, which represents our relative ordering ++ to any waiters. Relaxed MO is sufficient for that because: ++ 1) We can pick any position that is allowed by external happens-before ++ constraints. In particular, if another __pthread_cond_wait call ++ happened before us, this waiter must be eligible for being woken by ++ us. The only way do establish such a happens-before is by signaling ++ while having acquired the mutex associated with the condvar and ++ ensuring that the signal's critical section happens after the waiter. ++ Thus, the mutex ensures that we see that waiter's __wseq increase. ++ 2) Once we pick a position, we do not need to communicate this to the ++ program via a happens-before that we set up: First, any wake-up could ++ be a spurious wake-up, so the program must not interpret a wake-up as ++ an indication that the waiter happened before a particular signal; ++ second, a program cannot detect whether a waiter has not yet been ++ woken (i.e., it cannot distinguish between a non-woken waiter and one ++ that has been woken but hasn't resumed execution yet), and thus it ++ cannot try to deduce that a signal happened before a particular ++ waiter. */ ++ unsigned long long int wseq = __condvar_load_wseq_relaxed (cond); ++ unsigned int g1 = (wseq & 1) ^ 1; ++ wseq >>= 1; ++ bool do_futex_wake = false; ++ ++ /* If G1 is still receiving signals, we put the signal there. If not, we ++ check if G2 has waiters, and if so, quiesce and switch G1 to the former ++ G2; if this results in a new G1 with waiters (G2 might have cancellations ++ already, see __condvar_quiesce_and_switch_g1), we put the signal in the ++ new G1. */ ++ if ((cond->__data.__g_size[g1] != 0) ++ || __condvar_quiesce_and_switch_g1 (cond, wseq, &g1, private)) + { +- /* Yes. Mark one of them as woken. */ +- ++cond->__data.__wakeup_seq; +- ++cond->__data.__futex; +- +-#if (defined lll_futex_cmp_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- pthread_mutex_t *mut = cond->__data.__mutex; +- +- if (USE_REQUEUE_PI (mut) +- /* This can only really fail with a ENOSYS, since nobody can modify +- futex while we have the cond_lock. */ +- && lll_futex_cmp_requeue_pi (&cond->__data.__futex, 1, 0, +- &mut->__data.__lock, +- cond->__data.__futex, pshared) == 0) +- { +- lll_unlock (cond->__data.__lock, pshared); +- return 0; +- } +- else +-#endif +- /* Wake one. */ +- if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex, +- 1, 1, +- &cond->__data.__lock, +- pshared), 0)) +- return 0; +- +- /* Fallback if neither of them work. */ +- lll_futex_wake (&cond->__data.__futex, 1, pshared); ++ /* Add a signal. Relaxed MO is fine because signaling does not need to ++ establish a happens-before relation (see above). We do not mask the ++ release-MO store when initializing a group in ++ __condvar_quiesce_and_switch_g1 because we use an atomic ++ read-modify-write and thus extend that store's release sequence. */ ++ atomic_fetch_add_relaxed (cond->__data.__g_signals + g1, 2); ++ cond->__data.__g_size[g1]--; ++ /* TODO Only set it if there are indeed futex waiters. */ ++ do_futex_wake = true; + } + +- /* We are done. */ +- lll_unlock (cond->__data.__lock, pshared); ++ __condvar_release_lock (cond, private); ++ ++ if (do_futex_wake) ++ futex_wake (cond->__data.__g_signals + g1, 1, private); + + return 0; + } +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_timedwait.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_timedwait.c ++++ /dev/null +@@ -1,268 +0,0 @@ +-/* Copyright (C) 2003-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Martin Schwidefsky , 2003. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +-#ifndef HAVE_CLOCK_GETTIME_VSYSCALL +-# undef INTERNAL_VSYSCALL +-# define INTERNAL_VSYSCALL INTERNAL_SYSCALL +-# undef INLINE_VSYSCALL +-# define INLINE_VSYSCALL INLINE_SYSCALL +-#else +-# include +-#endif +- +-/* Cleanup handler, defined in pthread_cond_wait.c. */ +-extern void __condvar_cleanup (void *arg) +- __attribute__ ((visibility ("hidden"))); +- +-struct _condvar_cleanup_buffer +-{ +- int oldtype; +- pthread_cond_t *cond; +- pthread_mutex_t *mutex; +- unsigned int bc_seq; +-}; +- +-int +-__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, +- const struct timespec *abstime) +-{ +- struct _pthread_cleanup_buffer buffer; +- struct _condvar_cleanup_buffer cbuffer; +- int result = 0; +- +- /* Catch invalid parameters. */ +- if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) +- return EINVAL; +- +- int pshared = (cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- +-#if (defined lll_futex_timed_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- int pi_flag = 0; +-#endif +- +- /* Make sure we are alone. */ +- lll_lock (cond->__data.__lock, pshared); +- +- /* Now we can release the mutex. */ +- int err = __pthread_mutex_unlock_usercnt (mutex, 0); +- if (err) +- { +- lll_unlock (cond->__data.__lock, pshared); +- return err; +- } +- +- /* We have one new user of the condvar. */ +- ++cond->__data.__total_seq; +- ++cond->__data.__futex; +- cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT; +- +- /* Work around the fact that the kernel rejects negative timeout values +- despite them being valid. */ +- if (__glibc_unlikely (abstime->tv_sec < 0)) +- goto timeout; +- +- /* Remember the mutex we are using here. If there is already a +- different address store this is a bad user bug. Do not store +- anything for pshared condvars. */ +- if (cond->__data.__mutex != (void *) ~0l) +- cond->__data.__mutex = mutex; +- +- /* Prepare structure passed to cancellation handler. */ +- cbuffer.cond = cond; +- cbuffer.mutex = mutex; +- +- /* Before we block we enable cancellation. Therefore we have to +- install a cancellation handler. */ +- __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer); +- +- /* The current values of the wakeup counter. The "woken" counter +- must exceed this value. */ +- unsigned long long int val; +- unsigned long long int seq; +- val = seq = cond->__data.__wakeup_seq; +- /* Remember the broadcast counter. */ +- cbuffer.bc_seq = cond->__data.__broadcast_seq; +- +- while (1) +- { +-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \ +- || !defined lll_futex_timed_wait_bitset) +- struct timespec rt; +- { +-# ifdef __NR_clock_gettime +- INTERNAL_SYSCALL_DECL (err); +- (void) INTERNAL_VSYSCALL (clock_gettime, err, 2, +- (cond->__data.__nwaiters +- & ((1 << COND_NWAITERS_SHIFT) - 1)), +- &rt); +- /* Convert the absolute timeout value to a relative timeout. */ +- rt.tv_sec = abstime->tv_sec - rt.tv_sec; +- rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec; +-# else +- /* Get the current time. So far we support only one clock. */ +- struct timeval tv; +- (void) __gettimeofday (&tv, NULL); +- +- /* Convert the absolute timeout value to a relative timeout. */ +- rt.tv_sec = abstime->tv_sec - tv.tv_sec; +- rt.tv_nsec = abstime->tv_nsec - tv.tv_usec * 1000; +-# endif +- } +- if (rt.tv_nsec < 0) +- { +- rt.tv_nsec += 1000000000; +- --rt.tv_sec; +- } +- /* Did we already time out? */ +- if (__glibc_unlikely (rt.tv_sec < 0)) +- { +- if (cbuffer.bc_seq != cond->__data.__broadcast_seq) +- goto bc_out; +- +- goto timeout; +- } +-#endif +- +- unsigned int futex_val = cond->__data.__futex; +- +- /* Prepare to wait. Release the condvar futex. */ +- lll_unlock (cond->__data.__lock, pshared); +- +- /* Enable asynchronous cancellation. Required by the standard. */ +- cbuffer.oldtype = __pthread_enable_asynccancel (); +- +-/* REQUEUE_PI was implemented after FUTEX_CLOCK_REALTIME, so it is sufficient +- to check just the former. */ +-#if (defined lll_futex_timed_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- /* If pi_flag remained 1 then it means that we had the lock and the mutex +- but a spurious waker raced ahead of us. Give back the mutex before +- going into wait again. */ +- if (pi_flag) +- { +- __pthread_mutex_cond_lock_adjust (mutex); +- __pthread_mutex_unlock_usercnt (mutex, 0); +- } +- pi_flag = USE_REQUEUE_PI (mutex); +- +- if (pi_flag) +- { +- unsigned int clockbit = (cond->__data.__nwaiters & 1 +- ? 0 : FUTEX_CLOCK_REALTIME); +- err = lll_futex_timed_wait_requeue_pi (&cond->__data.__futex, +- futex_val, abstime, clockbit, +- &mutex->__data.__lock, +- pshared); +- pi_flag = (err == 0); +- } +- else +-#endif +- +- { +-#if (!defined __ASSUME_FUTEX_CLOCK_REALTIME \ +- || !defined lll_futex_timed_wait_bitset) +- /* Wait until woken by signal or broadcast. */ +- err = lll_futex_timed_wait (&cond->__data.__futex, +- futex_val, &rt, pshared); +-#else +- unsigned int clockbit = (cond->__data.__nwaiters & 1 +- ? 0 : FUTEX_CLOCK_REALTIME); +- err = lll_futex_timed_wait_bitset (&cond->__data.__futex, futex_val, +- abstime, clockbit, pshared); +-#endif +- } +- +- /* Disable asynchronous cancellation. */ +- __pthread_disable_asynccancel (cbuffer.oldtype); +- +- /* We are going to look at shared data again, so get the lock. */ +- lll_lock (cond->__data.__lock, pshared); +- +- /* If a broadcast happened, we are done. */ +- if (cbuffer.bc_seq != cond->__data.__broadcast_seq) +- goto bc_out; +- +- /* Check whether we are eligible for wakeup. */ +- val = cond->__data.__wakeup_seq; +- if (val != seq && cond->__data.__woken_seq != val) +- break; +- +- /* Not woken yet. Maybe the time expired? */ +- if (__glibc_unlikely (err == -ETIMEDOUT)) +- { +- timeout: +- /* Yep. Adjust the counters. */ +- ++cond->__data.__wakeup_seq; +- ++cond->__data.__futex; +- +- /* The error value. */ +- result = ETIMEDOUT; +- break; +- } +- } +- +- /* Another thread woken up. */ +- ++cond->__data.__woken_seq; +- +- bc_out: +- +- cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT; +- +- /* If pthread_cond_destroy was called on this variable already, +- notify the pthread_cond_destroy caller all waiters have left +- and it can be successfully destroyed. */ +- if (cond->__data.__total_seq == -1ULL +- && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT)) +- lll_futex_wake (&cond->__data.__nwaiters, 1, pshared); +- +- /* We are done with the condvar. */ +- lll_unlock (cond->__data.__lock, pshared); +- +- /* The cancellation handling is back to normal, remove the handler. */ +- __pthread_cleanup_pop (&buffer, 0); +- +- /* Get the mutex before returning. */ +-#if (defined lll_futex_timed_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- if (pi_flag) +- { +- __pthread_mutex_cond_lock_adjust (mutex); +- err = 0; +- } +- else +-#endif +- err = __pthread_mutex_cond_lock (mutex); +- +- return err ?: result; +-} +- +-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, +- GLIBC_2_3_2); +Index: glibc-2.24-256-g5140d03/nptl/pthread_cond_wait.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_cond_wait.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_cond_wait.c +@@ -19,219 +19,653 @@ + #include + #include + #include +-#include ++#include + #include + #include +-#include ++#include ++#include ++#include ++#include + + #include + #include ++#include ++ ++#include "pthread_cond_common.c" ++ + + struct _condvar_cleanup_buffer + { +- int oldtype; ++ uint64_t wseq; + pthread_cond_t *cond; + pthread_mutex_t *mutex; +- unsigned int bc_seq; ++ int private; + }; + + +-void +-__attribute__ ((visibility ("hidden"))) +-__condvar_cleanup (void *arg) ++/* Decrease the waiter reference count. */ ++static void ++__condvar_confirm_wakeup (pthread_cond_t *cond, int private) + { +- struct _condvar_cleanup_buffer *cbuffer = +- (struct _condvar_cleanup_buffer *) arg; +- unsigned int destroying; +- int pshared = (cbuffer->cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- +- /* We are going to modify shared data. */ +- lll_lock (cbuffer->cond->__data.__lock, pshared); +- +- if (cbuffer->bc_seq == cbuffer->cond->__data.__broadcast_seq) +- { +- /* This thread is not waiting anymore. Adjust the sequence counters +- appropriately. We do not increment WAKEUP_SEQ if this would +- bump it over the value of TOTAL_SEQ. This can happen if a thread +- was woken and then canceled. */ +- if (cbuffer->cond->__data.__wakeup_seq +- < cbuffer->cond->__data.__total_seq) ++ /* If destruction is pending (i.e., the wake-request flag is nonzero) and we ++ are the last waiter (prior value of __wrefs was 1 << 3), then wake any ++ threads waiting in pthread_cond_destroy. Release MO to synchronize with ++ these threads. Don't bother clearing the wake-up request flag. */ ++ if ((atomic_fetch_add_release (&cond->__data.__wrefs, -8) >> 2) == 3) ++ futex_wake (&cond->__data.__wrefs, INT_MAX, private); ++} ++ ++ ++/* Cancel waiting after having registered as a waiter previously. SEQ is our ++ position and G is our group index. ++ The goal of cancellation is to make our group smaller if that is still ++ possible. If we are in a closed group, this is not possible anymore; in ++ this case, we need to send a replacement signal for the one we effectively ++ consumed because the signal should have gotten consumed by another waiter ++ instead; we must not both cancel waiting and consume a signal. ++ ++ Must not be called while still holding a reference on the group. ++ ++ Returns true iff we consumed a signal. ++ ++ On some kind of timeouts, we may be able to pretend that a signal we ++ effectively consumed happened before the timeout (i.e., similarly to first ++ spinning on signals before actually checking whether the timeout has ++ passed already). Doing this would allow us to skip sending a replacement ++ signal, but this case might happen rarely because the end of the timeout ++ must race with someone else sending a signal. Therefore, we don't bother ++ trying to optimize this. */ ++static void ++__condvar_cancel_waiting (pthread_cond_t *cond, uint64_t seq, unsigned int g, ++ int private) ++{ ++ bool consumed_signal = false; ++ ++ /* No deadlock with group switching is possible here because we have do ++ not hold a reference on the group. */ ++ __condvar_acquire_lock (cond, private); ++ ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> 1; ++ if (g1_start > seq) ++ { ++ /* Our group is closed, so someone provided enough signals for it. ++ Thus, we effectively consumed a signal. */ ++ consumed_signal = true; ++ } ++ else ++ { ++ if (g1_start + __condvar_get_orig_size (cond) <= seq) + { +- ++cbuffer->cond->__data.__wakeup_seq; +- ++cbuffer->cond->__data.__futex; ++ /* We are in the current G2 and thus cannot have consumed a signal. ++ Reduce its effective size or handle overflow. Remember that in ++ G2, unsigned int size is zero or a negative value. */ ++ if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > 0) ++ { ++ cond->__data.__g_size[g]--; ++ } ++ else ++ { ++ /* Cancellations would overflow the maximum group size. Just ++ wake up everyone spuriously to create a clean state. This ++ also means we do not consume a signal someone else sent. */ ++ __condvar_release_lock (cond, private); ++ __pthread_cond_broadcast (cond); ++ return; ++ } ++ } ++ else ++ { ++ /* We are in current G1. If the group's size is zero, someone put ++ a signal in the group that nobody else but us can consume. */ ++ if (cond->__data.__g_size[g] == 0) ++ consumed_signal = true; ++ else ++ { ++ /* Otherwise, we decrease the size of the group. This is ++ equivalent to atomically putting in a signal just for us and ++ consuming it right away. We do not consume a signal sent ++ by someone else. We also cannot have consumed a futex ++ wake-up because if we were cancelled or timed out in a futex ++ call, the futex will wake another waiter. */ ++ cond->__data.__g_size[g]--; ++ } + } +- ++cbuffer->cond->__data.__woken_seq; + } + +- cbuffer->cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT; ++ __condvar_release_lock (cond, private); + +- /* If pthread_cond_destroy was called on this variable already, +- notify the pthread_cond_destroy caller all waiters have left +- and it can be successfully destroyed. */ +- destroying = 0; +- if (cbuffer->cond->__data.__total_seq == -1ULL +- && cbuffer->cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT)) ++ if (consumed_signal) + { +- lll_futex_wake (&cbuffer->cond->__data.__nwaiters, 1, pshared); +- destroying = 1; ++ /* We effectively consumed a signal even though we didn't want to. ++ Therefore, we need to send a replacement signal. ++ If we would want to optimize this, we could do what ++ pthread_cond_signal does right in the critical section above. */ ++ __pthread_cond_signal (cond); + } ++} + +- /* We are done. */ +- lll_unlock (cbuffer->cond->__data.__lock, pshared); ++/* Wake up any signalers that might be waiting. */ ++static void ++__condvar_dec_grefs (pthread_cond_t *cond, unsigned int g, int private) ++{ ++ /* Release MO to synchronize-with the acquire load in ++ __condvar_quiesce_and_switch_g1. */ ++ if (atomic_fetch_add_release (cond->__data.__g_refs + g, -2) == 3) ++ { ++ /* Clear the wake-up request flag before waking up. We do not need more ++ than relaxed MO and it doesn't matter if we apply this for an aliased ++ group because we wake all futex waiters right after clearing the ++ flag. */ ++ atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) 1); ++ futex_wake (cond->__data.__g_refs + g, INT_MAX, private); ++ } ++} + +- /* Wake everybody to make sure no condvar signal gets lost. */ +- if (! destroying) +- lll_futex_wake (&cbuffer->cond->__data.__futex, INT_MAX, pshared); ++/* Clean-up for cancellation of waiters waiting for normal signals. We cancel ++ our registration as a waiter, confirm we have woken up, and re-acquire the ++ mutex. */ ++static void ++__condvar_cleanup_waiting (void *arg) ++{ ++ struct _condvar_cleanup_buffer *cbuffer = ++ (struct _condvar_cleanup_buffer *) arg; ++ pthread_cond_t *cond = cbuffer->cond; ++ unsigned g = cbuffer->wseq & 1; + +- /* Get the mutex before returning unless asynchronous cancellation +- is in effect. We don't try to get the mutex if we already own it. */ +- if (!(USE_REQUEUE_PI (cbuffer->mutex)) +- || ((cbuffer->mutex->__data.__lock & FUTEX_TID_MASK) +- != THREAD_GETMEM (THREAD_SELF, tid))) +- { +- __pthread_mutex_cond_lock (cbuffer->mutex); +- } +- else +- __pthread_mutex_cond_lock_adjust (cbuffer->mutex); +-} ++ __condvar_dec_grefs (cond, g, cbuffer->private); + ++ __condvar_cancel_waiting (cond, cbuffer->wseq >> 1, g, cbuffer->private); ++ /* FIXME With the current cancellation implementation, it is possible that ++ a thread is cancelled after it has returned from a syscall. This could ++ result in a cancelled waiter consuming a futex wake-up that is then ++ causing another waiter in the same group to not wake up. To work around ++ this issue until we have fixed cancellation, just add a futex wake-up ++ conservatively. */ ++ futex_wake (cond->__data.__g_signals + g, 1, cbuffer->private); ++ ++ __condvar_confirm_wakeup (cond, cbuffer->private); ++ ++ /* XXX If locking the mutex fails, should we just stop execution? This ++ might be better than silently ignoring the error. */ ++ __pthread_mutex_cond_lock (cbuffer->mutex); ++} + +-int +-__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) ++/* This condvar implementation guarantees that all calls to signal and ++ broadcast and all of the three virtually atomic parts of each call to wait ++ (i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re- ++ acquiring the mutex) happen in some total order that is consistent with the ++ happens-before relations in the calling program. However, this order does ++ not necessarily result in additional happens-before relations being ++ established (which aligns well with spurious wake-ups being allowed). ++ ++ All waiters acquire a certain position in a 64b waiter sequence (__wseq). ++ This sequence determines which waiters are allowed to consume signals. ++ A broadcast is equal to sending as many signals as are unblocked waiters. ++ When a signal arrives, it samples the current value of __wseq with a ++ relaxed-MO load (i.e., the position the next waiter would get). (This is ++ sufficient because it is consistent with happens-before; the caller can ++ enforce stronger ordering constraints by calling signal while holding the ++ mutex.) Only waiters with a position less than the __wseq value observed ++ by the signal are eligible to consume this signal. ++ ++ This would be straight-forward to implement if waiters would just spin but ++ we need to let them block using futexes. Futexes give no guarantee of ++ waking in FIFO order, so we cannot reliably wake eligible waiters if we ++ just use a single futex. Also, futex words are 32b in size, but we need ++ to distinguish more than 1<<32 states because we need to represent the ++ order of wake-up (and thus which waiters are eligible to consume signals); ++ blocking in a futex is not atomic with a waiter determining its position in ++ the waiter sequence, so we need the futex word to reliably notify waiters ++ that they should not attempt to block anymore because they have been ++ already signaled in the meantime. While an ABA issue on a 32b value will ++ be rare, ignoring it when we are aware of it is not the right thing to do ++ either. ++ ++ Therefore, we use a 64b counter to represent the waiter sequence (on ++ architectures which only support 32b atomics, we use a few bits less). ++ To deal with the blocking using futexes, we maintain two groups of waiters: ++ * Group G1 consists of waiters that are all eligible to consume signals; ++ incoming signals will always signal waiters in this group until all ++ waiters in G1 have been signaled. ++ * Group G2 consists of waiters that arrive when a G1 is present and still ++ contains waiters that have not been signaled. When all waiters in G1 ++ are signaled and a new signal arrives, the new signal will convert G2 ++ into the new G1 and create a new G2 for future waiters. ++ ++ We cannot allocate new memory because of process-shared condvars, so we ++ have just two slots of groups that change their role between G1 and G2. ++ Each has a separate futex word, a number of signals available for ++ consumption, a size (number of waiters in the group that have not been ++ signaled), and a reference count. ++ ++ The group reference count is used to maintain the number of waiters that ++ are using the group's futex. Before a group can change its role, the ++ reference count must show that no waiters are using the futex anymore; this ++ prevents ABA issues on the futex word. ++ ++ To represent which intervals in the waiter sequence the groups cover (and ++ thus also which group slot contains G1 or G2), we use a 64b counter to ++ designate the start position of G1 (inclusive), and a single bit in the ++ waiter sequence counter to represent which group slot currently contains ++ G2. This allows us to switch group roles atomically wrt. waiters obtaining ++ a position in the waiter sequence. The G1 start position allows waiters to ++ figure out whether they are in a group that has already been completely ++ signaled (i.e., if the current G1 starts at a later position that the ++ waiter's position). Waiters cannot determine whether they are currently ++ in G2 or G1 -- but they do not have too because all they are interested in ++ is whether there are available signals, and they always start in G2 (whose ++ group slot they know because of the bit in the waiter sequence. Signalers ++ will simply fill the right group until it is completely signaled and can ++ be closed (they do not switch group roles until they really have to to ++ decrease the likelihood of having to wait for waiters still holding a ++ reference on the now-closed G1). ++ ++ Signalers maintain the initial size of G1 to be able to determine where ++ G2 starts (G2 is always open-ended until it becomes G1). They track the ++ remaining size of a group; when waiters cancel waiting (due to PThreads ++ cancellation or timeouts), they will decrease this remaining size as well. ++ ++ To implement condvar destruction requirements (i.e., that ++ pthread_cond_destroy can be called as soon as all waiters have been ++ signaled), waiters increment a reference count before starting to wait and ++ decrement it after they stopped waiting but right before they acquire the ++ mutex associated with the condvar. ++ ++ pthread_cond_t thus consists of the following (bits that are used for ++ flags and are not part of the primary value of each field but necessary ++ to make some things atomic or because there was no space for them ++ elsewhere in the data structure): ++ ++ __wseq: Waiter sequence counter ++ * LSB is index of current G2. ++ * Waiters fetch-add while having acquire the mutex associated with the ++ condvar. Signalers load it and fetch-xor it concurrently. ++ __g1_start: Starting position of G1 (inclusive) ++ * LSB is index of current G2. ++ * Modified by signalers while having acquired the condvar-internal lock ++ and observed concurrently by waiters. ++ __g1_orig_size: Initial size of G1 ++ * The two least-significant bits represent the condvar-internal lock. ++ * Only accessed while having acquired the condvar-internal lock. ++ __wrefs: Waiter reference counter. ++ * Bit 2 is true if waiters should run futex_wake when they remove the ++ last reference. pthread_cond_destroy uses this as futex word. ++ * Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC). ++ * Bit 0 is true iff this is a process-shared condvar. ++ * Simple reference count used by both waiters and pthread_cond_destroy. ++ For each of the two groups, we have: ++ __g_refs: Futex waiter reference count. ++ * LSB is true if waiters should run futex_wake when they remove the ++ last reference. ++ * Reference count used by waiters concurrently with signalers that have ++ acquired the condvar-internal lock. ++ __g_signals: The number of signals that can still be consumed. ++ * Used as a futex word by waiters. Used concurrently by waiters and ++ signalers. ++ * LSB is true iff this group has been completely signaled (i.e., it is ++ closed). ++ __g_size: Waiters remaining in this group (i.e., which have not been ++ signaled yet. ++ * Accessed by signalers and waiters that cancel waiting (both do so only ++ when having acquired the condvar-internal lock. ++ * The size of G2 is always zero because it cannot be determined until ++ the group becomes G1. ++ * Although this is of unsigned type, we rely on using unsigned overflow ++ rules to make this hold effectively negative values too (in ++ particular, when waiters in G2 cancel waiting). ++ ++ A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields ++ a condvar that has G2 starting at position 0 and a G1 that is closed. ++ ++ Because waiters do not claim ownership of a group right when obtaining a ++ position in __wseq but only reference count the group when using futexes ++ to block, it can happen that a group gets closed before a waiter can ++ increment the reference count. Therefore, waiters have to check whether ++ their group is already closed using __g1_start. They also have to perform ++ this check when spinning when trying to grab a signal from __g_signals. ++ Note that for these checks, using relaxed MO to load __g1_start is ++ sufficient because if a waiter can see a sufficiently large value, it could ++ have also consume a signal in the waiters group. ++ ++ Waiters try to grab a signal from __g_signals without holding a reference ++ count, which can lead to stealing a signal from a more recent group after ++ their own group was already closed. They cannot always detect whether they ++ in fact did because they do not know when they stole, but they can ++ conservatively add a signal back to the group they stole from; if they ++ did so unnecessarily, all that happens is a spurious wake-up. To make this ++ even less likely, __g1_start contains the index of the current g2 too, ++ which allows waiters to check if there aliasing on the group slots; if ++ there wasn't, they didn't steal from the current G1, which means that the ++ G1 they stole from must have been already closed and they do not need to ++ fix anything. ++ ++ It is essential that the last field in pthread_cond_t is __g_signals[1]: ++ The previous condvar used a pointer-sized field in pthread_cond_t, so a ++ PTHREAD_COND_INITIALIZER from that condvar implementation might only ++ initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes ++ in total instead of the 48 we need). __g_signals[1] is not accessed before ++ the first group switch (G2 starts at index 0), which will set its value to ++ zero after a harmless fetch-or whose return value is ignored. This ++ effectively completes initialization. ++ ++ ++ Limitations: ++ * This condvar isn't designed to allow for more than ++ __PTHREAD_COND_MAX_GROUP_SIZE * (1 << 31) calls to __pthread_cond_wait. ++ * More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not ++ supported. ++ * Beyond what is allowed as errors by POSIX or documented, we can also ++ return the following errors: ++ * EPERM if MUTEX is a recursive mutex and the caller doesn't own it. ++ * EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike ++ for other errors, this can happen when we re-acquire the mutex; this ++ isn't allowed by POSIX (which requires all errors to virtually happen ++ before we release the mutex or change the condvar state), but there's ++ nothing we can do really. ++ * When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors ++ returned by __pthread_tpp_change_priority. We will already have ++ released the mutex in such cases, so the caller cannot expect to own ++ MUTEX. ++ ++ Other notes: ++ * Instead of the normal mutex unlock / lock functions, we use ++ __pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m) ++ because those will not change the mutex-internal users count, so that it ++ can be detected when a condvar is still associated with a particular ++ mutex because there is a waiter blocked on this condvar using this mutex. ++*/ ++static __always_inline int ++__pthread_cond_wait_common (pthread_cond_t *cond, pthread_mutex_t *mutex, ++ const struct timespec *abstime) + { +- struct _pthread_cleanup_buffer buffer; +- struct _condvar_cleanup_buffer cbuffer; ++ const int maxspin = 0; + int err; +- int pshared = (cond->__data.__mutex == (void *) ~0l) +- ? LLL_SHARED : LLL_PRIVATE; +- +-#if (defined lll_futex_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- int pi_flag = 0; +-#endif ++ int result = 0; + + LIBC_PROBE (cond_wait, 2, cond, mutex); + +- /* Make sure we are alone. */ +- lll_lock (cond->__data.__lock, pshared); +- +- /* Now we can release the mutex. */ ++ /* Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an ++ atomic operation because signals and broadcasts may update the group ++ switch without acquiring the mutex. We do not need release MO here ++ because we do not need to establish any happens-before relation with ++ signalers (see __pthread_cond_signal); modification order alone ++ establishes a total order of waiters/signals. We do need acquire MO ++ to synchronize with group reinitialization in ++ __condvar_quiesce_and_switch_g1. */ ++ uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, 2); ++ /* Find our group's index. We always go into what was G2 when we acquired ++ our position. */ ++ unsigned int g = wseq & 1; ++ uint64_t seq = wseq >> 1; ++ ++ /* Increase the waiter reference count. Relaxed MO is sufficient because ++ we only need to synchronize when decrementing the reference count. */ ++ unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, 8); ++ int private = __condvar_get_private (flags); ++ ++ /* Now that we are registered as a waiter, we can release the mutex. ++ Waiting on the condvar must be atomic with releasing the mutex, so if ++ the mutex is used to establish a happens-before relation with any ++ signaler, the waiter must be visible to the latter; thus, we release the ++ mutex after registering as waiter. ++ If releasing the mutex fails, we just cancel our registration as a ++ waiter and confirm that we have woken up. */ + err = __pthread_mutex_unlock_usercnt (mutex, 0); +- if (__glibc_unlikely (err)) ++ if (__glibc_unlikely (err != 0)) + { +- lll_unlock (cond->__data.__lock, pshared); ++ __condvar_cancel_waiting (cond, seq, g, private); ++ __condvar_confirm_wakeup (cond, private); + return err; + } + +- /* We have one new user of the condvar. */ +- ++cond->__data.__total_seq; +- ++cond->__data.__futex; +- cond->__data.__nwaiters += 1 << COND_NWAITERS_SHIFT; +- +- /* Remember the mutex we are using here. If there is already a +- different address store this is a bad user bug. Do not store +- anything for pshared condvars. */ +- if (cond->__data.__mutex != (void *) ~0l) +- cond->__data.__mutex = mutex; +- +- /* Prepare structure passed to cancellation handler. */ +- cbuffer.cond = cond; +- cbuffer.mutex = mutex; +- +- /* Before we block we enable cancellation. Therefore we have to +- install a cancellation handler. */ +- __pthread_cleanup_push (&buffer, __condvar_cleanup, &cbuffer); +- +- /* The current values of the wakeup counter. The "woken" counter +- must exceed this value. */ +- unsigned long long int val; +- unsigned long long int seq; +- val = seq = cond->__data.__wakeup_seq; +- /* Remember the broadcast counter. */ +- cbuffer.bc_seq = cond->__data.__broadcast_seq; ++ /* Now wait until a signal is available in our group or it is closed. ++ Acquire MO so that if we observe a value of zero written after group ++ switching in __condvar_quiesce_and_switch_g1, we synchronize with that ++ store and will see the prior update of __g1_start done while switching ++ groups too. */ ++ unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g); + + do + { +- unsigned int futex_val = cond->__data.__futex; +- /* Prepare to wait. Release the condvar futex. */ +- lll_unlock (cond->__data.__lock, pshared); +- +- /* Enable asynchronous cancellation. Required by the standard. */ +- cbuffer.oldtype = __pthread_enable_asynccancel (); +- +-#if (defined lll_futex_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- /* If pi_flag remained 1 then it means that we had the lock and the mutex +- but a spurious waker raced ahead of us. Give back the mutex before +- going into wait again. */ +- if (pi_flag) ++ while (1) + { +- __pthread_mutex_cond_lock_adjust (mutex); +- __pthread_mutex_unlock_usercnt (mutex, 0); ++ /* Spin-wait first. ++ Note that spinning first without checking whether a timeout ++ passed might lead to what looks like a spurious wake-up even ++ though we should return ETIMEDOUT (e.g., if the caller provides ++ an absolute timeout that is clearly in the past). However, ++ (1) spurious wake-ups are allowed, (2) it seems unlikely that a ++ user will (ab)use pthread_cond_wait as a check for whether a ++ point in time is in the past, and (3) spinning first without ++ having to compare against the current time seems to be the right ++ choice from a performance perspective for most use cases. */ ++ unsigned int spin = maxspin; ++ while (signals == 0 && spin > 0) ++ { ++ /* Check that we are not spinning on a group that's already ++ closed. */ ++ if (seq < (__condvar_load_g1_start_relaxed (cond) >> 1)) ++ goto done; ++ ++ /* TODO Back off. */ ++ ++ /* Reload signals. See above for MO. */ ++ signals = atomic_load_acquire (cond->__data.__g_signals + g); ++ spin--; ++ } ++ ++ /* If our group will be closed as indicated by the flag on signals, ++ don't bother grabbing a signal. */ ++ if (signals & 1) ++ goto done; ++ ++ /* If there is an available signal, don't block. */ ++ if (signals != 0) ++ break; ++ ++ /* No signals available after spinning, so prepare to block. ++ We first acquire a group reference and use acquire MO for that so ++ that we synchronize with the dummy read-modify-write in ++ __condvar_quiesce_and_switch_g1 if we read from that. In turn, ++ in this case this will make us see the closed flag on __g_signals ++ that designates a concurrent attempt to reuse the group's slot. ++ We use acquire MO for the __g_signals check to make the ++ __g1_start check work (see spinning above). ++ Note that the group reference acquisition will not mask the ++ release MO when decrementing the reference count because we use ++ an atomic read-modify-write operation and thus extend the release ++ sequence. */ ++ atomic_fetch_add_acquire (cond->__data.__g_refs + g, 2); ++ if (((atomic_load_acquire (cond->__data.__g_signals + g) & 1) != 0) ++ || (seq < (__condvar_load_g1_start_relaxed (cond) >> 1))) ++ { ++ /* Our group is closed. Wake up any signalers that might be ++ waiting. */ ++ __condvar_dec_grefs (cond, g, private); ++ goto done; ++ } ++ ++ // Now block. ++ struct _pthread_cleanup_buffer buffer; ++ struct _condvar_cleanup_buffer cbuffer; ++ cbuffer.wseq = wseq; ++ cbuffer.cond = cond; ++ cbuffer.mutex = mutex; ++ cbuffer.private = private; ++ __pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer); ++ ++ if (abstime == NULL) ++ { ++ /* Block without a timeout. */ ++ err = futex_wait_cancelable ( ++ cond->__data.__g_signals + g, 0, private); ++ } ++ else ++ { ++ /* Block, but with a timeout. ++ Work around the fact that the kernel rejects negative timeout ++ values despite them being valid. */ ++ if (__glibc_unlikely (abstime->tv_sec < 0)) ++ err = ETIMEDOUT; ++ ++ else if ((flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK) != 0) ++ { ++ /* CLOCK_MONOTONIC is requested. */ ++ struct timespec rt; ++ if (__clock_gettime (CLOCK_MONOTONIC, &rt) != 0) ++ __libc_fatal ("clock_gettime does not support " ++ "CLOCK_MONOTONIC"); ++ /* Convert the absolute timeout value to a relative ++ timeout. */ ++ rt.tv_sec = abstime->tv_sec - rt.tv_sec; ++ rt.tv_nsec = abstime->tv_nsec - rt.tv_nsec; ++ if (rt.tv_nsec < 0) ++ { ++ rt.tv_nsec += 1000000000; ++ --rt.tv_sec; ++ } ++ /* Did we already time out? */ ++ if (__glibc_unlikely (rt.tv_sec < 0)) ++ err = ETIMEDOUT; ++ else ++ err = futex_reltimed_wait_cancelable ++ (cond->__data.__g_signals + g, 0, &rt, private); ++ } ++ else ++ { ++ /* Use CLOCK_REALTIME. */ ++ err = futex_abstimed_wait_cancelable ++ (cond->__data.__g_signals + g, 0, abstime, private); ++ } ++ } ++ ++ __pthread_cleanup_pop (&buffer, 0); ++ ++ if (__glibc_unlikely (err == ETIMEDOUT)) ++ { ++ __condvar_dec_grefs (cond, g, private); ++ /* If we timed out, we effectively cancel waiting. Note that ++ we have decremented __g_refs before cancellation, so that a ++ deadlock between waiting for quiescence of our group in ++ __condvar_quiesce_and_switch_g1 and us trying to acquire ++ the lock during cancellation is not possible. */ ++ __condvar_cancel_waiting (cond, seq, g, private); ++ result = ETIMEDOUT; ++ goto done; ++ } ++ else ++ __condvar_dec_grefs (cond, g, private); ++ ++ /* Reload signals. See above for MO. */ ++ signals = atomic_load_acquire (cond->__data.__g_signals + g); + } +- pi_flag = USE_REQUEUE_PI (mutex); + +- if (pi_flag) ++ } ++ /* Try to grab a signal. Use acquire MO so that we see an up-to-date value ++ of __g1_start below (see spinning above for a similar case). In ++ particular, if we steal from a more recent group, we will also see a ++ more recent __g1_start below. */ ++ while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g, ++ &signals, signals - 2)); ++ ++ /* We consumed a signal but we could have consumed from a more recent group ++ that aliased with ours due to being in the same group slot. If this ++ might be the case our group must be closed as visible through ++ __g1_start. */ ++ uint64_t g1_start = __condvar_load_g1_start_relaxed (cond); ++ if (seq < (g1_start >> 1)) ++ { ++ /* We potentially stole a signal from a more recent group but we do not ++ know which group we really consumed from. ++ We do not care about groups older than current G1 because they are ++ closed; we could have stolen from these, but then we just add a ++ spurious wake-up for the current groups. ++ We will never steal a signal from current G2 that was really intended ++ for G2 because G2 never receives signals (until it becomes G1). We ++ could have stolen a signal from G2 that was conservatively added by a ++ previous waiter that also thought it stole a signal -- but given that ++ that signal was added unnecessarily, it's not a problem if we steal ++ it. ++ Thus, the remaining case is that we could have stolen from the current ++ G1, where "current" means the __g1_start value we observed. However, ++ if the current G1 does not have the same slot index as we do, we did ++ not steal from it and do not need to undo that. This is the reason ++ for putting a bit with G2's index into__g1_start as well. */ ++ if (((g1_start & 1) ^ 1) == g) + { +- err = lll_futex_wait_requeue_pi (&cond->__data.__futex, +- futex_val, &mutex->__data.__lock, +- pshared); +- +- pi_flag = (err == 0); ++ /* We have to conservatively undo our potential mistake of stealing ++ a signal. We can stop trying to do that when the current G1 ++ changes because other spinning waiters will notice this too and ++ __condvar_quiesce_and_switch_g1 has checked that there are no ++ futex waiters anymore before switching G1. ++ Relaxed MO is fine for the __g1_start load because we need to ++ merely be able to observe this fact and not have to observe ++ something else as well. ++ ??? Would it help to spin for a little while to see whether the ++ current G1 gets closed? This might be worthwhile if the group is ++ small or close to being closed. */ ++ unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g); ++ while (__condvar_load_g1_start_relaxed (cond) == g1_start) ++ { ++ /* Try to add a signal. We don't need to acquire the lock ++ because at worst we can cause a spurious wake-up. If the ++ group is in the process of being closed (LSB is true), this ++ has an effect similar to us adding a signal. */ ++ if (((s & 1) != 0) ++ || atomic_compare_exchange_weak_relaxed ++ (cond->__data.__g_signals + g, &s, s + 2)) ++ { ++ /* If we added a signal, we also need to add a wake-up on ++ the futex. We also need to do that if we skipped adding ++ a signal because the group is being closed because ++ while __condvar_quiesce_and_switch_g1 could have closed ++ the group, it might stil be waiting for futex waiters to ++ leave (and one of those waiters might be the one we stole ++ the signal from, which cause it to block using the ++ futex). */ ++ futex_wake (cond->__data.__g_signals + g, 1, private); ++ break; ++ } ++ /* TODO Back off. */ ++ } + } +- else +-#endif +- /* Wait until woken by signal or broadcast. */ +- lll_futex_wait (&cond->__data.__futex, futex_val, pshared); +- +- /* Disable asynchronous cancellation. */ +- __pthread_disable_asynccancel (cbuffer.oldtype); +- +- /* We are going to look at shared data again, so get the lock. */ +- lll_lock (cond->__data.__lock, pshared); +- +- /* If a broadcast happened, we are done. */ +- if (cbuffer.bc_seq != cond->__data.__broadcast_seq) +- goto bc_out; +- +- /* Check whether we are eligible for wakeup. */ +- val = cond->__data.__wakeup_seq; + } +- while (val == seq || cond->__data.__woken_seq == val); + +- /* Another thread woken up. */ +- ++cond->__data.__woken_seq; ++ done: + +- bc_out: +- +- cond->__data.__nwaiters -= 1 << COND_NWAITERS_SHIFT; +- +- /* If pthread_cond_destroy was called on this varaible already, +- notify the pthread_cond_destroy caller all waiters have left +- and it can be successfully destroyed. */ +- if (cond->__data.__total_seq == -1ULL +- && cond->__data.__nwaiters < (1 << COND_NWAITERS_SHIFT)) +- lll_futex_wake (&cond->__data.__nwaiters, 1, pshared); ++ /* Confirm that we have been woken. We do that before acquiring the mutex ++ to allow for execution of pthread_cond_destroy while having acquired the ++ mutex. */ ++ __condvar_confirm_wakeup (cond, private); ++ ++ /* Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT, ++ which is set to ETIMEDOUT if a timeout occured, or zero otherwise. */ ++ err = __pthread_mutex_cond_lock (mutex); ++ /* XXX Abort on errors that are disallowed by POSIX? */ ++ return (err != 0) ? err : result; ++} + +- /* We are done with the condvar. */ +- lll_unlock (cond->__data.__lock, pshared); + +- /* The cancellation handling is back to normal, remove the handler. */ +- __pthread_cleanup_pop (&buffer, 0); ++/* See __pthread_cond_wait_common. */ ++int ++__pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) ++{ ++ return __pthread_cond_wait_common (cond, mutex, NULL); ++} + +- /* Get the mutex before returning. Not needed for PI. */ +-#if (defined lll_futex_wait_requeue_pi \ +- && defined __ASSUME_REQUEUE_PI) +- if (pi_flag) +- { +- __pthread_mutex_cond_lock_adjust (mutex); +- return 0; +- } +- else +-#endif +- return __pthread_mutex_cond_lock (mutex); ++/* See __pthread_cond_wait_common. */ ++int ++__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, ++ const struct timespec *abstime) ++{ ++ /* Check parameter validity. This should also tell the compiler that ++ it can assume that abstime is not NULL. */ ++ if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000) ++ return EINVAL; ++ return __pthread_cond_wait_common (cond, mutex, abstime); + } + + versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, + GLIBC_2_3_2); ++versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, ++ GLIBC_2_3_2); +Index: glibc-2.24-256-g5140d03/nptl/pthread_condattr_getclock.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_condattr_getclock.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_condattr_getclock.c +@@ -23,6 +23,6 @@ int + pthread_condattr_getclock (const pthread_condattr_t *attr, clockid_t *clock_id) + { + *clock_id = (((((const struct pthread_condattr *) attr)->value) >> 1) +- & ((1 << COND_NWAITERS_SHIFT) - 1)); ++ & ((1 << COND_CLOCK_BITS) - 1)); + return 0; + } +Index: glibc-2.24-256-g5140d03/nptl/pthread_condattr_getpshared.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_condattr_getpshared.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_condattr_getpshared.c +@@ -22,7 +22,8 @@ + int + pthread_condattr_getpshared (const pthread_condattr_t *attr, int *pshared) + { +- *pshared = ((const struct pthread_condattr *) attr)->value & 1; ++ *pshared = (((const struct pthread_condattr *) attr)->value & 1 ++ ? PTHREAD_PROCESS_SHARED : PTHREAD_PROCESS_PRIVATE); + + return 0; + } +Index: glibc-2.24-256-g5140d03/nptl/pthread_condattr_init.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_condattr_init.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_condattr_init.c +@@ -23,7 +23,9 @@ + int + __pthread_condattr_init (pthread_condattr_t *attr) + { +- memset (attr, '\0', sizeof (*attr)); ++ struct pthread_condattr *iattr = (struct pthread_condattr *) attr; ++ /* Default is not pshared and CLOCK_REALTIME. */ ++ iattr-> value = CLOCK_REALTIME << 1; + + return 0; + } +Index: glibc-2.24-256-g5140d03/nptl/pthread_condattr_setclock.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/pthread_condattr_setclock.c ++++ glibc-2.24-256-g5140d03/nptl/pthread_condattr_setclock.c +@@ -18,7 +18,7 @@ + + #include + #include +-#include ++#include + #include + #include + #include "pthreadP.h" +@@ -33,12 +33,17 @@ pthread_condattr_setclock (pthread_conda + in the pthread_cond_t structure needs to be adjusted. */ + return EINVAL; + ++ /* If we do not support waiting using CLOCK_MONOTONIC, return an error. */ ++ if (clock_id == CLOCK_MONOTONIC ++ && !futex_supports_exact_relative_timeouts()) ++ return ENOTSUP; ++ + /* Make sure the value fits in the bits we reserved. */ +- assert (clock_id < (1 << COND_NWAITERS_SHIFT)); ++ assert (clock_id < (1 << COND_CLOCK_BITS)); + + int *valuep = &((struct pthread_condattr *) attr)->value; + +- *valuep = ((*valuep & ~(((1 << COND_NWAITERS_SHIFT) - 1) << 1)) ++ *valuep = ((*valuep & ~(((1 << COND_CLOCK_BITS) - 1) << 1)) + | (clock_id << 1)); + + return 0; +Index: glibc-2.24-256-g5140d03/nptl/tst-cond1.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/tst-cond1.c ++++ glibc-2.24-256-g5140d03/nptl/tst-cond1.c +@@ -73,6 +73,9 @@ do_test (void) + + puts ("parent: wait for condition"); + ++ /* This test will fail on spurious wake-ups, which are allowed; however, ++ the current implementation shouldn't produce spurious wake-ups in the ++ scenario we are testing here. */ + err = pthread_cond_wait (&cond, &mut); + if (err != 0) + error (EXIT_FAILURE, err, "parent: cannot wait fir signal"); +Index: glibc-2.24-256-g5140d03/nptl/tst-cond20.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/tst-cond20.c ++++ glibc-2.24-256-g5140d03/nptl/tst-cond20.c +@@ -96,7 +96,10 @@ do_test (void) + + for (i = 0; i < ROUNDS; ++i) + { +- pthread_cond_wait (&cond2, &mut); ++ /* Make sure we discard spurious wake-ups. */ ++ do ++ pthread_cond_wait (&cond2, &mut); ++ while (count != N); + + if (i & 1) + pthread_mutex_unlock (&mut); +Index: glibc-2.24-256-g5140d03/nptl/tst-cond22.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/nptl/tst-cond22.c ++++ glibc-2.24-256-g5140d03/nptl/tst-cond22.c +@@ -106,10 +106,11 @@ do_test (void) + status = 1; + } + +- printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n", +- c.__data.__lock, c.__data.__futex, c.__data.__total_seq, +- c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex, +- c.__data.__nwaiters, c.__data.__broadcast_seq); ++ printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n", ++ c.__data.__wseq, c.__data.__g1_start, ++ c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], ++ c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], ++ c.__data.__g1_orig_size, c.__data.__wrefs); + + if (pthread_create (&th, NULL, tf, (void *) 1l) != 0) + { +@@ -148,10 +149,11 @@ do_test (void) + status = 1; + } + +- printf ("cond = { %d, %x, %lld, %lld, %lld, %p, %u, %u }\n", +- c.__data.__lock, c.__data.__futex, c.__data.__total_seq, +- c.__data.__wakeup_seq, c.__data.__woken_seq, c.__data.__mutex, +- c.__data.__nwaiters, c.__data.__broadcast_seq); ++ printf ("cond = { %llu, %llu, %u/%u/%u, %u/%u/%u, %u, %u }\n", ++ c.__data.__wseq, c.__data.__g1_start, ++ c.__data.__g_signals[0], c.__data.__g_refs[0], c.__data.__g_size[0], ++ c.__data.__g_signals[1], c.__data.__g_refs[1], c.__data.__g_size[1], ++ c.__data.__g1_orig_size, c.__data.__wrefs); + + return status; + } +Index: glibc-2.24-256-g5140d03/sysdeps/aarch64/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/aarch64/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/aarch64/nptl/bits/pthreadtypes.h +@@ -90,17 +90,32 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; +- long int __align; ++ __extension__ long long int __align; + } pthread_cond_t; + + typedef union +Index: glibc-2.24-256-g5140d03/sysdeps/arm/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/arm/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/arm/nptl/bits/pthreadtypes.h +@@ -93,14 +93,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/ia64/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/ia64/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/ia64/nptl/bits/pthreadtypes.h +@@ -90,17 +90,32 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; +- long int __align; ++ __extension__ long long int __align; + } pthread_cond_t; + + typedef union +Index: glibc-2.24-256-g5140d03/sysdeps/m68k/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/m68k/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/m68k/nptl/bits/pthreadtypes.h +@@ -88,19 +88,35 @@ typedef union + + + /* Data structure for conditional variable handling. The structure of +- the attribute type is deliberately not exposed. */ ++ the attribute type is not exposed on purpose. */ + typedef union + { + struct + { +- int __lock __attribute__ ((__aligned__ (4))); +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ /* Enforce proper alignment of fields used as futex words. */ ++ unsigned int __g_refs[2] __attribute__ ((__aligned__ (4))); ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/microblaze/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/microblaze/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/microblaze/nptl/bits/pthreadtypes.h +@@ -91,14 +91,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/mips/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/mips/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/mips/nptl/bits/pthreadtypes.h +@@ -117,19 +117,34 @@ typedef union + + + /* Data structure for conditional variable handling. The structure of +- the attribute type is deliberately not exposed. */ ++ the attribute type is not exposed on purpose. */ + typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/nios2/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/nios2/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/nios2/nptl/bits/pthreadtypes.h +@@ -88,19 +88,34 @@ typedef union + + + /* Data structure for conditional variable handling. The structure of +- the attribute type is deliberately not exposed. */ ++ the attribute type is not exposed on purpose. */ + typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/nptl/internaltypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/nptl/internaltypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/nptl/internaltypes.h +@@ -68,20 +68,13 @@ struct pthread_condattr + { + /* Combination of values: + +- Bit 0 : flag whether conditional variable will be sharable between +- processes. +- +- Bit 1-7: clock ID. */ ++ Bit 0 : flag whether conditional variable will be ++ sharable between processes. ++ Bit 1-COND_CLOCK_BITS: Clock ID. COND_CLOCK_BITS is the number of bits ++ needed to represent the ID of the clock. */ + int value; + }; +- +- +-/* The __NWAITERS field is used as a counter and to house the number +- of bits for other purposes. COND_CLOCK_BITS is the number +- of bits needed to represent the ID of the clock. COND_NWAITERS_SHIFT +- is the number of bits reserved for other purposes like the clock. */ +-#define COND_CLOCK_BITS 1 +-#define COND_NWAITERS_SHIFT 1 ++#define COND_CLOCK_BITS 1 + + + /* Read-write lock variable attribute data structure. */ +Index: glibc-2.24-256-g5140d03/sysdeps/nptl/pthread.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/nptl/pthread.h ++++ glibc-2.24-256-g5140d03/sysdeps/nptl/pthread.h +@@ -184,7 +184,7 @@ enum + + + /* Conditional variable handling. */ +-#define PTHREAD_COND_INITIALIZER { { 0, 0, 0, 0, 0, (void *) 0, 0, 0 } } ++#define PTHREAD_COND_INITIALIZER { { {0}, {0}, {0, 0}, {0, 0}, 0, 0, {0, 0} } } + + + /* Cleanup buffers */ +Index: glibc-2.24-256-g5140d03/sysdeps/s390/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/s390/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/s390/nptl/bits/pthreadtypes.h +@@ -142,14 +142,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/sh/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/sh/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/sh/nptl/bits/pthreadtypes.h +@@ -93,14 +93,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/tile/nptl/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/tile/nptl/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/tile/nptl/bits/pthreadtypes.h +@@ -122,14 +122,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/alpha/bits/pthreadtypes.h +@@ -84,19 +84,34 @@ typedef union + + + /* Data structure for conditional variable handling. The structure of +- the attribute type is deliberately not exposed. */ ++ the attribute type is not exposed on purpose. */ + typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/hppa/internaltypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/hppa/internaltypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/hppa/internaltypes.h +@@ -46,32 +46,38 @@ fails because __initializer is zero, and + is correctly. */ + + #define cond_compat_clear(var) \ +-({ \ +- int tmp = 0; \ +- var->__data.__lock = 0; \ +- var->__data.__futex = 0; \ +- var->__data.__mutex = NULL; \ +- /* Clear __initializer last, to indicate initialization is done. */ \ +- __asm__ __volatile__ ("stw,ma %1,0(%0)" \ +- : : "r" (&var->__data.__initializer), "r" (tmp) : "memory"); \ ++({ \ ++ int tmp = 0; \ ++ var->__data.__wseq = 0; \ ++ var->__data.__signals_sent = 0; \ ++ var->__data.__confirmed = 0; \ ++ var->__data.__generation = 0; \ ++ var->__data.__mutex = NULL; \ ++ var->__data.__quiescence_waiters = 0; \ ++ var->__data.__clockid = 0; \ ++ /* Clear __initializer last, to indicate initialization is done. */ \ ++ /* This synchronizes-with the acquire load below. */ \ ++ atomic_store_release (&var->__data.__initializer, 0); \ + }) + + #define cond_compat_check_and_clear(var) \ + ({ \ +- int ret; \ +- volatile int *value = &var->__data.__initializer; \ +- if ((ret = atomic_compare_and_exchange_val_acq(value, 2, 1))) \ ++ int v; \ ++ int *value = &var->__data.__initializer; \ ++ /* This synchronizes-with the release store above. */ \ ++ while ((v = atomic_load_acquire (value)) != 0) \ + { \ +- if (ret == 1) \ ++ if (v == 1 \ ++ /* Relaxed MO is fine; it only matters who's first. */ \ ++ && atomic_compare_exchange_acquire_weak_relaxed (value, 1, 2)) \ + { \ +- /* Initialize structure. */ \ ++ /* We're first; initialize structure. */ \ + cond_compat_clear (var); \ ++ break; \ + } \ + else \ +- { \ +- /* Yield until structure is initialized. */ \ +- while (*value == 2) sched_yield (); \ +- } \ ++ /* Yield before we re-check initialization status. */ \ ++ sched_yield (); \ + } \ + }) + +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/hppa/pthread_cond_timedwait.c ++++ /dev/null +@@ -1,41 +0,0 @@ +-/* Copyright (C) 2009-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Carlos O'Donell , 2009. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library. If not, see +- . */ +- +-#ifndef INCLUDED_SELF +-# define INCLUDED_SELF +-# include +-#else +-# include +-# include +-# include +-# include +-int +-__pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, +- const struct timespec *abstime) +-{ +- cond_compat_check_and_clear (cond); +- return __pthread_cond_timedwait_internal (cond, mutex, abstime); +-} +-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, +- GLIBC_2_3_2); +-# undef versioned_symbol +-# define versioned_symbol(lib, local, symbol, version) +-# undef __pthread_cond_timedwait +-# define __pthread_cond_timedwait __pthread_cond_timedwait_internal +-# include_next +-#endif +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c ++++ glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/hppa/pthread_cond_wait.c +@@ -32,9 +32,22 @@ __pthread_cond_wait (pthread_cond_t *con + } + versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, + GLIBC_2_3_2); ++int ++__pthread_cond_timedwait (cond, mutex, abstime) ++ pthread_cond_t *cond; ++ pthread_mutex_t *mutex; ++ const struct timespec *abstime; ++{ ++ cond_compat_check_and_clear (cond); ++ return __pthread_cond_timedwait_internal (cond, mutex, abstime); ++} ++versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, ++ GLIBC_2_3_2); + # undef versioned_symbol + # define versioned_symbol(lib, local, symbol, version) + # undef __pthread_cond_wait + # define __pthread_cond_wait __pthread_cond_wait_internal ++# undef __pthread_cond_timedwait ++# define __pthread_cond_timedwait __pthread_cond_timedwait_internal + # include_next + #endif +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/i386/i686/pthread_cond_timedwait.S ++++ /dev/null +@@ -1,20 +0,0 @@ +-/* Copyright (C) 2003-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2003. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#define HAVE_CMOV 1 +-#include "../pthread_cond_timedwait.S" +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/i386/pthread_cond_broadcast.S ++++ /dev/null +@@ -1,241 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- .text +- +- /* int pthread_cond_broadcast (pthread_cond_t *cond) */ +- .globl __pthread_cond_broadcast +- .type __pthread_cond_broadcast, @function +- .align 16 +-__pthread_cond_broadcast: +- cfi_startproc +- pushl %ebx +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebx, 0) +- pushl %esi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%esi, 0) +- pushl %edi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%edi, 0) +- pushl %ebp +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebp, 0) +- cfi_remember_state +- +- movl 20(%esp), %ebx +- +- LIBC_PROBE (cond_broadcast, 1, %edx) +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jnz 1f +- +-2: addl $cond_futex, %ebx +- movl total_seq+4-cond_futex(%ebx), %eax +- movl total_seq-cond_futex(%ebx), %ebp +- cmpl wakeup_seq+4-cond_futex(%ebx), %eax +- ja 3f +- jb 4f +- cmpl wakeup_seq-cond_futex(%ebx), %ebp +- jna 4f +- +- /* Cause all currently waiting threads to recognize they are +- woken up. */ +-3: movl %ebp, wakeup_seq-cond_futex(%ebx) +- movl %eax, wakeup_seq-cond_futex+4(%ebx) +- movl %ebp, woken_seq-cond_futex(%ebx) +- movl %eax, woken_seq-cond_futex+4(%ebx) +- addl %ebp, %ebp +- addl $1, broadcast_seq-cond_futex(%ebx) +- movl %ebp, (%ebx) +- +- /* Get the address of the mutex used. */ +- movl dep_mutex-cond_futex(%ebx), %edi +- +- /* Unlock. */ +- LOCK +- subl $1, cond_lock-cond_futex(%ebx) +- jne 7f +- +- /* Don't use requeue for pshared condvars. */ +-8: cmpl $-1, %edi +- je 9f +- +- /* Do not use requeue for pshared condvars. */ +- testl $PS_BIT, MUTEX_KIND(%edi) +- jne 9f +- +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%edi), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- je 81f +- +- /* Wake up all threads. */ +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %ecx +-#else +- movl %gs:PRIVATE_FUTEX, %ecx +- orl $FUTEX_CMP_REQUEUE, %ecx +-#endif +- movl $SYS_futex, %eax +- movl $0x7fffffff, %esi +- movl $1, %edx +- /* Get the address of the futex involved. */ +-# if MUTEX_FUTEX != 0 +- addl $MUTEX_FUTEX, %edi +-# endif +-/* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for sysenter. +- ENTER_KERNEL */ +- int $0x80 +- +- /* For any kind of error, which mainly is EAGAIN, we try again +- with WAKE. The general test also covers running on old +- kernels. */ +- cmpl $0xfffff001, %eax +- jae 9f +- +-6: xorl %eax, %eax +- popl %ebp +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebp) +- popl %edi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%edi) +- popl %esi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%esi) +- popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- ret +- +- cfi_restore_state +- +-81: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx +- movl $SYS_futex, %eax +- movl $0x7fffffff, %esi +- movl $1, %edx +- /* Get the address of the futex involved. */ +-# if MUTEX_FUTEX != 0 +- addl $MUTEX_FUTEX, %edi +-# endif +- int $0x80 +- +- /* For any kind of error, which mainly is EAGAIN, we try again +- with WAKE. The general test also covers running on old +- kernels. */ +- cmpl $0xfffff001, %eax +- jb 6b +- jmp 9f +- +- /* Initial locking failed. */ +-1: +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 2b +- +- .align 16 +- /* Unlock. */ +-4: LOCK +- subl $1, cond_lock-cond_futex(%ebx) +- je 6b +- +- /* Unlock in loop requires wakeup. */ +-5: leal cond_lock-cond_futex(%ebx), %eax +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 6b +- +- /* Unlock in loop requires wakeup. */ +-7: leal cond_lock-cond_futex(%ebx), %eax +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 8b +- +-9: /* The futex requeue functionality is not available. */ +- movl $0x7fffffff, %edx +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $SYS_futex, %eax +- ENTER_KERNEL +- jmp 6b +- cfi_endproc +- .size __pthread_cond_broadcast, .-__pthread_cond_broadcast +-versioned_symbol (libpthread, __pthread_cond_broadcast, pthread_cond_broadcast, +- GLIBC_2_3_2) +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/i386/pthread_cond_signal.S ++++ /dev/null +@@ -1,216 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- .text +- +- /* int pthread_cond_signal (pthread_cond_t *cond) */ +- .globl __pthread_cond_signal +- .type __pthread_cond_signal, @function +- .align 16 +-__pthread_cond_signal: +- +- cfi_startproc +- pushl %ebx +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebx, 0) +- pushl %edi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%edi, 0) +- cfi_remember_state +- +- movl 12(%esp), %edi +- +- LIBC_PROBE (cond_signal, 1, %edi) +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%edi) +-#else +- cmpxchgl %edx, cond_lock(%edi) +-#endif +- jnz 1f +- +-2: leal cond_futex(%edi), %ebx +- movl total_seq+4(%edi), %eax +- movl total_seq(%edi), %ecx +- cmpl wakeup_seq+4(%edi), %eax +-#if cond_lock != 0 +- /* Must use leal to preserve the flags. */ +- leal cond_lock(%edi), %edi +-#endif +- ja 3f +- jb 4f +- cmpl wakeup_seq-cond_futex(%ebx), %ecx +- jbe 4f +- +- /* Bump the wakeup number. */ +-3: addl $1, wakeup_seq-cond_futex(%ebx) +- adcl $0, wakeup_seq-cond_futex+4(%ebx) +- addl $1, (%ebx) +- +- /* Wake up one thread. */ +- pushl %esi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%esi, 0) +- pushl %ebp +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebp, 0) +- +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- sete %cl +- je 8f +- +- movl dep_mutex-cond_futex(%ebx), %edx +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%edx), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- je 9f +- +-8: subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE_OP, %ecx +- movl $SYS_futex, %eax +- movl $1, %edx +- movl $1, %esi +- movl $FUTEX_OP_CLEAR_WAKE_IF_GT_ONE, %ebp +- /* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for +- sysenter. +- ENTER_KERNEL */ +- int $0x80 +- popl %ebp +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebp) +- popl %esi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%esi) +- +- /* For any kind of error, we try again with WAKE. +- The general test also covers running on old kernels. */ +- cmpl $-4095, %eax +- jae 7f +- +-6: xorl %eax, %eax +- popl %edi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%edi) +- popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- ret +- +- cfi_restore_state +- +-9: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx +- movl $SYS_futex, %eax +- movl $1, %edx +- xorl %esi, %esi +- movl dep_mutex-cond_futex(%ebx), %edi +- movl (%ebx), %ebp +- /* FIXME: Until Ingo fixes 4G/4G vDSO, 6 arg syscalls are broken for +- sysenter. +- ENTER_KERNEL */ +- int $0x80 +- popl %ebp +- popl %esi +- +- leal -cond_futex(%ebx), %edi +- +- /* For any kind of error, we try again with WAKE. +- The general test also covers running on old kernels. */ +- cmpl $-4095, %eax +- jb 4f +- +-7: +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- orl $FUTEX_WAKE, %ecx +- +- movl $SYS_futex, %eax +- /* %edx should be 1 already from $FUTEX_WAKE_OP syscall. +- movl $1, %edx */ +- ENTER_KERNEL +- +- /* Unlock. Note that at this point %edi always points to +- cond_lock. */ +-4: LOCK +- subl $1, (%edi) +- je 6b +- +- /* Unlock in loop requires wakeup. */ +-5: movl %edi, %eax +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 6b +- +- /* Initial locking failed. */ +-1: +-#if cond_lock == 0 +- movl %edi, %edx +-#else +- leal cond_lock(%edi), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%edi) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 2b +- +- cfi_endproc +- .size __pthread_cond_signal, .-__pthread_cond_signal +-versioned_symbol (libpthread, __pthread_cond_signal, pthread_cond_signal, +- GLIBC_2_3_2) +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/i386/pthread_cond_timedwait.S ++++ /dev/null +@@ -1,974 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- .text +- +-/* int pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, +- const struct timespec *abstime) */ +- .globl __pthread_cond_timedwait +- .type __pthread_cond_timedwait, @function +- .align 16 +-__pthread_cond_timedwait: +-.LSTARTCODE: +- cfi_startproc +-#ifdef SHARED +- cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect, +- DW.ref.__gcc_personality_v0) +- cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART) +-#else +- cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0) +- cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) +-#endif +- +- pushl %ebp +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebp, 0) +- pushl %edi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%edi, 0) +- pushl %esi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%esi, 0) +- pushl %ebx +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebx, 0) +- +- movl 20(%esp), %ebx +- movl 28(%esp), %ebp +- +- LIBC_PROBE (cond_timedwait, 3, %ebx, 24(%esp), %ebp) +- +- cmpl $1000000000, 4(%ebp) +- movl $EINVAL, %eax +- jae 18f +- +- /* Stack frame: +- +- esp + 32 +- +--------------------------+ +- esp + 24 | timeout value | +- +--------------------------+ +- esp + 20 | futex pointer | +- +--------------------------+ +- esp + 16 | pi-requeued flag | +- +--------------------------+ +- esp + 12 | old broadcast_seq value | +- +--------------------------+ +- esp + 4 | old wake_seq value | +- +--------------------------+ +- esp + 0 | old cancellation mode | +- +--------------------------+ +- */ +- +-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME +-# ifdef PIC +- LOAD_PIC_REG (cx) +- cmpl $0, __have_futex_clock_realtime@GOTOFF(%ecx) +-# else +- cmpl $0, __have_futex_clock_realtime +-# endif +- je .Lreltmo +-#endif +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jnz 1f +- +- /* Store the reference to the mutex. If there is already a +- different value in there this is a bad user bug. */ +-2: cmpl $-1, dep_mutex(%ebx) +- movl 24(%esp), %eax +- je 17f +- movl %eax, dep_mutex(%ebx) +- +- /* Unlock the mutex. */ +-17: xorl %edx, %edx +- call __pthread_mutex_unlock_usercnt +- +- testl %eax, %eax +- jne 16f +- +- addl $1, total_seq(%ebx) +- adcl $0, total_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- addl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +-#ifdef __ASSUME_FUTEX_CLOCK_REALTIME +-# define FRAME_SIZE 24 +-#else +-# define FRAME_SIZE 32 +-#endif +- subl $FRAME_SIZE, %esp +- cfi_adjust_cfa_offset(FRAME_SIZE) +- cfi_remember_state +- +- /* Get and store current wakeup_seq value. */ +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- movl broadcast_seq(%ebx), %eax +- movl %edi, 4(%esp) +- movl %edx, 8(%esp) +- movl %eax, 12(%esp) +- +- /* Reset the pi-requeued flag. */ +- movl $0, 16(%esp) +- +- cmpl $0, (%ebp) +- movl $-ETIMEDOUT, %esi +- js 6f +- +-8: movl cond_futex(%ebx), %edi +- movl %edi, 20(%esp) +- +- /* Unlock. */ +- LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 3f +- +-.LcleanupSTART: +-4: call __pthread_enable_asynccancel +- movl %eax, (%esp) +- +- leal (%ebp), %esi +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- sete %cl +- je 40f +- +- movl dep_mutex(%ebx), %edi +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%edi), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 40f +- +- movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx +- /* The following only works like this because we only support +- two clocks, represented using a single bit. */ +- testl $1, cond_nwaiters(%ebx) +- /* XXX Need to implement using sete instead of a jump. */ +- jne 42f +- orl $FUTEX_CLOCK_REALTIME, %ecx +- +-42: movl 20(%esp), %edx +- addl $cond_futex, %ebx +-.Ladd_cond_futex_pi: +- movl $SYS_futex, %eax +- ENTER_KERNEL +- subl $cond_futex, %ebx +-.Lsub_cond_futex_pi: +- movl %eax, %esi +- /* Set the pi-requeued flag only if the kernel has returned 0. The +- kernel does not hold the mutex on ETIMEDOUT or any other error. */ +- cmpl $0, %eax +- sete 16(%esp) +- je 41f +- +- /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns +- successfully, it has already locked the mutex for us and the +- pi_flag (16(%esp)) is set to denote that fact. However, if another +- thread changed the futex value before we entered the wait, the +- syscall may return an EAGAIN and the mutex is not locked. We go +- ahead with a success anyway since later we look at the pi_flag to +- decide if we got the mutex or not. The sequence numbers then make +- sure that only one of the threads actually wake up. We retry using +- normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal +- and PI futexes don't mix. +- +- Note that we don't check for EAGAIN specifically; we assume that the +- only other error the futex function could return is EAGAIN (barring +- the ETIMEOUT of course, for the timeout case in futex) since +- anything else would mean an error in our function. It is too +- expensive to do that check for every call (which is quite common in +- case of a large number of threads), so it has been skipped. */ +- cmpl $-ENOSYS, %eax +- jne 41f +- xorl %ecx, %ecx +- +-40: subl $1, %ecx +- movl $0, 16(%esp) +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAIT_BITSET, %ecx +- /* The following only works like this because we only support +- two clocks, represented using a single bit. */ +- testl $1, cond_nwaiters(%ebx) +- jne 30f +- orl $FUTEX_CLOCK_REALTIME, %ecx +-30: +- movl 20(%esp), %edx +- movl $0xffffffff, %ebp +- addl $cond_futex, %ebx +-.Ladd_cond_futex: +- movl $SYS_futex, %eax +- ENTER_KERNEL +- subl $cond_futex, %ebx +-.Lsub_cond_futex: +- movl 28+FRAME_SIZE(%esp), %ebp +- movl %eax, %esi +- +-41: movl (%esp), %eax +- call __pthread_disable_asynccancel +-.LcleanupEND: +- +- /* Lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jnz 5f +- +-6: movl broadcast_seq(%ebx), %eax +- cmpl 12(%esp), %eax +- jne 23f +- +- movl woken_seq(%ebx), %eax +- movl woken_seq+4(%ebx), %ecx +- +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- +- cmpl 8(%esp), %edx +- jne 7f +- cmpl 4(%esp), %edi +- je 15f +- +-7: cmpl %ecx, %edx +- jne 9f +- cmp %eax, %edi +- jne 9f +- +-15: cmpl $-ETIMEDOUT, %esi +- je 28f +- +- /* We need to go back to futex_wait. If we're using requeue_pi, then +- release the mutex we had acquired and go back. */ +- movl 16(%esp), %edx +- test %edx, %edx +- jz 8b +- +- /* Adjust the mutex values first and then unlock it. The unlock +- should always succeed or else the kernel did not lock the mutex +- correctly. */ +- movl dep_mutex(%ebx), %eax +- call __pthread_mutex_cond_lock_adjust +- movl dep_mutex(%ebx), %eax +- xorl %edx, %edx +- call __pthread_mutex_unlock_usercnt +- jmp 8b +- +-28: addl $1, wakeup_seq(%ebx) +- adcl $0, wakeup_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- movl $ETIMEDOUT, %esi +- jmp 14f +- +-23: xorl %esi, %esi +- jmp 24f +- +-9: xorl %esi, %esi +-14: addl $1, woken_seq(%ebx) +- adcl $0, woken_seq+4(%ebx) +- +-24: subl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- movl total_seq(%ebx), %eax +- andl total_seq+4(%ebx), %eax +- cmpl $0xffffffff, %eax +- jne 25f +- movl cond_nwaiters(%ebx), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 25f +- +- addl $cond_nwaiters, %ebx +- movl $SYS_futex, %eax +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_nwaiters(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $1, %edx +- ENTER_KERNEL +- subl $cond_nwaiters, %ebx +- +-25: LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 10f +- +-11: movl 24+FRAME_SIZE(%esp), %eax +- /* With requeue_pi, the mutex lock is held in the kernel. */ +- movl 16(%esp), %ecx +- testl %ecx, %ecx +- jnz 27f +- +- call __pthread_mutex_cond_lock +-26: addl $FRAME_SIZE, %esp +- cfi_adjust_cfa_offset(-FRAME_SIZE) +- +- /* We return the result of the mutex_lock operation if it failed. */ +- testl %eax, %eax +-#ifdef HAVE_CMOV +- cmovel %esi, %eax +-#else +- jne 22f +- movl %esi, %eax +-22: +-#endif +- +-18: popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- popl %esi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%esi) +- popl %edi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%edi) +- popl %ebp +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebp) +- +- ret +- +- cfi_restore_state +- +-27: call __pthread_mutex_cond_lock_adjust +- xorl %eax, %eax +- jmp 26b +- +- cfi_adjust_cfa_offset(-FRAME_SIZE); +- /* Initial locking failed. */ +-1: +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 2b +- +- /* The initial unlocking of the mutex failed. */ +-16: +- LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 18b +- +- movl %eax, %esi +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- +- movl %esi, %eax +- jmp 18b +- +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +- /* Unlock in loop requires wakeup. */ +-3: +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 4b +- +- /* Locking in loop failed. */ +-5: +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 6b +- +- /* Unlock after loop requires wakeup. */ +-10: +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 11b +- +-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME +- cfi_adjust_cfa_offset(-FRAME_SIZE) +-.Lreltmo: +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-# if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-# else +- cmpxchgl %edx, cond_lock(%ebx) +-# endif +- jnz 101f +- +- /* Store the reference to the mutex. If there is already a +- different value in there this is a bad user bug. */ +-102: cmpl $-1, dep_mutex(%ebx) +- movl 24(%esp), %eax +- je 117f +- movl %eax, dep_mutex(%ebx) +- +- /* Unlock the mutex. */ +-117: xorl %edx, %edx +- call __pthread_mutex_unlock_usercnt +- +- testl %eax, %eax +- jne 16b +- +- addl $1, total_seq(%ebx) +- adcl $0, total_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- addl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +- subl $FRAME_SIZE, %esp +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +- /* Get and store current wakeup_seq value. */ +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- movl broadcast_seq(%ebx), %eax +- movl %edi, 4(%esp) +- movl %edx, 8(%esp) +- movl %eax, 12(%esp) +- +- /* Reset the pi-requeued flag. */ +- movl $0, 16(%esp) +- +- /* Get the current time. */ +-108: movl %ebx, %edx +-# ifdef __NR_clock_gettime +- /* Get the clock number. */ +- movl cond_nwaiters(%ebx), %ebx +- andl $((1 << nwaiters_shift) - 1), %ebx +- /* Only clocks 0 and 1 are allowed so far. Both are handled in the +- kernel. */ +- leal 24(%esp), %ecx +- movl $__NR_clock_gettime, %eax +- ENTER_KERNEL +- movl %edx, %ebx +- +- /* Compute relative timeout. */ +- movl (%ebp), %ecx +- movl 4(%ebp), %edx +- subl 24(%esp), %ecx +- subl 28(%esp), %edx +-# else +- /* Get the current time. */ +- leal 24(%esp), %ebx +- xorl %ecx, %ecx +- movl $__NR_gettimeofday, %eax +- ENTER_KERNEL +- movl %edx, %ebx +- +- /* Compute relative timeout. */ +- movl 28(%esp), %eax +- movl $1000, %edx +- mul %edx /* Milli seconds to nano seconds. */ +- movl (%ebp), %ecx +- movl 4(%ebp), %edx +- subl 24(%esp), %ecx +- subl %eax, %edx +-# endif +- jns 112f +- addl $1000000000, %edx +- subl $1, %ecx +-112: testl %ecx, %ecx +- movl $-ETIMEDOUT, %esi +- js 106f +- +- /* Store relative timeout. */ +-121: movl %ecx, 24(%esp) +- movl %edx, 28(%esp) +- +- movl cond_futex(%ebx), %edi +- movl %edi, 20(%esp) +- +- /* Unlock. */ +- LOCK +-# if cond_lock == 0 +- subl $1, (%ebx) +-# else +- subl $1, cond_lock(%ebx) +-# endif +- jne 103f +- +-.LcleanupSTART2: +-104: call __pthread_enable_asynccancel +- movl %eax, (%esp) +- +- leal 24(%esp), %esi +-# if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-# endif +- cmpl $-1, dep_mutex(%ebx) +- sete %cl +- subl $1, %ecx +-# ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-# else +- andl %gs:PRIVATE_FUTEX, %ecx +-# endif +-# if FUTEX_WAIT != 0 +- addl $FUTEX_WAIT, %ecx +-# endif +- movl 20(%esp), %edx +- addl $cond_futex, %ebx +-.Ladd_cond_futex2: +- movl $SYS_futex, %eax +- ENTER_KERNEL +- subl $cond_futex, %ebx +-.Lsub_cond_futex2: +- movl %eax, %esi +- +-141: movl (%esp), %eax +- call __pthread_disable_asynccancel +-.LcleanupEND2: +- +- +- /* Lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-# if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-# else +- cmpxchgl %edx, cond_lock(%ebx) +-# endif +- jnz 105f +- +-106: movl broadcast_seq(%ebx), %eax +- cmpl 12(%esp), %eax +- jne 23b +- +- movl woken_seq(%ebx), %eax +- movl woken_seq+4(%ebx), %ecx +- +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- +- cmpl 8(%esp), %edx +- jne 107f +- cmpl 4(%esp), %edi +- je 115f +- +-107: cmpl %ecx, %edx +- jne 9b +- cmp %eax, %edi +- jne 9b +- +-115: cmpl $-ETIMEDOUT, %esi +- je 28b +- +- jmp 8b +- +- cfi_adjust_cfa_offset(-FRAME_SIZE) +- /* Initial locking failed. */ +-101: +-# if cond_lock == 0 +- movl %ebx, %edx +-# else +- leal cond_lock(%ebx), %edx +-# endif +-# if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-# endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-# if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-# endif +- call __lll_lock_wait +- jmp 102b +- +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +- /* Unlock in loop requires wakeup. */ +-103: +-# if cond_lock == 0 +- movl %ebx, %eax +-# else +- leal cond_lock(%ebx), %eax +-# endif +-# if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-# endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-# if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-# endif +- call __lll_unlock_wake +- jmp 104b +- +- /* Locking in loop failed. */ +-105: +-# if cond_lock == 0 +- movl %ebx, %edx +-# else +- leal cond_lock(%ebx), %edx +-# endif +-# if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-# endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-# if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-# endif +- call __lll_lock_wait +- jmp 106b +-#endif +- +- .size __pthread_cond_timedwait, .-__pthread_cond_timedwait +-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, +- GLIBC_2_3_2) +- +- +- .type __condvar_tw_cleanup2, @function +-__condvar_tw_cleanup2: +- subl $cond_futex, %ebx +- .size __condvar_tw_cleanup2, .-__condvar_tw_cleanup2 +- .type __condvar_tw_cleanup, @function +-__condvar_tw_cleanup: +- movl %eax, %esi +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jz 1f +- +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- +-1: movl broadcast_seq(%ebx), %eax +- cmpl 12(%esp), %eax +- jne 3f +- +- /* We increment the wakeup_seq counter only if it is lower than +- total_seq. If this is not the case the thread was woken and +- then canceled. In this case we ignore the signal. */ +- movl total_seq(%ebx), %eax +- movl total_seq+4(%ebx), %edi +- cmpl wakeup_seq+4(%ebx), %edi +- jb 6f +- ja 7f +- cmpl wakeup_seq(%ebx), %eax +- jbe 7f +- +-6: addl $1, wakeup_seq(%ebx) +- adcl $0, wakeup_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- +-7: addl $1, woken_seq(%ebx) +- adcl $0, woken_seq+4(%ebx) +- +-3: subl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- xorl %edi, %edi +- movl total_seq(%ebx), %eax +- andl total_seq+4(%ebx), %eax +- cmpl $0xffffffff, %eax +- jne 4f +- movl cond_nwaiters(%ebx), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 4f +- +- addl $cond_nwaiters, %ebx +- movl $SYS_futex, %eax +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_nwaiters(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $1, %edx +- ENTER_KERNEL +- subl $cond_nwaiters, %ebx +- movl $1, %edi +- +-4: LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- je 2f +- +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- +- /* Wake up all waiters to make sure no signal gets lost. */ +-2: testl %edi, %edi +- jnz 5f +- addl $cond_futex, %ebx +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $SYS_futex, %eax +- movl $0x7fffffff, %edx +- ENTER_KERNEL +- +- /* Lock the mutex only if we don't own it already. This only happens +- in case of PI mutexes, if we got cancelled after a successful +- return of the futex syscall and before disabling async +- cancellation. */ +-5: movl 24+FRAME_SIZE(%esp), %eax +- movl MUTEX_KIND(%eax), %ebx +- andl $(ROBUST_BIT|PI_BIT), %ebx +- cmpl $PI_BIT, %ebx +- jne 8f +- +- movl (%eax), %ebx +- andl $TID_MASK, %ebx +- cmpl %ebx, %gs:TID +- jne 8f +- /* We managed to get the lock. Fix it up before returning. */ +- call __pthread_mutex_cond_lock_adjust +- jmp 9f +- +-8: call __pthread_mutex_cond_lock +- +-9: movl %esi, (%esp) +-.LcallUR: +- call _Unwind_Resume +- hlt +-.LENDCODE: +- cfi_endproc +- .size __condvar_tw_cleanup, .-__condvar_tw_cleanup +- +- +- .section .gcc_except_table,"a",@progbits +-.LexceptSTART: +- .byte DW_EH_PE_omit # @LPStart format (omit) +- .byte DW_EH_PE_omit # @TType format (omit) +- .byte DW_EH_PE_sdata4 # call-site format +- # DW_EH_PE_sdata4 +- .uleb128 .Lcstend-.Lcstbegin +-.Lcstbegin: +- .long .LcleanupSTART-.LSTARTCODE +- .long .Ladd_cond_futex_pi-.LcleanupSTART +- .long __condvar_tw_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .Ladd_cond_futex_pi-.LSTARTCODE +- .long .Lsub_cond_futex_pi-.Ladd_cond_futex_pi +- .long __condvar_tw_cleanup2-.LSTARTCODE +- .uleb128 0 +- .long .Lsub_cond_futex_pi-.LSTARTCODE +- .long .Ladd_cond_futex-.Lsub_cond_futex_pi +- .long __condvar_tw_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .Ladd_cond_futex-.LSTARTCODE +- .long .Lsub_cond_futex-.Ladd_cond_futex +- .long __condvar_tw_cleanup2-.LSTARTCODE +- .uleb128 0 +- .long .Lsub_cond_futex-.LSTARTCODE +- .long .LcleanupEND-.Lsub_cond_futex +- .long __condvar_tw_cleanup-.LSTARTCODE +- .uleb128 0 +-#ifndef __ASSUME_FUTEX_CLOCK_REALTIME +- .long .LcleanupSTART2-.LSTARTCODE +- .long .Ladd_cond_futex2-.LcleanupSTART2 +- .long __condvar_tw_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .Ladd_cond_futex2-.LSTARTCODE +- .long .Lsub_cond_futex2-.Ladd_cond_futex2 +- .long __condvar_tw_cleanup2-.LSTARTCODE +- .uleb128 0 +- .long .Lsub_cond_futex2-.LSTARTCODE +- .long .LcleanupEND2-.Lsub_cond_futex2 +- .long __condvar_tw_cleanup-.LSTARTCODE +- .uleb128 0 +-#endif +- .long .LcallUR-.LSTARTCODE +- .long .LENDCODE-.LcallUR +- .long 0 +- .uleb128 0 +-.Lcstend: +- +- +-#ifdef SHARED +- .hidden DW.ref.__gcc_personality_v0 +- .weak DW.ref.__gcc_personality_v0 +- .section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits +- .align 4 +- .type DW.ref.__gcc_personality_v0, @object +- .size DW.ref.__gcc_personality_v0, 4 +-DW.ref.__gcc_personality_v0: +- .long __gcc_personality_v0 +-#endif +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/i386/pthread_cond_wait.S ++++ /dev/null +@@ -1,642 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- +- .text +- +-/* int pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) */ +- .globl __pthread_cond_wait +- .type __pthread_cond_wait, @function +- .align 16 +-__pthread_cond_wait: +-.LSTARTCODE: +- cfi_startproc +-#ifdef SHARED +- cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect, +- DW.ref.__gcc_personality_v0) +- cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART) +-#else +- cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0) +- cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) +-#endif +- +- pushl %ebp +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebp, 0) +- pushl %edi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%edi, 0) +- pushl %esi +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%esi, 0) +- pushl %ebx +- cfi_adjust_cfa_offset(4) +- cfi_rel_offset(%ebx, 0) +- +- xorl %esi, %esi +- movl 20(%esp), %ebx +- +- LIBC_PROBE (cond_wait, 2, 24(%esp), %ebx) +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jnz 1f +- +- /* Store the reference to the mutex. If there is already a +- different value in there this is a bad user bug. */ +-2: cmpl $-1, dep_mutex(%ebx) +- movl 24(%esp), %eax +- je 15f +- movl %eax, dep_mutex(%ebx) +- +- /* Unlock the mutex. */ +-15: xorl %edx, %edx +- call __pthread_mutex_unlock_usercnt +- +- testl %eax, %eax +- jne 12f +- +- addl $1, total_seq(%ebx) +- adcl $0, total_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- addl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +-#define FRAME_SIZE 20 +- subl $FRAME_SIZE, %esp +- cfi_adjust_cfa_offset(FRAME_SIZE) +- cfi_remember_state +- +- /* Get and store current wakeup_seq value. */ +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- movl broadcast_seq(%ebx), %eax +- movl %edi, 4(%esp) +- movl %edx, 8(%esp) +- movl %eax, 12(%esp) +- +- /* Reset the pi-requeued flag. */ +-8: movl $0, 16(%esp) +- movl cond_futex(%ebx), %ebp +- +- /* Unlock. */ +- LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 3f +- +-.LcleanupSTART: +-4: call __pthread_enable_asynccancel +- movl %eax, (%esp) +- +- xorl %ecx, %ecx +- cmpl $-1, dep_mutex(%ebx) +- sete %cl +- je 18f +- +- movl dep_mutex(%ebx), %edi +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%edi), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 18f +- +- movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %ecx +- movl %ebp, %edx +- xorl %esi, %esi +- addl $cond_futex, %ebx +-.Ladd_cond_futex_pi: +- movl $SYS_futex, %eax +- ENTER_KERNEL +- subl $cond_futex, %ebx +-.Lsub_cond_futex_pi: +- /* Set the pi-requeued flag only if the kernel has returned 0. The +- kernel does not hold the mutex on error. */ +- cmpl $0, %eax +- sete 16(%esp) +- je 19f +- +- /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns +- successfully, it has already locked the mutex for us and the +- pi_flag (16(%esp)) is set to denote that fact. However, if another +- thread changed the futex value before we entered the wait, the +- syscall may return an EAGAIN and the mutex is not locked. We go +- ahead with a success anyway since later we look at the pi_flag to +- decide if we got the mutex or not. The sequence numbers then make +- sure that only one of the threads actually wake up. We retry using +- normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal +- and PI futexes don't mix. +- +- Note that we don't check for EAGAIN specifically; we assume that the +- only other error the futex function could return is EAGAIN since +- anything else would mean an error in our function. It is too +- expensive to do that check for every call (which is quite common in +- case of a large number of threads), so it has been skipped. */ +- cmpl $-ENOSYS, %eax +- jne 19f +- xorl %ecx, %ecx +- +-18: subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +-#if FUTEX_WAIT != 0 +- addl $FUTEX_WAIT, %ecx +-#endif +- movl %ebp, %edx +- addl $cond_futex, %ebx +-.Ladd_cond_futex: +- movl $SYS_futex, %eax +- ENTER_KERNEL +- subl $cond_futex, %ebx +-.Lsub_cond_futex: +- +-19: movl (%esp), %eax +- call __pthread_disable_asynccancel +-.LcleanupEND: +- +- /* Lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jnz 5f +- +-6: movl broadcast_seq(%ebx), %eax +- cmpl 12(%esp), %eax +- jne 16f +- +- movl woken_seq(%ebx), %eax +- movl woken_seq+4(%ebx), %ecx +- +- movl wakeup_seq(%ebx), %edi +- movl wakeup_seq+4(%ebx), %edx +- +- cmpl 8(%esp), %edx +- jne 7f +- cmpl 4(%esp), %edi +- je 22f +- +-7: cmpl %ecx, %edx +- jne 9f +- cmp %eax, %edi +- je 22f +- +-9: addl $1, woken_seq(%ebx) +- adcl $0, woken_seq+4(%ebx) +- +- /* Unlock */ +-16: subl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- movl total_seq(%ebx), %eax +- andl total_seq+4(%ebx), %eax +- cmpl $0xffffffff, %eax +- jne 17f +- movl cond_nwaiters(%ebx), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 17f +- +- addl $cond_nwaiters, %ebx +- movl $SYS_futex, %eax +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_nwaiters(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $1, %edx +- ENTER_KERNEL +- subl $cond_nwaiters, %ebx +- +-17: LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 10f +- +- /* With requeue_pi, the mutex lock is held in the kernel. */ +-11: movl 24+FRAME_SIZE(%esp), %eax +- movl 16(%esp), %ecx +- testl %ecx, %ecx +- jnz 21f +- +- call __pthread_mutex_cond_lock +-20: addl $FRAME_SIZE, %esp +- cfi_adjust_cfa_offset(-FRAME_SIZE); +- +-14: popl %ebx +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebx) +- popl %esi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%esi) +- popl %edi +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%edi) +- popl %ebp +- cfi_adjust_cfa_offset(-4) +- cfi_restore(%ebp) +- +- /* We return the result of the mutex_lock operation. */ +- ret +- +- cfi_restore_state +- +-21: call __pthread_mutex_cond_lock_adjust +- xorl %eax, %eax +- jmp 20b +- +- cfi_adjust_cfa_offset(-FRAME_SIZE); +- +- /* We need to go back to futex_wait. If we're using requeue_pi, then +- release the mutex we had acquired and go back. */ +-22: movl 16(%esp), %edx +- test %edx, %edx +- jz 8b +- +- /* Adjust the mutex values first and then unlock it. The unlock +- should always succeed or else the kernel did not lock the mutex +- correctly. */ +- movl dep_mutex(%ebx), %eax +- call __pthread_mutex_cond_lock_adjust +- movl dep_mutex(%ebx), %eax +- xorl %edx, %edx +- call __pthread_mutex_unlock_usercnt +- jmp 8b +- +- /* Initial locking failed. */ +-1: +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 2b +- +- /* The initial unlocking of the mutex failed. */ +-12: +- LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- jne 14b +- +- movl %eax, %esi +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- +- movl %esi, %eax +- jmp 14b +- +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +- /* Unlock in loop requires wakeup. */ +-3: +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 4b +- +- /* Locking in loop failed. */ +-5: +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- jmp 6b +- +- /* Unlock after loop requires wakeup. */ +-10: +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- jmp 11b +- +- .size __pthread_cond_wait, .-__pthread_cond_wait +-versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, +- GLIBC_2_3_2) +- +- +- .type __condvar_w_cleanup2, @function +-__condvar_w_cleanup2: +- subl $cond_futex, %ebx +- .size __condvar_w_cleanup2, .-__condvar_w_cleanup2 +-.LSbl4: +- .type __condvar_w_cleanup, @function +-__condvar_w_cleanup: +- movl %eax, %esi +- +- /* Get internal lock. */ +- movl $1, %edx +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %edx, (%ebx) +-#else +- cmpxchgl %edx, cond_lock(%ebx) +-#endif +- jz 1f +- +-#if cond_lock == 0 +- movl %ebx, %edx +-#else +- leal cond_lock(%ebx), %edx +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_lock_wait +- +-1: movl broadcast_seq(%ebx), %eax +- cmpl 12(%esp), %eax +- jne 3f +- +- /* We increment the wakeup_seq counter only if it is lower than +- total_seq. If this is not the case the thread was woken and +- then canceled. In this case we ignore the signal. */ +- movl total_seq(%ebx), %eax +- movl total_seq+4(%ebx), %edi +- cmpl wakeup_seq+4(%ebx), %edi +- jb 6f +- ja 7f +- cmpl wakeup_seq(%ebx), %eax +- jbe 7f +- +-6: addl $1, wakeup_seq(%ebx) +- adcl $0, wakeup_seq+4(%ebx) +- addl $1, cond_futex(%ebx) +- +-7: addl $1, woken_seq(%ebx) +- adcl $0, woken_seq+4(%ebx) +- +-3: subl $(1 << nwaiters_shift), cond_nwaiters(%ebx) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- xorl %edi, %edi +- movl total_seq(%ebx), %eax +- andl total_seq+4(%ebx), %eax +- cmpl $0xffffffff, %eax +- jne 4f +- movl cond_nwaiters(%ebx), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 4f +- +- addl $cond_nwaiters, %ebx +- movl $SYS_futex, %eax +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_nwaiters(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $1, %edx +- ENTER_KERNEL +- subl $cond_nwaiters, %ebx +- movl $1, %edi +- +-4: LOCK +-#if cond_lock == 0 +- subl $1, (%ebx) +-#else +- subl $1, cond_lock(%ebx) +-#endif +- je 2f +- +-#if cond_lock == 0 +- movl %ebx, %eax +-#else +- leal cond_lock(%ebx), %eax +-#endif +-#if (LLL_SHARED-LLL_PRIVATE) > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex(%ebx) +- setne %cl +- subl $1, %ecx +- andl $(LLL_SHARED-LLL_PRIVATE), %ecx +-#if LLL_PRIVATE != 0 +- addl $LLL_PRIVATE, %ecx +-#endif +- call __lll_unlock_wake +- +- /* Wake up all waiters to make sure no signal gets lost. */ +-2: testl %edi, %edi +- jnz 5f +- addl $cond_futex, %ebx +-#if FUTEX_PRIVATE_FLAG > 255 +- xorl %ecx, %ecx +-#endif +- cmpl $-1, dep_mutex-cond_futex(%ebx) +- sete %cl +- subl $1, %ecx +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %ecx +-#else +- andl %gs:PRIVATE_FUTEX, %ecx +-#endif +- addl $FUTEX_WAKE, %ecx +- movl $SYS_futex, %eax +- movl $0x7fffffff, %edx +- ENTER_KERNEL +- +- /* Lock the mutex only if we don't own it already. This only happens +- in case of PI mutexes, if we got cancelled after a successful +- return of the futex syscall and before disabling async +- cancellation. */ +-5: movl 24+FRAME_SIZE(%esp), %eax +- movl MUTEX_KIND(%eax), %ebx +- andl $(ROBUST_BIT|PI_BIT), %ebx +- cmpl $PI_BIT, %ebx +- jne 8f +- +- movl (%eax), %ebx +- andl $TID_MASK, %ebx +- cmpl %ebx, %gs:TID +- jne 8f +- /* We managed to get the lock. Fix it up before returning. */ +- call __pthread_mutex_cond_lock_adjust +- jmp 9f +- +-8: call __pthread_mutex_cond_lock +- +-9: movl %esi, (%esp) +-.LcallUR: +- call _Unwind_Resume +- hlt +-.LENDCODE: +- cfi_endproc +- .size __condvar_w_cleanup, .-__condvar_w_cleanup +- +- +- .section .gcc_except_table,"a",@progbits +-.LexceptSTART: +- .byte DW_EH_PE_omit # @LPStart format (omit) +- .byte DW_EH_PE_omit # @TType format (omit) +- .byte DW_EH_PE_sdata4 # call-site format +- # DW_EH_PE_sdata4 +- .uleb128 .Lcstend-.Lcstbegin +-.Lcstbegin: +- .long .LcleanupSTART-.LSTARTCODE +- .long .Ladd_cond_futex_pi-.LcleanupSTART +- .long __condvar_w_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .Ladd_cond_futex_pi-.LSTARTCODE +- .long .Lsub_cond_futex_pi-.Ladd_cond_futex_pi +- .long __condvar_w_cleanup2-.LSTARTCODE +- .uleb128 0 +- .long .Lsub_cond_futex_pi-.LSTARTCODE +- .long .Ladd_cond_futex-.Lsub_cond_futex_pi +- .long __condvar_w_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .Ladd_cond_futex-.LSTARTCODE +- .long .Lsub_cond_futex-.Ladd_cond_futex +- .long __condvar_w_cleanup2-.LSTARTCODE +- .uleb128 0 +- .long .Lsub_cond_futex-.LSTARTCODE +- .long .LcleanupEND-.Lsub_cond_futex +- .long __condvar_w_cleanup-.LSTARTCODE +- .uleb128 0 +- .long .LcallUR-.LSTARTCODE +- .long .LENDCODE-.LcallUR +- .long 0 +- .uleb128 0 +-.Lcstend: +- +-#ifdef SHARED +- .hidden DW.ref.__gcc_personality_v0 +- .weak DW.ref.__gcc_personality_v0 +- .section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits +- .align 4 +- .type DW.ref.__gcc_personality_v0, @object +- .size DW.ref.__gcc_personality_v0, 4 +-DW.ref.__gcc_personality_v0: +- .long __gcc_personality_v0 +-#endif +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/powerpc/bits/pthreadtypes.h +@@ -123,19 +123,34 @@ typedef union + + + /* Data structure for conditional variable handling. The structure of +- the attribute type is deliberately not exposed. */ ++ the attribute type is not exposed on purpose. */ + typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/x86_64/pthread_cond_broadcast.S ++++ /dev/null +@@ -1,177 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- .text +- +- /* int pthread_cond_broadcast (pthread_cond_t *cond) */ +-ENTRY(__pthread_cond_broadcast) +- +- LIBC_PROBE (cond_broadcast, 1, %rdi) +- +- /* Get internal lock. */ +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jnz 1f +- +-2: addq $cond_futex, %rdi +- movq total_seq-cond_futex(%rdi), %r9 +- cmpq wakeup_seq-cond_futex(%rdi), %r9 +- jna 4f +- +- /* Cause all currently waiting threads to recognize they are +- woken up. */ +- movq %r9, wakeup_seq-cond_futex(%rdi) +- movq %r9, woken_seq-cond_futex(%rdi) +- addq %r9, %r9 +- movl %r9d, (%rdi) +- incl broadcast_seq-cond_futex(%rdi) +- +- /* Get the address of the mutex used. */ +- mov dep_mutex-cond_futex(%rdi), %R8_LP +- +- /* Unlock. */ +- LOCK +- decl cond_lock-cond_futex(%rdi) +- jne 7f +- +-8: cmp $-1, %R8_LP +- je 9f +- +- /* Do not use requeue for pshared condvars. */ +- testl $PS_BIT, MUTEX_KIND(%r8) +- jne 9f +- +- /* Requeue to a PI mutex if the PI bit is set. */ +- movl MUTEX_KIND(%r8), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- je 81f +- +- /* Wake up all threads. */ +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $(FUTEX_CMP_REQUEUE|FUTEX_PRIVATE_FLAG), %esi +-#else +- movl %fs:PRIVATE_FUTEX, %esi +- orl $FUTEX_CMP_REQUEUE, %esi +-#endif +- movl $SYS_futex, %eax +- movl $1, %edx +- movl $0x7fffffff, %r10d +- syscall +- +- /* For any kind of error, which mainly is EAGAIN, we try again +- with WAKE. The general test also covers running on old +- kernels. */ +- cmpq $-4095, %rax +- jae 9f +- +-10: xorl %eax, %eax +- retq +- +- /* Wake up all threads. */ +-81: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi +- movl $SYS_futex, %eax +- movl $1, %edx +- movl $0x7fffffff, %r10d +- syscall +- +- /* For any kind of error, which mainly is EAGAIN, we try again +- with WAKE. The general test also covers running on old +- kernels. */ +- cmpq $-4095, %rax +- jb 10b +- jmp 9f +- +- .align 16 +- /* Unlock. */ +-4: LOCK +- decl cond_lock-cond_futex(%rdi) +- jne 5f +- +-6: xorl %eax, %eax +- retq +- +- /* Initial locking failed. */ +-1: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- jmp 2b +- +- /* Unlock in loop requires wakeup. */ +-5: addq $cond_lock-cond_futex, %rdi +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- jmp 6b +- +- /* Unlock in loop requires wakeup. */ +-7: addq $cond_lock-cond_futex, %rdi +- cmp $-1, %R8_LP +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- subq $cond_lock-cond_futex, %rdi +- jmp 8b +- +-9: /* The futex requeue functionality is not available. */ +- cmp $-1, %R8_LP +- movl $0x7fffffff, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- jmp 10b +-END(__pthread_cond_broadcast) +- +-versioned_symbol (libpthread, __pthread_cond_broadcast, pthread_cond_broadcast, +- GLIBC_2_3_2) +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/x86_64/pthread_cond_signal.S ++++ /dev/null +@@ -1,161 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +- +- .text +- +-ENTRY(__pthread_cond_signal) +- +- LIBC_PROBE (cond_signal, 1, %rdi) +- +- /* Get internal lock. */ +- movq %rdi, %r8 +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jnz 1f +- +-2: addq $cond_futex, %rdi +- movq total_seq(%r8), %rcx +- cmpq wakeup_seq(%r8), %rcx +- jbe 4f +- +- /* Bump the wakeup number. */ +- addq $1, wakeup_seq(%r8) +- addl $1, (%rdi) +- +- /* Wake up one thread. */ +- LP_OP(cmp) $-1, dep_mutex(%r8) +- movl $FUTEX_WAKE_OP, %esi +- movl $1, %edx +- movl $SYS_futex, %eax +- je 8f +- +- /* Get the address of the mutex used. */ +- mov dep_mutex(%r8), %RCX_LP +- movl MUTEX_KIND(%rcx), %r11d +- andl $(ROBUST_BIT|PI_BIT), %r11d +- cmpl $PI_BIT, %r11d +- je 9f +- +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $(FUTEX_WAKE_OP|FUTEX_PRIVATE_FLAG), %esi +-#else +- orl %fs:PRIVATE_FUTEX, %esi +-#endif +- +-8: movl $1, %r10d +-#if cond_lock != 0 +- addq $cond_lock, %r8 +-#endif +- movl $FUTEX_OP_CLEAR_WAKE_IF_GT_ONE, %r9d +- syscall +-#if cond_lock != 0 +- subq $cond_lock, %r8 +-#endif +- /* For any kind of error, we try again with WAKE. +- The general test also covers running on old kernels. */ +- cmpq $-4095, %rax +- jae 7f +- +- xorl %eax, %eax +- retq +- +- /* Wake up one thread and requeue none in the PI Mutex case. */ +-9: movl $(FUTEX_CMP_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi +- movq %rcx, %r8 +- xorq %r10, %r10 +- movl (%rdi), %r9d // XXX Can this be right? +- syscall +- +- leaq -cond_futex(%rdi), %r8 +- +- /* For any kind of error, we try again with WAKE. +- The general test also covers running on old kernels. */ +- cmpq $-4095, %rax +- jb 4f +- +-7: +-#ifdef __ASSUME_PRIVATE_FUTEX +- andl $FUTEX_PRIVATE_FLAG, %esi +-#else +- andl %fs:PRIVATE_FUTEX, %esi +-#endif +- orl $FUTEX_WAKE, %esi +- movl $SYS_futex, %eax +- /* %rdx should be 1 already from $FUTEX_WAKE_OP syscall. +- movl $1, %edx */ +- syscall +- +- /* Unlock. */ +-4: LOCK +-#if cond_lock == 0 +- decl (%r8) +-#else +- decl cond_lock(%r8) +-#endif +- jne 5f +- +-6: xorl %eax, %eax +- retq +- +- /* Initial locking failed. */ +-1: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- jmp 2b +- +- /* Unlock in loop requires wakeup. */ +-5: +- movq %r8, %rdi +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- jmp 6b +-END(__pthread_cond_signal) +- +-versioned_symbol (libpthread, __pthread_cond_signal, pthread_cond_signal, +- GLIBC_2_3_2) +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S ++++ /dev/null +@@ -1,623 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +- +- .text +- +- +-/* int pthread_cond_timedwait (pthread_cond_t *cond, pthread_mutex_t *mutex, +- const struct timespec *abstime) */ +- .globl __pthread_cond_timedwait +- .type __pthread_cond_timedwait, @function +- .align 16 +-__pthread_cond_timedwait: +-.LSTARTCODE: +- cfi_startproc +-#ifdef SHARED +- cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect, +- DW.ref.__gcc_personality_v0) +- cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART) +-#else +- cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0) +- cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) +-#endif +- +- pushq %r12 +- cfi_adjust_cfa_offset(8) +- cfi_rel_offset(%r12, 0) +- pushq %r13 +- cfi_adjust_cfa_offset(8) +- cfi_rel_offset(%r13, 0) +- pushq %r14 +- cfi_adjust_cfa_offset(8) +- cfi_rel_offset(%r14, 0) +- pushq %r15 +- cfi_adjust_cfa_offset(8) +- cfi_rel_offset(%r15, 0) +-#define FRAME_SIZE (32+8) +- subq $FRAME_SIZE, %rsp +- cfi_adjust_cfa_offset(FRAME_SIZE) +- cfi_remember_state +- +- LIBC_PROBE (cond_timedwait, 3, %rdi, %rsi, %rdx) +- +- cmpq $1000000000, 8(%rdx) +- movl $EINVAL, %eax +- jae 48f +- +- /* Stack frame: +- +- rsp + 48 +- +--------------------------+ +- rsp + 32 | timeout value | +- +--------------------------+ +- rsp + 24 | old wake_seq value | +- +--------------------------+ +- rsp + 16 | mutex pointer | +- +--------------------------+ +- rsp + 8 | condvar pointer | +- +--------------------------+ +- rsp + 4 | old broadcast_seq value | +- +--------------------------+ +- rsp + 0 | old cancellation mode | +- +--------------------------+ +- */ +- +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- +- /* Prepare structure passed to cancellation handler. */ +- movq %rdi, 8(%rsp) +- movq %rsi, 16(%rsp) +- movq %rdx, %r13 +- +- je 22f +- mov %RSI_LP, dep_mutex(%rdi) +- +-22: +- xorb %r15b, %r15b +- +- /* Get internal lock. */ +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jnz 31f +- +- /* Unlock the mutex. */ +-32: movq 16(%rsp), %rdi +- xorl %esi, %esi +- callq __pthread_mutex_unlock_usercnt +- +- testl %eax, %eax +- jne 46f +- +- movq 8(%rsp), %rdi +- incq total_seq(%rdi) +- incl cond_futex(%rdi) +- addl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Get and store current wakeup_seq value. */ +- movq 8(%rsp), %rdi +- movq wakeup_seq(%rdi), %r9 +- movl broadcast_seq(%rdi), %edx +- movq %r9, 24(%rsp) +- movl %edx, 4(%rsp) +- +- cmpq $0, (%r13) +- movq $-ETIMEDOUT, %r14 +- js 36f +- +-38: movl cond_futex(%rdi), %r12d +- +- /* Unlock. */ +- LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- jne 33f +- +-.LcleanupSTART1: +-34: callq __pthread_enable_asynccancel +- movl %eax, (%rsp) +- +- movq %r13, %r10 +- movl $FUTEX_WAIT_BITSET, %esi +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- je 60f +- +- mov dep_mutex(%rdi), %R8_LP +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%r8), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 61f +- +- movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi +- xorl %eax, %eax +- /* The following only works like this because we only support +- two clocks, represented using a single bit. */ +- testl $1, cond_nwaiters(%rdi) +- movl $FUTEX_CLOCK_REALTIME, %edx +- cmove %edx, %eax +- orl %eax, %esi +- movq %r12, %rdx +- addq $cond_futex, %rdi +- movl $SYS_futex, %eax +- syscall +- +- cmpl $0, %eax +- sete %r15b +- +-#ifdef __ASSUME_REQUEUE_PI +- jmp 62f +-#else +- je 62f +- +- /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns +- successfully, it has already locked the mutex for us and the +- pi_flag (%r15b) is set to denote that fact. However, if another +- thread changed the futex value before we entered the wait, the +- syscall may return an EAGAIN and the mutex is not locked. We go +- ahead with a success anyway since later we look at the pi_flag to +- decide if we got the mutex or not. The sequence numbers then make +- sure that only one of the threads actually wake up. We retry using +- normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal +- and PI futexes don't mix. +- +- Note that we don't check for EAGAIN specifically; we assume that the +- only other error the futex function could return is EAGAIN (barring +- the ETIMEOUT of course, for the timeout case in futex) since +- anything else would mean an error in our function. It is too +- expensive to do that check for every call (which is quite common in +- case of a large number of threads), so it has been skipped. */ +- cmpl $-ENOSYS, %eax +- jne 62f +- +- subq $cond_futex, %rdi +-#endif +- +-61: movl $(FUTEX_WAIT_BITSET|FUTEX_PRIVATE_FLAG), %esi +-60: xorb %r15b, %r15b +- xorl %eax, %eax +- /* The following only works like this because we only support +- two clocks, represented using a single bit. */ +- testl $1, cond_nwaiters(%rdi) +- movl $FUTEX_CLOCK_REALTIME, %edx +- movl $0xffffffff, %r9d +- cmove %edx, %eax +- orl %eax, %esi +- movq %r12, %rdx +- addq $cond_futex, %rdi +- movl $SYS_futex, %eax +- syscall +-62: movq %rax, %r14 +- +- movl (%rsp), %edi +- callq __pthread_disable_asynccancel +-.LcleanupEND1: +- +- /* Lock. */ +- movq 8(%rsp), %rdi +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jne 35f +- +-36: movl broadcast_seq(%rdi), %edx +- +- movq woken_seq(%rdi), %rax +- +- movq wakeup_seq(%rdi), %r9 +- +- cmpl 4(%rsp), %edx +- jne 53f +- +- cmpq 24(%rsp), %r9 +- jbe 45f +- +- cmpq %rax, %r9 +- ja 39f +- +-45: cmpq $-ETIMEDOUT, %r14 +- je 99f +- +- /* We need to go back to futex_wait. If we're using requeue_pi, then +- release the mutex we had acquired and go back. */ +- test %r15b, %r15b +- jz 38b +- +- /* Adjust the mutex values first and then unlock it. The unlock +- should always succeed or else the kernel did not lock the +- mutex correctly. */ +- movq %r8, %rdi +- callq __pthread_mutex_cond_lock_adjust +- xorl %esi, %esi +- callq __pthread_mutex_unlock_usercnt +- /* Reload cond_var. */ +- movq 8(%rsp), %rdi +- jmp 38b +- +-99: incq wakeup_seq(%rdi) +- incl cond_futex(%rdi) +- movl $ETIMEDOUT, %r14d +- jmp 44f +- +-53: xorq %r14, %r14 +- jmp 54f +- +-39: xorq %r14, %r14 +-44: incq woken_seq(%rdi) +- +-54: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- cmpq $0xffffffffffffffff, total_seq(%rdi) +- jne 55f +- movl cond_nwaiters(%rdi), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 55f +- +- addq $cond_nwaiters, %rdi +- LP_OP(cmp) $-1, dep_mutex-cond_nwaiters(%rdi) +- movl $1, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- subq $cond_nwaiters, %rdi +- +-55: LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- jne 40f +- +- /* If requeue_pi is used the kernel performs the locking of the +- mutex. */ +-41: movq 16(%rsp), %rdi +- testb %r15b, %r15b +- jnz 64f +- +- callq __pthread_mutex_cond_lock +- +-63: testq %rax, %rax +- cmoveq %r14, %rax +- +-48: addq $FRAME_SIZE, %rsp +- cfi_adjust_cfa_offset(-FRAME_SIZE) +- popq %r15 +- cfi_adjust_cfa_offset(-8) +- cfi_restore(%r15) +- popq %r14 +- cfi_adjust_cfa_offset(-8) +- cfi_restore(%r14) +- popq %r13 +- cfi_adjust_cfa_offset(-8) +- cfi_restore(%r13) +- popq %r12 +- cfi_adjust_cfa_offset(-8) +- cfi_restore(%r12) +- +- retq +- +- cfi_restore_state +- +-64: callq __pthread_mutex_cond_lock_adjust +- movq %r14, %rax +- jmp 48b +- +- /* Initial locking failed. */ +-31: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +- jmp 32b +- +- /* Unlock in loop requires wakeup. */ +-33: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- jmp 34b +- +- /* Locking in loop failed. */ +-35: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- jmp 36b +- +- /* Unlock after loop requires wakeup. */ +-40: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- jmp 41b +- +- /* The initial unlocking of the mutex failed. */ +-46: movq 8(%rsp), %rdi +- movq %rax, (%rsp) +- LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- jne 47f +- +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- +-47: movq (%rsp), %rax +- jmp 48b +- +- .size __pthread_cond_timedwait, .-__pthread_cond_timedwait +-versioned_symbol (libpthread, __pthread_cond_timedwait, pthread_cond_timedwait, +- GLIBC_2_3_2) +- +- +- .align 16 +- .type __condvar_cleanup2, @function +-__condvar_cleanup2: +- /* Stack frame: +- +- rsp + 72 +- +--------------------------+ +- rsp + 64 | %r12 | +- +--------------------------+ +- rsp + 56 | %r13 | +- +--------------------------+ +- rsp + 48 | %r14 | +- +--------------------------+ +- rsp + 24 | unused | +- +--------------------------+ +- rsp + 16 | mutex pointer | +- +--------------------------+ +- rsp + 8 | condvar pointer | +- +--------------------------+ +- rsp + 4 | old broadcast_seq value | +- +--------------------------+ +- rsp + 0 | old cancellation mode | +- +--------------------------+ +- */ +- +- movq %rax, 24(%rsp) +- +- /* Get internal lock. */ +- movq 8(%rsp), %rdi +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jz 1f +- +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- +-1: movl broadcast_seq(%rdi), %edx +- cmpl 4(%rsp), %edx +- jne 3f +- +- /* We increment the wakeup_seq counter only if it is lower than +- total_seq. If this is not the case the thread was woken and +- then canceled. In this case we ignore the signal. */ +- movq total_seq(%rdi), %rax +- cmpq wakeup_seq(%rdi), %rax +- jbe 6f +- incq wakeup_seq(%rdi) +- incl cond_futex(%rdi) +-6: incq woken_seq(%rdi) +- +-3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- xorq %r12, %r12 +- cmpq $0xffffffffffffffff, total_seq(%rdi) +- jne 4f +- movl cond_nwaiters(%rdi), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 4f +- +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- leaq cond_nwaiters(%rdi), %rdi +- movl $1, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- subq $cond_nwaiters, %rdi +- movl $1, %r12d +- +-4: LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- je 2f +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- +- /* Wake up all waiters to make sure no signal gets lost. */ +-2: testq %r12, %r12 +- jnz 5f +- addq $cond_futex, %rdi +- LP_OP(cmp) $-1, dep_mutex-cond_futex(%rdi) +- movl $0x7fffffff, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- +- /* Lock the mutex only if we don't own it already. This only happens +- in case of PI mutexes, if we got cancelled after a successful +- return of the futex syscall and before disabling async +- cancellation. */ +-5: movq 16(%rsp), %rdi +- movl MUTEX_KIND(%rdi), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 7f +- +- movl (%rdi), %eax +- andl $TID_MASK, %eax +- cmpl %eax, %fs:TID +- jne 7f +- /* We managed to get the lock. Fix it up before returning. */ +- callq __pthread_mutex_cond_lock_adjust +- jmp 8f +- +-7: callq __pthread_mutex_cond_lock +- +-8: movq 24(%rsp), %rdi +- movq FRAME_SIZE(%rsp), %r15 +- movq FRAME_SIZE+8(%rsp), %r14 +- movq FRAME_SIZE+16(%rsp), %r13 +- movq FRAME_SIZE+24(%rsp), %r12 +-.LcallUR: +- call _Unwind_Resume +- hlt +-.LENDCODE: +- cfi_endproc +- .size __condvar_cleanup2, .-__condvar_cleanup2 +- +- +- .section .gcc_except_table,"a",@progbits +-.LexceptSTART: +- .byte DW_EH_PE_omit # @LPStart format +- .byte DW_EH_PE_omit # @TType format +- .byte DW_EH_PE_uleb128 # call-site format +- .uleb128 .Lcstend-.Lcstbegin +-.Lcstbegin: +- .uleb128 .LcleanupSTART1-.LSTARTCODE +- .uleb128 .LcleanupEND1-.LcleanupSTART1 +- .uleb128 __condvar_cleanup2-.LSTARTCODE +- .uleb128 0 +- .uleb128 .LcallUR-.LSTARTCODE +- .uleb128 .LENDCODE-.LcallUR +- .uleb128 0 +- .uleb128 0 +-.Lcstend: +- +- +-#ifdef SHARED +- .hidden DW.ref.__gcc_personality_v0 +- .weak DW.ref.__gcc_personality_v0 +- .section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits +- .align LP_SIZE +- .type DW.ref.__gcc_personality_v0, @object +- .size DW.ref.__gcc_personality_v0, LP_SIZE +-DW.ref.__gcc_personality_v0: +- ASM_ADDR __gcc_personality_v0 +-#endif +Index: glibc-2.24-256-g5140d03/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S ++++ /dev/null +@@ -1,555 +0,0 @@ +-/* Copyright (C) 2002-2016 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- Contributed by Ulrich Drepper , 2002. +- +- The GNU C Library is free software; you can redistribute it and/or +- modify it under the terms of the GNU Lesser General Public +- License as published by the Free Software Foundation; either +- version 2.1 of the License, or (at your option) any later version. +- +- The GNU C Library is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +- Lesser General Public License for more details. +- +- You should have received a copy of the GNU Lesser General Public +- License along with the GNU C Library; if not, see +- . */ +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include +- +- +- .text +- +-/* int pthread_cond_wait (pthread_cond_t *cond, pthread_mutex_t *mutex) */ +- .globl __pthread_cond_wait +- .type __pthread_cond_wait, @function +- .align 16 +-__pthread_cond_wait: +-.LSTARTCODE: +- cfi_startproc +-#ifdef SHARED +- cfi_personality(DW_EH_PE_pcrel | DW_EH_PE_sdata4 | DW_EH_PE_indirect, +- DW.ref.__gcc_personality_v0) +- cfi_lsda(DW_EH_PE_pcrel | DW_EH_PE_sdata4, .LexceptSTART) +-#else +- cfi_personality(DW_EH_PE_udata4, __gcc_personality_v0) +- cfi_lsda(DW_EH_PE_udata4, .LexceptSTART) +-#endif +- +-#define FRAME_SIZE (32+8) +- leaq -FRAME_SIZE(%rsp), %rsp +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +- /* Stack frame: +- +- rsp + 32 +- +--------------------------+ +- rsp + 24 | old wake_seq value | +- +--------------------------+ +- rsp + 16 | mutex pointer | +- +--------------------------+ +- rsp + 8 | condvar pointer | +- +--------------------------+ +- rsp + 4 | old broadcast_seq value | +- +--------------------------+ +- rsp + 0 | old cancellation mode | +- +--------------------------+ +- */ +- +- LIBC_PROBE (cond_wait, 2, %rdi, %rsi) +- +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- +- /* Prepare structure passed to cancellation handler. */ +- movq %rdi, 8(%rsp) +- movq %rsi, 16(%rsp) +- +- je 15f +- mov %RSI_LP, dep_mutex(%rdi) +- +- /* Get internal lock. */ +-15: movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jne 1f +- +- /* Unlock the mutex. */ +-2: movq 16(%rsp), %rdi +- xorl %esi, %esi +- callq __pthread_mutex_unlock_usercnt +- +- testl %eax, %eax +- jne 12f +- +- movq 8(%rsp), %rdi +- incq total_seq(%rdi) +- incl cond_futex(%rdi) +- addl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Get and store current wakeup_seq value. */ +- movq 8(%rsp), %rdi +- movq wakeup_seq(%rdi), %r9 +- movl broadcast_seq(%rdi), %edx +- movq %r9, 24(%rsp) +- movl %edx, 4(%rsp) +- +- /* Unlock. */ +-8: movl cond_futex(%rdi), %edx +- LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- jne 3f +- +-.LcleanupSTART: +-4: callq __pthread_enable_asynccancel +- movl %eax, (%rsp) +- +- xorq %r10, %r10 +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- leaq cond_futex(%rdi), %rdi +- movl $FUTEX_WAIT, %esi +- je 60f +- +- mov dep_mutex-cond_futex(%rdi), %R8_LP +- /* Requeue to a non-robust PI mutex if the PI bit is set and +- the robust bit is not set. */ +- movl MUTEX_KIND(%r8), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 61f +- +- movl $(FUTEX_WAIT_REQUEUE_PI|FUTEX_PRIVATE_FLAG), %esi +- movl $SYS_futex, %eax +- syscall +- +- cmpl $0, %eax +- sete %r8b +- +-#ifdef __ASSUME_REQUEUE_PI +- jmp 62f +-#else +- je 62f +- +- /* When a futex syscall with FUTEX_WAIT_REQUEUE_PI returns +- successfully, it has already locked the mutex for us and the +- pi_flag (%r8b) is set to denote that fact. However, if another +- thread changed the futex value before we entered the wait, the +- syscall may return an EAGAIN and the mutex is not locked. We go +- ahead with a success anyway since later we look at the pi_flag to +- decide if we got the mutex or not. The sequence numbers then make +- sure that only one of the threads actually wake up. We retry using +- normal FUTEX_WAIT only if the kernel returned ENOSYS, since normal +- and PI futexes don't mix. +- +- Note that we don't check for EAGAIN specifically; we assume that the +- only other error the futex function could return is EAGAIN since +- anything else would mean an error in our function. It is too +- expensive to do that check for every call (which is quite common in +- case of a large number of threads), so it has been skipped. */ +- cmpl $-ENOSYS, %eax +- jne 62f +- +-# ifndef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAIT, %esi +-# endif +-#endif +- +-61: +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $(FUTEX_WAIT|FUTEX_PRIVATE_FLAG), %esi +-#else +- orl %fs:PRIVATE_FUTEX, %esi +-#endif +-60: xorb %r8b, %r8b +- movl $SYS_futex, %eax +- syscall +- +-62: movl (%rsp), %edi +- callq __pthread_disable_asynccancel +-.LcleanupEND: +- +- /* Lock. */ +- movq 8(%rsp), %rdi +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jnz 5f +- +-6: movl broadcast_seq(%rdi), %edx +- +- movq woken_seq(%rdi), %rax +- +- movq wakeup_seq(%rdi), %r9 +- +- cmpl 4(%rsp), %edx +- jne 16f +- +- cmpq 24(%rsp), %r9 +- jbe 19f +- +- cmpq %rax, %r9 +- jna 19f +- +- incq woken_seq(%rdi) +- +- /* Unlock */ +-16: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- cmpq $0xffffffffffffffff, total_seq(%rdi) +- jne 17f +- movl cond_nwaiters(%rdi), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 17f +- +- addq $cond_nwaiters, %rdi +- LP_OP(cmp) $-1, dep_mutex-cond_nwaiters(%rdi) +- movl $1, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- subq $cond_nwaiters, %rdi +- +-17: LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- jne 10f +- +- /* If requeue_pi is used the kernel performs the locking of the +- mutex. */ +-11: movq 16(%rsp), %rdi +- testb %r8b, %r8b +- jnz 18f +- +- callq __pthread_mutex_cond_lock +- +-14: leaq FRAME_SIZE(%rsp), %rsp +- cfi_adjust_cfa_offset(-FRAME_SIZE) +- +- /* We return the result of the mutex_lock operation. */ +- retq +- +- cfi_adjust_cfa_offset(FRAME_SIZE) +- +-18: callq __pthread_mutex_cond_lock_adjust +- xorl %eax, %eax +- jmp 14b +- +- /* We need to go back to futex_wait. If we're using requeue_pi, then +- release the mutex we had acquired and go back. */ +-19: testb %r8b, %r8b +- jz 8b +- +- /* Adjust the mutex values first and then unlock it. The unlock +- should always succeed or else the kernel did not lock the mutex +- correctly. */ +- movq 16(%rsp), %rdi +- callq __pthread_mutex_cond_lock_adjust +- movq %rdi, %r8 +- xorl %esi, %esi +- callq __pthread_mutex_unlock_usercnt +- /* Reload cond_var. */ +- movq 8(%rsp), %rdi +- jmp 8b +- +- /* Initial locking failed. */ +-1: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +- jmp 2b +- +- /* Unlock in loop requires wakeup. */ +-3: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- /* The call preserves %rdx. */ +- callq __lll_unlock_wake +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- jmp 4b +- +- /* Locking in loop failed. */ +-5: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- jmp 6b +- +- /* Unlock after loop requires wakeup. */ +-10: +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- jmp 11b +- +- /* The initial unlocking of the mutex failed. */ +-12: movq %rax, %r10 +- movq 8(%rsp), %rdi +- LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- je 13f +- +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_unlock_wake +- +-13: movq %r10, %rax +- jmp 14b +- +- .size __pthread_cond_wait, .-__pthread_cond_wait +-versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait, +- GLIBC_2_3_2) +- +- +- .align 16 +- .type __condvar_cleanup1, @function +- .globl __condvar_cleanup1 +- .hidden __condvar_cleanup1 +-__condvar_cleanup1: +- /* Stack frame: +- +- rsp + 32 +- +--------------------------+ +- rsp + 24 | unused | +- +--------------------------+ +- rsp + 16 | mutex pointer | +- +--------------------------+ +- rsp + 8 | condvar pointer | +- +--------------------------+ +- rsp + 4 | old broadcast_seq value | +- +--------------------------+ +- rsp + 0 | old cancellation mode | +- +--------------------------+ +- */ +- +- movq %rax, 24(%rsp) +- +- /* Get internal lock. */ +- movq 8(%rsp), %rdi +- movl $1, %esi +- xorl %eax, %eax +- LOCK +-#if cond_lock == 0 +- cmpxchgl %esi, (%rdi) +-#else +- cmpxchgl %esi, cond_lock(%rdi) +-#endif +- jz 1f +- +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- callq __lll_lock_wait +-#if cond_lock != 0 +- subq $cond_lock, %rdi +-#endif +- +-1: movl broadcast_seq(%rdi), %edx +- cmpl 4(%rsp), %edx +- jne 3f +- +- /* We increment the wakeup_seq counter only if it is lower than +- total_seq. If this is not the case the thread was woken and +- then canceled. In this case we ignore the signal. */ +- movq total_seq(%rdi), %rax +- cmpq wakeup_seq(%rdi), %rax +- jbe 6f +- incq wakeup_seq(%rdi) +- incl cond_futex(%rdi) +-6: incq woken_seq(%rdi) +- +-3: subl $(1 << nwaiters_shift), cond_nwaiters(%rdi) +- +- /* Wake up a thread which wants to destroy the condvar object. */ +- xorl %ecx, %ecx +- cmpq $0xffffffffffffffff, total_seq(%rdi) +- jne 4f +- movl cond_nwaiters(%rdi), %eax +- andl $~((1 << nwaiters_shift) - 1), %eax +- jne 4f +- +- LP_OP(cmp) $-1, dep_mutex(%rdi) +- leaq cond_nwaiters(%rdi), %rdi +- movl $1, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- subq $cond_nwaiters, %rdi +- movl $1, %ecx +- +-4: LOCK +-#if cond_lock == 0 +- decl (%rdi) +-#else +- decl cond_lock(%rdi) +-#endif +- je 2f +-#if cond_lock != 0 +- addq $cond_lock, %rdi +-#endif +- LP_OP(cmp) $-1, dep_mutex-cond_lock(%rdi) +- movl $LLL_PRIVATE, %eax +- movl $LLL_SHARED, %esi +- cmovne %eax, %esi +- /* The call preserves %rcx. */ +- callq __lll_unlock_wake +- +- /* Wake up all waiters to make sure no signal gets lost. */ +-2: testl %ecx, %ecx +- jnz 5f +- addq $cond_futex, %rdi +- LP_OP(cmp) $-1, dep_mutex-cond_futex(%rdi) +- movl $0x7fffffff, %edx +-#ifdef __ASSUME_PRIVATE_FUTEX +- movl $FUTEX_WAKE, %eax +- movl $(FUTEX_WAKE|FUTEX_PRIVATE_FLAG), %esi +- cmove %eax, %esi +-#else +- movl $0, %eax +- movl %fs:PRIVATE_FUTEX, %esi +- cmove %eax, %esi +- orl $FUTEX_WAKE, %esi +-#endif +- movl $SYS_futex, %eax +- syscall +- +- /* Lock the mutex only if we don't own it already. This only happens +- in case of PI mutexes, if we got cancelled after a successful +- return of the futex syscall and before disabling async +- cancellation. */ +-5: movq 16(%rsp), %rdi +- movl MUTEX_KIND(%rdi), %eax +- andl $(ROBUST_BIT|PI_BIT), %eax +- cmpl $PI_BIT, %eax +- jne 7f +- +- movl (%rdi), %eax +- andl $TID_MASK, %eax +- cmpl %eax, %fs:TID +- jne 7f +- /* We managed to get the lock. Fix it up before returning. */ +- callq __pthread_mutex_cond_lock_adjust +- jmp 8f +- +- +-7: callq __pthread_mutex_cond_lock +- +-8: movq 24(%rsp), %rdi +-.LcallUR: +- call _Unwind_Resume +- hlt +-.LENDCODE: +- cfi_endproc +- .size __condvar_cleanup1, .-__condvar_cleanup1 +- +- +- .section .gcc_except_table,"a",@progbits +-.LexceptSTART: +- .byte DW_EH_PE_omit # @LPStart format +- .byte DW_EH_PE_omit # @TType format +- .byte DW_EH_PE_uleb128 # call-site format +- .uleb128 .Lcstend-.Lcstbegin +-.Lcstbegin: +- .uleb128 .LcleanupSTART-.LSTARTCODE +- .uleb128 .LcleanupEND-.LcleanupSTART +- .uleb128 __condvar_cleanup1-.LSTARTCODE +- .uleb128 0 +- .uleb128 .LcallUR-.LSTARTCODE +- .uleb128 .LENDCODE-.LcallUR +- .uleb128 0 +- .uleb128 0 +-.Lcstend: +- +- +-#ifdef SHARED +- .hidden DW.ref.__gcc_personality_v0 +- .weak DW.ref.__gcc_personality_v0 +- .section .gnu.linkonce.d.DW.ref.__gcc_personality_v0,"aw",@progbits +- .align LP_SIZE +- .type DW.ref.__gcc_personality_v0, @object +- .size DW.ref.__gcc_personality_v0, LP_SIZE +-DW.ref.__gcc_personality_v0: +- ASM_ADDR __gcc_personality_v0 +-#endif +Index: glibc-2.24-256-g5140d03/sysdeps/x86/bits/pthreadtypes.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/sysdeps/x86/bits/pthreadtypes.h ++++ glibc-2.24-256-g5140d03/sysdeps/x86/bits/pthreadtypes.h +@@ -140,14 +140,29 @@ typedef union + { + struct + { +- int __lock; +- unsigned int __futex; +- __extension__ unsigned long long int __total_seq; +- __extension__ unsigned long long int __wakeup_seq; +- __extension__ unsigned long long int __woken_seq; +- void *__mutex; +- unsigned int __nwaiters; +- unsigned int __broadcast_seq; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __wseq; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __wseq32; ++ }; ++ __extension__ union ++ { ++ __extension__ unsigned long long int __g1_start; ++ struct { ++ unsigned int __low; ++ unsigned int __high; ++ } __g1_start32; ++ }; ++ unsigned int __g_refs[2]; ++ unsigned int __g_size[2]; ++ unsigned int __g1_orig_size; ++ unsigned int __wrefs; ++ unsigned int __g_signals[2]; ++#define __PTHREAD_COND_CLOCK_MONOTONIC_MASK 2 ++#define __PTHREAD_COND_SHARED_MASK 1 + } __data; + char __size[__SIZEOF_PTHREAD_COND_T]; + __extension__ long long int __align; +Index: glibc-2.24-256-g5140d03/include/atomic.h +=================================================================== +--- glibc-2.24-256-g5140d03.orig/include/atomic.h ++++ glibc-2.24-256-g5140d03/include/atomic.h +@@ -777,18 +777,22 @@ void __atomic_link_error (void); + # endif + + # ifndef atomic_fetch_xor_release ++/* Failing the atomic_compare_exchange_weak_release reloads the value in ++ __atg104_expected, so we need only do the XOR again and retry. */ + # define atomic_fetch_xor_release(mem, operand) \ +- ({ __typeof (*(mem)) __atg104_old; \ +- __typeof (mem) __atg104_memp = (mem); \ ++ ({ __typeof (mem) __atg104_memp = (mem); \ ++ __typeof (*(mem)) __atg104_expected = (*__atg104_memp); \ ++ __typeof (*(mem)) __atg104_desired; \ + __typeof (*(mem)) __atg104_op = (operand); \ + \ + do \ +- __atg104_old = (*__atg104_memp); \ +- while (__builtin_expect \ +- (atomic_compare_and_exchange_bool_rel ( \ +- __atg104_memp, __atg104_old ^ __atg104_op, __atg104_old), 0));\ ++ __atg104_desired = __atg104_expected ^ __atg104_op; \ ++ while (__glibc_unlikely \ ++ (atomic_compare_exchange_weak_release ( \ ++ __atg104_memp, &__atg104_expected, __atg104_desired) \ ++ == 0)); \ + \ +- __atg104_old; }) ++ __atg104_expected; }) + #endif + + #endif /* !USE_ATOMIC_COMPILER_BUILTINS */ diff --git a/glibc-swbz20019.patch b/glibc-swbz20019.patch new file mode 100644 index 0000000..fcd7ad8 --- /dev/null +++ b/glibc-swbz20019.patch @@ -0,0 +1,33 @@ +Bug 20019 - NULL pointer dereference in libc.so.6 IFUNC due to uninitialized GOT + +Prototype patch by H.J. Lu to error +out when IFUNC being resolved is for another map that hasn't +yet been relocated. + +--- a/sysdeps/x86_64/dl-machine.h ++++ a/sysdeps/x86_64/dl-machine.h +@@ -331,7 +331,23 @@ elf_machine_rela (struct link_map *map, const ElfW(Rela) *reloc, + 0) + && __builtin_expect (sym->st_shndx != SHN_UNDEF, 1) + && __builtin_expect (!skip_ifunc, 1)) +- value = ((ElfW(Addr) (*) (void)) value) (); ++ { ++# ifndef RTLD_BOOTSTRAP ++ if (sym_map != map ++ && sym_map->l_type != lt_executable ++ && !sym_map->l_relocated) ++ { ++ const char *strtab ++ = (const char *) D_PTR (map, l_info[DT_STRTAB]); ++ _dl_fatal_printf ("\ ++%s: Relink `%s' with `%s' for IFUNC symbol `%s'\n", ++ RTLD_PROGNAME, map->l_name, ++ sym_map->l_name, ++ strtab + refsym->st_name); ++ } ++#endif ++ value = ((ElfW(Addr) (*) (void)) value) (); ++ } + + switch (r_type) + { diff --git a/glibc.spec b/glibc.spec index 8e924aa..e01f944 100644 --- a/glibc.spec +++ b/glibc.spec @@ -1,6 +1,6 @@ %define glibcsrcdir glibc-2.24-256-g5140d03 %define glibcversion 2.24.90 -%define glibcrelease 10%{?dist} +%define glibcrelease 11%{?dist} # Pre-release tarballs are pulled in from git using a command that is # effectively: # @@ -262,6 +262,13 @@ Patch0059: glibc-c-utf8-locale.patch # Build libcrypt twice, with and without NSS. Patch0060: glibc-rh1324623.patch +# Bug 20019: Prototype patch to error on resolution of IFUNC +# for an uninitialized library. +Patch0061: glibc-swbz20019.patch + +# Bug 13165: New condvar implementation. +Patch0062: glibc-swbz13165.patch + ############################################################################## # # Patches from upstream @@ -881,6 +888,8 @@ microbenchmark tests on the system. %patch2038 -p1 %patch2110 -p1 %patch2112 -p1 +%patch0061 -p1 +%patch0062 -p1 ############################################################################## # %%prep - Additional prep required... @@ -2275,6 +2284,10 @@ rm -f *.filelist* %endif %changelog +* Mon Oct 17 2016 Carlos O'Donell - 2.24.90-11 +- Add prototype support for detecting invalid IFUNC calls (swbz#20019). +- New POSIX thread condition variable implementation (swbz#13165). + * Fri Oct 07 2016 Florian Weimer - 2.24.90-10 - Auto-sync with upstream master, commit 5140d036f9c16585448b5908c3a219bd96842161, fixing: