diff options
author | Thiago Macieira <[email protected]> | 2023-05-22 22:57:11 -0700 |
---|---|---|
committer | Mårten Nordheim <[email protected]> | 2025-06-27 14:50:31 +0000 |
commit | b8c09d2e3b1eabf3f908ca9e0633a4430a976f89 (patch) | |
tree | c6ce987d72f95cab2c5f645dc0c376754e2ef1a3 | |
parent | 4409a7c21399e3c602edeea62c409a18ef78148f (diff) |
We do it by using the sign bit of the atomic to indicate that there are
no waiters waiting. That way, the counter only becomes a zero when all
expected counters have counted down *and* there is at least one waiter.
That means the countDown() code remains unchanged. On x86-64 and
AArch64, there is no change in the number of instructions in the inline
portion of wait() either. The non-inline portion uses a
__atomic_and_fetch() instead of atomic_fetch_and() so compilers will
generate LOCK AND for x86-64, LDCLR for AArch64 ARMv8.1, and AMOAND.W
for RISC-V.
This is more efficient than the Standard Libraries's current
implementations, which use a separate, global atomic out of a pool,
meaning that there could be a collision between two or more latches (or
any other users std::atomic waiting, such as std::semaphore and
std::barrier). Coupled with the fact that we futexWait() on the actual
latch's address (something libc++ can't / won't do), this implementation
should be overall much more efficient.
Change-Id: Ib5ce7a497e034ebabb2cfffd1761b02a44d548d3
Reviewed-by: Mårten Nordheim <[email protected]>
-rw-r--r-- | src/corelib/thread/qlatch.cpp | 31 | ||||
-rw-r--r-- | src/corelib/thread/qlatch_p.h | 9 |
2 files changed, 37 insertions, 3 deletions
diff --git a/src/corelib/thread/qlatch.cpp b/src/corelib/thread/qlatch.cpp index f91dfb1400a..0b4863267c0 100644 --- a/src/corelib/thread/qlatch.cpp +++ b/src/corelib/thread/qlatch.cpp @@ -59,6 +59,7 @@ namespace atomicwait = q20; \endcode In fact, the above is exactly what Qt::BlockingQueued connection does. + \section3 Synchronizing execution For this use-case, multiple threads must reach a particular state before @@ -84,6 +85,16 @@ namespace atomicwait = q20; \li count_down() is not \c{const} (libstdc++ implementation is). \endlist + \omit + \section2 Implementation details + + countDown() must call wakeUp() if the latch counter reaches zero and there + are threads waiting to be woken up. Or, conversely, countDown() needs to do + nothing after decrementing if the latch counter is still non-zero or there + are no waiters. Therefore, we choose the bits so that a non-zero + \c{counter} member implies no action required. + + \endomit */ /*! @@ -172,6 +183,26 @@ namespace atomicwait = q20; void QLatch::waitInternal(int current) noexcept { + // mark that there is a waiter -> clear the bit that there are no waiters + if (current & NoWaiters) { +#if __has_builtin(__atomic_and_fetch) + // Modern GCC and Clang are able to generate loop-free code for this + // operation on x86-64, ARMv8.1 and RISC-V. + if (__atomic_and_fetch(reinterpret_cast<int *>(&counter._q_value), ~NoWaiters, + int(std::memory_order_relaxed)) == 0) + return; +#else + // Do it in two steps, which is usually better than a compare_exchange + // loop. This is not exactly the same as above (it's not atomic!) but + // is correct for our purposes because the counter never changes from 0 + // once it reaches that. + counter.fetchAndAndRelaxed(~NoWaiters); + if (counter.loadRelaxed() == 0) + return; // no need to wait! +#endif + } + current &= ~NoWaiters; + auto waitLoop = [&](auto waiter) { do { waiter(current); diff --git a/src/corelib/thread/qlatch_p.h b/src/corelib/thread/qlatch_p.h index b407e3b6c7d..95890f7519d 100644 --- a/src/corelib/thread/qlatch_p.h +++ b/src/corelib/thread/qlatch_p.h @@ -28,12 +28,12 @@ class QLatch { public: constexpr explicit QLatch(int expected) noexcept - : counter(expected) + : counter(expected | NoWaiters) {} int pending() const noexcept { - return counter.loadAcquire(); + return (counter.loadAcquire() & CounterMask); } void countDown(int n = 1) noexcept @@ -53,7 +53,7 @@ public: void wait() noexcept // not const { - if (int current = counter.loadAcquire(); current != 0) { + if (int current = counter.loadAcquire(); (current & CounterMask) != 0) { waitInternal(current); QtTsan::latchWait(&counter); } @@ -72,6 +72,9 @@ public: void arrive_and_wait(int n = 1) noexcept { arriveAndWait(n); } private: + static constexpr int NoWaitersBit = 31; + static constexpr int NoWaiters = 1 << NoWaitersBit; + static constexpr int CounterMask = ~NoWaiters; QBasicAtomicInt counter; Q_DISABLE_COPY_MOVE(QLatch) |