diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index ec9c345ffdfb..a2d1d6d85ac1 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -97,20 +97,41 @@ #define LW_FLAG_BITS 3 #define LW_FLAG_MASK (((1<state); + uint32 excl = (state & LW_VAL_EXCLUSIVE) != 0; + uint32 shared = excl ? 0 : state & LW_SHARED_MASK; ereport(LOG, (errhidestmt(true), @@ -244,8 +267,8 @@ PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode) errmsg_internal("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d", MyProcPid, where, T_NAME(lock), lock, - (state & LW_VAL_EXCLUSIVE) != 0, - state & LW_SHARED_MASK, + excl, + shared, (state & LW_FLAG_HAS_WAITERS) != 0, pg_atomic_read_u32(&lock->nwaiters), (state & LW_FLAG_RELEASE_OK) != 0))); @@ -750,14 +773,53 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId) * This function will not block waiting for a lock to become free - that's the * caller's job. * + * willwait: true if the caller is willing to wait for the lock to become free + * false if the caller is not willing to wait. + * * Returns true if the lock isn't free and we need to wait. */ static bool -LWLockAttemptLock(LWLock *lock, LWLockMode mode) +LWLockAttemptLock(LWLock *lock, LWLockMode mode, bool willwait) { uint32 old_state; Assert(mode == LW_EXCLUSIVE || mode == LW_SHARED); + /* + * Optimized shared lock acquisition using atomic fetch-and-add. + * + * This optimization aims to lower the cost of acquiring shared locks + * by reducing the number of atomic operations, which can be expensive + * on systems with many CPU cores. + * + * It is only activated when willwait=true, ensuring that the reference + * count does not grow unchecked and overflow into the LW_VAL_EXCLUSIVE bit. + * + * Three scenarios can occur when acquiring a shared lock: + * 1) Lock is free: atomically increment reference count and acquire + * 2) Lock held in shared mode: atomically increment reference count and acquire + * 3) Lock held exclusively: atomically increment reference count but fail to acquire + * + * Scenarios 1 and 2 work as expected - we successfully increment the count + * and acquire the lock. + * + * Scenario 3 is counterintuitive: we increment the reference count even though + * we cannot acquire the lock due to the exclusive holder. This creates a + * temporarily invalid reference count, but it's acceptable because: + * - The LW_VAL_EXCLUSIVE flag takes precedence in determining lock state + * - Each process retries at most twice before blocking on a semaphore + * - This bounds the "overcounted" references to MAX_BACKENDS * 2 + * - The bound fits within LW_SHARED_MASK capacity + * - The lock->state including "overcounted" references is reset when the exclusive + * lock is released. + * + * See LW_SHARED_MASK definition comments for additional details. + */ + if (willwait && mode == LW_SHARED) + { + old_state = pg_atomic_fetch_add_u32(&lock->state, LW_VAL_SHARED); + Assert((old_state & LW_LOCK_MASK) != LW_LOCK_MASK); + return (old_state & LW_VAL_EXCLUSIVE) != 0; + } /* * Read once outside the loop, later iterations will get the newer value @@ -1202,7 +1264,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) * Try to grab the lock the first time, we're not in the waitqueue * yet/anymore. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (!mustwait) { @@ -1225,7 +1287,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) LWLockQueueSelf(lock, mode); /* we're now guaranteed to be woken up if necessary */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); /* ok, grabbed the lock the second time round, need to undo queueing */ if (!mustwait) @@ -1256,6 +1318,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode) for (;;) { + /* When signaled, lock->state has been zero-initialized by the previous holder */ PGSemaphoreLock(proc->sem); if (proc->lwWaiting == LW_WS_NOT_WAITING) break; @@ -1328,7 +1391,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode) HOLD_INTERRUPTS(); /* Check for the lock */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, false); if (mustwait) { @@ -1395,13 +1458,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) * NB: We're using nearly the same twice-in-a-row lock acquisition * protocol as LWLockAcquire(). Check its comments for details. */ - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (mustwait) { LWLockQueueSelf(lock, LW_WAIT_UNTIL_FREE); - mustwait = LWLockAttemptLock(lock, mode); + mustwait = LWLockAttemptLock(lock, mode, true); if (mustwait) { @@ -1421,6 +1484,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode) for (;;) { + /* When signaled, lock->state has been zero-initialized by the previous holder */ PGSemaphoreLock(proc->sem); if (proc->lwWaiting == LW_WS_NOT_WAITING) break; @@ -1803,7 +1867,15 @@ LWLockReleaseInternal(LWLock *lock, LWLockMode mode) * others, even if we still have to wakeup other waiters. */ if (mode == LW_EXCLUSIVE) - oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE); + { + /* + * To release the exclusive lock, all bits of LW_LOCK_MASK, + * including any "overcounted" increments from blocked readers, + * are cleared. + */ + oldstate = pg_atomic_fetch_and_u32(&lock->state, ~LW_LOCK_MASK); + oldstate &= ~LW_LOCK_MASK; + } else oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);