9797#define LW_FLAG_BITS 3
9898#define LW_FLAG_MASK (((1<<LW_FLAG_BITS)-1)<<(32-LW_FLAG_BITS))
9999
100- /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
101- #define LW_VAL_EXCLUSIVE (MAX_BACKENDS + 1)
100+ /*
101+ * already (power of 2)-1, i.e. suitable for a mask
102+ *
103+ * Originally, the LW_SHARED lock reference count was maintained in bits
104+ * [MAX_BACKEND_BITS-1:0] of LWLock.state, with a theoretical maximum of
105+ * MAX_BACKENDS (when all MAX_BACKENDS processes hold the lock concurrently).
106+ *
107+ * To reduce lock acquisition overhead, we optimized LWLockAttemptLock by
108+ * merging the read and update operations for the LW_SHARED lock's state.
109+ * This eliminates the need for separate atomic instructions - a critical
110+ * improvement given the high cost of atomic operations on high-core-count
111+ * systems.
112+ *
113+ * This optimization introduces a scenario where the reference count may
114+ * temporarily increment even when a reader fails to acquire an exclusive lock.
115+ * However, since each process retries lock acquisition up to *twice* before
116+ * waiting on a semaphore, the reference count is bounded by MAX_BACKENDS * 2.
117+ *
118+ * To ensure compatibility with this upper bound:
119+ * 1. LW_SHARED_MASK has been extended by 1 bit
120+ * 2. LW_VAL_EXCLUSIVE is left-shifted by 1 bit
121+ */
122+ #define LW_SHARED_MASK ((MAX_BACKENDS << 1) + 1)
123+ #define LW_VAL_EXCLUSIVE (LW_SHARED_MASK + 1)
124+ #define LW_LOCK_MASK (LW_SHARED_MASK | LW_VAL_EXCLUSIVE)
102125#define LW_VAL_SHARED 1
103126
104- /* already (power of 2)-1, i.e. suitable for a mask */
105- #define LW_SHARED_MASK MAX_BACKENDS
106- #define LW_LOCK_MASK (MAX_BACKENDS | LW_VAL_EXCLUSIVE)
127+ /* assumes MAX_BACKENDS is a (power of 2) - 1, checked below */
107128
108129
109130StaticAssertDecl (((MAX_BACKENDS + 1 ) & MAX_BACKENDS ) == 0 ,
110131 "MAX_BACKENDS + 1 needs to be a power of 2" );
111132
112- StaticAssertDecl ((MAX_BACKENDS & LW_FLAG_MASK ) == 0 ,
113- "MAX_BACKENDS and LW_FLAG_MASK overlap" );
133+ StaticAssertDecl ((LW_SHARED_MASK & LW_FLAG_MASK ) == 0 ,
134+ "LW_SHARED_MASK and LW_FLAG_MASK overlap" );
114135
115136StaticAssertDecl ((LW_VAL_EXCLUSIVE & LW_FLAG_MASK ) == 0 ,
116137 "LW_VAL_EXCLUSIVE and LW_FLAG_MASK overlap" );
@@ -237,15 +258,17 @@ PRINT_LWDEBUG(const char *where, LWLock *lock, LWLockMode mode)
237258 if (Trace_lwlocks )
238259 {
239260 uint32 state = pg_atomic_read_u32 (& lock -> state );
261+ uint32 excl = (state & LW_VAL_EXCLUSIVE ) != 0 ;
262+ uint32 shared = excl ? 0 : state & LW_SHARED_MASK ;
240263
241264 ereport (LOG ,
242265 (errhidestmt (true),
243266 errhidecontext (true),
244267 errmsg_internal ("%d: %s(%s %p): excl %u shared %u haswaiters %u waiters %u rOK %d" ,
245268 MyProcPid ,
246269 where , T_NAME (lock ), lock ,
247- ( state & LW_VAL_EXCLUSIVE ) != 0 ,
248- state & LW_SHARED_MASK ,
270+ excl ,
271+ shared ,
249272 (state & LW_FLAG_HAS_WAITERS ) != 0 ,
250273 pg_atomic_read_u32 (& lock -> nwaiters ),
251274 (state & LW_FLAG_RELEASE_OK ) != 0 )));
@@ -750,14 +773,53 @@ GetLWLockIdentifier(uint32 classId, uint16 eventId)
750773 * This function will not block waiting for a lock to become free - that's the
751774 * caller's job.
752775 *
776+ * willwait: true if the caller is willing to wait for the lock to become free
777+ * false if the caller is not willing to wait.
778+ *
753779 * Returns true if the lock isn't free and we need to wait.
754780 */
755781static bool
756- LWLockAttemptLock (LWLock * lock , LWLockMode mode )
782+ LWLockAttemptLock (LWLock * lock , LWLockMode mode , bool willwait )
757783{
758784 uint32 old_state ;
759785
760786 Assert (mode == LW_EXCLUSIVE || mode == LW_SHARED );
787+ /*
788+ * Optimized shared lock acquisition using atomic fetch-and-add.
789+ *
790+ * This optimization aims to lower the cost of acquiring shared locks
791+ * by reducing the number of atomic operations, which can be expensive
792+ * on systems with many CPU cores.
793+ *
794+ * It is only activated when willwait=true, ensuring that the reference
795+ * count does not grow unchecked and overflow into the LW_VAL_EXCLUSIVE bit.
796+ *
797+ * Three scenarios can occur when acquiring a shared lock:
798+ * 1) Lock is free: atomically increment reference count and acquire
799+ * 2) Lock held in shared mode: atomically increment reference count and acquire
800+ * 3) Lock held exclusively: atomically increment reference count but fail to acquire
801+ *
802+ * Scenarios 1 and 2 work as expected - we successfully increment the count
803+ * and acquire the lock.
804+ *
805+ * Scenario 3 is counterintuitive: we increment the reference count even though
806+ * we cannot acquire the lock due to the exclusive holder. This creates a
807+ * temporarily invalid reference count, but it's acceptable because:
808+ * - The LW_VAL_EXCLUSIVE flag takes precedence in determining lock state
809+ * - Each process retries at most twice before blocking on a semaphore
810+ * - This bounds the "overcounted" references to MAX_BACKENDS * 2
811+ * - The bound fits within LW_SHARED_MASK capacity
812+ * - The lock->state including "overcounted" references is reset when the exclusive
813+ * lock is released.
814+ *
815+ * See LW_SHARED_MASK definition comments for additional details.
816+ */
817+ if (willwait && mode == LW_SHARED )
818+ {
819+ old_state = pg_atomic_fetch_add_u32 (& lock -> state , LW_VAL_SHARED );
820+ Assert ((old_state & LW_LOCK_MASK ) != LW_LOCK_MASK );
821+ return (old_state & LW_VAL_EXCLUSIVE ) != 0 ;
822+ }
761823
762824 /*
763825 * Read once outside the loop, later iterations will get the newer value
@@ -1202,7 +1264,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
12021264 * Try to grab the lock the first time, we're not in the waitqueue
12031265 * yet/anymore.
12041266 */
1205- mustwait = LWLockAttemptLock (lock , mode );
1267+ mustwait = LWLockAttemptLock (lock , mode , true );
12061268
12071269 if (!mustwait )
12081270 {
@@ -1225,7 +1287,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
12251287 LWLockQueueSelf (lock , mode );
12261288
12271289 /* we're now guaranteed to be woken up if necessary */
1228- mustwait = LWLockAttemptLock (lock , mode );
1290+ mustwait = LWLockAttemptLock (lock , mode , true );
12291291
12301292 /* ok, grabbed the lock the second time round, need to undo queueing */
12311293 if (!mustwait )
@@ -1256,6 +1318,7 @@ LWLockAcquire(LWLock *lock, LWLockMode mode)
12561318
12571319 for (;;)
12581320 {
1321+ /* When signaled, lock->state has been zero-initialized by the previous holder */
12591322 PGSemaphoreLock (proc -> sem );
12601323 if (proc -> lwWaiting == LW_WS_NOT_WAITING )
12611324 break ;
@@ -1328,7 +1391,7 @@ LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
13281391 HOLD_INTERRUPTS ();
13291392
13301393 /* Check for the lock */
1331- mustwait = LWLockAttemptLock (lock , mode );
1394+ mustwait = LWLockAttemptLock (lock , mode , false );
13321395
13331396 if (mustwait )
13341397 {
@@ -1395,13 +1458,13 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
13951458 * NB: We're using nearly the same twice-in-a-row lock acquisition
13961459 * protocol as LWLockAcquire(). Check its comments for details.
13971460 */
1398- mustwait = LWLockAttemptLock (lock , mode );
1461+ mustwait = LWLockAttemptLock (lock , mode , true );
13991462
14001463 if (mustwait )
14011464 {
14021465 LWLockQueueSelf (lock , LW_WAIT_UNTIL_FREE );
14031466
1404- mustwait = LWLockAttemptLock (lock , mode );
1467+ mustwait = LWLockAttemptLock (lock , mode , true );
14051468
14061469 if (mustwait )
14071470 {
@@ -1421,6 +1484,7 @@ LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
14211484
14221485 for (;;)
14231486 {
1487+ /* When signaled, lock->state has been zero-initialized by the previous holder */
14241488 PGSemaphoreLock (proc -> sem );
14251489 if (proc -> lwWaiting == LW_WS_NOT_WAITING )
14261490 break ;
@@ -1803,7 +1867,15 @@ LWLockReleaseInternal(LWLock *lock, LWLockMode mode)
18031867 * others, even if we still have to wakeup other waiters.
18041868 */
18051869 if (mode == LW_EXCLUSIVE )
1806- oldstate = pg_atomic_sub_fetch_u32 (& lock -> state , LW_VAL_EXCLUSIVE );
1870+ {
1871+ /*
1872+ * To release the exclusive lock, all bits of LW_LOCK_MASK,
1873+ * including any "overcounted" increments from blocked readers,
1874+ * are cleared.
1875+ */
1876+ oldstate = pg_atomic_fetch_and_u32 (& lock -> state , ~LW_LOCK_MASK );
1877+ oldstate &= ~LW_LOCK_MASK ;
1878+ }
18071879 else
18081880 oldstate = pg_atomic_sub_fetch_u32 (& lock -> state , LW_VAL_SHARED );
18091881
0 commit comments