Skip to content

Commit 22058ae

Browse files
melanieplagemanCommitfest Bot
authored andcommitted
Refactor goto into for loop in GetVictimBuffer()
GetVictimBuffer() implemented a loop to optimistically lock a clean victim buffer using a goto. Future commits will add batch flushing functionality to GetVictimBuffer. The new logic works better with standard for loop flow control. This commit is only a refactor and does not introduce any new functionality. Author: Melanie Plageman <[email protected]> Reviewed-by: Nazir Bilal Yavuz <[email protected]> Reviewed-by: Chao Li <[email protected]> Discussion: https://postgr.es/m/2FA0BAC7-5413-4ABD-94CA-4398FE77750D%40gmail.com Discussion: https://postgr.es/m/flat/CAAKRu_Yjn4mvN9NBxtmsCQSGwup45CoA4e05nhR7ADP-v0WCig%40mail.gmail.com
1 parent 50eb4e1 commit 22058ae

File tree

3 files changed

+112
-103
lines changed

3 files changed

+112
-103
lines changed

src/backend/storage/buffer/bufmgr.c

Lines changed: 88 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@
6868
#include "utils/timestamp.h"
6969

7070

71-
/* Note: these two macros only work on shared buffers, not local ones! */
72-
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
73-
#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
74-
7571
/* Note: this macro only works on local buffers, not shared ones! */
7672
#define LocalBufHdrGetBlock(bufHdr) \
7773
LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)]
@@ -2325,125 +2321,116 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context)
23252321
ReservePrivateRefCountEntry();
23262322
ResourceOwnerEnlarge(CurrentResourceOwner);
23272323

2328-
/* we return here if a prospective victim buffer gets used concurrently */
2329-
again:
2330-
2331-
/*
2332-
* Select a victim buffer. The buffer is returned pinned and owned by
2333-
* this backend.
2334-
*/
2335-
buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
2336-
buf = BufferDescriptorGetBuffer(buf_hdr);
2337-
2338-
/*
2339-
* We shouldn't have any other pins for this buffer.
2340-
*/
2341-
CheckBufferIsPinnedOnce(buf);
2342-
2343-
/*
2344-
* If the buffer was dirty, try to write it out. There is a race
2345-
* condition here, in that someone might dirty it after we released the
2346-
* buffer header lock above, or even while we are writing it out (since
2347-
* our share-lock won't prevent hint-bit updates). We will recheck the
2348-
* dirty bit after re-locking the buffer header.
2349-
*/
2350-
if (buf_state & BM_DIRTY)
2324+
/* Select a victim buffer using an optimistic locking scheme. */
2325+
for (;;)
23512326
{
2352-
LWLock *content_lock;
23532327

2354-
Assert(buf_state & BM_TAG_VALID);
2355-
Assert(buf_state & BM_VALID);
2328+
/* Attempt to claim a victim buffer. Buffer is returned pinned. */
2329+
buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring);
2330+
buf = BufferDescriptorGetBuffer(buf_hdr);
23562331

23572332
/*
2358-
* We need a share-lock on the buffer contents to write it out (else
2359-
* we might write invalid data, eg because someone else is compacting
2360-
* the page contents while we write). We must use a conditional lock
2361-
* acquisition here to avoid deadlock. Even though the buffer was not
2362-
* pinned (and therefore surely not locked) when StrategyGetBuffer
2363-
* returned it, someone else could have pinned and exclusive-locked it
2364-
* by the time we get here. If we try to get the lock unconditionally,
2365-
* we'd block waiting for them; if they later block waiting for us,
2366-
* deadlock ensues. (This has been observed to happen when two
2367-
* backends are both trying to split btree index pages, and the second
2368-
* one just happens to be trying to split the page the first one got
2369-
* from StrategyGetBuffer.)
2333+
* We shouldn't have any other pins for this buffer.
23702334
*/
2371-
content_lock = BufferDescriptorGetContentLock(buf_hdr);
2372-
if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
2373-
{
2374-
/*
2375-
* Someone else has locked the buffer, so give it up and loop back
2376-
* to get another one.
2377-
*/
2378-
UnpinBuffer(buf_hdr);
2379-
goto again;
2380-
}
2335+
CheckBufferIsPinnedOnce(buf);
23812336

23822337
/*
2383-
* If using a nondefault strategy, and writing the buffer would
2384-
* require a WAL flush, let the strategy decide whether to go ahead
2385-
* and write/reuse the buffer or to choose another victim. We need a
2386-
* lock to inspect the page LSN, so this can't be done inside
2387-
* StrategyGetBuffer.
2338+
* If the buffer was dirty, try to write it out. There is a race
2339+
* condition here, in that someone might dirty it after we released
2340+
* the buffer header lock above, or even while we are writing it out
2341+
* (since our share-lock won't prevent hint-bit updates). We will
2342+
* recheck the dirty bit after re-locking the buffer header.
23882343
*/
2389-
if (strategy != NULL)
2344+
if (buf_state & BM_DIRTY)
23902345
{
2391-
XLogRecPtr lsn;
2346+
LWLock *content_lock;
23922347

2393-
/* Read the LSN while holding buffer header lock */
2394-
buf_state = LockBufHdr(buf_hdr);
2395-
lsn = BufferGetLSN(buf_hdr);
2396-
UnlockBufHdr(buf_hdr, buf_state);
2348+
Assert(buf_state & BM_TAG_VALID);
2349+
Assert(buf_state & BM_VALID);
23972350

2398-
if (XLogNeedsFlush(lsn)
2399-
&& StrategyRejectBuffer(strategy, buf_hdr, from_ring))
2351+
/*
2352+
* We need a share-lock on the buffer contents to write it out
2353+
* (else we might write invalid data, eg because someone else is
2354+
* compacting the page contents while we write). We must use a
2355+
* conditional lock acquisition here to avoid deadlock. Even
2356+
* though the buffer was not pinned (and therefore surely not
2357+
* locked) when StrategyGetBuffer returned it, someone else could
2358+
* have pinned and exclusive-locked it by the time we get here. If
2359+
* we try to get the lock unconditionally, we'd block waiting for
2360+
* them; if they later block waiting for us, deadlock ensues.
2361+
* (This has been observed to happen when two backends are both
2362+
* trying to split btree index pages, and the second one just
2363+
* happens to be trying to split the page the first one got from
2364+
* StrategyGetBuffer.)
2365+
*/
2366+
content_lock = BufferDescriptorGetContentLock(buf_hdr);
2367+
if (!LWLockConditionalAcquire(content_lock, LW_SHARED))
2368+
{
2369+
/*
2370+
* Someone else has locked the buffer, so give it up and loop
2371+
* back to get another one.
2372+
*/
2373+
UnpinBuffer(buf_hdr);
2374+
continue;
2375+
}
2376+
2377+
/*
2378+
* If using a nondefault strategy, and writing the buffer would
2379+
* require a WAL flush, let the strategy decide whether to go
2380+
* ahead and write/reuse the buffer or to choose another victim.
2381+
* We need the content lock to inspect the page LSN, so this can't
2382+
* be done inside StrategyGetBuffer.
2383+
*/
2384+
if (StrategyRejectBuffer(strategy, buf_hdr, from_ring))
24002385
{
24012386
LWLockRelease(content_lock);
24022387
UnpinBuffer(buf_hdr);
2403-
goto again;
2388+
continue;
24042389
}
2405-
}
24062390

2407-
/* OK, do the I/O */
2408-
FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
2409-
LWLockRelease(content_lock);
2391+
/* OK, do the I/O */
2392+
FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context);
2393+
LWLockRelease(content_lock);
24102394

2411-
ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context,
2412-
&buf_hdr->tag);
2413-
}
2395+
ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context,
2396+
&buf_hdr->tag);
2397+
}
24142398

24152399

2416-
if (buf_state & BM_VALID)
2417-
{
2400+
if (buf_state & BM_VALID)
2401+
{
2402+
/*
2403+
* When a BufferAccessStrategy is in use, blocks evicted from
2404+
* shared buffers are counted as IOOP_EVICT in the corresponding
2405+
* context (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted
2406+
* by a strategy in two cases: 1) while initially claiming buffers
2407+
* for the strategy ring 2) to replace an existing strategy ring
2408+
* buffer because it is pinned or in use and cannot be reused.
2409+
*
2410+
* Blocks evicted from buffers already in the strategy ring are
2411+
* counted as IOOP_REUSE in the corresponding strategy context.
2412+
*
2413+
* At this point, we can accurately count evictions and reuses,
2414+
* because we have successfully claimed the valid buffer.
2415+
* Previously, we may have been forced to release the buffer due
2416+
* to concurrent pinners or erroring out.
2417+
*/
2418+
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
2419+
from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2420+
}
2421+
24182422
/*
2419-
* When a BufferAccessStrategy is in use, blocks evicted from shared
2420-
* buffers are counted as IOOP_EVICT in the corresponding context
2421-
* (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
2422-
* strategy in two cases: 1) while initially claiming buffers for the
2423-
* strategy ring 2) to replace an existing strategy ring buffer
2424-
* because it is pinned or in use and cannot be reused.
2425-
*
2426-
* Blocks evicted from buffers already in the strategy ring are
2427-
* counted as IOOP_REUSE in the corresponding strategy context.
2428-
*
2429-
* At this point, we can accurately count evictions and reuses,
2430-
* because we have successfully claimed the valid buffer. Previously,
2431-
* we may have been forced to release the buffer due to concurrent
2432-
* pinners or erroring out.
2423+
* If the buffer has an entry in the buffer mapping table, delete it.
2424+
* This can fail because another backend could have pinned or dirtied
2425+
* the buffer.
24332426
*/
2434-
pgstat_count_io_op(IOOBJECT_RELATION, io_context,
2435-
from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0);
2436-
}
2427+
if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2428+
{
2429+
UnpinBuffer(buf_hdr);
2430+
continue;
2431+
}
24372432

2438-
/*
2439-
* If the buffer has an entry in the buffer mapping table, delete it. This
2440-
* can fail because another backend could have pinned or dirtied the
2441-
* buffer.
2442-
*/
2443-
if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr))
2444-
{
2445-
UnpinBuffer(buf_hdr);
2446-
goto again;
2433+
break;
24472434
}
24482435

24492436
/* a final set of sanity checks */

src/backend/storage/buffer/freelist.c

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
#include "postgres.h"
1717

18+
#include "access/xlog.h"
1819
#include "pgstat.h"
1920
#include "port/atomics.h"
2021
#include "storage/buf_internals.h"
@@ -779,12 +780,21 @@ IOContextForStrategy(BufferAccessStrategy strategy)
779780
* be written out and doing so would require flushing WAL too. This gives us
780781
* a chance to choose a different victim.
781782
*
783+
* The buffer must be pinned and content locked and the buffer header spinlock
784+
* must not be held. We must hold the content lock to examine the LSN.
785+
*
782786
* Returns true if buffer manager should ask for a new victim, and false
783787
* if this buffer should be written and re-used.
784788
*/
785789
bool
786790
StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_ring)
787791
{
792+
uint32 buf_state;
793+
XLogRecPtr lsn;
794+
795+
if (!strategy)
796+
return false;
797+
788798
/* We only do this in bulkread mode */
789799
if (strategy->btype != BAS_BULKREAD)
790800
return false;
@@ -794,11 +804,17 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, BufferDesc *buf, bool from_r
794804
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
795805
return false;
796806

807+
buf_state = LockBufHdr(buf);
808+
lsn = BufferGetLSN(buf);
809+
UnlockBufHdr(buf, buf_state);
810+
811+
if (XLogNeedsFlush(lsn))
812+
return false;
813+
797814
/*
798-
* Remove the dirty buffer from the ring; necessary to prevent infinite
815+
* Remove the dirty buffer from the ring; necessary to prevent an infinite
799816
* loop if all ring members are dirty.
800817
*/
801818
strategy->buffers[strategy->current] = InvalidBuffer;
802-
803819
return true;
804820
}

src/include/storage/buf_internals.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,12 @@ ResourceOwnerForgetBufferIO(ResourceOwner owner, Buffer buffer)
421421
/*
422422
* Internal buffer management routines
423423
*/
424+
425+
426+
/* Note: these two macros only work on shared buffers, not local ones! */
427+
#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ))
428+
#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr)))
429+
424430
/* bufmgr.c */
425431
extern void WritebackContextInit(WritebackContext *context, int *max_pending);
426432
extern void IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context);

0 commit comments

Comments
 (0)