|
68 | 68 | #include "utils/timestamp.h" |
69 | 69 |
|
70 | 70 |
|
71 | | -/* Note: these two macros only work on shared buffers, not local ones! */ |
72 | | -#define BufHdrGetBlock(bufHdr) ((Block) (BufferBlocks + ((Size) (bufHdr)->buf_id) * BLCKSZ)) |
73 | | -#define BufferGetLSN(bufHdr) (PageGetLSN(BufHdrGetBlock(bufHdr))) |
74 | | - |
75 | 71 | /* Note: this macro only works on local buffers, not shared ones! */ |
76 | 72 | #define LocalBufHdrGetBlock(bufHdr) \ |
77 | 73 | LocalBufferBlockPointers[-((bufHdr)->buf_id + 2)] |
@@ -2325,125 +2321,116 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) |
2325 | 2321 | ReservePrivateRefCountEntry(); |
2326 | 2322 | ResourceOwnerEnlarge(CurrentResourceOwner); |
2327 | 2323 |
|
2328 | | - /* we return here if a prospective victim buffer gets used concurrently */ |
2329 | | -again: |
2330 | | - |
2331 | | - /* |
2332 | | - * Select a victim buffer. The buffer is returned pinned and owned by |
2333 | | - * this backend. |
2334 | | - */ |
2335 | | - buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring); |
2336 | | - buf = BufferDescriptorGetBuffer(buf_hdr); |
2337 | | - |
2338 | | - /* |
2339 | | - * We shouldn't have any other pins for this buffer. |
2340 | | - */ |
2341 | | - CheckBufferIsPinnedOnce(buf); |
2342 | | - |
2343 | | - /* |
2344 | | - * If the buffer was dirty, try to write it out. There is a race |
2345 | | - * condition here, in that someone might dirty it after we released the |
2346 | | - * buffer header lock above, or even while we are writing it out (since |
2347 | | - * our share-lock won't prevent hint-bit updates). We will recheck the |
2348 | | - * dirty bit after re-locking the buffer header. |
2349 | | - */ |
2350 | | - if (buf_state & BM_DIRTY) |
| 2324 | + /* Select a victim buffer using an optimistic locking scheme. */ |
| 2325 | + for (;;) |
2351 | 2326 | { |
2352 | | - LWLock *content_lock; |
2353 | 2327 |
|
2354 | | - Assert(buf_state & BM_TAG_VALID); |
2355 | | - Assert(buf_state & BM_VALID); |
| 2328 | + /* Attempt to claim a victim buffer. Buffer is returned pinned. */ |
| 2329 | + buf_hdr = StrategyGetBuffer(strategy, &buf_state, &from_ring); |
| 2330 | + buf = BufferDescriptorGetBuffer(buf_hdr); |
2356 | 2331 |
|
2357 | 2332 | /* |
2358 | | - * We need a share-lock on the buffer contents to write it out (else |
2359 | | - * we might write invalid data, eg because someone else is compacting |
2360 | | - * the page contents while we write). We must use a conditional lock |
2361 | | - * acquisition here to avoid deadlock. Even though the buffer was not |
2362 | | - * pinned (and therefore surely not locked) when StrategyGetBuffer |
2363 | | - * returned it, someone else could have pinned and exclusive-locked it |
2364 | | - * by the time we get here. If we try to get the lock unconditionally, |
2365 | | - * we'd block waiting for them; if they later block waiting for us, |
2366 | | - * deadlock ensues. (This has been observed to happen when two |
2367 | | - * backends are both trying to split btree index pages, and the second |
2368 | | - * one just happens to be trying to split the page the first one got |
2369 | | - * from StrategyGetBuffer.) |
| 2333 | + * We shouldn't have any other pins for this buffer. |
2370 | 2334 | */ |
2371 | | - content_lock = BufferDescriptorGetContentLock(buf_hdr); |
2372 | | - if (!LWLockConditionalAcquire(content_lock, LW_SHARED)) |
2373 | | - { |
2374 | | - /* |
2375 | | - * Someone else has locked the buffer, so give it up and loop back |
2376 | | - * to get another one. |
2377 | | - */ |
2378 | | - UnpinBuffer(buf_hdr); |
2379 | | - goto again; |
2380 | | - } |
| 2335 | + CheckBufferIsPinnedOnce(buf); |
2381 | 2336 |
|
2382 | 2337 | /* |
2383 | | - * If using a nondefault strategy, and writing the buffer would |
2384 | | - * require a WAL flush, let the strategy decide whether to go ahead |
2385 | | - * and write/reuse the buffer or to choose another victim. We need a |
2386 | | - * lock to inspect the page LSN, so this can't be done inside |
2387 | | - * StrategyGetBuffer. |
| 2338 | + * If the buffer was dirty, try to write it out. There is a race |
| 2339 | + * condition here, in that someone might dirty it after we released |
| 2340 | + * the buffer header lock above, or even while we are writing it out |
| 2341 | + * (since our share-lock won't prevent hint-bit updates). We will |
| 2342 | + * recheck the dirty bit after re-locking the buffer header. |
2388 | 2343 | */ |
2389 | | - if (strategy != NULL) |
| 2344 | + if (buf_state & BM_DIRTY) |
2390 | 2345 | { |
2391 | | - XLogRecPtr lsn; |
| 2346 | + LWLock *content_lock; |
2392 | 2347 |
|
2393 | | - /* Read the LSN while holding buffer header lock */ |
2394 | | - buf_state = LockBufHdr(buf_hdr); |
2395 | | - lsn = BufferGetLSN(buf_hdr); |
2396 | | - UnlockBufHdr(buf_hdr, buf_state); |
| 2348 | + Assert(buf_state & BM_TAG_VALID); |
| 2349 | + Assert(buf_state & BM_VALID); |
2397 | 2350 |
|
2398 | | - if (XLogNeedsFlush(lsn) |
2399 | | - && StrategyRejectBuffer(strategy, buf_hdr, from_ring)) |
| 2351 | + /* |
| 2352 | + * We need a share-lock on the buffer contents to write it out |
| 2353 | + * (else we might write invalid data, eg because someone else is |
| 2354 | + * compacting the page contents while we write). We must use a |
| 2355 | + * conditional lock acquisition here to avoid deadlock. Even |
| 2356 | + * though the buffer was not pinned (and therefore surely not |
| 2357 | + * locked) when StrategyGetBuffer returned it, someone else could |
| 2358 | + * have pinned and exclusive-locked it by the time we get here. If |
| 2359 | + * we try to get the lock unconditionally, we'd block waiting for |
| 2360 | + * them; if they later block waiting for us, deadlock ensues. |
| 2361 | + * (This has been observed to happen when two backends are both |
| 2362 | + * trying to split btree index pages, and the second one just |
| 2363 | + * happens to be trying to split the page the first one got from |
| 2364 | + * StrategyGetBuffer.) |
| 2365 | + */ |
| 2366 | + content_lock = BufferDescriptorGetContentLock(buf_hdr); |
| 2367 | + if (!LWLockConditionalAcquire(content_lock, LW_SHARED)) |
| 2368 | + { |
| 2369 | + /* |
| 2370 | + * Someone else has locked the buffer, so give it up and loop |
| 2371 | + * back to get another one. |
| 2372 | + */ |
| 2373 | + UnpinBuffer(buf_hdr); |
| 2374 | + continue; |
| 2375 | + } |
| 2376 | + |
| 2377 | + /* |
| 2378 | + * If using a nondefault strategy, and writing the buffer would |
| 2379 | + * require a WAL flush, let the strategy decide whether to go |
| 2380 | + * ahead and write/reuse the buffer or to choose another victim. |
| 2381 | + * We need the content lock to inspect the page LSN, so this can't |
| 2382 | + * be done inside StrategyGetBuffer. |
| 2383 | + */ |
| 2384 | + if (StrategyRejectBuffer(strategy, buf_hdr, from_ring)) |
2400 | 2385 | { |
2401 | 2386 | LWLockRelease(content_lock); |
2402 | 2387 | UnpinBuffer(buf_hdr); |
2403 | | - goto again; |
| 2388 | + continue; |
2404 | 2389 | } |
2405 | | - } |
2406 | 2390 |
|
2407 | | - /* OK, do the I/O */ |
2408 | | - FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context); |
2409 | | - LWLockRelease(content_lock); |
| 2391 | + /* OK, do the I/O */ |
| 2392 | + FlushBuffer(buf_hdr, NULL, IOOBJECT_RELATION, io_context); |
| 2393 | + LWLockRelease(content_lock); |
2410 | 2394 |
|
2411 | | - ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context, |
2412 | | - &buf_hdr->tag); |
2413 | | - } |
| 2395 | + ScheduleBufferTagForWriteback(&BackendWritebackContext, io_context, |
| 2396 | + &buf_hdr->tag); |
| 2397 | + } |
2414 | 2398 |
|
2415 | 2399 |
|
2416 | | - if (buf_state & BM_VALID) |
2417 | | - { |
| 2400 | + if (buf_state & BM_VALID) |
| 2401 | + { |
| 2402 | + /* |
| 2403 | + * When a BufferAccessStrategy is in use, blocks evicted from |
| 2404 | + * shared buffers are counted as IOOP_EVICT in the corresponding |
| 2405 | + * context (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted |
| 2406 | + * by a strategy in two cases: 1) while initially claiming buffers |
| 2407 | + * for the strategy ring 2) to replace an existing strategy ring |
| 2408 | + * buffer because it is pinned or in use and cannot be reused. |
| 2409 | + * |
| 2410 | + * Blocks evicted from buffers already in the strategy ring are |
| 2411 | + * counted as IOOP_REUSE in the corresponding strategy context. |
| 2412 | + * |
| 2413 | + * At this point, we can accurately count evictions and reuses, |
| 2414 | + * because we have successfully claimed the valid buffer. |
| 2415 | + * Previously, we may have been forced to release the buffer due |
| 2416 | + * to concurrent pinners or erroring out. |
| 2417 | + */ |
| 2418 | + pgstat_count_io_op(IOOBJECT_RELATION, io_context, |
| 2419 | + from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0); |
| 2420 | + } |
| 2421 | + |
2418 | 2422 | /* |
2419 | | - * When a BufferAccessStrategy is in use, blocks evicted from shared |
2420 | | - * buffers are counted as IOOP_EVICT in the corresponding context |
2421 | | - * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a |
2422 | | - * strategy in two cases: 1) while initially claiming buffers for the |
2423 | | - * strategy ring 2) to replace an existing strategy ring buffer |
2424 | | - * because it is pinned or in use and cannot be reused. |
2425 | | - * |
2426 | | - * Blocks evicted from buffers already in the strategy ring are |
2427 | | - * counted as IOOP_REUSE in the corresponding strategy context. |
2428 | | - * |
2429 | | - * At this point, we can accurately count evictions and reuses, |
2430 | | - * because we have successfully claimed the valid buffer. Previously, |
2431 | | - * we may have been forced to release the buffer due to concurrent |
2432 | | - * pinners or erroring out. |
| 2423 | + * If the buffer has an entry in the buffer mapping table, delete it. |
| 2424 | + * This can fail because another backend could have pinned or dirtied |
| 2425 | + * the buffer. |
2433 | 2426 | */ |
2434 | | - pgstat_count_io_op(IOOBJECT_RELATION, io_context, |
2435 | | - from_ring ? IOOP_REUSE : IOOP_EVICT, 1, 0); |
2436 | | - } |
| 2427 | + if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr)) |
| 2428 | + { |
| 2429 | + UnpinBuffer(buf_hdr); |
| 2430 | + continue; |
| 2431 | + } |
2437 | 2432 |
|
2438 | | - /* |
2439 | | - * If the buffer has an entry in the buffer mapping table, delete it. This |
2440 | | - * can fail because another backend could have pinned or dirtied the |
2441 | | - * buffer. |
2442 | | - */ |
2443 | | - if ((buf_state & BM_TAG_VALID) && !InvalidateVictimBuffer(buf_hdr)) |
2444 | | - { |
2445 | | - UnpinBuffer(buf_hdr); |
2446 | | - goto again; |
| 2433 | + break; |
2447 | 2434 | } |
2448 | 2435 |
|
2449 | 2436 | /* a final set of sanity checks */ |
|
0 commit comments