Skip to content

Commit 961a75b

Browse files
melanieplagemanCommitfest Bot
authored andcommitted
Allow on-access pruning to set pages all-visible
Many queries do not modify the underlying relation. For such queries, if on-access pruning occurs during the scan, we can check whether the page has become all-visible and update the visibility map accordingly. Previously, only vacuum and COPY FREEZE marked pages as all-visible or all-frozen. Supporting this requires passing information about whether the relation is modified from the executor down to the scan descriptor. This commit implements on-access VM setting for sequential scans as well as for the underlying heap relation in index scans and bitmap heap scans.
1 parent e918e79 commit 961a75b

File tree

16 files changed

+284
-39
lines changed

16 files changed

+284
-39
lines changed

src/backend/access/heap/heapam.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ heap_prepare_pagescan(TableScanDesc sscan)
555555
Buffer buffer = scan->rs_cbuf;
556556
BlockNumber block = scan->rs_cblock;
557557
Snapshot snapshot;
558+
Buffer *vmbuffer = NULL;
558559
Page page;
559560
int lines;
560561
bool all_visible;
@@ -569,7 +570,9 @@ heap_prepare_pagescan(TableScanDesc sscan)
569570
/*
570571
* Prune and repair fragmentation for the whole page, if possible.
571572
*/
572-
heap_page_prune_opt(scan->rs_base.rs_rd, buffer);
573+
if (sscan->rs_flags & SO_ALLOW_VM_SET)
574+
vmbuffer = &scan->rs_vmbuffer;
575+
heap_page_prune_opt(scan->rs_base.rs_rd, buffer, vmbuffer);
573576

574577
/*
575578
* We must hold share lock on the buffer content while examining tuple
@@ -1246,6 +1249,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
12461249
sizeof(TBMIterateResult));
12471250
}
12481251

1252+
scan->rs_vmbuffer = InvalidBuffer;
12491253

12501254
return (TableScanDesc) scan;
12511255
}
@@ -1284,6 +1288,12 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
12841288
scan->rs_cbuf = InvalidBuffer;
12851289
}
12861290

1291+
if (BufferIsValid(scan->rs_vmbuffer))
1292+
{
1293+
ReleaseBuffer(scan->rs_vmbuffer);
1294+
scan->rs_vmbuffer = InvalidBuffer;
1295+
}
1296+
12871297
/*
12881298
* SO_TYPE_BITMAPSCAN would be cleaned up here, but it does not hold any
12891299
* additional data vs a normal HeapScan
@@ -1316,6 +1326,9 @@ heap_endscan(TableScanDesc sscan)
13161326
if (BufferIsValid(scan->rs_cbuf))
13171327
ReleaseBuffer(scan->rs_cbuf);
13181328

1329+
if (BufferIsValid(scan->rs_vmbuffer))
1330+
ReleaseBuffer(scan->rs_vmbuffer);
1331+
13191332
/*
13201333
* Must free the read stream before freeing the BufferAccessStrategy.
13211334
*/

src/backend/access/heap/heapam_handler.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ heapam_index_fetch_begin(Relation rel)
8585

8686
hscan->xs_base.rel = rel;
8787
hscan->xs_cbuf = InvalidBuffer;
88+
hscan->xs_vmbuffer = InvalidBuffer;
8889

8990
return &hscan->xs_base;
9091
}
@@ -99,6 +100,12 @@ heapam_index_fetch_reset(IndexFetchTableData *scan)
99100
ReleaseBuffer(hscan->xs_cbuf);
100101
hscan->xs_cbuf = InvalidBuffer;
101102
}
103+
104+
if (BufferIsValid(hscan->xs_vmbuffer))
105+
{
106+
ReleaseBuffer(hscan->xs_vmbuffer);
107+
hscan->xs_vmbuffer = InvalidBuffer;
108+
}
102109
}
103110

104111
static void
@@ -138,7 +145,8 @@ heapam_index_fetch_tuple(struct IndexFetchTableData *scan,
138145
* Prune page, but only if we weren't already on this page
139146
*/
140147
if (prev_buf != hscan->xs_cbuf)
141-
heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf);
148+
heap_page_prune_opt(hscan->xs_base.rel, hscan->xs_cbuf,
149+
scan->modifies_base_rel ? NULL : &hscan->xs_vmbuffer);
142150
}
143151

144152
/* Obtain share-lock on the buffer so we can examine visibility */
@@ -2471,6 +2479,7 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
24712479
TBMIterateResult *tbmres;
24722480
OffsetNumber offsets[TBM_MAX_TUPLES_PER_PAGE];
24732481
int noffsets = -1;
2482+
Buffer *vmbuffer = NULL;
24742483

24752484
Assert(scan->rs_flags & SO_TYPE_BITMAPSCAN);
24762485
Assert(hscan->rs_read_stream);
@@ -2517,7 +2526,9 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
25172526
/*
25182527
* Prune and repair fragmentation for the whole page, if possible.
25192528
*/
2520-
heap_page_prune_opt(scan->rs_rd, buffer);
2529+
if (scan->rs_flags & SO_ALLOW_VM_SET)
2530+
vmbuffer = &hscan->rs_vmbuffer;
2531+
heap_page_prune_opt(scan->rs_rd, buffer, vmbuffer);
25212532

25222533
/*
25232534
* We must hold share lock on the buffer content while examining tuple

src/backend/access/heap/pruneheap.c

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,9 @@ static bool heap_page_will_set_vis(Relation relation,
188188
Buffer heap_buf,
189189
Buffer vmbuffer,
190190
bool blk_known_av,
191-
const PruneState *prstate,
191+
PruneReason reason,
192+
bool do_prune, bool do_freeze,
193+
PruneState *prstate,
192194
uint8 *vmflags,
193195
bool *do_set_pd_vis);
194196

@@ -203,9 +205,13 @@ static bool heap_page_will_set_vis(Relation relation,
203205
* if there's not any use in pruning.
204206
*
205207
* Caller must have pin on the buffer, and must *not* have a lock on it.
208+
*
209+
* If vmbuffer is not NULL, it is okay for pruning to set the visibility map if
210+
* the page is all-visible. We will take care of pinning and, if needed,
211+
* reading in the page of the visibility map.
206212
*/
207213
void
208-
heap_page_prune_opt(Relation relation, Buffer buffer)
214+
heap_page_prune_opt(Relation relation, Buffer buffer, Buffer *vmbuffer)
209215
{
210216
Page page = BufferGetPage(buffer);
211217
TransactionId prune_xid;
@@ -271,12 +277,21 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
271277
PruneFreezeParams params;
272278
PruneFreezeResult presult;
273279

280+
params.options = 0;
281+
params.vmbuffer = InvalidBuffer;
282+
283+
if (vmbuffer)
284+
{
285+
visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
286+
params.options = HEAP_PAGE_PRUNE_UPDATE_VIS;
287+
params.vmbuffer = *vmbuffer;
288+
}
289+
274290
params.relation = relation;
275291
params.buffer = buffer;
276292
params.reason = PRUNE_ON_ACCESS;
277293
params.vistest = vistest;
278294
params.cutoffs = NULL;
279-
params.vmbuffer = InvalidBuffer;
280295
params.blk_known_av = false;
281296

282297
/*
@@ -456,6 +471,9 @@ heap_page_will_freeze(Relation relation, Buffer buffer,
456471
* have examined this page’s VM bits (e.g., VACUUM in the previous
457472
* heap_vac_scan_next_block() call) and can pass that along.
458473
*
474+
* This should be called only after do_freeze has been decided (and do_prune
475+
* has been set), as these factor into our heuristic-based decision.
476+
*
459477
* Returns true if one or both VM bits should be set, along with the desired
460478
* flags in *vmflags. Also indicates via do_set_pd_vis whether PD_ALL_VISIBLE
461479
* should be set on the heap page.
@@ -466,7 +484,9 @@ heap_page_will_set_vis(Relation relation,
466484
Buffer heap_buf,
467485
Buffer vmbuffer,
468486
bool blk_known_av,
469-
const PruneState *prstate,
487+
PruneReason reason,
488+
bool do_prune, bool do_freeze,
489+
PruneState *prstate,
470490
uint8 *vmflags,
471491
bool *do_set_pd_vis)
472492
{
@@ -482,6 +502,23 @@ heap_page_will_set_vis(Relation relation,
482502
return false;
483503
}
484504

505+
/*
506+
* If this is an on-access call and we're not actually pruning, avoid
507+
* setting the visibility map if it would newly dirty the heap page or, if
508+
* the page is already dirty, if doing so would require including a
509+
* full-page image (FPI) of the heap page in the WAL. This situation
510+
* should be rare, as on-access pruning is only attempted when
511+
* pd_prune_xid is valid.
512+
*/
513+
if (reason == PRUNE_ON_ACCESS &&
514+
prstate->all_visible &&
515+
!do_prune && !do_freeze &&
516+
(!BufferIsDirty(heap_buf) || XLogCheckBufferNeedsBackup(heap_buf)))
517+
{
518+
prstate->all_visible = prstate->all_frozen = false;
519+
return false;
520+
}
521+
485522
if (prstate->all_visible && !PageIsAllVisible(heap_page))
486523
*do_set_pd_vis = true;
487524

@@ -505,6 +542,11 @@ heap_page_will_set_vis(Relation relation,
505542
* page-level bit is clear. However, it's possible that in vacuum the bit
506543
* got cleared after heap_vac_scan_next_block() was called, so we must
507544
* recheck with buffer lock before concluding that the VM is corrupt.
545+
*
546+
* This will never trigger for on-access pruning because it couldn't have
547+
* done a previous visibility map lookup and thus will always pass
548+
* blk_known_av as false. A future vacuum will have to take care of fixing
549+
* the corruption.
508550
*/
509551
else if (blk_known_av && !PageIsAllVisible(heap_page) &&
510552
visibilitymap_get_status(relation, heap_blk, &vmbuffer) != 0)
@@ -913,6 +955,14 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
913955
prstate.ndead > 0 ||
914956
prstate.nunused > 0;
915957

958+
/*
959+
* Even if we don't prune anything, if we found a new value for the
960+
* pd_prune_xid field or the page was marked full, we will update the hint
961+
* bit.
962+
*/
963+
do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
964+
PageIsFull(page);
965+
916966
/*
917967
* After processing all the live tuples on the page, if the newest xmin
918968
* amongst them is not visible to everyone, the page cannot be
@@ -923,14 +973,6 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
923973
!GlobalVisXidVisibleToAll(prstate.vistest, prstate.visibility_cutoff_xid))
924974
prstate.all_visible = prstate.all_frozen = false;
925975

926-
/*
927-
* Even if we don't prune anything, if we found a new value for the
928-
* pd_prune_xid field or the page was marked full, we will update the hint
929-
* bit.
930-
*/
931-
do_hint_prune = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
932-
PageIsFull(page);
933-
934976
/*
935977
* Decide if we want to go ahead with freezing according to the freeze
936978
* plans we prepared, or not.
@@ -974,6 +1016,7 @@ heap_page_prune_and_freeze(PruneFreezeParams *params,
9741016
*/
9751017
do_set_vm = heap_page_will_set_vis(params->relation,
9761018
blockno, buffer, vmbuffer, params->blk_known_av,
1019+
params->reason, do_prune, do_freeze,
9771020
&prstate, &new_vmbits, &do_set_pd_vis);
9781021

9791022
/* We should only set the VM if PD_ALL_VISIBLE is set or will be */
@@ -2250,7 +2293,7 @@ heap_log_freeze_plan(HeapTupleFreeze *tuples, int ntuples,
22502293

22512294
/*
22522295
* Calculate the conflict horizon for the whole XLOG_HEAP2_PRUNE_VACUUM_SCAN
2253-
* record.
2296+
* or XLOG_HEAP2_PRUNE_ON_ACCESS record.
22542297
*/
22552298
static TransactionId
22562299
get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
@@ -2319,8 +2362,8 @@ get_conflict_xid(bool do_prune, bool do_freeze, bool do_set_vm,
23192362
* - Reaping: During vacuum phase III, items that are already LP_DEAD are
23202363
* marked as unused.
23212364
*
2322-
* - VM updates: After vacuum phases I and III, the heap page may be marked
2323-
* all-visible and all-frozen.
2365+
* - VM updates: After vacuum phases I and III and on-access, the heap page
2366+
* may be marked all-visible and all-frozen.
23242367
*
23252368
* These changes all happen together, so we use a single WAL record for them
23262369
* all.

src/backend/access/index/indexam.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,32 @@ index_beginscan(Relation heapRelation,
289289
return scan;
290290
}
291291

292+
/*
293+
* Similar to index_beginscan(), but allows the caller to indicate whether the
294+
* query modifies the underlying base relation. This is used when the caller
295+
* wants to attempt marking pages in the base relation as all-visible in the
296+
* visibility map during on-access pruning.
297+
*/
298+
IndexScanDesc
299+
index_beginscan_vmset(Relation heapRelation,
300+
Relation indexRelation,
301+
Snapshot snapshot,
302+
IndexScanInstrumentation *instrument,
303+
int nkeys, int norderbys, bool modifies_base_rel)
304+
{
305+
IndexScanDesc scan;
306+
307+
scan = index_beginscan(heapRelation,
308+
indexRelation,
309+
snapshot,
310+
instrument,
311+
nkeys, norderbys);
312+
313+
scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
314+
315+
return scan;
316+
}
317+
292318
/*
293319
* index_beginscan_bitmap - start a scan of an index with amgetbitmap
294320
*
@@ -620,6 +646,26 @@ index_beginscan_parallel(Relation heaprel, Relation indexrel,
620646
return scan;
621647
}
622648

649+
/*
650+
* Parallel version of index_beginscan_vmset()
651+
*/
652+
IndexScanDesc
653+
index_beginscan_parallel_vmset(Relation heaprel, Relation indexrel,
654+
IndexScanInstrumentation *instrument,
655+
int nkeys, int norderbys,
656+
ParallelIndexScanDesc pscan,
657+
bool modifies_base_rel)
658+
{
659+
IndexScanDesc scan;
660+
661+
scan = index_beginscan_parallel(heaprel, indexrel,
662+
instrument,
663+
nkeys, norderbys,
664+
pscan);
665+
scan->xs_heapfetch->modifies_base_rel = modifies_base_rel;
666+
return scan;
667+
}
668+
623669
/* ----------------
624670
* index_getnext_tid - get the next TID from a scan
625671
*

src/backend/access/table/tableam.c

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@
4949
char *default_table_access_method = DEFAULT_TABLE_ACCESS_METHOD;
5050
bool synchronize_seqscans = true;
5151

52+
/* Helper for table_beginscan_parallel() and table_beginscan_parallel_vmset() */
53+
static TableScanDesc table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
54+
uint32 flags);
55+
5256

5357
/* ----------------------------------------------------------------------------
5458
* Slot functions.
@@ -162,12 +166,14 @@ table_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan,
162166
}
163167
}
164168

165-
TableScanDesc
166-
table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
169+
/*
170+
* Common helper for table_beginscan_parallel() and table_beginscan_parallel_vmset()
171+
*/
172+
static TableScanDesc
173+
table_beginscan_parallel_common(Relation relation, ParallelTableScanDesc pscan,
174+
uint32 flags)
167175
{
168176
Snapshot snapshot;
169-
uint32 flags = SO_TYPE_SEQSCAN |
170-
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
171177

172178
Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator));
173179

@@ -188,6 +194,31 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
188194
pscan, flags);
189195
}
190196

197+
TableScanDesc
198+
table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan)
199+
{
200+
uint32 flags = SO_TYPE_SEQSCAN |
201+
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
202+
203+
return table_beginscan_parallel_common(relation, pscan, flags);
204+
}
205+
206+
/*
207+
* Parallel version of table_beginscan_vmset()
208+
*/
209+
TableScanDesc
210+
table_beginscan_parallel_vmset(Relation relation, ParallelTableScanDesc pscan,
211+
bool modifies_rel)
212+
{
213+
uint32 flags = SO_TYPE_SEQSCAN |
214+
SO_ALLOW_STRAT | SO_ALLOW_SYNC | SO_ALLOW_PAGEMODE;
215+
216+
if (!modifies_rel)
217+
flags |= SO_ALLOW_VM_SET;
218+
219+
return table_beginscan_parallel_common(relation, pscan, flags);
220+
}
221+
191222

192223
/* ----------------------------------------------------------------------------
193224
* Index scan related functions.

src/backend/executor/execMain.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -916,6 +916,10 @@ InitPlan(QueryDesc *queryDesc, int eflags)
916916
break;
917917
}
918918

919+
/* If it has a rowmark, the relation is modified */
920+
estate->es_modified_relids = bms_add_member(estate->es_modified_relids,
921+
rc->rti);
922+
919923
/* Check that relation is a legal target for marking */
920924
if (relation)
921925
CheckValidRowMarkRel(relation, rc->markType);

0 commit comments

Comments
 (0)