Skip to content

Commit ba7c386

Browse files
author
Commitfest Bot
committed
[CF 5327] v10 - Use read_stream in index vacuum
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5327 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/CAAKRu_YeMT67LfyN7siwiACP9Q7YnCY9zdzxbv1Rhkeg94mYSQ@mail.gmail.com Author(s): Andrey Borodin
2 parents 00b52c3 + 4e1bb8e commit ba7c386

File tree

3 files changed

+167
-25
lines changed

3 files changed

+167
-25
lines changed

src/backend/access/nbtree/nbtree.c

Lines changed: 70 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "storage/lmgr.h"
3333
#include "utils/fmgrprotos.h"
3434
#include "utils/index_selfuncs.h"
35+
#include "utils/injection_point.h"
3536
#include "utils/memutils.h"
3637

3738

@@ -86,7 +87,7 @@ typedef struct BTParallelScanDescData *BTParallelScanDesc;
8687
static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
8788
IndexBulkDeleteCallback callback, void *callback_state,
8889
BTCycleId cycleid);
89-
static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno);
90+
static BlockNumber btvacuumpage(BTVacState *vstate, Buffer buf);
9091
static BTVacuumPosting btreevacuumposting(BTVacState *vstate,
9192
IndexTuple posting,
9293
OffsetNumber updatedoffset,
@@ -991,8 +992,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
991992
Relation rel = info->index;
992993
BTVacState vstate;
993994
BlockNumber num_pages;
994-
BlockNumber scanblkno;
995995
bool needLock;
996+
BlockRangeReadStreamPrivate p;
997+
ReadStream *stream = NULL;
996998

997999
/*
9981000
* Reset fields that track information about the entire index now. This
@@ -1061,9 +1063,18 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
10611063
*/
10621064
needLock = !RELATION_IS_LOCAL(rel);
10631065

1064-
scanblkno = BTREE_METAPAGE + 1;
1066+
p.current_blocknum = BTREE_METAPAGE + 1;
1067+
stream = read_stream_begin_relation(READ_STREAM_FULL,
1068+
info->strategy,
1069+
rel,
1070+
MAIN_FORKNUM,
1071+
block_range_read_stream_cb,
1072+
&p,
1073+
0);
10651074
for (;;)
10661075
{
1076+
Buffer buf;
1077+
10671078
/* Get the current relation length */
10681079
if (needLock)
10691080
LockRelationForExtension(rel, ExclusiveLock);
@@ -1076,18 +1087,44 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
10761087
num_pages);
10771088

10781089
/* Quit if we've scanned the whole relation */
1079-
if (scanblkno >= num_pages)
1090+
if (p.current_blocknum >= num_pages)
10801091
break;
1081-
/* Iterate over pages, then loop back to recheck length */
1082-
for (; scanblkno < num_pages; scanblkno++)
1092+
1093+
1094+
p.last_exclusive = num_pages;
1095+
1096+
/* Iterate over pages, then loop back to recheck relation length */
1097+
while (true)
10831098
{
1084-
btvacuumpage(&vstate, scanblkno);
1099+
BlockNumber current_block;
1100+
1101+
/* call vacuum_delay_point while not holding any buffer lock */
1102+
vacuum_delay_point(false);
1103+
1104+
buf = read_stream_next_buffer(stream, NULL);
1105+
1106+
if (!BufferIsValid(buf))
1107+
break;
1108+
1109+
current_block = btvacuumpage(&vstate, buf);
1110+
10851111
if (info->report_progress)
10861112
pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
1087-
scanblkno);
1113+
current_block);
10881114
}
1115+
1116+
Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
1117+
1118+
/*
1119+
* We have to reset the read stream to use it again. After returning
1120+
* InvalidBuffer, the read stream API won't invoke our callback again
1121+
* until the stream has been reset.
1122+
*/
1123+
read_stream_reset(stream);
10891124
}
10901125

1126+
read_stream_end(stream);
1127+
10911128
/* Set statistics num_pages field to final size of index */
10921129
stats->num_pages = num_pages;
10931130

@@ -1111,14 +1148,16 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
11111148
* btvacuumpage --- VACUUM one page
11121149
*
11131150
* This processes a single page for btvacuumscan(). In some cases we must
1114-
* backtrack to re-examine and VACUUM pages that were the scanblkno during
1151+
* backtrack to re-examine and VACUUM pages that were on buf's page during
11151152
* a previous call here. This is how we handle page splits (that happened
11161153
* after our cycleid was acquired) whose right half page happened to reuse
11171154
* a block that we might have processed at some point before it was
11181155
* recycled (i.e. before the page split).
1156+
*
1157+
* Returns BlockNumber of a scanned page (not backtracked).
11191158
*/
1120-
static void
1121-
btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
1159+
static BlockNumber
1160+
btvacuumpage(BTVacState *vstate, Buffer buf)
11221161
{
11231162
IndexVacuumInfo *info = vstate->info;
11241163
IndexBulkDeleteResult *stats = vstate->stats;
@@ -1129,7 +1168,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11291168
bool attempt_pagedel;
11301169
BlockNumber blkno,
11311170
backtrack_to;
1132-
Buffer buf;
1171+
BlockNumber scanblkno = BufferGetBlockNumber(buf);
11331172
Page page;
11341173
BTPageOpaque opaque;
11351174

@@ -1140,17 +1179,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11401179
attempt_pagedel = false;
11411180
backtrack_to = P_NONE;
11421181

1143-
/* call vacuum_delay_point while not holding any buffer lock */
1144-
vacuum_delay_point(false);
1145-
1146-
/*
1147-
* We can't use _bt_getbuf() here because it always applies
1148-
* _bt_checkpage(), which will barf on an all-zero page. We want to
1149-
* recycle all-zero pages, not fail. Also, we want to use a nondefault
1150-
* buffer access strategy.
1151-
*/
1152-
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1153-
info->strategy);
1182+
INJECTION_POINT("nbtree-vacuum-page");
11541183
_bt_lockbuf(rel, buf, BT_READ);
11551184
page = BufferGetPage(buf);
11561185
opaque = NULL;
@@ -1186,7 +1215,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
11861215
errmsg_internal("right sibling %u of scanblkno %u unexpectedly in an inconsistent state in index \"%s\"",
11871216
blkno, scanblkno, RelationGetRelationName(rel))));
11881217
_bt_relbuf(rel, buf);
1189-
return;
1218+
return scanblkno;
11901219
}
11911220

11921221
/*
@@ -1206,7 +1235,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
12061235
{
12071236
/* Done with current scanblkno (and all lower split pages) */
12081237
_bt_relbuf(rel, buf);
1209-
return;
1238+
return scanblkno;
12101239
}
12111240
}
12121241

@@ -1437,8 +1466,24 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
14371466
if (backtrack_to != P_NONE)
14381467
{
14391468
blkno = backtrack_to;
1469+
1470+
/* check for vacuum delay while not holding any buffer lock */
1471+
vacuum_delay_point(false);
1472+
1473+
INJECTION_POINT("nbtree-vacuum-page-backtrack");
1474+
1475+
/*
1476+
* We can't use _bt_getbuf() here because it always applies
1477+
* _bt_checkpage(), which will barf on an all-zero page. We want to
1478+
* recycle all-zero pages, not fail. Also, we want to use a
1479+
* nondefault buffer access strategy.
1480+
*/
1481+
buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
1482+
info->strategy);
14401483
goto backtrack;
14411484
}
1485+
1486+
return scanblkno;
14421487
}
14431488

14441489
/*

src/test/modules/test_misc/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ tests += {
1616
't/005_timeouts.pl',
1717
't/006_signal_autovacuum.pl',
1818
't/007_catcache_inval.pl',
19+
't/008_vacuum_btree.pl',
1920
],
2021
},
2122
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
# Copyright (c) 2024, PostgreSQL Global Development Group
2+
3+
# Test btree vacuum
4+
5+
use strict;
6+
use warnings;
7+
use PostgreSQL::Test::Cluster;
8+
use Test::More;
9+
10+
if ($ENV{enable_injection_points} ne 'yes')
11+
{
12+
plan skip_all => 'Injection points not supported by this build';
13+
}
14+
15+
my $node = PostgreSQL::Test::Cluster->new('node');
16+
$node->init;
17+
18+
# Turn autovacuum off for the cluster. This could be done at the table level
19+
# instead. However, since this file exercises vacuum, turn off autovacuum
20+
# globally. This also allows use of non-local injection points in vacuum code.
21+
$node->append_conf('postgresql.conf', 'autovacuum = off');
22+
$node->start;
23+
24+
# Check if extension injection_points is available
25+
if (!$node->check_extension('injection_points'))
26+
{
27+
plan skip_all => 'Extension injection_points not installed';
28+
}
29+
30+
$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
31+
32+
my $psql_session = $node->background_psql('postgres');
33+
34+
# Create a table with an index filled with dead index entries
35+
$node->safe_psql('postgres', q[
36+
create table test_backtrack (col1 int);
37+
insert into test_backtrack select generate_series(1,800);
38+
create index on test_backtrack(col1);
39+
delete from test_backtrack;
40+
]
41+
);
42+
43+
# Attach to two injection points. The first one will allow us to stop between
44+
# vacuuming each index page. The second is our validation that we did backtrack.
45+
$psql_session->query_safe(
46+
qq[
47+
SELECT injection_points_set_local();
48+
SELECT injection_points_attach('nbtree-vacuum-page', 'wait');
49+
SELECT injection_points_attach('nbtree-vacuum-page-backtrack', 'wait');
50+
]);
51+
52+
# Start a vacuum of the table and index.
53+
$psql_session->query_until(
54+
qr/starting_bg_psql/,
55+
q(\echo starting_bg_psql
56+
vacuum (index_cleanup on, parallel 0) test_backtrack;
57+
));
58+
59+
# The index vacuum should be waiting on our first injection point and is yet to
60+
# process any pages.
61+
$node->wait_for_event('client backend', 'nbtree-vacuum-page');
62+
63+
# Wake up vacuum so it can process the first index leaf page.
64+
$node->safe_psql('postgres', "SELECT injection_points_wakeup('nbtree-vacuum-page');");
65+
66+
# The first index leaf page is now vacuumed and vacuum should be waiting again
67+
# on the first injection point.
68+
$node->wait_for_event('client backend', 'nbtree-vacuum-page');
69+
70+
# Insert data while vacuum is waiting to process the next leaf page. The
71+
# inserted data will force a page split in which some tuples from unprocessed
72+
# leaf pages will be moved to the first already vacuumed leaf page.
73+
$node->safe_psql('postgres',
74+
"insert into test_backtrack select generate_series(1,300);");
75+
76+
# Now we want the vacuum to continue. We don't want to wait on our first break
77+
# point again.
78+
# We need to make sure we are waiting before detaching and issuing a wakeup.
79+
# Otherwise there could be a race and the backend may not get woken up.
80+
$node->wait_for_event('client backend', 'nbtree-vacuum-page');
81+
$node->safe_psql('postgres', "SELECT injection_points_detach('nbtree-vacuum-page');");
82+
$node->safe_psql('postgres', "SELECT injection_points_wakeup('nbtree-vacuum-page');");
83+
84+
# Wait on our second break point. Vacuum should have been forced to backtrack
85+
# and vacuum the first leaf page again to ensure it removed all dead index
86+
# entries.
87+
$node->wait_for_event('client backend', 'nbtree-vacuum-page-backtrack');
88+
89+
# Once we wait on our second break point, we're done. Time to tell the backend
90+
# to detach and wake it up.
91+
$node->safe_psql('postgres', "SELECT injection_points_detach('nbtree-vacuum-page-backtrack');");
92+
$node->safe_psql('postgres', "SELECT injection_points_wakeup('nbtree-vacuum-page-backtrack');");
93+
94+
ok($psql_session->quit);
95+
96+
done_testing();

0 commit comments

Comments
 (0)