From 0ac0c32236034021cc797444ae6464e499bc776d Mon Sep 17 00:00:00 2001 From: Cary Huang Date: Fri, 29 Aug 2025 10:10:51 -0700 Subject: [PATCH 1/2] v10 parallel tid range scan --- src/backend/access/heap/heapam.c | 10 ++ src/backend/access/table/tableam.c | 46 ++++++++- src/backend/executor/execParallel.c | 21 ++++ src/backend/executor/nodeTidrangescan.c | 114 ++++++++++++++++++++- src/backend/optimizer/path/costsize.c | 36 ++++--- src/backend/optimizer/path/tidpath.c | 20 +++- src/backend/optimizer/util/pathnode.c | 7 +- src/include/access/relscan.h | 2 + src/include/access/tableam.h | 12 +++ src/include/executor/nodeTidrangescan.h | 7 ++ src/include/nodes/execnodes.h | 2 + src/include/optimizer/pathnode.h | 3 +- src/test/regress/expected/tidrangescan.out | 106 +++++++++++++++++++ src/test/regress/sql/tidrangescan.sql | 45 ++++++++ 14 files changed, 410 insertions(+), 21 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 4b0c49f4bb0b..de0a3a8b2191 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -490,6 +490,16 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk scan->rs_startblock = startBlk; scan->rs_numblocks = numBlks; + + /* set the limits in the ParallelBlockTableScanDesc, when present as leader */ + if (scan->rs_base.rs_parallel != NULL && !IsParallelWorker()) + { + ParallelBlockTableScanDesc bpscan; + + bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel; + bpscan->phs_startblock = startBlk; + bpscan->phs_numblock = numBlks; + } } /* diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index 5e41404937eb..baef7459b6b9 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -188,6 +188,42 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) pscan, flags); } +TableScanDesc +table_beginscan_parallel_tidrange(Relation relation, ParallelTableScanDesc pscan, + ItemPointerData * mintid, ItemPointerData * maxtid) +{ + Snapshot snapshot; + uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; + TableScanDesc sscan; + + Assert(RelFileLocatorEquals(relation->rd_locator, pscan->phs_locator)); + + /* disable syncscan in parallel tid range scan. */ + pscan->phs_syncscan = false; + + if (!pscan->phs_snapshot_any) + { + /* Snapshot was serialized -- restore it */ + snapshot = RestoreSnapshot((char *) pscan + pscan->phs_snapshot_off); + RegisterSnapshot(snapshot); + flags |= SO_TEMP_SNAPSHOT; + } + else + { + /* SnapshotAny passed by caller (not serialized) */ + snapshot = SnapshotAny; + } + + sscan = relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, + pscan, flags); + + /* Set the TID range if needed */ + if (mintid && maxtid) + relation->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); + + return sscan; +} + /* ---------------------------------------------------------------------------- * Index scan related functions. @@ -398,6 +434,7 @@ table_block_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) bpscan->phs_nblocks > NBuffers / 4; SpinLockInit(&bpscan->phs_mutex); bpscan->phs_startblock = InvalidBlockNumber; + bpscan->phs_numblock = InvalidBlockNumber; pg_atomic_init_u64(&bpscan->phs_nallocated, 0); return sizeof(ParallelBlockTableScanDescData); @@ -577,8 +614,15 @@ table_block_parallelscan_nextpage(Relation rel, pbscanwork->phsw_chunk_remaining = pbscanwork->phsw_chunk_size - 1; } + /* + * Check if we've allocated every block in the relation, or if we've + * reached the limit imposed by pbscan->phs_numblock (if set). + */ if (nallocated >= pbscan->phs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ + page = InvalidBlockNumber; /* all blocks have been allocated */ + else if (pbscan->phs_numblock != InvalidBlockNumber && + nallocated >= pbscan->phs_numblock) + page = InvalidBlockNumber; /* upper scan limit reached */ else page = (nallocated + pbscan->phs_startblock) % pbscan->phs_nblocks; diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index f098a5557cf0..7b1eb2e82c78 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -41,6 +41,7 @@ #include "executor/nodeSort.h" #include "executor/nodeSubplan.h" #include "executor/tqueue.h" +#include "executor/nodeTidrangescan.h" #include "jit/jit.h" #include "nodes/nodeFuncs.h" #include "pgstat.h" @@ -266,6 +267,11 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e) ExecForeignScanEstimate((ForeignScanState *) planstate, e->pcxt); break; + case T_TidRangeScanState: + if (planstate->plan->parallel_aware) + ExecTidRangeScanEstimate((TidRangeScanState *) planstate, + e->pcxt); + break; case T_AppendState: if (planstate->plan->parallel_aware) ExecAppendEstimate((AppendState *) planstate, @@ -493,6 +499,11 @@ ExecParallelInitializeDSM(PlanState *planstate, ExecForeignScanInitializeDSM((ForeignScanState *) planstate, d->pcxt); break; + case T_TidRangeScanState: + if (planstate->plan->parallel_aware) + ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate, + d->pcxt); + break; case T_AppendState: if (planstate->plan->parallel_aware) ExecAppendInitializeDSM((AppendState *) planstate, @@ -994,6 +1005,11 @@ ExecParallelReInitializeDSM(PlanState *planstate, ExecForeignScanReInitializeDSM((ForeignScanState *) planstate, pcxt); break; + case T_TidRangeScanState: + if (planstate->plan->parallel_aware) + ExecTidRangeScanReInitializeDSM((TidRangeScanState *) planstate, + pcxt); + break; case T_AppendState: if (planstate->plan->parallel_aware) ExecAppendReInitializeDSM((AppendState *) planstate, pcxt); @@ -1362,6 +1378,11 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt) ExecForeignScanInitializeWorker((ForeignScanState *) planstate, pwcxt); break; + case T_TidRangeScanState: + if (planstate->plan->parallel_aware) + ExecTidRangeScanInitializeWorker((TidRangeScanState *) planstate, + pwcxt); + break; case T_AppendState: if (planstate->plan->parallel_aware) ExecAppendInitializeWorker((AppendState *) planstate, pwcxt); diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 1bce8d6cbfe6..39088755e905 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -250,9 +250,13 @@ TidRangeNext(TidRangeScanState *node) } else { - /* rescan with the updated TID range */ - table_rescan_tidrange(scandesc, &node->trss_mintid, - &node->trss_maxtid); + /* rescan with the updated TID range only in non-parallel mode */ + if (scandesc->rs_parallel == NULL) + { + /* rescan with the updated TID range */ + table_rescan_tidrange(scandesc, &node->trss_mintid, + &node->trss_maxtid); + } } node->trss_inScan = true; @@ -415,3 +419,107 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) */ return tidrangestate; } +/* ---------------------------------------------------------------- + * Parallel Scan Support + * ---------------------------------------------------------------- + */ + +/* ---------------------------------------------------------------- + * ExecTidRangeScanEstimate + * + * Compute the amount of space we'll need in the parallel + * query DSM, and inform pcxt->estimator about our needs. + * ---------------------------------------------------------------- + */ +void +ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + + node->trss_pscanlen = + table_parallelscan_estimate(node->ss.ss_currentRelation, + estate->es_snapshot); + shm_toc_estimate_chunk(&pcxt->estimator, node->trss_pscanlen); + shm_toc_estimate_keys(&pcxt->estimator, 1); +} + +/* ---------------------------------------------------------------- + * ExecTidRangeScanInitializeDSM + * + * Set up a parallel TID scan descriptor. + * ---------------------------------------------------------------- + */ +void +ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) +{ + EState *estate = node->ss.ps.state; + ParallelTableScanDesc pscan; + + pscan = shm_toc_allocate(pcxt->toc, node->trss_pscanlen); + table_parallelscan_initialize(node->ss.ss_currentRelation, + pscan, + estate->es_snapshot); + shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); + + /* + * Initialize parallel scan descriptor with given TID range if it can be + * evaluated successfully. + */ + if (TidRangeEval(node)) + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, + &node->trss_mintid, &node->trss_maxtid); + else + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, + NULL, NULL); +} + +/* ---------------------------------------------------------------- + * ExecTidRangeScanReInitializeDSM + * + * Reset shared state before beginning a fresh scan. + * ---------------------------------------------------------------- + */ +void +ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, + ParallelContext *pcxt) +{ + ParallelTableScanDesc pscan; + + pscan = node->ss.ss_currentScanDesc->rs_parallel; + table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan); + + /* Set the new TID range if it can be evaluated successfully */ + if (TidRangeEval(node)) + node->ss.ss_currentRelation->rd_tableam->scan_set_tidrange( + node->ss.ss_currentScanDesc, &node->trss_mintid, + &node->trss_maxtid); + else + node->ss.ss_currentRelation->rd_tableam->scan_set_tidrange( + node->ss.ss_currentScanDesc, NULL, NULL); +} + +/* ---------------------------------------------------------------- + * ExecTidRangeScanInitializeWorker + * + * Copy relevant information from TOC into planstate. + * ---------------------------------------------------------------- + */ +void +ExecTidRangeScanInitializeWorker(TidRangeScanState *node, + ParallelWorkerContext *pwcxt) +{ + ParallelTableScanDesc pscan; + + pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); + + if (TidRangeEval(node)) + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, + &node->trss_mintid, &node->trss_maxtid); + else + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, + NULL, NULL); +} diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 8335cf5b5c5a..01976226d192 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1340,8 +1340,9 @@ cost_tidrangescan(Path *path, PlannerInfo *root, { Selectivity selectivity; double pages; - Cost startup_cost = 0; - Cost run_cost = 0; + Cost startup_cost; + Cost cpu_run_cost; + Cost disk_run_cost; QualCost qpqual_cost; Cost cpu_per_tuple; QualCost tid_qual_cost; @@ -1370,11 +1371,7 @@ cost_tidrangescan(Path *path, PlannerInfo *root, /* * The first page in a range requires a random seek, but each subsequent - * page is just a normal sequential page read. NOTE: it's desirable for - * TID Range Scans to cost more than the equivalent Sequential Scans, - * because Seq Scans have some performance advantages such as scan - * synchronization and parallelizability, and we'd prefer one of them to - * be picked unless a TID Range Scan really is better. + * page is just a normal sequential page read. */ ntuples = selectivity * baserel->tuples; nseqpages = pages - 1.0; @@ -1391,7 +1388,7 @@ cost_tidrangescan(Path *path, PlannerInfo *root, &spc_seq_page_cost); /* disk costs; 1 random page and the remainder as seq pages */ - run_cost += spc_random_page_cost + spc_seq_page_cost * nseqpages; + disk_run_cost = spc_random_page_cost + spc_seq_page_cost * nseqpages; /* Add scanning CPU costs */ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost); @@ -1403,20 +1400,35 @@ cost_tidrangescan(Path *path, PlannerInfo *root, * can't be removed, this is a mistake and we're going to underestimate * the CPU cost a bit.) */ - startup_cost += qpqual_cost.startup + tid_qual_cost.per_tuple; + startup_cost = qpqual_cost.startup + tid_qual_cost.per_tuple; cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple - tid_qual_cost.per_tuple; - run_cost += cpu_per_tuple * ntuples; + cpu_run_cost = cpu_per_tuple * ntuples; /* tlist eval costs are paid per output row, not per tuple scanned */ startup_cost += path->pathtarget->cost.startup; - run_cost += path->pathtarget->cost.per_tuple * path->rows; + cpu_run_cost += path->pathtarget->cost.per_tuple * path->rows; + + /* Adjust costing for parallelism, if used. */ + if (path->parallel_workers > 0) + { + double parallel_divisor = get_parallel_divisor(path); + + /* The CPU cost is divided among all the workers. */ + cpu_run_cost /= parallel_divisor; + + /* + * In the case of a parallel plan, the row count needs to represent + * the number of tuples processed per worker. + */ + path->rows = clamp_row_est(path->rows / parallel_divisor); + } /* we should not generate this path type when enable_tidscan=false */ Assert(enable_tidscan); path->disabled_nodes = 0; path->startup_cost = startup_cost; - path->total_cost = startup_cost + run_cost; + path->total_cost = startup_cost + cpu_run_cost + disk_run_cost; } /* diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index 2bfb338b81ce..e48c85833e77 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -47,6 +47,7 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" +#include "optimizer/cost.h" /* @@ -553,7 +554,24 @@ create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) add_path(rel, (Path *) create_tidrangescan_path(root, rel, tidrangequals, - required_outer)); + required_outer, + 0)); + + /* If appropriate, consider parallel tid range scan. */ + if (rel->consider_parallel && required_outer == NULL) + { + int parallel_workers; + + parallel_workers = compute_parallel_worker(rel, rel->pages, -1, + max_parallel_workers_per_gather); + + if (parallel_workers > 0) + add_partial_path(rel, (Path *) create_tidrangescan_path(root, + rel, + tidrangequals, + required_outer, + parallel_workers)); + } } /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e4fd6950fad1..fd4bd5f93f0d 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1262,7 +1262,8 @@ create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, */ TidRangePath * create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, - List *tidrangequals, Relids required_outer) + List *tidrangequals, Relids required_outer, + int parallel_workers) { TidRangePath *pathnode = makeNode(TidRangePath); @@ -1271,9 +1272,9 @@ create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, pathnode->path.pathtarget = rel->reltarget; pathnode->path.param_info = get_baserel_parampathinfo(root, rel, required_outer); - pathnode->path.parallel_aware = false; + pathnode->path.parallel_aware = (parallel_workers > 0); pathnode->path.parallel_safe = rel->consider_parallel; - pathnode->path.parallel_workers = 0; + pathnode->path.parallel_workers = parallel_workers; pathnode->path.pathkeys = NIL; /* always unordered */ pathnode->tidrangequals = tidrangequals; diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index b5e0fb386c0a..3da43557a13b 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -96,6 +96,8 @@ typedef struct ParallelBlockTableScanDescData BlockNumber phs_nblocks; /* # blocks in relation at start of scan */ slock_t phs_mutex; /* mutual exclusion for setting startblock */ BlockNumber phs_startblock; /* starting block number */ + BlockNumber phs_numblock; /* # blocks to scan, or InvalidBlockNumber if + * no limit */ pg_atomic_uint64 phs_nallocated; /* number of blocks allocated to * workers so far. */ } ParallelBlockTableScanDescData; diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index e16bf0256928..8e97fc5f0be6 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -1130,6 +1130,18 @@ extern void table_parallelscan_initialize(Relation rel, extern TableScanDesc table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan); +/* + * Begin a parallel tidrange scan. `pscan` needs to have been initialized with + * table_parallelscan_initialize(), for the same relation. The initialization + * does not need to have happened in this backend. + * + * Caller must hold a suitable lock on the relation. + */ +extern TableScanDesc table_beginscan_parallel_tidrange(Relation relation, + ParallelTableScanDesc pscan, + ItemPointerData * mintid, + ItemPointerData * maxtid); + /* * Restart a parallel scan. Call this in the leader process. Caller is * responsible for making sure that all workers have finished the scan diff --git a/src/include/executor/nodeTidrangescan.h b/src/include/executor/nodeTidrangescan.h index a831f1202cae..2b5465b3ce4e 100644 --- a/src/include/executor/nodeTidrangescan.h +++ b/src/include/executor/nodeTidrangescan.h @@ -14,6 +14,7 @@ #ifndef NODETIDRANGESCAN_H #define NODETIDRANGESCAN_H +#include "access/parallel.h" #include "nodes/execnodes.h" extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node, @@ -21,4 +22,10 @@ extern TidRangeScanState *ExecInitTidRangeScan(TidRangeScan *node, extern void ExecEndTidRangeScan(TidRangeScanState *node); extern void ExecReScanTidRangeScan(TidRangeScanState *node); +/* parallel scan support */ +extern void ExecTidRangeScanEstimate(TidRangeScanState *node, ParallelContext *pcxt); +extern void ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt); +extern void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt); +extern void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, ParallelWorkerContext *pwcxt); + #endif /* NODETIDRANGESCAN_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 18ae8f0d4bb8..64ff6996431e 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1930,6 +1930,7 @@ typedef struct TidScanState * trss_mintid the lowest TID in the scan range * trss_maxtid the highest TID in the scan range * trss_inScan is a scan currently in progress? + * trss_pscanlen size of parallel heap scan descriptor * ---------------- */ typedef struct TidRangeScanState @@ -1939,6 +1940,7 @@ typedef struct TidRangeScanState ItemPointerData trss_mintid; ItemPointerData trss_maxtid; bool trss_inScan; + Size trss_pscanlen; } TidRangeScanState; /* ---------------- diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 955e90568583..6b010f0b1a5a 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -67,7 +67,8 @@ extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, extern TidRangePath *create_tidrangescan_path(PlannerInfo *root, RelOptInfo *rel, List *tidrangequals, - Relids required_outer); + Relids required_outer, + int parallel_workers); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, List *pathkeys, Relids required_outer, diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out index 721f3b94e042..3c5fc9e102a3 100644 --- a/src/test/regress/expected/tidrangescan.out +++ b/src/test/regress/expected/tidrangescan.out @@ -297,4 +297,110 @@ FETCH LAST c; COMMIT; DROP TABLE tidrangescan; +-- tests for parallel tidrangescans +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +SET min_parallel_table_scan_size TO 0; +SET max_parallel_workers_per_gather TO 4; +CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); +-- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 +INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); +-- ensure there are 40 pages for parallel test +SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; + min | max +-------+-------- + (0,1) | (39,5) +(1 row) + +-- parallel range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; + QUERY PLAN +-------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Tid Range Scan on parallel_tidrangescan + TID Cond: (ctid < '(30,1)'::tid) +(6 rows) + +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; + count +------- + 150 +(1 row) + +-- parallel range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; + QUERY PLAN +-------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Tid Range Scan on parallel_tidrangescan + TID Cond: (ctid > '(10,0)'::tid) +(6 rows) + +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; + count +------- + 150 +(1 row) + +-- parallel range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; + QUERY PLAN +----------------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 4 + -> Partial Aggregate + -> Parallel Tid Range Scan on parallel_tidrangescan + TID Cond: ((ctid > '(10,0)'::tid) AND (ctid < '(30,1)'::tid)) +(6 rows) + +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; + count +------- + 100 +(1 row) + +-- parallel rescans +EXPLAIN (COSTS OFF) +SELECT t.ctid,t2.c FROM parallel_tidrangescan t, +LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + -> Gather + Workers Planned: 4 + -> Parallel Tid Range Scan on parallel_tidrangescan t + TID Cond: (ctid < '(1,0)'::tid) + -> Aggregate + -> Tid Range Scan on parallel_tidrangescan t2 + TID Cond: (ctid <= t.ctid) +(8 rows) + +SELECT t.ctid,t2.c FROM parallel_tidrangescan t, +LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + ctid | c +-------+--- + (0,1) | 1 + (0,2) | 2 + (0,3) | 3 + (0,4) | 4 + (0,5) | 5 +(5 rows) + +DROP TABLE parallel_tidrangescan; +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; RESET enable_seqscan; diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql index ac09ebb62626..0f1e43c6d053 100644 --- a/src/test/regress/sql/tidrangescan.sql +++ b/src/test/regress/sql/tidrangescan.sql @@ -98,4 +98,49 @@ COMMIT; DROP TABLE tidrangescan; +-- tests for parallel tidrangescans +SET parallel_setup_cost TO 0; +SET parallel_tuple_cost TO 0; +SET min_parallel_table_scan_size TO 0; +SET max_parallel_workers_per_gather TO 4; + +CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); + +-- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 +INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); + +-- ensure there are 40 pages for parallel test +SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; + +-- parallel range scans with upper bound +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; + +-- parallel range scans with lower bound +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; + +-- parallel range scans with both bounds +EXPLAIN (COSTS OFF) +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; +SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; + +-- parallel rescans +EXPLAIN (COSTS OFF) +SELECT t.ctid,t2.c FROM parallel_tidrangescan t, +LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + +SELECT t.ctid,t2.c FROM parallel_tidrangescan t, +LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 +WHERE t.ctid < '(1,0)'; + +DROP TABLE parallel_tidrangescan; + +RESET parallel_setup_cost; +RESET parallel_tuple_cost; +RESET min_parallel_table_scan_size; +RESET max_parallel_workers_per_gather; RESET enable_seqscan; From c65957818172d57266b41ce9a03cc947e55abc8e Mon Sep 17 00:00:00 2001 From: David Rowley Date: Fri, 7 Nov 2025 18:03:09 +1300 Subject: [PATCH 2/2] fixup! v10 parallel tid range scan --- src/backend/access/heap/heapam.c | 14 +-- src/backend/access/table/tableam.c | 134 ++++++++++++--------- src/backend/executor/execParallel.c | 6 +- src/backend/executor/nodeTidrangescan.c | 52 ++------ src/backend/optimizer/path/costsize.c | 6 +- src/backend/optimizer/path/tidpath.c | 6 +- src/include/access/tableam.h | 8 +- src/test/regress/expected/tidrangescan.out | 29 ++--- src/test/regress/sql/tidrangescan.sql | 31 +++-- 9 files changed, 136 insertions(+), 150 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index de0a3a8b2191..0a820bab87a4 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -258,7 +258,9 @@ heap_scan_stream_read_next_parallel(ReadStream *stream, /* parallel scan */ table_block_parallelscan_startblock_init(scan->rs_base.rs_rd, scan->rs_parallelworkerdata, - (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel); + (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel, + scan->rs_startblock, + scan->rs_numblocks); /* may return InvalidBlockNumber if there are no more blocks */ scan->rs_prefetch_block = table_block_parallelscan_nextpage(scan->rs_base.rs_rd, @@ -490,16 +492,6 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk scan->rs_startblock = startBlk; scan->rs_numblocks = numBlks; - - /* set the limits in the ParallelBlockTableScanDesc, when present as leader */ - if (scan->rs_base.rs_parallel != NULL && !IsParallelWorker()) - { - ParallelBlockTableScanDesc bpscan; - - bpscan = (ParallelBlockTableScanDesc) scan->rs_base.rs_parallel; - bpscan->phs_startblock = startBlk; - bpscan->phs_numblock = numBlks; - } } /* diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index baef7459b6b9..9c3347ba12b0 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -189,8 +189,8 @@ table_beginscan_parallel(Relation relation, ParallelTableScanDesc pscan) } TableScanDesc -table_beginscan_parallel_tidrange(Relation relation, ParallelTableScanDesc pscan, - ItemPointerData * mintid, ItemPointerData * maxtid) +table_beginscan_parallel_tidrange(Relation relation, + ParallelTableScanDesc pscan) { Snapshot snapshot; uint32 flags = SO_TYPE_TIDRANGESCAN | SO_ALLOW_PAGEMODE; @@ -216,11 +216,6 @@ table_beginscan_parallel_tidrange(Relation relation, ParallelTableScanDesc pscan sscan = relation->rd_tableam->scan_begin(relation, snapshot, 0, NULL, pscan, flags); - - /* Set the TID range if needed */ - if (mintid && maxtid) - relation->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid); - return sscan; } @@ -453,14 +448,22 @@ table_block_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) * * Determine where the parallel seq scan should start. This function may be * called many times, once by each parallel worker. We must be careful only - * to set the startblock once. + * to set the phs_startblock and phs_numblock fields once. + * + * Callers may optionally specify a non-InvalidBlockNumber value for + * 'startblock' to force the scan to start at the given page. Likewise, + * 'numblocks' can be specified as a non-InvalidBlockNumber to limit the + * number of blocks to scan to that many blocks. */ void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, - ParallelBlockTableScanDesc pbscan) + ParallelBlockTableScanDesc pbscan, + BlockNumber startblock, + BlockNumber numblocks) { BlockNumber sync_startpage = InvalidBlockNumber; + BlockNumber scan_nblocks; /* Reset the state we use for controlling allocation size. */ memset(pbscanwork, 0, sizeof(*pbscanwork)); @@ -468,42 +471,36 @@ table_block_parallelscan_startblock_init(Relation rel, StaticAssertStmt(MaxBlockNumber <= 0xFFFFFFFE, "pg_nextpower2_32 may be too small for non-standard BlockNumber width"); - /* - * We determine the chunk size based on the size of the relation. First we - * split the relation into PARALLEL_SEQSCAN_NCHUNKS chunks but we then - * take the next highest power of 2 number of the chunk size. This means - * we split the relation into somewhere between PARALLEL_SEQSCAN_NCHUNKS - * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks. - */ - pbscanwork->phsw_chunk_size = pg_nextpower2_32(Max(pbscan->phs_nblocks / - PARALLEL_SEQSCAN_NCHUNKS, 1)); - - /* - * Ensure we don't go over the maximum chunk size with larger tables. This - * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger - * tables. Too large a chunk size has been shown to be detrimental to - * synchronous scan performance. - */ - pbscanwork->phsw_chunk_size = Min(pbscanwork->phsw_chunk_size, - PARALLEL_SEQSCAN_MAX_CHUNK_SIZE); - retry: /* Grab the spinlock. */ SpinLockAcquire(&pbscan->phs_mutex); /* - * If the scan's startblock has not yet been initialized, we must do so - * now. If this is not a synchronized scan, we just start at block 0, but - * if it is a synchronized scan, we must get the starting position from - * the synchronized scan machinery. We can't hold the spinlock while - * doing that, though, so release the spinlock, get the information we - * need, and retry. If nobody else has initialized the scan in the - * meantime, we'll fill in the value we fetched on the second time - * through. + * When the caller specified a limit on the number of blocks to scan, set + * that in the ParallelBlockTableScanDesc, if it's not been done by + * another worker already. + */ + if (numblocks != InvalidBlockNumber && + pbscan->phs_numblock == InvalidBlockNumber) + { + pbscan->phs_numblock = numblocks; + } + + /* + * If the scan's phs_startblock has not yet been initialized, we must do + * so now. If a startblock was specified, start there, otherwise if this + * is not a synchronized scan, we just start at block 0, but if it is a + * synchronized scan, we must get the starting position from the + * synchronized scan machinery. We can't hold the spinlock while doing + * that, though, so release the spinlock, get the information we need, and + * retry. If nobody else has initialized the scan in the meantime, we'll + * fill in the value we fetched on the second time through. */ if (pbscan->phs_startblock == InvalidBlockNumber) { - if (!pbscan->base.phs_syncscan) + if (startblock != InvalidBlockNumber) + pbscan->phs_startblock = startblock; + else if (!pbscan->base.phs_syncscan) pbscan->phs_startblock = 0; else if (sync_startpage != InvalidBlockNumber) pbscan->phs_startblock = sync_startpage; @@ -515,6 +512,34 @@ table_block_parallelscan_startblock_init(Relation rel, } } SpinLockRelease(&pbscan->phs_mutex); + + /* + * Figure out how many blocks we're going to scan; either all of them, or + * just phs_numblock's worth, if a limit has been imposed. + */ + if (pbscan->phs_numblock == InvalidBlockNumber) + scan_nblocks = pbscan->phs_nblocks; + else + scan_nblocks = pbscan->phs_numblock; + + /* + * We determine the chunk size based on scan_nblocks. First we split + * scan_nblocks into PARALLEL_SEQSCAN_NCHUNKS chunks then we calculate the + * next highest power of 2 number of the result. This means we split the + * blocks we're scanning into somewhere between PARALLEL_SEQSCAN_NCHUNKS + * and PARALLEL_SEQSCAN_NCHUNKS / 2 chunks. + */ + pbscanwork->phsw_chunk_size = pg_nextpower2_32(Max(scan_nblocks / + PARALLEL_SEQSCAN_NCHUNKS, 1)); + + /* + * Ensure we don't go over the maximum chunk size with larger tables. This + * means we may get much more than PARALLEL_SEQSCAN_NCHUNKS for larger + * tables. Too large a chunk size has been shown to be detrimental to + * synchronous scan performance. + */ + pbscanwork->phsw_chunk_size = Min(pbscanwork->phsw_chunk_size, + PARALLEL_SEQSCAN_MAX_CHUNK_SIZE); } /* @@ -530,6 +555,7 @@ table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanWorker pbscanwork, ParallelBlockTableScanDesc pbscan) { + BlockNumber scan_nblocks; BlockNumber page; uint64 nallocated; @@ -550,7 +576,7 @@ table_block_parallelscan_nextpage(Relation rel, * * Here we name these ranges of blocks "chunks". The initial size of * these chunks is determined in table_block_parallelscan_startblock_init - * based on the size of the relation. Towards the end of the scan, we + * based on the number of blocks to scan. Towards the end of the scan, we * start making reductions in the size of the chunks in order to attempt * to divide the remaining work over all the workers as evenly as * possible. @@ -567,17 +593,23 @@ table_block_parallelscan_nextpage(Relation rel, * phs_nallocated counter will exceed rs_nblocks, because workers will * still increment the value, when they try to allocate the next block but * all blocks have been allocated already. The counter must be 64 bits - * wide because of that, to avoid wrapping around when rs_nblocks is close - * to 2^32. + * wide because of that, to avoid wrapping around when scan_nblocks is + * close to 2^32. * * The actual block to return is calculated by adding the counter to the - * starting block number, modulo nblocks. + * starting block number, modulo phs_nblocks. */ + /* First, figure out how many blocks we're planning on scanning */ + if (pbscan->phs_numblock == InvalidBlockNumber) + scan_nblocks = pbscan->phs_nblocks; + else + scan_nblocks = pbscan->phs_numblock; + /* - * First check if we have any remaining blocks in a previous chunk for - * this worker. We must consume all of the blocks from that before we - * allocate a new chunk to the worker. + * Now check if we have any remaining blocks in a previous chunk for this + * worker. We must consume all of the blocks from that before we allocate + * a new chunk to the worker. */ if (pbscanwork->phsw_chunk_remaining > 0) { @@ -599,7 +631,7 @@ table_block_parallelscan_nextpage(Relation rel, * chunk size set to 1. */ if (pbscanwork->phsw_chunk_size > 1 && - pbscanwork->phsw_nallocated > pbscan->phs_nblocks - + pbscanwork->phsw_nallocated > scan_nblocks - (pbscanwork->phsw_chunk_size * PARALLEL_SEQSCAN_RAMPDOWN_CHUNKS)) pbscanwork->phsw_chunk_size >>= 1; @@ -614,15 +646,9 @@ table_block_parallelscan_nextpage(Relation rel, pbscanwork->phsw_chunk_remaining = pbscanwork->phsw_chunk_size - 1; } - /* - * Check if we've allocated every block in the relation, or if we've - * reached the limit imposed by pbscan->phs_numblock (if set). - */ - if (nallocated >= pbscan->phs_nblocks) - page = InvalidBlockNumber; /* all blocks have been allocated */ - else if (pbscan->phs_numblock != InvalidBlockNumber && - nallocated >= pbscan->phs_numblock) - page = InvalidBlockNumber; /* upper scan limit reached */ + /* Check if we've run out of blocks to scan */ + if (nallocated >= scan_nblocks) + page = InvalidBlockNumber; /* all blocks have been allocated */ else page = (nallocated + pbscan->phs_startblock) % pbscan->phs_nblocks; diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index 7b1eb2e82c78..0125464d942b 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -40,8 +40,8 @@ #include "executor/nodeSeqscan.h" #include "executor/nodeSort.h" #include "executor/nodeSubplan.h" -#include "executor/tqueue.h" #include "executor/nodeTidrangescan.h" +#include "executor/tqueue.h" #include "jit/jit.h" #include "nodes/nodeFuncs.h" #include "pgstat.h" @@ -502,7 +502,7 @@ ExecParallelInitializeDSM(PlanState *planstate, case T_TidRangeScanState: if (planstate->plan->parallel_aware) ExecTidRangeScanInitializeDSM((TidRangeScanState *) planstate, - d->pcxt); + d->pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) @@ -1008,7 +1008,7 @@ ExecParallelReInitializeDSM(PlanState *planstate, case T_TidRangeScanState: if (planstate->plan->parallel_aware) ExecTidRangeScanReInitializeDSM((TidRangeScanState *) planstate, - pcxt); + pcxt); break; case T_AppendState: if (planstate->plan->parallel_aware) diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index 39088755e905..03ce8525f896 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -250,13 +250,9 @@ TidRangeNext(TidRangeScanState *node) } else { - /* rescan with the updated TID range only in non-parallel mode */ - if (scandesc->rs_parallel == NULL) - { - /* rescan with the updated TID range */ - table_rescan_tidrange(scandesc, &node->trss_mintid, - &node->trss_maxtid); - } + /* rescan with the updated TID range */ + table_rescan_tidrange(scandesc, &node->trss_mintid, + &node->trss_maxtid); } node->trss_inScan = true; @@ -419,6 +415,7 @@ ExecInitTidRangeScan(TidRangeScan *node, EState *estate, int eflags) */ return tidrangestate; } + /* ---------------------------------------------------------------- * Parallel Scan Support * ---------------------------------------------------------------- @@ -460,19 +457,9 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) pscan, estate->es_snapshot); shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pscan); - - /* - * Initialize parallel scan descriptor with given TID range if it can be - * evaluated successfully. - */ - if (TidRangeEval(node)) - node->ss.ss_currentScanDesc = - table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, - &node->trss_mintid, &node->trss_maxtid); - else - node->ss.ss_currentScanDesc = - table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, - NULL, NULL); + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, + pscan); } /* ---------------------------------------------------------------- @@ -483,21 +470,12 @@ ExecTidRangeScanInitializeDSM(TidRangeScanState *node, ParallelContext *pcxt) */ void ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, - ParallelContext *pcxt) + ParallelContext *pcxt) { ParallelTableScanDesc pscan; pscan = node->ss.ss_currentScanDesc->rs_parallel; table_parallelscan_reinitialize(node->ss.ss_currentRelation, pscan); - - /* Set the new TID range if it can be evaluated successfully */ - if (TidRangeEval(node)) - node->ss.ss_currentRelation->rd_tableam->scan_set_tidrange( - node->ss.ss_currentScanDesc, &node->trss_mintid, - &node->trss_maxtid); - else - node->ss.ss_currentRelation->rd_tableam->scan_set_tidrange( - node->ss.ss_currentScanDesc, NULL, NULL); } /* ---------------------------------------------------------------- @@ -508,18 +486,12 @@ ExecTidRangeScanReInitializeDSM(TidRangeScanState *node, */ void ExecTidRangeScanInitializeWorker(TidRangeScanState *node, - ParallelWorkerContext *pwcxt) + ParallelWorkerContext *pwcxt) { ParallelTableScanDesc pscan; pscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); - - if (TidRangeEval(node)) - node->ss.ss_currentScanDesc = - table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, - &node->trss_mintid, &node->trss_maxtid); - else - node->ss.ss_currentScanDesc = - table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, pscan, - NULL, NULL); + node->ss.ss_currentScanDesc = + table_beginscan_parallel_tidrange(node->ss.ss_currentRelation, + pscan); } diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 01976226d192..5a7283bd2f53 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1371,7 +1371,11 @@ cost_tidrangescan(Path *path, PlannerInfo *root, /* * The first page in a range requires a random seek, but each subsequent - * page is just a normal sequential page read. + * page is just a normal sequential page read. NOTE: it's desirable for + * TID Range Scans to cost more than the equivalent Sequential Scans, + * because Seq Scans have some performance advantages such as scan + * synchronization, and we'd prefer one of them to be picked unless a TID + * Range Scan really is better. */ ntuples = selectivity * baserel->tuples; nseqpages = pages - 1.0; diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c index e48c85833e77..3ddbc10bbdf1 100644 --- a/src/backend/optimizer/path/tidpath.c +++ b/src/backend/optimizer/path/tidpath.c @@ -47,7 +47,6 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" -#include "optimizer/cost.h" /* @@ -491,9 +490,8 @@ ec_member_matches_ctid(PlannerInfo *root, RelOptInfo *rel, /* * create_tidscan_paths - * Create paths corresponding to direct TID scans of the given rel. - * - * Candidate paths are added to the rel's pathlist (using add_path). + * Create paths corresponding to direct TID scans of the given rel and add + * them to the corresponding path list via add_path or add_partial_path. */ bool create_tidscan_paths(PlannerInfo *root, RelOptInfo *rel) diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8e97fc5f0be6..5ef8de3f141d 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -1138,9 +1138,7 @@ extern TableScanDesc table_beginscan_parallel(Relation relation, * Caller must hold a suitable lock on the relation. */ extern TableScanDesc table_beginscan_parallel_tidrange(Relation relation, - ParallelTableScanDesc pscan, - ItemPointerData * mintid, - ItemPointerData * maxtid); + ParallelTableScanDesc pscan); /* * Restart a parallel scan. Call this in the leader process. Caller is @@ -2040,7 +2038,9 @@ extern BlockNumber table_block_parallelscan_nextpage(Relation rel, ParallelBlockTableScanDesc pbscan); extern void table_block_parallelscan_startblock_init(Relation rel, ParallelBlockTableScanWorker pbscanwork, - ParallelBlockTableScanDesc pbscan); + ParallelBlockTableScanDesc pbscan, + BlockNumber startblock, + BlockNumber numblocks); /* ---------------------------------------------------------------------------- diff --git a/src/test/regress/expected/tidrangescan.out b/src/test/regress/expected/tidrangescan.out index 3c5fc9e102a3..ce75c96e7c83 100644 --- a/src/test/regress/expected/tidrangescan.out +++ b/src/test/regress/expected/tidrangescan.out @@ -297,22 +297,23 @@ FETCH LAST c; COMMIT; DROP TABLE tidrangescan; --- tests for parallel tidrangescans -SET parallel_setup_cost TO 0; -SET parallel_tuple_cost TO 0; -SET min_parallel_table_scan_size TO 0; -SET max_parallel_workers_per_gather TO 4; +-- Tests for parallel tidrangescans +BEGIN; +SET LOCAL parallel_setup_cost TO 0; +SET LOCAL parallel_tuple_cost TO 0; +SET LOCAL min_parallel_table_scan_size TO 0; +SET LOCAL max_parallel_workers_per_gather TO 4; CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); --- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 +-- Insert enough tuples such that each page gets 5 tuples with fillfactor = 10 INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); --- ensure there are 40 pages for parallel test +-- Ensure there are 40 pages for parallel test SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; min | max -------+-------- (0,1) | (39,5) (1 row) --- parallel range scans with upper bound +-- Parallel range scans with upper bound EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; QUERY PLAN @@ -331,7 +332,7 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; 150 (1 row) --- parallel range scans with lower bound +-- Parallel range scans with lower bound EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; QUERY PLAN @@ -350,7 +351,7 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; 150 (1 row) --- parallel range scans with both bounds +-- Parallel range scans with both bounds EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; QUERY PLAN @@ -369,7 +370,7 @@ SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30 100 (1 row) --- parallel rescans +-- Parallel rescans EXPLAIN (COSTS OFF) SELECT t.ctid,t2.c FROM parallel_tidrangescan t, LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 @@ -398,9 +399,5 @@ WHERE t.ctid < '(1,0)'; (0,5) | 5 (5 rows) -DROP TABLE parallel_tidrangescan; -RESET parallel_setup_cost; -RESET parallel_tuple_cost; -RESET min_parallel_table_scan_size; -RESET max_parallel_workers_per_gather; +ROLLBACK; RESET enable_seqscan; diff --git a/src/test/regress/sql/tidrangescan.sql b/src/test/regress/sql/tidrangescan.sql index 0f1e43c6d053..c9a63b10dddd 100644 --- a/src/test/regress/sql/tidrangescan.sql +++ b/src/test/regress/sql/tidrangescan.sql @@ -98,36 +98,38 @@ COMMIT; DROP TABLE tidrangescan; --- tests for parallel tidrangescans -SET parallel_setup_cost TO 0; -SET parallel_tuple_cost TO 0; -SET min_parallel_table_scan_size TO 0; -SET max_parallel_workers_per_gather TO 4; +-- Tests for parallel tidrangescans +BEGIN; + +SET LOCAL parallel_setup_cost TO 0; +SET LOCAL parallel_tuple_cost TO 0; +SET LOCAL min_parallel_table_scan_size TO 0; +SET LOCAL max_parallel_workers_per_gather TO 4; CREATE TABLE parallel_tidrangescan(id integer, data text) WITH (fillfactor = 10); --- insert enough tuples such that each page gets 5 tuples with fillfactor = 10 +-- Insert enough tuples such that each page gets 5 tuples with fillfactor = 10 INSERT INTO parallel_tidrangescan SELECT i, repeat('x', 100) FROM generate_series(1,200) AS s(i); --- ensure there are 40 pages for parallel test +-- Ensure there are 40 pages for parallel test SELECT min(ctid), max(ctid) FROM parallel_tidrangescan; --- parallel range scans with upper bound +-- Parallel range scans with upper bound EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; SELECT count(*) FROM parallel_tidrangescan WHERE ctid < '(30,1)'; --- parallel range scans with lower bound +-- Parallel range scans with lower bound EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)'; --- parallel range scans with both bounds +-- Parallel range scans with both bounds EXPLAIN (COSTS OFF) SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; SELECT count(*) FROM parallel_tidrangescan WHERE ctid > '(10,0)' AND ctid < '(30,1)'; --- parallel rescans +-- Parallel rescans EXPLAIN (COSTS OFF) SELECT t.ctid,t2.c FROM parallel_tidrangescan t, LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 @@ -137,10 +139,5 @@ SELECT t.ctid,t2.c FROM parallel_tidrangescan t, LATERAL (SELECT count(*) c FROM parallel_tidrangescan t2 WHERE t2.ctid <= t.ctid) t2 WHERE t.ctid < '(1,0)'; -DROP TABLE parallel_tidrangescan; - -RESET parallel_setup_cost; -RESET parallel_tuple_cost; -RESET min_parallel_table_scan_size; -RESET max_parallel_workers_per_gather; +ROLLBACK; RESET enable_seqscan;