Skip to content

Commit 03d40e4

Browse files
committed
Teach UNION planner to remove dummy inputs
This adjusts UNION planning so that the planner produces more optimal plans when one or more of the UNION's subqueries have been proven to be empty (a dummy rel). If any of the inputs are empty, then that input can be removed from the Append / MergeAppend. Previously, a const-false "Result" node would appear to represent this. Removing empty inputs has a few extra benefits when only 1 union child remains as it means the Append or MergeAppend can be removed in setrefs.c, making the plan slightly faster to execute. Also, we can provide better n_distinct estimates by looking at the sole remaining input rel's statistics. Author: David Rowley <[email protected]> Reviewed-by: Tom Lane <[email protected]> Discussion: https://postgr.es/m/CAApHDvri53PPF76c3M94_QNWbJfXjyCnjXuj_2=LYM-0m8WZtw@mail.gmail.com
1 parent 5092aae commit 03d40e4

File tree

3 files changed

+125
-9
lines changed

3 files changed

+125
-9
lines changed

src/backend/optimizer/prep/prepunion.c

Lines changed: 47 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,13 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
523523
bool is_sorted;
524524
int presorted_keys;
525525

526+
/* If the input rel is dummy, propagate that to this query level */
527+
if (is_dummy_rel(final_rel))
528+
{
529+
mark_dummy_rel(rel);
530+
continue;
531+
}
532+
526533
/*
527534
* Include the cheapest path as-is so that the set operation can be
528535
* cheaply implemented using a method which does not require the input
@@ -763,6 +770,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
763770
RelOptInfo *rel = lfirst(lc);
764771
Path *ordered_path;
765772

773+
/* Skip any UNION children that are proven not to yield any rows */
774+
if (is_dummy_rel(rel))
775+
continue;
776+
766777
cheapest_pathlist = lappend(cheapest_pathlist,
767778
rel->cheapest_total_path);
768779

@@ -812,6 +823,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
812823
result_rel->consider_parallel = consider_parallel;
813824
result_rel->consider_startup = (root->tuple_fraction > 0);
814825

826+
/* If all UNION children were dummy rels, make the resulting rel dummy */
827+
if (cheapest_pathlist == NIL)
828+
{
829+
result_rel->reltarget = create_pathtarget(root, list_nth(tlist_list, 0));
830+
mark_dummy_rel(result_rel);
831+
832+
return result_rel;
833+
}
834+
815835
/*
816836
* Append the child results together using the cheapest paths from each
817837
* union child.
@@ -876,15 +896,33 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
876896
bool can_sort = grouping_is_sortable(groupList);
877897
bool can_hash = grouping_is_hashable(groupList);
878898

879-
/*
880-
* XXX for the moment, take the number of distinct groups as equal to
881-
* the total input size, i.e., the worst case. This is too
882-
* conservative, but it's not clear how to get a decent estimate of
883-
* the true size. One should note as well the propensity of novices
884-
* to write UNION rather than UNION ALL even when they don't expect
885-
* any duplicates...
886-
*/
887-
dNumGroups = apath->rows;
899+
if (list_length(cheapest_pathlist) == 1)
900+
{
901+
Path *path = linitial(cheapest_pathlist);
902+
903+
/*
904+
* In the case where only one union child remains due to the
905+
* detection of one or more dummy union children, obtain an
906+
* estimate on the surviving child directly.
907+
*/
908+
dNumGroups = estimate_num_groups(root,
909+
path->pathtarget->exprs,
910+
path->rows,
911+
NULL,
912+
NULL);
913+
}
914+
else
915+
{
916+
/*
917+
* Otherwise, for the moment, take the number of distinct groups
918+
* as equal to the total input size, i.e., the worst case. This
919+
* is too conservative, but it's not clear how to get a decent
920+
* estimate of the true size. One should note as well the
921+
* propensity of novices to write UNION rather than UNION ALL even
922+
* when they don't expect any duplicates...
923+
*/
924+
dNumGroups = apath->rows;
925+
}
888926

889927
if (can_hash)
890928
{

src/test/regress/expected/union.out

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,6 +1216,57 @@ select event_id
12161216

12171217
drop table events_child, events, other_events;
12181218
reset enable_indexonlyscan;
1219+
--
1220+
-- Test handling of UNION with provably empty inputs
1221+
--
1222+
-- Ensure the empty UNION input is pruned and de-duplication is done for the
1223+
-- remaining relation.
1224+
EXPLAIN (COSTS OFF, VERBOSE)
1225+
SELECT two FROM tenk1 WHERE 1=2
1226+
UNION
1227+
SELECT four FROM tenk1
1228+
ORDER BY 1;
1229+
QUERY PLAN
1230+
--------------------------------------
1231+
Sort
1232+
Output: tenk1.four
1233+
Sort Key: tenk1.four
1234+
-> HashAggregate
1235+
Output: tenk1.four
1236+
Group Key: tenk1.four
1237+
-> Seq Scan on public.tenk1
1238+
Output: tenk1.four
1239+
(8 rows)
1240+
1241+
-- Validate that the results of the above are correct
1242+
SELECT two FROM tenk1 WHERE 1=2
1243+
UNION
1244+
SELECT four FROM tenk1
1245+
ORDER BY 1;
1246+
two
1247+
-----
1248+
0
1249+
1
1250+
2
1251+
3
1252+
(4 rows)
1253+
1254+
-- All UNION inputs are proven empty. Ensure the planner provides a
1255+
-- const-false Result node
1256+
EXPLAIN (COSTS OFF, VERBOSE)
1257+
SELECT two FROM tenk1 WHERE 1=2
1258+
UNION
1259+
SELECT four FROM tenk1 WHERE 1=2
1260+
UNION
1261+
SELECT ten FROM tenk1 WHERE 1=2;
1262+
QUERY PLAN
1263+
--------------------------------
1264+
Result
1265+
Output: unnamed_subquery.two
1266+
Replaces: Aggregate
1267+
One-Time Filter: false
1268+
(4 rows)
1269+
12191270
-- Test constraint exclusion of UNION ALL subqueries
12201271
explain (costs off)
12211272
SELECT * FROM

src/test/regress/sql/union.sql

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,33 @@ drop table events_child, events, other_events;
459459

460460
reset enable_indexonlyscan;
461461

462+
--
463+
-- Test handling of UNION with provably empty inputs
464+
--
465+
466+
-- Ensure the empty UNION input is pruned and de-duplication is done for the
467+
-- remaining relation.
468+
EXPLAIN (COSTS OFF, VERBOSE)
469+
SELECT two FROM tenk1 WHERE 1=2
470+
UNION
471+
SELECT four FROM tenk1
472+
ORDER BY 1;
473+
474+
-- Validate that the results of the above are correct
475+
SELECT two FROM tenk1 WHERE 1=2
476+
UNION
477+
SELECT four FROM tenk1
478+
ORDER BY 1;
479+
480+
-- All UNION inputs are proven empty. Ensure the planner provides a
481+
-- const-false Result node
482+
EXPLAIN (COSTS OFF, VERBOSE)
483+
SELECT two FROM tenk1 WHERE 1=2
484+
UNION
485+
SELECT four FROM tenk1 WHERE 1=2
486+
UNION
487+
SELECT ten FROM tenk1 WHERE 1=2;
488+
462489
-- Test constraint exclusion of UNION ALL subqueries
463490
explain (costs off)
464491
SELECT * FROM

0 commit comments

Comments
 (0)