From 6f06b14b9f0649eb6f6e813ed95b09cc114fd019 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Sat, 8 Nov 2025 18:16:30 +0100 Subject: [PATCH 1/2] Simplify planning of starjoin queries --- patch/create-1.sql | 18 + patch/create-2.sql | 11 + patch/create-3.sql | 32 + patch/run.sh | 23 + patch/select-1-inner.sql | 10 + patch/select-1-outer.sql | 10 + patch/select-2.sql | 9 + patch/select-3-inner.sql | 15 + patch/select-3-outer.sql | 15 + src/backend/optimizer/path/joinrels.c | 3 +- src/backend/optimizer/plan/analyzejoins.c | 546 ++++++++++++++++++ src/backend/optimizer/plan/planmain.c | 7 + src/backend/utils/misc/guc_parameters.dat | 7 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/optimizer/paths.h | 1 + src/include/optimizer/planmain.h | 2 + src/test/regress/expected/sysviews.out | 3 +- 17 files changed, 710 insertions(+), 3 deletions(-) create mode 100644 patch/create-1.sql create mode 100644 patch/create-2.sql create mode 100644 patch/create-3.sql create mode 100644 patch/run.sh create mode 100644 patch/select-1-inner.sql create mode 100644 patch/select-1-outer.sql create mode 100644 patch/select-2.sql create mode 100644 patch/select-3-inner.sql create mode 100644 patch/select-3-outer.sql diff --git a/patch/create-1.sql b/patch/create-1.sql new file mode 100644 index 000000000000..8df04fd0677d --- /dev/null +++ b/patch/create-1.sql @@ -0,0 +1,18 @@ +create table dim1 (id int primary key, val1 text); +create table dim2 (id int primary key, val2 text); +create table dim3 (id int primary key, val3 text); +create table dim4 (id int primary key, val4 text); +create table dim5 (id int primary key, val5 text); +create table dim6 (id int primary key, val6 text); +create table dim7 (id int primary key, val7 text); + +create table t (id serial primary key, + id1 int references dim1(id), + id2 int references dim2(id), + id3 int references dim3(id), + id4 int references dim4(id), + id5 int references dim5(id), + id6 int references dim6(id), + id7 int references dim7(id)); + +vacuum analyze; diff --git a/patch/create-2.sql b/patch/create-2.sql new file mode 100644 index 000000000000..cdf612dde8f6 --- /dev/null +++ b/patch/create-2.sql @@ -0,0 +1,11 @@ +create table t (id serial primary key, a text); + +create table dim1 (id1 int primary key references t(id), val1 text); +create table dim2 (id2 int primary key references t(id), val2 text); +create table dim3 (id3 int primary key references t(id), val3 text); +create table dim4 (id4 int primary key references t(id), val4 text); +create table dim5 (id5 int primary key references t(id), val5 text); +create table dim6 (id6 int primary key references t(id), val6 text); +create table dim7 (id7 int primary key references t(id), val7 text); + +vacuum analyze; diff --git a/patch/create-3.sql b/patch/create-3.sql new file mode 100644 index 000000000000..bd086c137cef --- /dev/null +++ b/patch/create-3.sql @@ -0,0 +1,32 @@ +create table dim1_1 (id int primary key, val2 text); +create table dim1_2 (id int primary key, val3 text); +create table dim1 (id int primary key, + id1_1 int references dim1_1(id), + id1_2 int references dim1_2(id), + val1 text); +create table dim2_1 (id int primary key, val5 text); +create table dim2_2 (id int primary key, val6 text); +create table dim2 (id int primary key, + id2_1 int references dim2_1(id), + id2_2 int references dim2_2(id), + val4 text); +create table dim3_1 (id int primary key, val5 text); +create table dim3_2 (id int primary key, val6 text); +create table dim3 (id int primary key, + id3_1 int references dim3_1(id), + id3_2 int references dim3_2(id), + val4 text); +create table dim4_1 (id int primary key, val5 text); +create table dim4_2 (id int primary key, val6 text); +create table dim4 (id int primary key, + id4_1 int references dim4_1(id), + id4_2 int references dim4_2(id), + val4 text); + +create table t (id serial primary key, + id1 int references dim1(id), + id2 int references dim2(id), + id3 int references dim3(id), + id4 int references dim4(id)); + +vacuum analyze; diff --git a/patch/run.sh b/patch/run.sh new file mode 100644 index 000000000000..c67cd848a4ac --- /dev/null +++ b/patch/run.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +for t in 1 3; do + + dropdb --if-exists test + createdb test + psql test < create-$t.sql > /dev/null 2>&1 + + for m in inner outer; do + + for o in off on; do + + sed "s/OPT/$o/" select-$t$m.sql > script.sql + + tps=$(pgbench -n -f script.sql -T 10 test | grep 'tps = ' | awk '{print $3}') + + echo $t $m $o $tps + + done + + done + +done diff --git a/patch/select-1-inner.sql b/patch/select-1-inner.sql new file mode 100644 index 000000000000..d3bd477eea37 --- /dev/null +++ b/patch/select-1-inner.sql @@ -0,0 +1,10 @@ +--set join_collapse_limit = 1; +set enable_starjoin_join_search = OPT; +select * from t + join dim1 on (dim1.id = id1) + join dim2 on (dim2.id = id2) + join dim3 on (dim3.id = id3) + join dim4 on (dim4.id = id4) + join dim5 on (dim5.id = id5) + join dim6 on (dim6.id = id6) + join dim7 on (dim7.id = id7); diff --git a/patch/select-1-outer.sql b/patch/select-1-outer.sql new file mode 100644 index 000000000000..249cde4b7682 --- /dev/null +++ b/patch/select-1-outer.sql @@ -0,0 +1,10 @@ +--set join_collapse_limit = 1; +set enable_starjoin_join_search = OPT; +select * from t + left join dim1 on (dim1.id = id1) + left join dim2 on (dim2.id = id2) + left join dim3 on (dim3.id = id3) + left join dim4 on (dim4.id = id4) + left join dim5 on (dim5.id = id5) + left join dim6 on (dim6.id = id6) + left join dim7 on (dim7.id = id7); diff --git a/patch/select-2.sql b/patch/select-2.sql new file mode 100644 index 000000000000..4e1d2a7b0e72 --- /dev/null +++ b/patch/select-2.sql @@ -0,0 +1,9 @@ +-- set join_collapse_limit = 1; +select * from t + left join dim1 on (id = id1) + left join dim2 on (id = id2) + left join dim3 on (id = id3) + left join dim4 on (id = id4) + left join dim5 on (id = id5) + left join dim6 on (id = id6) + left join dim7 on (id = id7); diff --git a/patch/select-3-inner.sql b/patch/select-3-inner.sql new file mode 100644 index 000000000000..f661c0c2fb53 --- /dev/null +++ b/patch/select-3-inner.sql @@ -0,0 +1,15 @@ +--set join_collapse_limit = 1; +set enable_starjoin_join_search = OPT; +select * from t + join dim1 on (dim1.id = id1) + join dim2 on (dim2.id = id2) + join dim3 on (dim3.id = id3) + join dim4 on (dim4.id = id4) + join dim1_1 on (id1_1 = dim1_1.id) + join dim1_2 on (id1_2 = dim1_2.id) + join dim2_1 on (id2_1 = dim2_1.id) + join dim2_2 on (id2_2 = dim2_2.id) + join dim3_1 on (id3_1 = dim3_1.id) + join dim3_2 on (id3_2 = dim3_2.id) + join dim4_1 on (id4_1 = dim4_1.id) + join dim4_2 on (id4_2 = dim4_2.id); diff --git a/patch/select-3-outer.sql b/patch/select-3-outer.sql new file mode 100644 index 000000000000..5ca549f02acf --- /dev/null +++ b/patch/select-3-outer.sql @@ -0,0 +1,15 @@ +--set join_collapse_limit = 1; +set enable_starjoin_join_search = OPT; +select * from t + left join dim1 on (dim1.id = id1) + left join dim2 on (dim2.id = id2) + left join dim3 on (dim3.id = id3) + left join dim4 on (dim4.id = id4) + left join dim1_1 on (id1_1 = dim1_1.id) + left join dim1_2 on (id1_2 = dim1_2.id) + left join dim2_1 on (id2_1 = dim2_1.id) + left join dim2_2 on (id2_2 = dim2_2.id) + left join dim3_1 on (id3_1 = dim3_1.id) + left join dim3_2 on (id3_2 = dim3_2.id) + left join dim4_1 on (id4_1 = dim4_1.id) + left join dim4_2 on (id4_2 = dim4_2.id); diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 5d1fc3899dae..17332350da59 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -32,7 +32,6 @@ static void make_rels_by_clause_joins(PlannerInfo *root, static void make_rels_by_clauseless_joins(PlannerInfo *root, RelOptInfo *old_rel, List *other_rels); -static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel); static bool restriction_is_constant_false(List *restrictlist, RelOptInfo *joinrel, @@ -1363,7 +1362,7 @@ have_join_order_restriction(PlannerInfo *root, * say "true" incorrectly. (Therefore, we don't bother with the relatively * expensive has_legal_joinclause test.) */ -static bool +bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel) { ListCell *l; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index e592e1ac3d12..79a7f0c8608e 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -51,6 +51,7 @@ typedef struct } SelfJoinCandidate; bool enable_self_join_elimination; +bool enable_starjoin_join_search; /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); @@ -2511,3 +2512,548 @@ remove_useless_self_joins(PlannerInfo *root, List *joinlist) return joinlist; } + +/* + * starjoin_foreign_key_matched_by_clauses + * Determines if the foreign key is matched by join clauses. + * + * Each foreign key attribute must be matched by a join clause. Either by + * a simple RestrictInfo, or by an equivalence class (if the relation has + * eclass joins). + * + * Returns true if the foreign key is matched, false otherwise. + * + * XXX Do we need to check that the FK side of the join (i.e. the fact table) + * has the columns referenced as NOT NULL? Otherwise we could have a FK join + * that reduces the cardinality, which is one of the arguments why it's fine + * to move the join (that it doesn't change the cardinality). But if the join + * is LEFT JOIN, this should be fine too - but do we get here with LEFT JOINs? + */ +static bool +starjoin_foreign_key_matched_by_clauses(PlannerInfo *root, RelOptInfo *rel, + ForeignKeyOptInfo *fkinfo) +{ + /* make sure each FK attribute has at least one matching clause */ + for (int i = 0; i < fkinfo->nkeys; i++) + { + /* matching rinfo clause or equivalance class */ + if ((fkinfo->rinfos[i] != NULL) || (fkinfo->eclass[i] != NULL)) + { + /* + * XXX Do we need to inspect the rinfo/eclass further? I don't + * think that's really necessary, we've already inspected it when + * building ForeignKeyOptInfo. So existence should be enough. + */ + continue; + } + + /* found an attribute without a join clause, ignore this FK */ + return false; + } + + /* all FK attributes had a matching join clause */ + return true; +} + +/* + * starjoin_clauses_matched_by_foreign_key + * Are all join clauses (rinfo or eclass) matched by the foreign key? + * + * Check if all join clauses for the current relation match the foreign key. + * Returns true if that's the case, false otherwise. + * + * XXX Could we do the check in both directions at once? Essentially, merge + * this with starjoin_foreign_key_matched_by_clauses, in a way that would + * make it cheaper than two separate functions? I don't think so. + */ +static bool +starjoin_clauses_matched_by_foreign_key(PlannerInfo *root, RelOptInfo *rel, + ForeignKeyOptInfo *fkinfo) +{ + ListCell *lc; + int j; + + /* Inspect all simple (RestrictInfo) clauses. */ + foreach(lc, rel->joininfo) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + bool matched = false; /* matched to the foreign key */ + + /* Is there a FK attribute referencing this rinfo? */ + for (int i = 0; i < fkinfo->nkeys; i++) + { + if (list_member_ptr(fkinfo->rinfos[i], rinfo)) + { + matched = true; + break; + } + } + + /* found a clause not matched for FK */ + if (!matched) + return false; + } + + /* If the rel does not have eclass joins, we're done. */ + if (!rel->has_eclass_joins) + return true; + + /* + * There should be joins with clauses in equivalence classes. Walk all the + * eclasses, and try to match them to the foreign key. + */ + j = -1; + while ((j = bms_next_member(rel->eclass_indexes, j)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, j); + bool matched = false; /* matched to the foreign key */ + + /* + * We're interested in joins, and const or single-member EC won't + * generate join clauses. So skip them now, before walking the FK. + * (The latter test covers the volatile case too.) + * + * XXX Taken from generate_implied_equalities_for_column(). + */ + if (ec->ec_has_const || list_length(ec->ec_members) <= 1) + continue; + + /* + * A join EC needs to have multiple relids, so ignore cases with only + * a single relid. + * + * XXX Could there be 0 relids? I don't think so. Or do we need to + * check if (num != 2) instead? + * + * XXX If this simple check is not enough and we need to inspect ut we + * need to inspect the members, we should probably leave this after + * matching to FK attributes, if it doesn't match. The FK check is + * likely cheaper + */ + if (bms_num_members(ec->ec_relids) == 1) + continue; + + /* Is there a FK attribute referencing this EC? */ + for (int i = 0; i < fkinfo->nkeys; i++) + { + if (fkinfo->eclass[i] == ec) + { + matched = true; + break; + } + } + + /* found a join eclass not matched by the FK */ + if (!matched) + return false; + } + + /* all the join clauses seem to match (both rinfo and eclass) */ + return true; +} + +/* + * starjoin_match_to_foreign_key + * Determine if the relation is joined through a FK constraint. + * + * The relation needs to be joined through a FK constraint, with it being on + * the PK side of the join. The FK must be matched completely (no columns + * missing in join clauses), and there must be no other join clauses. + * + * We already have a list of relevant foreign keys, and we use that info + * for selectivity estimation in get_foreign_key_join_selectivity(). And + * we're actually doing something quite similar here. + * + * XXX Do we need to worry about the join type, e.g. inner/outer joins, + * semi/anti? get_foreign_key_join_selectivity() does care about it, and + * ignores some of those cases. Maybe we should too? + */ +static bool +starjoin_match_to_foreign_key(PlannerInfo *root, RelOptInfo *rel) +{ + ListCell *lc; + + /* + * Check if there's a FK matching the join clauses. + * + * The match needs to be perfect from both sides. All join clauses need to + * match the foreign key, and the whole foreign key needs to have a + * matching clause. There must not be any extra join clauses. + */ + foreach(lc, root->fkey_list) + { + ForeignKeyOptInfo *fkinfo = (ForeignKeyOptInfo *) lfirst(lc); + + /* + * The foreign key is not relevant unless it references the rel on the + * PK side. + * + * XXX If we want to support the "inverse" join (with smaller tables + * referencing the main table) in the future, we'll probably need to + * allow con_relid too. + */ + if (fkinfo->ref_relid != rel->relid) + continue; + + /* + * First, check that each FK attribute has a matching join clause. + */ + if (!starjoin_foreign_key_matched_by_clauses(root, rel, fkinfo)) + continue; + + /* + * Each FK attribute has a join clause matching it. What about the + * opposite direction? Do all join clauses match this FK? We need to + * check both joininfo and equivalence classes (if the rel has + * has_eclass_joins=true). + */ + if (!starjoin_clauses_matched_by_foreign_key(root, rel, fkinfo)) + continue; + + /* matched in both directions */ + return true; + } + + return false; +} + +/* + * starjoin_is_dimension + * Determine if a range table entry is a dimension in a starjoin. + * + * To be considered a dimension for the simplified join order search, the + * relation must be joined in a way that does not affect the cardinality + * of the join. And must be possible to postpone the join. + * + * We ensure that by checking a couple things: + * + * 1) The join clause(s) match a single FK. There has to be a single FK, + * with each key covered by a join clause. There must be no extra join + * clauses (which means the rel can be joined to a single other rel). + * + * 2) The FK side (in the fact table) should be marked as NOT NULL. + * + * 3) There must be no additional restrictions (WHERE conditions etc.). + * + * 4) The relation must not have a join order restriction, i.e. it has to + * be possible to "postpone" the join. + * + * These three rules guarantee the join does not alter the cardinality, as + * each row in the fact table has exactly one match in the dimension. + * + * XXX For inner joins this works fine. For outer we may need to be smarter + * as outer joins imply ordering restrictions, violating (4) and so don't + * allow the simplified planning. I believe it should be possible to still + * postpone the join, if we're dealing with the last relation in the list + * (because then we're not really changing the order). Or maybe it needs + * to check varnullingrels? In any case, we need to keep this cheap, + * cheaper than the "full" join planning. + * + * XXX Could we relax (3) in some way, to allow filters on dimensions? The + * joins are independent (there are no clauses between dimensions), so the + * per-dimension selectivity could measures how much it "shrinks" the join. + * We could order the dimensions so that the most selective (close to 0.0) + * are joined first, to reduce the join cardinality soon. This could be + * extended with the "cost" of evaluating the join clause, to make the cost + * model more correct. + * + * In snowflake joins this would be more complicated, because dimensions + * can join to other (child) dimensions, but even there the clauses don't + * go between branches. So even there it should be possible. + * + * XXX With LEFT joins we could even allow baserestrictinfo on the rel, as + * that won't affect the cardinality (if the filter happens before the + * join, of course). But such queries are likely rather rare, because why + * would you filter before LEFT join? + * + * XXX Right now this uses has_join_restriction(), but maybe that's a bit + * too heavy-handed and have_join_order_restriction would be better? But + * that might be too early? Also, have_join_order_restriction is already + * exposed in paths.h, not static. Could have_relevant_joinclause help? + * + * XXX There's also join_is_legal() to check legality of joins, with + * LEFT/RIGHT joins, and IN/EXISTS clauses (see "Join Tree Construction" + * in README, circa line 188). It also looks-up the SpecialJoinInfo for + * the join. But it's probably too early for join_is_legal(), we don't + * have RelOptInfos for joins yet, just base relations. + * + * XXX Is it just the "skipping" that can break the order restrictions? + * Adding something to the list of dimensions keeps the order (at least + * with respect to the rels after it). + */ +static bool +starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) +{ + Index rti = rtr->rtindex; + RangeTblEntry *rte = root->simple_rte_array[rti]; + RelOptInfo *rel = root->simple_rel_array[rti]; + + /* only plain relations can be dimensions (we need FK/PK join) */ + if ((rte->rtekind != RTE_RELATION) || + (rel->reloptkind != RELOPT_BASEREL)) + return false; + + /* + * Does it have any conditions/restrictions that may affect the number of + * rows matched? If yes, don't treat as dimension. + * + * Dimensions in a starjoin may have restrictions, but that means it'll + * change cardinality of the joins (reduce it), so it may be better to + * join it early. We leave it to the regular join order planning. The + * expectation is that most dimensions won't have extra restrictions. + * + * XXX Should we check some other fields, like lateral references, and so + * on? Or is that unnecessary? What about eclasses? + */ + if (rel->baserestrictinfo != NIL) + return false; + + /* + * Ignore relations with join restrictions. This requires the complete + * join order search, this cheap heuristics is not enough. + * + * XXX This blocks the simplified planning for LEFT (or OUTER) joins, + * because outer joins imply restrictions. + */ + if (has_join_restriction(root, rel)) + return false; + + /* + * See if the join clause matches a foreign key. It should match a single + * relation on the other side, and the dimension should be on the PK side. + */ + if (!starjoin_match_to_foreign_key(root, rel)) + return false; + + /* + * XXX Maybe some additional checks here ... + */ + + return true; +} + +/* + * starjoin_adjust_joins + * Adjust the jointree for starjoins, to simplify the join order search. + * + * Try to simplify the join search problem for starjoin-like joins, with + * joins over FK relationships. The dimensions can be joined in almost any + * order, which is about the worst case for the standard join order search, + * and can be close to factorial complexity. But all the different orders + * are equivalent, so all this work is wasted. So the simplified planning + * identifies dimensions, and joins them all at the end of each group (as + * determined by the join_collapse_limit earlier). + * + * Note: The definition of a dimension is a bit vague. See the comment at + * starjoin_is_dimension() for our definition. + * + * The join search for starjoin queries is surprisingly expensive, because + * there are very few join order restrictions. Consider a starjoin query + * + * SELECT * FROM f + * JOIN d1 ON (f.id1 = d1.id) + * JOIN d2 ON (f.id2 = d2.id) + * ... + * JOIN d9 ON (f.id9 = d9.id) + * + * There are no clauses between the dimension tables (d#), which means those + * tables can be joined in almost arbitrary order. This means the standard + * join_order_search() would explore a N! possible join orders. It is not + * that bad in practice, as we split the problem by from_collapse_limit into + * a sequence of smaller problems, but even for the default of 8 relations + * it's quite expensive. This can be easily demonstrated by setting + * join_collapse_limit=1 for starjoin queries. + * + * We can significantly simplify the join order search for this type of + * queries, without too much risk of picking a much worse plans. It is + * however a trade off between how expensive we allow this to be, and how + * good the decisions will be. This can help only starjoins with multiple + * dimension tables, and we don't want to harm planning of other queries, + * so the basic "query shape" detection needs to be very cheap. + * + * If a perfect check is impossible or too expensive, it's better to end + * up with a cheap false negative (i.e. and not use the optimization), + * rather than risk regressions in other cases. Otherwise we might just + * as well use the regular join order search. + * + * The simplified join order search relies leverages the fact that joins + * of dimensions do not change the cardinality of the join, which means + * the relative order of those joins does not matter. All orders perform + * the same - we can pick an arbitrary of those orders, and "hardcode" + * it in the join tree before passing it to join_order_search(). + * + * The join order is "hardcoded" by modifying the jointree, undoing some + * of the work performed by deconstruct_jointree earlier. That decomposed + * the original jointree into smaller "problems" depending on join type, + * join_collapse_limit and other details. We inspect those smaller lists, + * and selectively split them into smaller problems to force a join order. + * This may effectively undo some of the merging, which tries to construct + * problems with up to join_collapse_limit relations. + * + * For example, imagine a join problem with 8 rels - one fact table and + * then 7 dimensions, which we can represent a joinlist with 8 elements. + * + * (D7, D6, D5, D4, D3, D2, D1, F) + * + * Assuming all those joins meet the requirements (have a matching FK, + * don't affect the join cardinality, ...), then we can split this into + * + * (D7, (D6, (D5, (D4, (D3, (D2, (D1, F))))))) + * + * which is a nested joinlist, with only two elements on each level. That + * means there's no need for expensive join order search, there's only + * one way to join the relations (two, if we consider the relations may + * switch sides). + * + * The joinlist may already be nested, with multiple smaller join problems, + * similar to the example. Those are processed independently. We don't try + * to merge problems to build join_collapse_limit problems again. This is + * partially to keep it cheap/simple, but also so not change behavior for + * cases when people use join_collapse_limit to force a particular shape. + * + * Note: Ne never move relations between the "smaller problems", so this + * restricts what fraction of the join space we explore. So reducing the + * join_collapse_limit would improve the starjoin planning, but it may + * produce worse plans for other queries. + * + * The query may involve joins to additional (non-dimension) tables, and + * those may alter cardinality in either direction. In principle, it'd be + * best to first perform all the joins that reduce join size, then join all + * the dimensions, and finally perform joins that may increase the join + * size. Imagine a joinlist: + * + * (D1, D2, A, B, F) + * + * with fact F, dimensions D1 and D2, and non-dimensions A and B. If A + * increases cardinality, and B does not (or even reduces it), we could + * use this join tree: + * + * (A, (D2, (D1, (B, F)))) + * + * For now we simply leave the dimension joins at the end, assuming + * that the earlier joins did not inflate the join too much. + * + * XXX Joins with cardinality increase don't seem very common, at least in + * regular starjoin queries. But maybe we could simply check if there are + * any such joins and disable this optimization? Is there a cheap way to + * identify when a join increases cardinality? I suppose we would need to + * calculate selectivity for join clauses? That might be too expensive, + * which goes against keeping this join search optimization very cheap. + * + * XXX A possible improvement would be to allow snowflake joins, i.e. + * queries with "recursive" dimensions. That would require a more complex + * logic, as a dimension would be allowed to join to other rels, as long + * as those are dimensions too. We'd need to be careful about preventing + * cycles, and about the order in which we join them. + * + * XXX Maybe we could invent a "join item" representing the dimensions, + * and pass it to join_order_search()? It'd expand the item into individual + * joins, and do the regular cardinality estimations etc. But it would + * only consider a single join order of the dimension. It would be trivial + * to disable the optimization if needed, I think - don't collapse the + * dimensions into the "group" join item. It would require changes to + * the generic join search, to be aware of the new item type. + */ +List * +starjoin_adjust_joins(PlannerInfo *root, List *joinlist) +{ + ListCell *lc; + List *newlist = NIL; + List *dimensions = NIL; + int nlist = list_length(joinlist); + + /* Do nothing if starjoin optimization not enabled. */ + if (!enable_starjoin_join_search) + return joinlist; + + /* + * Do nothing if starjoin optimization not applicable. + * + * XXX It might seems we can skip this for lists with <= 2 items, but that + * does not work, the elements may be nested list, and we need to descend + * into those. So do what remove_useless_self_joins() does, and only bail + * out for the simplest single-relation case (i.e. no joins). + */ + if (joinlist == NIL || + (nlist == 1 && !IsA(linitial(joinlist), List))) + return joinlist; + + /* + * Process the current join problem - split the elements into dimensions + * and non-dimensions. If there are dimensions, add them back at the end, + * as small single-rel joins. + * + * The list may contain various types of elements. It may contain a list, + * which means it's an independent join search problem and we need to + * process it recursively. Or it may be RangeTblRef, in which case we need + * to check if it's a dimension. Other types of elements are just added + * back to the list as-is. + * + * XXX I think we need to be careful to keep the order of the list (for + * the non-dimension entries). The join_search_one_level() relies on that + * when handling join order restrictions. + * + * XXX It might be better to only create a new list if needed, i.e. once + * we find the first dimension. So that non-starjoin queries don't pay for + * something they don't need. A mutable iterator might be a way, but I'm + * not sure how expensive this really is. + */ + foreach(lc, joinlist) + { + Node *item = (Node *) lfirst(lc); + + /* a separate join search problem, handle it recursively */ + if (IsA(item, List)) + { + newlist = lappend(newlist, + starjoin_adjust_joins(root, (List *) item)); + continue; + } + + /* + * If it's not a List, it has to be a RangeTblRef - jinlists can't + * contain any other elements (see make_rel_from_joinlist). + */ + Assert(IsA(item, RangeTblRef)); + + /* + * An entry representing a baserel. If it's a dimension, save it in a + * separate list, and we'll add it at the "top" of the join at the + * end. Otherwise add it to the list just like other elements. + * + * We do this only when the joinlist has at least 3 items. For fewer + * rels the optimization does not matter, there's only a single join + * order anyway. That only skips the optimization on this level - we + * still do the recursion, and that might hit a larger join problem. + * + * XXX We might have a new GUC to customize the cutoff limit, but for + * now it seems good enough to do it whenever applicable. If we find + * it's not worth it for less than N rels, we can add it later. + */ + if ((nlist >= 3) && + starjoin_is_dimension(root, (RangeTblRef *) item)) + { + dimensions = lappend(dimensions, item); + continue; + } + + /* not a dimension, add it to the list directly */ + newlist = lappend(newlist, item); + } + + /* + * If we found some dimensions, add them to the join tree one by one. The + * exact order does not matter, so we add them in the order we found them + * in the original list. + * + * We need to add them by creating smaller 2-element lists, with the rest + * of the list being a sub-problem and then adding the dimension table. + * This is how we force the desired join order. + */ + foreach(lc, dimensions) + { + newlist = list_make2(newlist, lfirst(lc)); + } + + return newlist; +} diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index eefc486a5666..58278dbc94f0 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -291,6 +291,13 @@ query_planner(PlannerInfo *root, */ distribute_row_identity_vars(root); + /* + * Try to simplify the join search problem for starjoin-like joins. This + * step relies on info about FK relationships, so we can't do it till + * after match_foreign_keys_to_quals(). + */ + joinlist = starjoin_adjust_joins(root, joinlist); + /* * Ready to do the primary planning. */ diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 1128167c0251..d5b976b2e395 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -968,6 +968,13 @@ boot_val => 'true', }, +{ name => 'enable_starjoin_join_search', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD', + short_desc => 'Enables simplified join order planning for starjoins.', + flags => 'GUC_EXPLAIN', + variable => 'enable_starjoin_join_search', + boot_val => 'true', +}, + { name => 'enable_tidscan', type => 'bool', context => 'PGC_USERSET', group => 'QUERY_TUNING_METHOD', short_desc => 'Enables the planner\'s use of TID scan plans.', flags => 'GUC_EXPLAIN', diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 6268c1752984..1aa4a9bc1e47 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -424,6 +424,7 @@ #enable_presorted_aggregate = on #enable_seqscan = on #enable_sort = on +#enable_starjoin_join_search = on #enable_tidscan = on #enable_group_by_reordering = on #enable_distinct_reordering = on diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index f6a62df0b43d..8d3c44854cd3 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -108,6 +108,7 @@ extern RelOptInfo *make_join_rel(PlannerInfo *root, extern Relids add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, SpecialJoinInfo *sjinfo, List **pushed_down_joins); +extern bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel); extern bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern void mark_dummy_rel(RelOptInfo *rel); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index 00addf159925..c30ac3a5754a 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -21,6 +21,7 @@ #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1 extern PGDLLIMPORT double cursor_tuple_fraction; extern PGDLLIMPORT bool enable_self_join_elimination; +extern PGDLLIMPORT bool enable_starjoin_join_search; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); @@ -120,6 +121,7 @@ extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids, JoinType jointype, List *restrictlist, bool force_cache, List **extra_clauses); extern List *remove_useless_self_joins(PlannerInfo *root, List *joinlist); +extern List *starjoin_adjust_joins(PlannerInfo *root, List *joinlist); /* * prototypes for plan/setrefs.c diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 3b37fafa65b9..e9762da99375 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -172,8 +172,9 @@ select name, setting from pg_settings where name like 'enable%'; enable_self_join_elimination | on enable_seqscan | on enable_sort | on + enable_starjoin_join_search | on enable_tidscan | on -(25 rows) +(26 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. From 180aae05fb0d2822fe6ede5156103762f3bc545d Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Tue, 11 Nov 2025 23:12:39 +0100 Subject: [PATCH 2/2] Allow dimensions with some join restrictions --- src/backend/optimizer/plan/analyzejoins.c | 154 +++++++++++++++++----- 1 file changed, 119 insertions(+), 35 deletions(-) diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index 79a7f0c8608e..bc19c2b537cd 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -2782,7 +2782,8 @@ starjoin_match_to_foreign_key(PlannerInfo *root, RelOptInfo *rel) * with respect to the rels after it). */ static bool -starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) +starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr, + bool allow_restrictions) { Index rti = rtr->rtindex; RangeTblEntry *rte = root->simple_rte_array[rti]; @@ -2815,7 +2816,7 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) * XXX This blocks the simplified planning for LEFT (or OUTER) joins, * because outer joins imply restrictions. */ - if (has_join_restriction(root, rel)) + if (!allow_restrictions && has_join_restriction(root, rel)) return false; /* @@ -2953,6 +2954,28 @@ starjoin_is_dimension(PlannerInfo *root, RangeTblRef *rtr) * to disable the optimization if needed, I think - don't collapse the * dimensions into the "group" join item. It would require changes to * the generic join search, to be aware of the new item type. + * + * The search for dimensions may perform multiple passes over the list, to + * allow treating some rels with restrictions as dimensions. Relations + * without restrictions can be moved to an arbitrary place in the join + * tree. We leverage that by moving it to the list of dimensions, which + * may skip over various other relations. + * + * Relations with join order do not allow these arbitrary moves. But we can + * allow treating them dimensions in some cases. A join restriction does not + * imply we can't move the relation at all, otherwise we wouldn't be allowed + * to move any relations when there's a single relation with a restriction. + * It means we can't change the relative order of restricted relations. + * + * This means we can treat a relation with a restriction as a dimension, + * as long as it's the last in the current joinlist (after some relations + * were already moved to list of dimensions). + * + * To do this we walk the joinlist multiple times, and in each iteration + * we try to identify as many dimensions as possible. We walk the list in + * reverse, and we add dimensions to the beginning of the list. This way + * we preserve the original syntactic join order. If we find no dimensions + * in a loop, we're done. */ List * starjoin_adjust_joins(PlannerInfo *root, List *joinlist) @@ -2961,6 +2984,8 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) List *newlist = NIL; List *dimensions = NIL; int nlist = list_length(joinlist); + int nitems; + Node **items; /* Do nothing if starjoin optimization not enabled. */ if (!enable_starjoin_join_search) @@ -2978,6 +3003,15 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) (nlist == 1 && !IsA(linitial(joinlist), List))) return joinlist; + /* expand the list into an array, to make backwards processing easier */ + items = palloc_array(Node *, nlist); + + nitems = 0; + foreach(lc, joinlist) + { + items[nitems++] = (Node *) lfirst(lc); + } + /* * Process the current join problem - split the elements into dimensions * and non-dimensions. If there are dimensions, add them back at the end, @@ -2989,6 +3023,9 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) * to check if it's a dimension. Other types of elements are just added * back to the list as-is. * + * Walk the list backwards, to preserve syntactic join order. This allows + * tracking "last" relation. If we find no dimension, we're done. + * * XXX I think we need to be careful to keep the order of the list (for * the non-dimension entries). The join_search_one_level() relies on that * when handling join order restrictions. @@ -2998,47 +3035,94 @@ starjoin_adjust_joins(PlannerInfo *root, List *joinlist) * something they don't need. A mutable iterator might be a way, but I'm * not sure how expensive this really is. */ - foreach(lc, joinlist) + for (;;) { - Node *item = (Node *) lfirst(lc); + bool found = false; /* found at least one dimension */ + bool last = true; /* is this the current last rel */ - /* a separate join search problem, handle it recursively */ - if (IsA(item, List)) + for (int i = (nitems - 1); i >= 0; i--) { - newlist = lappend(newlist, - starjoin_adjust_joins(root, (List *) item)); - continue; + Node *item = items[i]; + + /* skip empty items (already moved to dimensions) */ + if (item == NULL) + continue; + + /* do nothing about join subproblems, leave them in place */ + if (IsA(item, List)) + { + /* XXX do we need to disable "false" for join subtree? */ + last = false; + continue; + } + + /* + * If it's not a List, it has to be a RangeTblRef - jinlists can't + * contain any other elements (see make_rel_from_joinlist). + */ + Assert(IsA(item, RangeTblRef)); + + /* + * Is it a dimension? + * + * An entry representing a baserel. If it's a dimension, save it + * in a separate list, and we'll add it at the "top" of the join + * at the end. Otherwise add it to the list just like other + * elements. + * + * We do this only when the joinlist has at least 3 items. For + * fewer rels the optimization does not matter, there's only a + * single join order anyway. That only skips the optimization on + * this level - we still do the recursion, and that might hit a + * larger join problem. + * + * XXX If we decide to treat the rel as a dimension, don't update + * the "last" flag. The next relation will be the last one. + * + * XXX We might have a new GUC to customize the cutoff limit, but + * for now it seems good enough to do it whenever applicable. If + * we find it's not worth it for less than N rels, we can add it + * later. + */ + if ((nlist >= 3) && + starjoin_is_dimension(root, (RangeTblRef *) item, last)) + { + /* add it to the beginning of the list */ + dimensions = lcons(item, dimensions); + items[i] = NULL; + found = true; + continue; + } + + /* + * Not a dimension. Leave it in the array, but remember the next + * item (backwards) is no longer the last one. + * + * XXX Maybe we don't need to reset "last" if the item does not + * have join restrictions? + */ + last = false; } - /* - * If it's not a List, it has to be a RangeTblRef - jinlists can't - * contain any other elements (see make_rel_from_joinlist). - */ - Assert(IsA(item, RangeTblRef)); + /* terminate when a loop finds no dimension */ + if (!found) + break; + } - /* - * An entry representing a baserel. If it's a dimension, save it in a - * separate list, and we'll add it at the "top" of the join at the - * end. Otherwise add it to the list just like other elements. - * - * We do this only when the joinlist has at least 3 items. For fewer - * rels the optimization does not matter, there's only a single join - * order anyway. That only skips the optimization on this level - we - * still do the recursion, and that might hit a larger join problem. - * - * XXX We might have a new GUC to customize the cutoff limit, but for - * now it seems good enough to do it whenever applicable. If we find - * it's not worth it for less than N rels, we can add it later. - */ - if ((nlist >= 3) && - starjoin_is_dimension(root, (RangeTblRef *) item)) - { - dimensions = lappend(dimensions, item); + /* + * Add items remaining in the input array to the newlist. We need to do + * this every time, even without dimensions, because we need to recurse to + * the nested join problems. + */ + for (int i = 0; i < nitems; i++) + { + if (items[i] == NULL) continue; - } - /* not a dimension, add it to the list directly */ - newlist = lappend(newlist, item); + if (IsA(items[i], List)) + items[i] = (Node *) starjoin_adjust_joins(root, (List *) items[i]); + + newlist = lappend(newlist, items[i]); } /*