Skip to content

Commit 8aa8bdf

Browse files
Alena RybakinaCommitfest Bot
authored andcommitted
Enables pull-up of EXISTS subqueries that contain INNER joins, unlocking join reordering and earlier filtering. OUTER joins with outer references are safely excluded to preserve null-preserving semantics.
To achieve this, introduce a mutator that performs a single conservative pass over the subquery jointree and stops transformation if subquery contains volatile quals, or OUTER joins with outer references, since hoisting would break null-preserving behavior. On the other hand, OUTER joins without such references remain intact. Add IS NOT NULL guards on hoisted outer Vars to avoid redundant null elements that obviously won't result after join operation. Replace affected subquery quals with true. Author: Alena Rybakina Reviewers: Ranier Vilela <[email protected]>, Peter Petrov <[email protected]>, Ilia Evdokimov <[email protected]>
1 parent e1ac846 commit 8aa8bdf

File tree

4 files changed

+1487
-46
lines changed

4 files changed

+1487
-46
lines changed

src/backend/optimizer/plan/subselect.c

Lines changed: 249 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,6 +1440,237 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14401440
return result;
14411441
}
14421442

1443+
typedef struct HoistJoinQualsContext
1444+
{
1445+
List *outer_clauses; /* collect hoisted clauses */
1446+
Relids observed_nulltest_vars;
1447+
} HoistJoinQualsContext;
1448+
1449+
static Node *
1450+
preprocess_quals(Node *node)
1451+
{
1452+
/*
1453+
* Run const-folding without planner context.
1454+
*
1455+
* IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
1456+
* a *subquery’s* quals before its rtable has been merged with the
1457+
* parent. If we passed a non-NULL root, eval_const_expressions()
1458+
* could perform root-dependent transforms (e.g., fold NullTest on Var
1459+
* using var_is_nonnullable) against the *wrong* rangetable, risking
1460+
* out-of-bounds RTE access. See eval_const_expressions()’s contract:
1461+
* “root can be passed as NULL …” for exactly this use-case.
1462+
*/
1463+
node = eval_const_expressions(NULL, node);
1464+
node = (Node *) canonicalize_qual((Expr *) node, false);
1465+
1466+
node = (Node *) make_ands_implicit((Expr *) node);
1467+
1468+
return node;
1469+
}
1470+
1471+
static NullTest *
1472+
make_nulltest(Var *var, NullTestType type)
1473+
{
1474+
NullTest *nulltest = makeNode(NullTest);
1475+
nulltest->arg = (Expr *) var;
1476+
nulltest->nulltesttype = type;
1477+
nulltest->argisrow = false;
1478+
nulltest->location = -1;
1479+
1480+
return nulltest;
1481+
}
1482+
1483+
static bool
1484+
simplicity_check_walker(Node *node, void *ctx)
1485+
{
1486+
if (node == NULL)
1487+
{
1488+
return false;
1489+
}
1490+
else if(IsA(node, Var))
1491+
return true;
1492+
else if(IsA(node, Query))
1493+
return query_tree_walker((Query *) node,
1494+
simplicity_check_walker,
1495+
(void*) ctx,
1496+
QTW_EXAMINE_RTES_BEFORE);
1497+
1498+
return expression_tree_walker(node, simplicity_check_walker,
1499+
(void *) ctx);
1500+
}
1501+
1502+
static List *
1503+
generate_not_null_exprs(List *list_expr, Relids *observed_vars)
1504+
{
1505+
ListCell *lc;
1506+
List *result = NIL;
1507+
1508+
foreach(lc, list_expr)
1509+
{
1510+
Node *node = (Node *) lfirst(lc);
1511+
1512+
if (IsA(node, OpExpr))
1513+
{
1514+
Node *larg = get_leftop(node);
1515+
Node *rarg = get_rightop(node);
1516+
1517+
if (IsA(larg, RelabelType))
1518+
larg = (Node *) ((RelabelType *) larg)->arg;
1519+
1520+
if (IsA(rarg, RelabelType))
1521+
rarg = (Node *) ((RelabelType *) rarg)->arg;
1522+
1523+
if(IsA(larg, Var))
1524+
{
1525+
Var *var = (Var *) larg;
1526+
if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
1527+
{
1528+
NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
1529+
result = lappend(result, nulltest);
1530+
*observed_vars = bms_add_member(*observed_vars, var->varno);
1531+
continue;
1532+
}
1533+
}
1534+
1535+
if(IsA(rarg, Var))
1536+
{
1537+
Var *var = (Var *) rarg;
1538+
if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
1539+
{
1540+
NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
1541+
result = lappend(result, nulltest);
1542+
*observed_vars = bms_add_member(*observed_vars, var->varno);
1543+
continue;
1544+
}
1545+
}
1546+
}
1547+
}
1548+
1549+
return result;
1550+
}
1551+
1552+
static Node *
1553+
hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
1554+
{
1555+
if (jtnode == NULL)
1556+
return NULL;
1557+
1558+
if (IsA(jtnode, RangeTblRef))
1559+
return jtnode; /* nothing to change */
1560+
1561+
if (IsA(jtnode, JoinExpr))
1562+
{
1563+
JoinExpr *j = (JoinExpr *) jtnode;
1564+
JoinExpr *newj = makeNode(JoinExpr);
1565+
ListCell *lc;
1566+
List *join_clauses = NIL;
1567+
Node *qual;
1568+
memcpy(newj, j, sizeof(JoinExpr));
1569+
1570+
/* Recurse into join inputs */
1571+
newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
1572+
newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
1573+
1574+
if(contain_volatile_functions(newj->quals) ||
1575+
newj->larg == NULL ||
1576+
newj->rarg == NULL)
1577+
return NULL;
1578+
1579+
qual = newj->quals;
1580+
qual = preprocess_quals(qual);
1581+
1582+
foreach(lc, (List *) qual)
1583+
{
1584+
Node *node = (Node *) lfirst(lc);
1585+
1586+
if (IsA(node, OpExpr))
1587+
{
1588+
if(simplicity_check_walker(get_leftop(node), NULL) &&
1589+
simplicity_check_walker(get_rightop(node), NULL))
1590+
{
1591+
join_clauses = lappend(join_clauses, node);
1592+
continue;
1593+
}
1594+
}
1595+
context->outer_clauses = lappend(context->outer_clauses, node);
1596+
}
1597+
1598+
/* Only touch INNER JOINs */
1599+
if ((j->jointype != JOIN_LEFT &&
1600+
j->jointype != JOIN_RIGHT &&
1601+
j->jointype != JOIN_FULL)) /* subquery vars */
1602+
{
1603+
List *null_tests;
1604+
1605+
if (join_clauses == NIL) /* subquery vars */
1606+
{
1607+
newj->quals = (Node *) makeBoolConst(true, false);
1608+
}
1609+
else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
1610+
{
1611+
null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
1612+
context->outer_clauses = list_concat(context->outer_clauses, null_tests);
1613+
context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
1614+
newj->quals = (Node *) makeBoolConst(true, false);
1615+
}
1616+
else
1617+
{
1618+
newj->quals = (Node *) make_ands_explicit(join_clauses);
1619+
}
1620+
}
1621+
else
1622+
{
1623+
if (contain_vars_of_level(j->quals, 1))
1624+
return NULL;
1625+
}
1626+
1627+
return (Node *) newj;
1628+
}
1629+
1630+
if (IsA(jtnode, FromExpr))
1631+
{
1632+
FromExpr *f = (FromExpr *) jtnode;
1633+
FromExpr *newf = makeNode(FromExpr);
1634+
ListCell *lc;
1635+
List *fromlist = NIL;
1636+
1637+
/* Recurse into fromlist */
1638+
memcpy(newf, f, sizeof(FromExpr));
1639+
1640+
/*
1641+
* Process children, if any of their jointree contains Vars of the
1642+
* parent query or quals of their JoinExpr contains volatile functions
1643+
* then exit
1644+
*/
1645+
foreach(lc, newf->fromlist)
1646+
{
1647+
Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
1648+
1649+
if (fnode == NULL)
1650+
return NULL;
1651+
fromlist = lappend(fromlist, fnode);
1652+
}
1653+
1654+
newf->fromlist = fromlist;
1655+
1656+
if(contain_volatile_functions(newf->quals))
1657+
return NULL;
1658+
1659+
if(newf->quals)
1660+
{
1661+
Node *qual = newf->quals;
1662+
/* Quals (WHERE clause) may still contain sublinks etc */
1663+
qual = preprocess_quals(qual);
1664+
context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
1665+
newf->quals = NULL;
1666+
}
1667+
1668+
return (Node *) newf;
1669+
}
1670+
1671+
return jtnode; /* quiet compiler */
1672+
}
1673+
14431674
/*
14441675
* convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
14451676
*
@@ -1454,12 +1685,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14541685
JoinExpr *result;
14551686
Query *parse = root->parse;
14561687
Query *subselect = (Query *) sublink->subselect;
1457-
Node *whereClause;
14581688
PlannerInfo subroot;
14591689
int rtoffset;
14601690
int varno;
14611691
Relids clause_varnos;
14621692
Relids upper_varnos;
1693+
List *newWhere = NIL;
1694+
HoistJoinQualsContext hjq_context = {NIL, NULL};
14631695

14641696
Assert(sublink->subLinkType == EXISTS_SUBLINK);
14651697

@@ -1489,34 +1721,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14891721
if (!simplify_EXISTS_query(root, subselect))
14901722
return NULL;
14911723

1492-
/*
1493-
* Separate out the WHERE clause. (We could theoretically also remove
1494-
* top-level plain JOIN/ON clauses, but it's probably not worth the
1495-
* trouble.)
1496-
*/
1497-
whereClause = subselect->jointree->quals;
1498-
subselect->jointree->quals = NULL;
1499-
1500-
/*
1501-
* The rest of the sub-select must not refer to any Vars of the parent
1502-
* query. (Vars of higher levels should be okay, though.)
1503-
*/
1504-
if (contain_vars_of_level((Node *) subselect, 1))
1505-
return NULL;
1506-
1507-
/*
1508-
* On the other hand, the WHERE clause must contain some Vars of the
1509-
* parent query, else it's not gonna be a join.
1510-
*/
1511-
if (!contain_vars_of_level(whereClause, 1))
1512-
return NULL;
1513-
1514-
/*
1515-
* We don't risk optimizing if the WHERE clause is volatile, either.
1516-
*/
1517-
if (contain_volatile_functions(whereClause))
1518-
return NULL;
1519-
15201724
/*
15211725
* Scan the rangetable for relation RTEs and retrieve the necessary
15221726
* catalog information for each relation. Using this information, clear
@@ -1537,13 +1741,17 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15371741
subroot.type = T_PlannerInfo;
15381742
subroot.glob = root->glob;
15391743
subroot.parse = subselect;
1540-
subselect->jointree->quals = whereClause;
15411744
subselect = preprocess_relation_rtes(&subroot);
15421745

1543-
/*
1544-
* Now separate out the WHERE clause again.
1545-
*/
1546-
whereClause = subselect->jointree->quals;
1746+
subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
1747+
1748+
if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
1749+
return NULL;
1750+
1751+
newWhere = hjq_context.outer_clauses;
1752+
1753+
bms_free(hjq_context.observed_nulltest_vars);
1754+
15471755
subselect->jointree->quals = NULL;
15481756

15491757
/*
@@ -1568,23 +1776,23 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15681776
*/
15691777
rtoffset = list_length(parse->rtable);
15701778
OffsetVarNodes((Node *) subselect, rtoffset, 0);
1571-
OffsetVarNodes(whereClause, rtoffset, 0);
1779+
OffsetVarNodes((Node *) newWhere, rtoffset, 0);
15721780

15731781
/*
15741782
* Upper-level vars in subquery will now be one level closer to their
15751783
* parent than before; in particular, anything that had been level 1
15761784
* becomes level zero.
15771785
*/
15781786
IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1579-
IncrementVarSublevelsUp(whereClause, -1, 1);
1787+
IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
15801788

15811789
/*
15821790
* Now that the WHERE clause is adjusted to match the parent query
15831791
* environment, we can easily identify all the level-zero rels it uses.
15841792
* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
15851793
* not.
15861794
*/
1587-
clause_varnos = pull_varnos(root, whereClause);
1795+
clause_varnos = pull_varnos(root, (Node *) newWhere);
15881796
upper_varnos = NULL;
15891797
varno = -1;
15901798
while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
@@ -1593,7 +1801,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15931801
upper_varnos = bms_add_member(upper_varnos, varno);
15941802
}
15951803
bms_free(clause_varnos);
1596-
Assert(!bms_is_empty(upper_varnos));
15971804

15981805
/*
15991806
* Now that we've got the set of upper-level varnos, we can make the last
@@ -1607,7 +1814,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
16071814
* adds subquery's RTEPermissionInfos into the upper query.
16081815
*/
16091816
CombineRangeTables(&parse->rtable, &parse->rteperminfos,
1610-
subselect->rtable, subselect->rteperminfos);
1817+
subselect->rtable, subselect->rteperminfos);
16111818

16121819
/*
16131820
* And finally, build the JoinExpr node.
@@ -1616,16 +1823,18 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
16161823
result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
16171824
result->isNatural = false;
16181825
result->larg = NULL; /* caller must fill this in */
1826+
16191827
/* flatten out the FromExpr node if it's useless */
16201828
if (list_length(subselect->jointree->fromlist) == 1)
16211829
result->rarg = (Node *) linitial(subselect->jointree->fromlist);
16221830
else
16231831
result->rarg = (Node *) subselect->jointree;
1832+
16241833
result->usingClause = NIL;
16251834
result->join_using_alias = NULL;
1626-
result->quals = whereClause;
16271835
result->alias = NULL;
16281836
result->rtindex = 0; /* we don't need an RTE for it */
1837+
result->quals = (Node *) make_ands_explicit(newWhere);
16291838

16301839
return result;
16311840
}

0 commit comments

Comments
 (0)