Skip to content

Commit c523939

Browse files
author
Commitfest Bot
committed
[CF 5487] v6 - Pull-up subquery if INNER JOIN-ON contains refs to upper-query
This branch was automatically generated by a robot using patches from an email thread registered at: https://commitfest.postgresql.org/patch/5487 The branch will be overwritten each time a new patch version is posted to the thread, and also periodically to check for bitrot caused by changes on the master branch. Patch(es): https://www.postgresql.org/message-id/[email protected] Author(s): Alena Rybakina
2 parents e1ac846 + 8aa8bdf commit c523939

File tree

4 files changed

+1487
-46
lines changed

4 files changed

+1487
-46
lines changed

src/backend/optimizer/plan/subselect.c

Lines changed: 249 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,6 +1440,237 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14401440
return result;
14411441
}
14421442

1443+
typedef struct HoistJoinQualsContext
1444+
{
1445+
List *outer_clauses; /* collect hoisted clauses */
1446+
Relids observed_nulltest_vars;
1447+
} HoistJoinQualsContext;
1448+
1449+
static Node *
1450+
preprocess_quals(Node *node)
1451+
{
1452+
/*
1453+
* Run const-folding without planner context.
1454+
*
1455+
* IMPORTANT: Pass NULL as PlannerInfo here because we’re simplifying
1456+
* a *subquery’s* quals before its rtable has been merged with the
1457+
* parent. If we passed a non-NULL root, eval_const_expressions()
1458+
* could perform root-dependent transforms (e.g., fold NullTest on Var
1459+
* using var_is_nonnullable) against the *wrong* rangetable, risking
1460+
* out-of-bounds RTE access. See eval_const_expressions()’s contract:
1461+
* “root can be passed as NULL …” for exactly this use-case.
1462+
*/
1463+
node = eval_const_expressions(NULL, node);
1464+
node = (Node *) canonicalize_qual((Expr *) node, false);
1465+
1466+
node = (Node *) make_ands_implicit((Expr *) node);
1467+
1468+
return node;
1469+
}
1470+
1471+
static NullTest *
1472+
make_nulltest(Var *var, NullTestType type)
1473+
{
1474+
NullTest *nulltest = makeNode(NullTest);
1475+
nulltest->arg = (Expr *) var;
1476+
nulltest->nulltesttype = type;
1477+
nulltest->argisrow = false;
1478+
nulltest->location = -1;
1479+
1480+
return nulltest;
1481+
}
1482+
1483+
static bool
1484+
simplicity_check_walker(Node *node, void *ctx)
1485+
{
1486+
if (node == NULL)
1487+
{
1488+
return false;
1489+
}
1490+
else if(IsA(node, Var))
1491+
return true;
1492+
else if(IsA(node, Query))
1493+
return query_tree_walker((Query *) node,
1494+
simplicity_check_walker,
1495+
(void*) ctx,
1496+
QTW_EXAMINE_RTES_BEFORE);
1497+
1498+
return expression_tree_walker(node, simplicity_check_walker,
1499+
(void *) ctx);
1500+
}
1501+
1502+
static List *
1503+
generate_not_null_exprs(List *list_expr, Relids *observed_vars)
1504+
{
1505+
ListCell *lc;
1506+
List *result = NIL;
1507+
1508+
foreach(lc, list_expr)
1509+
{
1510+
Node *node = (Node *) lfirst(lc);
1511+
1512+
if (IsA(node, OpExpr))
1513+
{
1514+
Node *larg = get_leftop(node);
1515+
Node *rarg = get_rightop(node);
1516+
1517+
if (IsA(larg, RelabelType))
1518+
larg = (Node *) ((RelabelType *) larg)->arg;
1519+
1520+
if (IsA(rarg, RelabelType))
1521+
rarg = (Node *) ((RelabelType *) rarg)->arg;
1522+
1523+
if(IsA(larg, Var))
1524+
{
1525+
Var *var = (Var *) larg;
1526+
if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
1527+
{
1528+
NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
1529+
result = lappend(result, nulltest);
1530+
*observed_vars = bms_add_member(*observed_vars, var->varno);
1531+
continue;
1532+
}
1533+
}
1534+
1535+
if(IsA(rarg, Var))
1536+
{
1537+
Var *var = (Var *) rarg;
1538+
if (!bms_is_member(var->varno, *observed_vars) && var->varlevelsup == 1)
1539+
{
1540+
NullTest *nulltest = make_nulltest(var, IS_NOT_NULL);
1541+
result = lappend(result, nulltest);
1542+
*observed_vars = bms_add_member(*observed_vars, var->varno);
1543+
continue;
1544+
}
1545+
}
1546+
}
1547+
}
1548+
1549+
return result;
1550+
}
1551+
1552+
static Node *
1553+
hoist_parent_quals_jointree_mutator(Node *jtnode, HoistJoinQualsContext *context)
1554+
{
1555+
if (jtnode == NULL)
1556+
return NULL;
1557+
1558+
if (IsA(jtnode, RangeTblRef))
1559+
return jtnode; /* nothing to change */
1560+
1561+
if (IsA(jtnode, JoinExpr))
1562+
{
1563+
JoinExpr *j = (JoinExpr *) jtnode;
1564+
JoinExpr *newj = makeNode(JoinExpr);
1565+
ListCell *lc;
1566+
List *join_clauses = NIL;
1567+
Node *qual;
1568+
memcpy(newj, j, sizeof(JoinExpr));
1569+
1570+
/* Recurse into join inputs */
1571+
newj->larg = (Node *) hoist_parent_quals_jointree_mutator(j->larg, context);
1572+
newj->rarg = (Node *) hoist_parent_quals_jointree_mutator(j->rarg, context);
1573+
1574+
if(contain_volatile_functions(newj->quals) ||
1575+
newj->larg == NULL ||
1576+
newj->rarg == NULL)
1577+
return NULL;
1578+
1579+
qual = newj->quals;
1580+
qual = preprocess_quals(qual);
1581+
1582+
foreach(lc, (List *) qual)
1583+
{
1584+
Node *node = (Node *) lfirst(lc);
1585+
1586+
if (IsA(node, OpExpr))
1587+
{
1588+
if(simplicity_check_walker(get_leftop(node), NULL) &&
1589+
simplicity_check_walker(get_rightop(node), NULL))
1590+
{
1591+
join_clauses = lappend(join_clauses, node);
1592+
continue;
1593+
}
1594+
}
1595+
context->outer_clauses = lappend(context->outer_clauses, node);
1596+
}
1597+
1598+
/* Only touch INNER JOINs */
1599+
if ((j->jointype != JOIN_LEFT &&
1600+
j->jointype != JOIN_RIGHT &&
1601+
j->jointype != JOIN_FULL)) /* subquery vars */
1602+
{
1603+
List *null_tests;
1604+
1605+
if (join_clauses == NIL) /* subquery vars */
1606+
{
1607+
newj->quals = (Node *) makeBoolConst(true, false);
1608+
}
1609+
else if(join_clauses != NIL && contain_vars_of_level((Node *) join_clauses, 1))
1610+
{
1611+
null_tests = generate_not_null_exprs(join_clauses, &context->observed_nulltest_vars);
1612+
context->outer_clauses = list_concat(context->outer_clauses, null_tests);
1613+
context->outer_clauses = list_concat(context->outer_clauses, join_clauses);
1614+
newj->quals = (Node *) makeBoolConst(true, false);
1615+
}
1616+
else
1617+
{
1618+
newj->quals = (Node *) make_ands_explicit(join_clauses);
1619+
}
1620+
}
1621+
else
1622+
{
1623+
if (contain_vars_of_level(j->quals, 1))
1624+
return NULL;
1625+
}
1626+
1627+
return (Node *) newj;
1628+
}
1629+
1630+
if (IsA(jtnode, FromExpr))
1631+
{
1632+
FromExpr *f = (FromExpr *) jtnode;
1633+
FromExpr *newf = makeNode(FromExpr);
1634+
ListCell *lc;
1635+
List *fromlist = NIL;
1636+
1637+
/* Recurse into fromlist */
1638+
memcpy(newf, f, sizeof(FromExpr));
1639+
1640+
/*
1641+
* Process children, if any of their jointree contains Vars of the
1642+
* parent query or quals of their JoinExpr contains volatile functions
1643+
* then exit
1644+
*/
1645+
foreach(lc, newf->fromlist)
1646+
{
1647+
Node *fnode = hoist_parent_quals_jointree_mutator(lfirst(lc), context);
1648+
1649+
if (fnode == NULL)
1650+
return NULL;
1651+
fromlist = lappend(fromlist, fnode);
1652+
}
1653+
1654+
newf->fromlist = fromlist;
1655+
1656+
if(contain_volatile_functions(newf->quals))
1657+
return NULL;
1658+
1659+
if(newf->quals)
1660+
{
1661+
Node *qual = newf->quals;
1662+
/* Quals (WHERE clause) may still contain sublinks etc */
1663+
qual = preprocess_quals(qual);
1664+
context->outer_clauses = list_concat(context->outer_clauses, (List *) qual);
1665+
newf->quals = NULL;
1666+
}
1667+
1668+
return (Node *) newf;
1669+
}
1670+
1671+
return jtnode; /* quiet compiler */
1672+
}
1673+
14431674
/*
14441675
* convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join
14451676
*
@@ -1454,12 +1685,13 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14541685
JoinExpr *result;
14551686
Query *parse = root->parse;
14561687
Query *subselect = (Query *) sublink->subselect;
1457-
Node *whereClause;
14581688
PlannerInfo subroot;
14591689
int rtoffset;
14601690
int varno;
14611691
Relids clause_varnos;
14621692
Relids upper_varnos;
1693+
List *newWhere = NIL;
1694+
HoistJoinQualsContext hjq_context = {NIL, NULL};
14631695

14641696
Assert(sublink->subLinkType == EXISTS_SUBLINK);
14651697

@@ -1489,34 +1721,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
14891721
if (!simplify_EXISTS_query(root, subselect))
14901722
return NULL;
14911723

1492-
/*
1493-
* Separate out the WHERE clause. (We could theoretically also remove
1494-
* top-level plain JOIN/ON clauses, but it's probably not worth the
1495-
* trouble.)
1496-
*/
1497-
whereClause = subselect->jointree->quals;
1498-
subselect->jointree->quals = NULL;
1499-
1500-
/*
1501-
* The rest of the sub-select must not refer to any Vars of the parent
1502-
* query. (Vars of higher levels should be okay, though.)
1503-
*/
1504-
if (contain_vars_of_level((Node *) subselect, 1))
1505-
return NULL;
1506-
1507-
/*
1508-
* On the other hand, the WHERE clause must contain some Vars of the
1509-
* parent query, else it's not gonna be a join.
1510-
*/
1511-
if (!contain_vars_of_level(whereClause, 1))
1512-
return NULL;
1513-
1514-
/*
1515-
* We don't risk optimizing if the WHERE clause is volatile, either.
1516-
*/
1517-
if (contain_volatile_functions(whereClause))
1518-
return NULL;
1519-
15201724
/*
15211725
* Scan the rangetable for relation RTEs and retrieve the necessary
15221726
* catalog information for each relation. Using this information, clear
@@ -1537,13 +1741,17 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15371741
subroot.type = T_PlannerInfo;
15381742
subroot.glob = root->glob;
15391743
subroot.parse = subselect;
1540-
subselect->jointree->quals = whereClause;
15411744
subselect = preprocess_relation_rtes(&subroot);
15421745

1543-
/*
1544-
* Now separate out the WHERE clause again.
1545-
*/
1546-
whereClause = subselect->jointree->quals;
1746+
subselect->jointree = (FromExpr * ) hoist_parent_quals_jointree_mutator((Node *) subselect->jointree, &hjq_context);
1747+
1748+
if(subselect->jointree == NULL || hjq_context.outer_clauses == NIL)
1749+
return NULL;
1750+
1751+
newWhere = hjq_context.outer_clauses;
1752+
1753+
bms_free(hjq_context.observed_nulltest_vars);
1754+
15471755
subselect->jointree->quals = NULL;
15481756

15491757
/*
@@ -1568,23 +1776,23 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15681776
*/
15691777
rtoffset = list_length(parse->rtable);
15701778
OffsetVarNodes((Node *) subselect, rtoffset, 0);
1571-
OffsetVarNodes(whereClause, rtoffset, 0);
1779+
OffsetVarNodes((Node *) newWhere, rtoffset, 0);
15721780

15731781
/*
15741782
* Upper-level vars in subquery will now be one level closer to their
15751783
* parent than before; in particular, anything that had been level 1
15761784
* becomes level zero.
15771785
*/
15781786
IncrementVarSublevelsUp((Node *) subselect, -1, 1);
1579-
IncrementVarSublevelsUp(whereClause, -1, 1);
1787+
IncrementVarSublevelsUp((Node *) newWhere, -1, 1);
15801788

15811789
/*
15821790
* Now that the WHERE clause is adjusted to match the parent query
15831791
* environment, we can easily identify all the level-zero rels it uses.
15841792
* The ones <= rtoffset belong to the upper query; the ones > rtoffset do
15851793
* not.
15861794
*/
1587-
clause_varnos = pull_varnos(root, whereClause);
1795+
clause_varnos = pull_varnos(root, (Node *) newWhere);
15881796
upper_varnos = NULL;
15891797
varno = -1;
15901798
while ((varno = bms_next_member(clause_varnos, varno)) >= 0)
@@ -1593,7 +1801,6 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
15931801
upper_varnos = bms_add_member(upper_varnos, varno);
15941802
}
15951803
bms_free(clause_varnos);
1596-
Assert(!bms_is_empty(upper_varnos));
15971804

15981805
/*
15991806
* Now that we've got the set of upper-level varnos, we can make the last
@@ -1607,7 +1814,7 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
16071814
* adds subquery's RTEPermissionInfos into the upper query.
16081815
*/
16091816
CombineRangeTables(&parse->rtable, &parse->rteperminfos,
1610-
subselect->rtable, subselect->rteperminfos);
1817+
subselect->rtable, subselect->rteperminfos);
16111818

16121819
/*
16131820
* And finally, build the JoinExpr node.
@@ -1616,16 +1823,18 @@ convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
16161823
result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
16171824
result->isNatural = false;
16181825
result->larg = NULL; /* caller must fill this in */
1826+
16191827
/* flatten out the FromExpr node if it's useless */
16201828
if (list_length(subselect->jointree->fromlist) == 1)
16211829
result->rarg = (Node *) linitial(subselect->jointree->fromlist);
16221830
else
16231831
result->rarg = (Node *) subselect->jointree;
1832+
16241833
result->usingClause = NIL;
16251834
result->join_using_alias = NULL;
1626-
result->quals = whereClause;
16271835
result->alias = NULL;
16281836
result->rtindex = 0; /* we don't need an RTE for it */
1837+
result->quals = (Node *) make_ands_explicit(newWhere);
16291838

16301839
return result;
16311840
}

0 commit comments

Comments
 (0)