Skip to content

Commit 1eccb93

Browse files
committed
Re-allow using statistics for bool-valued functions in WHERE.
Commit a391ff3, which added the ability for a function's support function to provide a custom selectivity estimate for "WHERE f(...)", unintentionally removed the possibility of applying expression statistics after finding there's no applicable support function. That happened because we no longer fell through to boolvarsel() as before. Refactor to do so again, putting the 0.3333333 default back into boolvarsel() where it had been (cf. commit 39df0f1). I surely wouldn't have made this error if 39df0f1 had included a test case, so add one now. At the time we did not have the "extended statistics" infrastructure, but we do now, and it is also unable to work in this scenario because of this error. So make use of that for the test case. This is very clearly a bug fix, but I'm afraid to put it into released branches because of the likelihood of altering plan choices, which we avoid doing in minor releases. So, master only. Reported-by: Frédéric Yhuel <[email protected]> Author: Tom Lane <[email protected]> Discussion: https://postgr.es/m/[email protected]
1 parent 18cdf59 commit 1eccb93

File tree

5 files changed

+53
-13
lines changed

5 files changed

+53
-13
lines changed

src/backend/optimizer/path/clausesel.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,10 @@ clause_selectivity_ext(PlannerInfo *root,
874874
varRelid,
875875
jointype,
876876
sjinfo);
877+
878+
/* If no support, fall back on boolvarsel */
879+
if (s1 < 0)
880+
s1 = boolvarsel(root, clause, varRelid);
877881
}
878882
else if (IsA(clause, ScalarArrayOpExpr))
879883
{

src/backend/optimizer/util/plancat.c

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2143,9 +2143,8 @@ join_selectivity(PlannerInfo *root,
21432143
/*
21442144
* function_selectivity
21452145
*
2146-
* Returns the selectivity of a specified boolean function clause.
2147-
* This code executes registered procedures stored in the
2148-
* pg_proc relation, by calling the function manager.
2146+
* Attempt to estimate the selectivity of a specified boolean function clause
2147+
* by asking its support function. If the function lacks support, return -1.
21492148
*
21502149
* See clause_selectivity() for the meaning of the additional parameters.
21512150
*/
@@ -2163,15 +2162,8 @@ function_selectivity(PlannerInfo *root,
21632162
SupportRequestSelectivity req;
21642163
SupportRequestSelectivity *sresult;
21652164

2166-
/*
2167-
* If no support function is provided, use our historical default
2168-
* estimate, 0.3333333. This seems a pretty unprincipled choice, but
2169-
* Postgres has been using that estimate for function calls since 1992.
2170-
* The hoariness of this behavior suggests that we should not be in too
2171-
* much hurry to use another value.
2172-
*/
21732165
if (!prosupport)
2174-
return (Selectivity) 0.3333333;
2166+
return (Selectivity) -1; /* no support function */
21752167

21762168
req.type = T_SupportRequestSelectivity;
21772169
req.root = root;
@@ -2188,9 +2180,8 @@ function_selectivity(PlannerInfo *root,
21882180
DatumGetPointer(OidFunctionCall1(prosupport,
21892181
PointerGetDatum(&req)));
21902182

2191-
/* If support function fails, use default */
21922183
if (sresult != &req)
2193-
return (Selectivity) 0.3333333;
2184+
return (Selectivity) -1; /* function did not honor request */
21942185

21952186
if (req.selectivity < 0.0 || req.selectivity > 1.0)
21962187
elog(ERROR, "invalid function selectivity: %f", req.selectivity);

src/backend/utils/adt/selfuncs.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1528,6 +1528,17 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
15281528
selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
15291529
BoolGetDatum(true), false, true, false);
15301530
}
1531+
else if (is_funcclause(arg))
1532+
{
1533+
/*
1534+
* If we have no stats and it's a function call, estimate 0.3333333.
1535+
* This seems a pretty unprincipled choice, but Postgres has been
1536+
* using that estimate for function calls since 1992. The hoariness
1537+
* of this behavior suggests that we should not be in too much hurry
1538+
* to use another value.
1539+
*/
1540+
selec = 0.3333333;
1541+
}
15311542
else
15321543
{
15331544
/* Otherwise, the default estimate is 0.5 */

src/test/regress/expected/stats_ext.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3537,4 +3537,24 @@ SELECT FROM sb_1 LEFT JOIN sb_2
35373537

35383538
RESET enable_nestloop;
35393539
RESET enable_mergejoin;
3540+
-- Check that we can use statistics on a bool-valued function.
3541+
CREATE FUNCTION extstat_small(x numeric) RETURNS bool
3542+
STRICT IMMUTABLE LANGUAGE plpgsql
3543+
AS $$ BEGIN RETURN x < 1; END $$;
3544+
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
3545+
estimated | actual
3546+
-----------+--------
3547+
3333 | 196
3548+
(1 row)
3549+
3550+
CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2;
3551+
ANALYZE sb_2;
3552+
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
3553+
estimated | actual
3554+
-----------+--------
3555+
196 | 196
3556+
(1 row)
3557+
3558+
-- Tidy up
35403559
DROP TABLE sb_1, sb_2 CASCADE;
3560+
DROP FUNCTION extstat_small(x numeric);

src/test/regress/sql/stats_ext.sql

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1811,4 +1811,18 @@ SELECT FROM sb_1 LEFT JOIN sb_2
18111811
RESET enable_nestloop;
18121812
RESET enable_mergejoin;
18131813

1814+
-- Check that we can use statistics on a bool-valued function.
1815+
CREATE FUNCTION extstat_small(x numeric) RETURNS bool
1816+
STRICT IMMUTABLE LANGUAGE plpgsql
1817+
AS $$ BEGIN RETURN x < 1; END $$;
1818+
1819+
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
1820+
1821+
CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2;
1822+
ANALYZE sb_2;
1823+
1824+
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
1825+
1826+
-- Tidy up
18141827
DROP TABLE sb_1, sb_2 CASCADE;
1828+
DROP FUNCTION extstat_small(x numeric);

0 commit comments

Comments
 (0)