Skip to content

Commit 499edb0

Browse files
committed
Track more precisely query locations for nested statements
Previously, a Query generated through the transform phase would have unset stmt_location, tracking the starting point of a query string. Extensions relying on the statement location to extract its relevant parts in the source text string would fallback to use the whole statement instead, leading to confusing results like in pg_stat_statements for queries relying on nested queries, like: - EXPLAIN, with top-level and nested query using the same query string, and a query ID coming from the nested query when the non-top-level entry. - Multi-statements, with only partial portions of queries being normalized. - COPY TO with a query, SELECT or DMLs. This patch improves things by keeping track of the statement locations and propagate it to Query during transform, allowing PGSS to only show the relevant part of the query for nested query. This leads to less bloat in entries for non-top-level entries, as queries can now be grouped within the same (toplevel, queryid) duos in pg_stat_statements. The result gives a stricter one-one mapping between query IDs and its query strings. The regression tests introduced in 45e0ba3 produce differences reflecting the new logic. Author: Anthonin Bonnefoy Reviewed-by: Michael Paquier, Jian He Discussion: https://postgr.es/m/CAO6_XqqM6S9bQ2qd=75W+yKATwoazxSNhv5sjW06fjGAtHbTUA@mail.gmail.com
1 parent 4b096c6 commit 499edb0

File tree

9 files changed

+280
-104
lines changed

9 files changed

+280
-104
lines changed

contrib/pg_stat_statements/expected/level_tracking.out

+81-84
Large diffs are not rendered by default.

contrib/pg_stat_statements/expected/planning.out

+5-5
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ SELECT 42;
5858
(1 row)
5959

6060
SELECT plans, calls, rows, query FROM pg_stat_statements
61-
WHERE query NOT LIKE 'PREPARE%' ORDER BY query COLLATE "C";
61+
WHERE query NOT LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
6262
plans | calls | rows | query
6363
-------+-------+------+----------------------------------------------------------
6464
0 | 1 | 0 | ALTER TABLE stats_plan_test ADD COLUMN x int
@@ -72,10 +72,10 @@ SELECT plans, calls, rows, query FROM pg_stat_statements
7272
-- for the prepared statement we expect at least one replan, but cache
7373
-- invalidations could force more
7474
SELECT plans >= 2 AND plans <= calls AS plans_ok, calls, rows, query FROM pg_stat_statements
75-
WHERE query LIKE 'PREPARE%' ORDER BY query COLLATE "C";
76-
plans_ok | calls | rows | query
77-
----------+-------+------+-------------------------------------------------------
78-
t | 4 | 4 | PREPARE prep1 AS SELECT COUNT(*) FROM stats_plan_test
75+
WHERE query LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
76+
plans_ok | calls | rows | query
77+
----------+-------+------+--------------------------------------
78+
t | 4 | 4 | SELECT COUNT(*) FROM stats_plan_test
7979
(1 row)
8080

8181
-- Cleanup

contrib/pg_stat_statements/expected/select.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@ DEALLOCATE pgss_test;
128128
SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
129129
calls | rows | query
130130
-------+------+------------------------------------------------------------------------------
131-
1 | 1 | PREPARE pgss_test (int) AS SELECT $1, $2 LIMIT $3
132131
4 | 4 | SELECT $1 +
133132
| | -- but this one will appear +
134133
| | AS "text"
@@ -138,6 +137,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
138137
2 | 2 | SELECT $1 AS "int"
139138
1 | 2 | SELECT $1 AS i UNION SELECT $2 ORDER BY i
140139
1 | 1 | SELECT $1 || $2
140+
1 | 1 | SELECT $1, $2 LIMIT $3
141141
0 | 0 | SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"
142142
1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
143143
1 | 2 | WITH t(f) AS ( +

contrib/pg_stat_statements/expected/utility.out

+1-1
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
540540
-------+------+----------------------------------------------------
541541
2 | 0 | DEALLOCATE $1
542542
2 | 0 | DEALLOCATE ALL
543-
2 | 2 | PREPARE stat_select AS SELECT $1 AS a
543+
2 | 2 | SELECT $1 AS a
544544
1 | 1 | SELECT $1 as a
545545
1 | 1 | SELECT pg_stat_statements_reset() IS NOT NULL AS t
546546
(5 rows)

contrib/pg_stat_statements/sql/planning.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ SELECT 42;
2020
SELECT 42;
2121
SELECT 42;
2222
SELECT plans, calls, rows, query FROM pg_stat_statements
23-
WHERE query NOT LIKE 'PREPARE%' ORDER BY query COLLATE "C";
23+
WHERE query NOT LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
2424
-- for the prepared statement we expect at least one replan, but cache
2525
-- invalidations could force more
2626
SELECT plans >= 2 AND plans <= calls AS plans_ok, calls, rows, query FROM pg_stat_statements
27-
WHERE query LIKE 'PREPARE%' ORDER BY query COLLATE "C";
27+
WHERE query LIKE 'SELECT COUNT%' ORDER BY query COLLATE "C";
2828

2929
-- Cleanup
3030
DROP TABLE stats_plan_test;

src/backend/parser/analyze.c

+90-5
Original file line numberDiff line numberDiff line change
@@ -238,24 +238,108 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
238238
return query;
239239
}
240240

241+
/*
242+
* setQueryLocationAndLength
243+
* Set query's location and length from statement and ParseState
244+
*
245+
* Some statements, like PreparableStmt, can be located within parentheses.
246+
* For example "(SELECT 1)" or "COPY (UPDATE ...) to x;". For those, we
247+
* cannot use the whole string from the statement's location or the SQL
248+
* string would yield incorrectly. The parser will set stmt_len, reflecting
249+
* the size of the statement within the parentheses. Thus, when stmt_len is
250+
* available, we need to use it for the Query's stmt_len.
251+
*
252+
* For other cases, the parser can't provide the length of individual
253+
* statements. However, we have the statement's location plus the length
254+
* (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
255+
* stored in pstate. Thus, the statement's length is the RawStmt's length
256+
* minus how much we've advanced in the RawStmt's string.
257+
*/
258+
static void
259+
setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
260+
{
261+
ParseLoc stmt_len = 0;
262+
263+
/*
264+
* If there is no information about the top RawStmt's length, leave it at
265+
* 0 to use the whole string.
266+
*/
267+
if (pstate->p_stmt_len == 0)
268+
return;
269+
270+
switch (nodeTag(parseTree))
271+
{
272+
case T_InsertStmt:
273+
qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
274+
stmt_len = ((InsertStmt *) parseTree)->stmt_len;
275+
break;
276+
277+
case T_DeleteStmt:
278+
qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
279+
stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
280+
break;
281+
282+
case T_UpdateStmt:
283+
qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
284+
stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
285+
break;
286+
287+
case T_MergeStmt:
288+
qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
289+
stmt_len = ((MergeStmt *) parseTree)->stmt_len;
290+
break;
291+
292+
case T_SelectStmt:
293+
qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
294+
stmt_len = ((SelectStmt *) parseTree)->stmt_len;
295+
break;
296+
297+
case T_PLAssignStmt:
298+
qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
299+
break;
300+
301+
default:
302+
qry->stmt_location = pstate->p_stmt_location;
303+
break;
304+
}
305+
306+
if (stmt_len > 0)
307+
{
308+
/* Statement's length is known, use it */
309+
qry->stmt_len = stmt_len;
310+
}
311+
else
312+
{
313+
/*
314+
* Compute the statement's length from the statement's location and
315+
* the RawStmt's length and location.
316+
*/
317+
qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
318+
}
319+
320+
/* The calculated statement length should be calculated as positive. */
321+
Assert(qry->stmt_len >= 0);
322+
}
323+
241324
/*
242325
* transformTopLevelStmt -
243326
* transform a Parse tree into a Query tree.
244327
*
245-
* This function is just responsible for transferring statement location data
246-
* from the RawStmt into the finished Query.
328+
* This function is just responsible for storing location data
329+
* from the RawStmt into the ParseState.
247330
*/
248331
Query *
249332
transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
250333
{
251334
Query *result;
252335

336+
/* Store RawStmt's length and location in pstate */
337+
pstate->p_stmt_len = parseTree->stmt_len;
338+
pstate->p_stmt_location = parseTree->stmt_location;
339+
253340
/* We're at top level, so allow SELECT INTO */
254341
result = transformOptionalSelectInto(pstate, parseTree->stmt);
255342

256-
result->stmt_location = parseTree->stmt_location;
257-
result->stmt_len = parseTree->stmt_len;
258-
259343
return result;
260344
}
261345

@@ -424,6 +508,7 @@ transformStmt(ParseState *pstate, Node *parseTree)
424508
/* Mark as original query until we learn differently */
425509
result->querySource = QSRC_ORIGINAL;
426510
result->canSetTag = true;
511+
setQueryLocationAndLength(pstate, result, parseTree);
427512

428513
return result;
429514
}

src/backend/parser/gram.y

+74-6
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
153153
const char *msg);
154154
static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
155155
static void updateRawStmtEnd(RawStmt *rs, int end_location);
156+
static void updatePreparableStmtEnd(Node *n, int end_location);
156157
static Node *makeColumnRef(char *colname, List *indirection,
157158
int location, core_yyscan_t yyscanner);
158159
static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -176,7 +177,7 @@ static void insertSelectOptions(SelectStmt *stmt,
176177
SelectLimit *limitClause,
177178
WithClause *withClause,
178179
core_yyscan_t yyscanner);
179-
static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
180+
static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
180181
static Node *doNegate(Node *n, int location);
181182
static void doNegateFloat(Float *v);
182183
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
@@ -3383,6 +3384,7 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list
33833384
{
33843385
CopyStmt *n = makeNode(CopyStmt);
33853386

3387+
updatePreparableStmtEnd($3, @4);
33863388
n->relation = NULL;
33873389
n->query = $3;
33883390
n->attlist = NIL;
@@ -12150,6 +12152,7 @@ InsertStmt:
1215012152
$5->onConflictClause = $6;
1215112153
$5->returningList = $7;
1215212154
$5->withClause = $1;
12155+
$5->stmt_location = @$;
1215312156
$$ = (Node *) $5;
1215412157
}
1215512158
;
@@ -12303,6 +12306,7 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
1230312306
n->whereClause = $6;
1230412307
n->returningList = $7;
1230512308
n->withClause = $1;
12309+
n->stmt_location = @$;
1230612310
$$ = (Node *) n;
1230712311
}
1230812312
;
@@ -12377,6 +12381,7 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
1237712381
n->whereClause = $7;
1237812382
n->returningList = $8;
1237912383
n->withClause = $1;
12384+
n->stmt_location = @$;
1238012385
$$ = (Node *) n;
1238112386
}
1238212387
;
@@ -12454,6 +12459,7 @@ MergeStmt:
1245412459
m->joinCondition = $8;
1245512460
m->mergeWhenClauses = $9;
1245612461
m->returningList = $10;
12462+
m->stmt_location = @$;
1245712463

1245812464
$$ = (Node *) m;
1245912465
}
@@ -12694,7 +12700,20 @@ SelectStmt: select_no_parens %prec UMINUS
1269412700
;
1269512701

1269612702
select_with_parens:
12697-
'(' select_no_parens ')' { $$ = $2; }
12703+
'(' select_no_parens ')'
12704+
{
12705+
SelectStmt *n = (SelectStmt *) $2;
12706+
12707+
/*
12708+
* As SelectStmt's location starts at the SELECT keyword,
12709+
* we need to track the length of the SelectStmt within
12710+
* parentheses to be able to extract the relevant part
12711+
* of the query. Without this, the RawStmt's length would
12712+
* be used and would include the closing parenthesis.
12713+
*/
12714+
n->stmt_len = @3 - @2;
12715+
$$ = $2;
12716+
}
1269812717
| '(' select_with_parens ')' { $$ = $2; }
1269912718
;
1270012719

@@ -12816,6 +12835,7 @@ simple_select:
1281612835
n->groupDistinct = ($7)->distinct;
1281712836
n->havingClause = $8;
1281812837
n->windowClause = $9;
12838+
n->stmt_location = @1;
1281912839
$$ = (Node *) n;
1282012840
}
1282112841
| SELECT distinct_clause target_list
@@ -12833,6 +12853,7 @@ simple_select:
1283312853
n->groupDistinct = ($7)->distinct;
1283412854
n->havingClause = $8;
1283512855
n->windowClause = $9;
12856+
n->stmt_location = @1;
1283612857
$$ = (Node *) n;
1283712858
}
1283812859
| values_clause { $$ = $1; }
@@ -12853,19 +12874,20 @@ simple_select:
1285312874

1285412875
n->targetList = list_make1(rt);
1285512876
n->fromClause = list_make1($2);
12877+
n->stmt_location = @1;
1285612878
$$ = (Node *) n;
1285712879
}
1285812880
| select_clause UNION set_quantifier select_clause
1285912881
{
12860-
$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
12882+
$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
1286112883
}
1286212884
| select_clause INTERSECT set_quantifier select_clause
1286312885
{
12864-
$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
12886+
$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
1286512887
}
1286612888
| select_clause EXCEPT set_quantifier select_clause
1286712889
{
12868-
$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
12890+
$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
1286912891
}
1287012892
;
1287112893

@@ -13423,6 +13445,7 @@ values_clause:
1342313445
{
1342413446
SelectStmt *n = makeNode(SelectStmt);
1342513447

13448+
n->stmt_location = @1;
1342613449
n->valuesLists = list_make1($3);
1342713450
$$ = (Node *) n;
1342813451
}
@@ -18565,6 +18588,47 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
1856518588
rs->stmt_len = end_location - rs->stmt_location;
1856618589
}
1856718590

18591+
/*
18592+
* Adjust a PreparableStmt to reflect that it doesn't run to the end of the
18593+
* string.
18594+
*/
18595+
static void
18596+
updatePreparableStmtEnd(Node *n, int end_location)
18597+
{
18598+
if (IsA(n, SelectStmt))
18599+
{
18600+
SelectStmt *stmt = (SelectStmt *)n;
18601+
18602+
stmt->stmt_len = end_location - stmt->stmt_location;
18603+
}
18604+
else if (IsA(n, InsertStmt))
18605+
{
18606+
InsertStmt *stmt = (InsertStmt *)n;
18607+
18608+
stmt->stmt_len = end_location - stmt->stmt_location;
18609+
}
18610+
else if (IsA(n, UpdateStmt))
18611+
{
18612+
UpdateStmt *stmt = (UpdateStmt *)n;
18613+
18614+
stmt->stmt_len = end_location - stmt->stmt_location;
18615+
}
18616+
else if (IsA(n, DeleteStmt))
18617+
{
18618+
DeleteStmt *stmt = (DeleteStmt *)n;
18619+
18620+
stmt->stmt_len = end_location - stmt->stmt_location;
18621+
}
18622+
else if (IsA(n, MergeStmt))
18623+
{
18624+
MergeStmt *stmt = (MergeStmt *)n;
18625+
18626+
stmt->stmt_len = end_location - stmt->stmt_location;
18627+
}
18628+
else
18629+
elog(ERROR, "unexpected node type %d", (int) n->type);
18630+
}
18631+
1856818632
static Node *
1856918633
makeColumnRef(char *colname, List *indirection,
1857018634
int location, core_yyscan_t yyscanner)
@@ -18943,18 +19007,22 @@ insertSelectOptions(SelectStmt *stmt,
1894319007
errmsg("multiple WITH clauses not allowed"),
1894419008
parser_errposition(exprLocation((Node *) withClause))));
1894519009
stmt->withClause = withClause;
19010+
19011+
/* Update SelectStmt's location to the start of the WITH clause */
19012+
stmt->stmt_location = withClause->location;
1894619013
}
1894719014
}
1894819015

1894919016
static Node *
18950-
makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
19017+
makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
1895119018
{
1895219019
SelectStmt *n = makeNode(SelectStmt);
1895319020

1895419021
n->op = op;
1895519022
n->all = all;
1895619023
n->larg = (SelectStmt *) larg;
1895719024
n->rarg = (SelectStmt *) rarg;
19025+
n->stmt_location = location;
1895819026
return (Node *) n;
1895919027
}
1896019028

0 commit comments

Comments
 (0)