Skip to content

Commit a839ad8

Browse files
michaelpqCommitfest Bot
authored andcommitted
Refactor output format of pg_ndistinct.
The existing format of pg_ndistinct uses a single-object JSON structure where each key is itself a comma-separated list of attnums. While this is a very compact format, it's confusing to read and is difficult to manipulate values within the object. This wasn't a concern until statistics import functions were introduced, enabling users to inject hypothetical statistics into an object to observe their effect on the query planner. The new format is an array of objects, each object must have the keys "attributes", which must contain an array of attnums, and "ndistinct", which must be an integer. This is a quirk because the underlying internal storage is a double, but the value stored was always an integer. The change in format is described from the changes to src/test/regress/expected/stats_ext.out.
1 parent 180e7ab commit a839ad8

File tree

5 files changed

+220
-36
lines changed

5 files changed

+220
-36
lines changed

doc/src/sgml/perform.sgml

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1576,12 +1576,42 @@ CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes;
15761576

15771577
ANALYZE zipcodes;
15781578

1579-
SELECT stxkeys AS k, stxdndistinct AS nd
1579+
SELECT stxkeys AS k, jsonb_pretty(stxdndistinct::text::jsonb) AS nd
15801580
FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid)
15811581
WHERE stxname = 'stts2';
1582-
-[ RECORD 1 ]------------------------------------------------------&zwsp;--
1582+
-[ RECORD 1 ]-------------------
15831583
k | 1 2 5
1584-
nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178}
1584+
nd | [ +
1585+
| { +
1586+
| "ndistinct": 33178,+
1587+
| "attributes": [ +
1588+
| 1, +
1589+
| 2 +
1590+
| ] +
1591+
| }, +
1592+
| { +
1593+
| "ndistinct": 33178,+
1594+
| "attributes": [ +
1595+
| 1, +
1596+
| 5 +
1597+
| ] +
1598+
| }, +
1599+
| { +
1600+
| "ndistinct": 27435,+
1601+
| "attributes": [ +
1602+
| 2, +
1603+
| 5 +
1604+
| ] +
1605+
| }, +
1606+
| { +
1607+
| "ndistinct": 33178,+
1608+
| "attributes": [ +
1609+
| 1, +
1610+
| 2, +
1611+
| 5 +
1612+
| ] +
1613+
| } +
1614+
| ]
15851615
(1 row)
15861616
</programlisting>
15871617
This indicates that there are three combinations of columns that

src/backend/utils/adt/pg_ndistinct.c

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "lib/stringinfo.h"
1818
#include "statistics/extended_stats_internal.h"
19+
#include "statistics/statistics_format.h"
1920
#include "utils/fmgrprotos.h"
2021

2122

@@ -51,26 +52,29 @@ pg_ndistinct_out(PG_FUNCTION_ARGS)
5152
StringInfoData str;
5253

5354
initStringInfo(&str);
54-
appendStringInfoChar(&str, '{');
55+
appendStringInfoChar(&str, '[');
5556

5657
for (i = 0; i < ndist->nitems; i++)
5758
{
58-
int j;
5959
MVNDistinctItem item = ndist->items[i];
6060

6161
if (i > 0)
6262
appendStringInfoString(&str, ", ");
6363

64-
for (j = 0; j < item.nattributes; j++)
65-
{
66-
AttrNumber attnum = item.attributes[j];
64+
if (item.nattributes <= 0)
65+
elog(ERROR, "invalid zero-length attribute array in MVNDistinct");
6766

68-
appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum);
69-
}
70-
appendStringInfo(&str, "\": %d", (int) item.ndistinct);
67+
appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d",
68+
item.attributes[0]);
69+
70+
for (int j = 1; j < item.nattributes; j++)
71+
appendStringInfo(&str, ", %d", item.attributes[j]);
72+
73+
appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}",
74+
(int) item.ndistinct);
7175
}
7276

73-
appendStringInfoChar(&str, '}');
77+
appendStringInfoChar(&str, ']');
7478

7579
PG_RETURN_CSTRING(str.data);
7680
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* statistics_format.h
4+
* Data related to the format of extended statistics, usable by both
5+
* frontend and backend code.
6+
*
7+
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8+
* Portions Copyright (c) 1994, Regents of the University of California
9+
*
10+
* src/include/statistics/statistics_format.h
11+
*
12+
*-------------------------------------------------------------------------
13+
*/
14+
#ifndef STATISTICS_FORMAT_H
15+
#define STATISTICS_FORMAT_H
16+
17+
/* ----------
18+
* pg_ndistinct in human-readable format is a JSON array made of elements with
19+
* a predefined set of keys, like:
20+
*
21+
* [{"ndistinct": 11, "attributes": [3,4]},
22+
* {"ndistinct": 11, "attributes": [3,6]},
23+
* {"ndistinct": 11, "attributes": [4,6]},
24+
* {"ndistinct": 11, "attributes": [3,4,6]}]
25+
* ----------
26+
*/
27+
#define PG_NDISTINCT_KEY_ATTRIBUTES "attributes"
28+
#define PG_NDISTINCT_KEY_NDISTINCT "ndistinct"
29+
30+
#endif /* STATISTICS_FORMAT_H */

src/test/regress/expected/stats_ext.out

Lines changed: 138 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ Statistics objects:
196196
"public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0
197197

198198
ANALYZE ab1;
199-
SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
199+
SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
200200
FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
201201
WHERE s.stxname = 'ab1_a_b_stats';
202202
stxname | stxdndistinct | stxddependencies | stxdmcv | stxdinherit
@@ -476,13 +476,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
476476
-- correct command
477477
CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
478478
ANALYZE ndistinct;
479-
SELECT s.stxkind, d.stxdndistinct
479+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
480480
FROM pg_statistic_ext s, pg_statistic_ext_data d
481481
WHERE s.stxrelid = 'ndistinct'::regclass
482482
AND d.stxoid = s.oid;
483-
stxkind | stxdndistinct
484-
---------+-----------------------------------------------------
485-
{d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
483+
stxkind | stxdndistinct
484+
---------+--------------------------
485+
{d,f,m} | [ +
486+
| { +
487+
| "ndistinct": 11,+
488+
| "attributes": [ +
489+
| 3, +
490+
| 4 +
491+
| ] +
492+
| }, +
493+
| { +
494+
| "ndistinct": 11,+
495+
| "attributes": [ +
496+
| 3, +
497+
| 6 +
498+
| ] +
499+
| }, +
500+
| { +
501+
| "ndistinct": 11,+
502+
| "attributes": [ +
503+
| 4, +
504+
| 6 +
505+
| ] +
506+
| }, +
507+
| { +
508+
| "ndistinct": 11,+
509+
| "attributes": [ +
510+
| 3, +
511+
| 4, +
512+
| 6 +
513+
| ] +
514+
| } +
515+
| ]
486516
(1 row)
487517

488518
-- minor improvement, make sure the ctid does not break the matching
@@ -558,13 +588,43 @@ INSERT INTO ndistinct (a, b, c, filler1)
558588
mod(i,23) || ' dollars and zero cents'
559589
FROM generate_series(1,1000) s(i);
560590
ANALYZE ndistinct;
561-
SELECT s.stxkind, d.stxdndistinct
591+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
562592
FROM pg_statistic_ext s, pg_statistic_ext_data d
563593
WHERE s.stxrelid = 'ndistinct'::regclass
564594
AND d.stxoid = s.oid;
565-
stxkind | stxdndistinct
566-
---------+----------------------------------------------------------
567-
{d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000}
595+
stxkind | stxdndistinct
596+
---------+----------------------------
597+
{d,f,m} | [ +
598+
| { +
599+
| "ndistinct": 221, +
600+
| "attributes": [ +
601+
| 3, +
602+
| 4 +
603+
| ] +
604+
| }, +
605+
| { +
606+
| "ndistinct": 247, +
607+
| "attributes": [ +
608+
| 3, +
609+
| 6 +
610+
| ] +
611+
| }, +
612+
| { +
613+
| "ndistinct": 323, +
614+
| "attributes": [ +
615+
| 4, +
616+
| 6 +
617+
| ] +
618+
| }, +
619+
| { +
620+
| "ndistinct": 1000,+
621+
| "attributes": [ +
622+
| 3, +
623+
| 4, +
624+
| 6 +
625+
| ] +
626+
| } +
627+
| ]
568628
(1 row)
569629

570630
-- correct estimates
@@ -623,7 +683,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
623683
(1 row)
624684

625685
DROP STATISTICS s10;
626-
SELECT s.stxkind, d.stxdndistinct
686+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
627687
FROM pg_statistic_ext s, pg_statistic_ext_data d
628688
WHERE s.stxrelid = 'ndistinct'::regclass
629689
AND d.stxoid = s.oid;
@@ -707,13 +767,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
707767

708768
CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
709769
ANALYZE ndistinct;
710-
SELECT s.stxkind, d.stxdndistinct
770+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
711771
FROM pg_statistic_ext s, pg_statistic_ext_data d
712772
WHERE s.stxrelid = 'ndistinct'::regclass
713773
AND d.stxoid = s.oid;
714-
stxkind | stxdndistinct
715-
---------+-------------------------------------------------------------------
716-
{d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000}
774+
stxkind | stxdndistinct
775+
---------+----------------------------
776+
{d,e} | [ +
777+
| { +
778+
| "ndistinct": 221, +
779+
| "attributes": [ +
780+
| -1, +
781+
| -2 +
782+
| ] +
783+
| }, +
784+
| { +
785+
| "ndistinct": 247, +
786+
| "attributes": [ +
787+
| -1, +
788+
| -3 +
789+
| ] +
790+
| }, +
791+
| { +
792+
| "ndistinct": 323, +
793+
| "attributes": [ +
794+
| -2, +
795+
| -3 +
796+
| ] +
797+
| }, +
798+
| { +
799+
| "ndistinct": 1000,+
800+
| "attributes": [ +
801+
| -1, +
802+
| -2, +
803+
| -3 +
804+
| ] +
805+
| } +
806+
| ]
717807
(1 row)
718808

719809
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)');
@@ -756,13 +846,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b
756846

757847
CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
758848
ANALYZE ndistinct;
759-
SELECT s.stxkind, d.stxdndistinct
849+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
760850
FROM pg_statistic_ext s, pg_statistic_ext_data d
761851
WHERE s.stxrelid = 'ndistinct'::regclass
762852
AND d.stxoid = s.oid;
763-
stxkind | stxdndistinct
764-
---------+-------------------------------------------------------------
765-
{d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000}
853+
stxkind | stxdndistinct
854+
---------+----------------------------
855+
{d,e} | [ +
856+
| { +
857+
| "ndistinct": 221, +
858+
| "attributes": [ +
859+
| 3, +
860+
| 4 +
861+
| ] +
862+
| }, +
863+
| { +
864+
| "ndistinct": 247, +
865+
| "attributes": [ +
866+
| 3, +
867+
| -1 +
868+
| ] +
869+
| }, +
870+
| { +
871+
| "ndistinct": 323, +
872+
| "attributes": [ +
873+
| 4, +
874+
| -1 +
875+
| ] +
876+
| }, +
877+
| { +
878+
| "ndistinct": 1000,+
879+
| "attributes": [ +
880+
| 3, +
881+
| 4, +
882+
| -1 +
883+
| ] +
884+
| } +
885+
| ]
766886
(1 row)
767887

768888
SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b');

src/test/regress/sql/stats_ext.sql

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1;
125125
ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0;
126126
\d ab1
127127
ANALYZE ab1;
128-
SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit
128+
SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit
129129
FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid)
130130
WHERE s.stxname = 'ab1_a_b_stats';
131131
ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1;
@@ -297,7 +297,7 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct;
297297

298298
ANALYZE ndistinct;
299299

300-
SELECT s.stxkind, d.stxdndistinct
300+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
301301
FROM pg_statistic_ext s, pg_statistic_ext_data d
302302
WHERE s.stxrelid = 'ndistinct'::regclass
303303
AND d.stxoid = s.oid;
@@ -338,7 +338,7 @@ INSERT INTO ndistinct (a, b, c, filler1)
338338

339339
ANALYZE ndistinct;
340340

341-
SELECT s.stxkind, d.stxdndistinct
341+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
342342
FROM pg_statistic_ext s, pg_statistic_ext_data d
343343
WHERE s.stxrelid = 'ndistinct'::regclass
344344
AND d.stxoid = s.oid;
@@ -364,7 +364,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, (
364364

365365
DROP STATISTICS s10;
366366

367-
SELECT s.stxkind, d.stxdndistinct
367+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
368368
FROM pg_statistic_ext s, pg_statistic_ext_data d
369369
WHERE s.stxrelid = 'ndistinct'::regclass
370370
AND d.stxoid = s.oid;
@@ -399,7 +399,7 @@ CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct;
399399

400400
ANALYZE ndistinct;
401401

402-
SELECT s.stxkind, d.stxdndistinct
402+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
403403
FROM pg_statistic_ext s, pg_statistic_ext_data d
404404
WHERE s.stxrelid = 'ndistinct'::regclass
405405
AND d.stxoid = s.oid;
@@ -423,7 +423,7 @@ CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct;
423423

424424
ANALYZE ndistinct;
425425

426-
SELECT s.stxkind, d.stxdndistinct
426+
SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct
427427
FROM pg_statistic_ext s, pg_statistic_ext_data d
428428
WHERE s.stxrelid = 'ndistinct'::regclass
429429
AND d.stxoid = s.oid;

0 commit comments

Comments
 (0)