From a839ad8a2bdb53cf76dd31c80597701daa9c040e Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 11 Nov 2025 14:05:49 +0900 Subject: [PATCH 1/7] Refactor output format of pg_ndistinct. The existing format of pg_ndistinct uses a single-object JSON structure where each key is itself a comma-separated list of attnums. While this is a very compact format, it's confusing to read and is difficult to manipulate values within the object. This wasn't a concern until statistics import functions were introduced, enabling users to inject hypothetical statistics into an object to observe their effect on the query planner. The new format is an array of objects, each object must have the keys "attributes", which must contain an array of attnums, and "ndistinct", which must be an integer. This is a quirk because the underlying internal storage is a double, but the value stored was always an integer. The change in format is described from the changes to src/test/regress/expected/stats_ext.out. --- doc/src/sgml/perform.sgml | 36 ++++- src/backend/utils/adt/pg_ndistinct.c | 22 +-- src/include/statistics/statistics_format.h | 30 ++++ src/test/regress/expected/stats_ext.out | 156 ++++++++++++++++++--- src/test/regress/sql/stats_ext.sql | 12 +- 5 files changed, 220 insertions(+), 36 deletions(-) create mode 100644 src/include/statistics/statistics_format.h diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index 106583fb2965..b2dc2d27a770 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1576,12 +1576,42 @@ CREATE STATISTICS stts2 (ndistinct) ON city, state, zip FROM zipcodes; ANALYZE zipcodes; -SELECT stxkeys AS k, stxdndistinct AS nd +SELECT stxkeys AS k, jsonb_pretty(stxdndistinct::text::jsonb) AS nd FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid) WHERE stxname = 'stts2'; --[ RECORD 1 ]------------------------------------------------------&zwsp;-- +-[ RECORD 1 ]------------------- k | 1 2 5 -nd | {"1, 2": 33178, "1, 5": 33178, "2, 5": 27435, "1, 2, 5": 33178} +nd | [ + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 2 + + | ] + + | }, + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 5 + + | ] + + | }, + + | { + + | "ndistinct": 27435,+ + | "attributes": [ + + | 2, + + | 5 + + | ] + + | }, + + | { + + | "ndistinct": 33178,+ + | "attributes": [ + + | 1, + + | 2, + + | 5 + + | ] + + | } + + | ] (1 row) This indicates that there are three combinations of columns that diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c index 667ada9c3b45..97efc290ef5e 100644 --- a/src/backend/utils/adt/pg_ndistinct.c +++ b/src/backend/utils/adt/pg_ndistinct.c @@ -16,6 +16,7 @@ #include "lib/stringinfo.h" #include "statistics/extended_stats_internal.h" +#include "statistics/statistics_format.h" #include "utils/fmgrprotos.h" @@ -51,26 +52,29 @@ pg_ndistinct_out(PG_FUNCTION_ARGS) StringInfoData str; initStringInfo(&str); - appendStringInfoChar(&str, '{'); + appendStringInfoChar(&str, '['); for (i = 0; i < ndist->nitems; i++) { - int j; MVNDistinctItem item = ndist->items[i]; if (i > 0) appendStringInfoString(&str, ", "); - for (j = 0; j < item.nattributes; j++) - { - AttrNumber attnum = item.attributes[j]; + if (item.nattributes <= 0) + elog(ERROR, "invalid zero-length attribute array in MVNDistinct"); - appendStringInfo(&str, "%s%d", (j == 0) ? "\"" : ", ", attnum); - } - appendStringInfo(&str, "\": %d", (int) item.ndistinct); + appendStringInfo(&str, "{\"" PG_NDISTINCT_KEY_ATTRIBUTES "\": [%d", + item.attributes[0]); + + for (int j = 1; j < item.nattributes; j++) + appendStringInfo(&str, ", %d", item.attributes[j]); + + appendStringInfo(&str, "], \"" PG_NDISTINCT_KEY_NDISTINCT "\": %d}", + (int) item.ndistinct); } - appendStringInfoChar(&str, '}'); + appendStringInfoChar(&str, ']'); PG_RETURN_CSTRING(str.data); } diff --git a/src/include/statistics/statistics_format.h b/src/include/statistics/statistics_format.h new file mode 100644 index 000000000000..ba97c0880be1 --- /dev/null +++ b/src/include/statistics/statistics_format.h @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * statistics_format.h + * Data related to the format of extended statistics, usable by both + * frontend and backend code. + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/statistics/statistics_format.h + * + *------------------------------------------------------------------------- + */ +#ifndef STATISTICS_FORMAT_H +#define STATISTICS_FORMAT_H + +/* ---------- + * pg_ndistinct in human-readable format is a JSON array made of elements with + * a predefined set of keys, like: + * + * [{"ndistinct": 11, "attributes": [3,4]}, + * {"ndistinct": 11, "attributes": [3,6]}, + * {"ndistinct": 11, "attributes": [4,6]}, + * {"ndistinct": 11, "attributes": [3,4,6]}] + * ---------- + */ +#define PG_NDISTINCT_KEY_ATTRIBUTES "attributes" +#define PG_NDISTINCT_KEY_NDISTINCT "ndistinct" + +#endif /* STATISTICS_FORMAT_H */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index 495a1b350189..e9379afe39e2 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -196,7 +196,7 @@ Statistics objects: "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0 ANALYZE ab1; -SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit +SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) WHERE s.stxname = 'ab1_a_b_stats'; stxname | stxdndistinct | stxddependencies | stxdmcv | stxdinherit @@ -476,13 +476,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, ( -- correct command CREATE STATISTICS s10 ON a, b, c FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+----------------------------------------------------- - {d,f,m} | {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} + stxkind | stxdndistinct +---------+-------------------------- + {d,f,m} | [ + + | { + + | "ndistinct": 11,+ + | "attributes": [ + + | 3, + + | 4 + + | ] + + | }, + + | { + + | "ndistinct": 11,+ + | "attributes": [ + + | 3, + + | 6 + + | ] + + | }, + + | { + + | "ndistinct": 11,+ + | "attributes": [ + + | 4, + + | 6 + + | ] + + | }, + + | { + + | "ndistinct": 11,+ + | "attributes": [ + + | 3, + + | 4, + + | 6 + + | ] + + | } + + | ] (1 row) -- minor improvement, make sure the ctid does not break the matching @@ -558,13 +588,43 @@ INSERT INTO ndistinct (a, b, c, filler1) mod(i,23) || ' dollars and zero cents' FROM generate_series(1,1000) s(i); ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+---------------------------------------------------------- - {d,f,m} | {"3, 4": 221, "3, 6": 247, "4, 6": 323, "3, 4, 6": 1000} + stxkind | stxdndistinct +---------+---------------------------- + {d,f,m} | [ + + | { + + | "ndistinct": 221, + + | "attributes": [ + + | 3, + + | 4 + + | ] + + | }, + + | { + + | "ndistinct": 247, + + | "attributes": [ + + | 3, + + | 6 + + | ] + + | }, + + | { + + | "ndistinct": 323, + + | "attributes": [ + + | 4, + + | 6 + + | ] + + | }, + + | { + + | "ndistinct": 1000,+ + | "attributes": [ + + | 3, + + | 4, + + | 6 + + | ] + + | } + + | ] (1 row) -- correct estimates @@ -623,7 +683,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, ( (1 row) DROP STATISTICS s10; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; @@ -707,13 +767,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, ( CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+------------------------------------------------------------------- - {d,e} | {"-1, -2": 221, "-1, -3": 247, "-2, -3": 323, "-1, -2, -3": 1000} + stxkind | stxdndistinct +---------+---------------------------- + {d,e} | [ + + | { + + | "ndistinct": 221, + + | "attributes": [ + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 247, + + | "attributes": [ + + | -1, + + | -3 + + | ] + + | }, + + | { + + | "ndistinct": 323, + + | "attributes": [ + + | -2, + + | -3 + + | ] + + | }, + + | { + + | "ndistinct": 1000,+ + | "attributes": [ + + | -1, + + | -2, + + | -3 + + | ] + + | } + + | ] (1 row) SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY (a+1), (b+100)'); @@ -756,13 +846,43 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; - stxkind | stxdndistinct ----------+------------------------------------------------------------- - {d,e} | {"3, 4": 221, "3, -1": 247, "4, -1": 323, "3, 4, -1": 1000} + stxkind | stxdndistinct +---------+---------------------------- + {d,e} | [ + + | { + + | "ndistinct": 221, + + | "attributes": [ + + | 3, + + | 4 + + | ] + + | }, + + | { + + | "ndistinct": 247, + + | "attributes": [ + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 323, + + | "attributes": [ + + | 4, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 1000,+ + | "attributes": [ + + | 3, + + | 4, + + | -1 + + | ] + + | } + + | ] (1 row) SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, b'); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index fc6f152a072f..fc4aee6d8399 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -125,7 +125,7 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1; ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; \d ab1 ANALYZE ab1; -SELECT stxname, stxdndistinct, stxddependencies, stxdmcv, stxdinherit +SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) WHERE s.stxname = 'ab1_a_b_stats'; ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; @@ -297,7 +297,7 @@ CREATE STATISTICS s10 ON a, b, c FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; @@ -338,7 +338,7 @@ INSERT INTO ndistinct (a, b, c, filler1) ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; @@ -364,7 +364,7 @@ SELECT * FROM check_estimated_rows('SELECT COUNT(*) FROM ndistinct GROUP BY a, ( DROP STATISTICS s10; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; @@ -399,7 +399,7 @@ CREATE STATISTICS s10 (ndistinct) ON (a+1), (b+100), (2*c) FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; @@ -423,7 +423,7 @@ CREATE STATISTICS s10 (ndistinct) ON a, b, (2*c) FROM ndistinct; ANALYZE ndistinct; -SELECT s.stxkind, d.stxdndistinct +SELECT s.stxkind, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct FROM pg_statistic_ext s, pg_statistic_ext_data d WHERE s.stxrelid = 'ndistinct'::regclass AND d.stxoid = s.oid; From 8a4d7c05d0c4da24f0a32c712a6c9b76a66bbb39 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 11 Nov 2025 14:15:27 +0900 Subject: [PATCH 2/7] Refactor output format of pg_dependencies. The existing format of pg_dependencies uses a single-object JSON structure where each key is itself a comma-separated list of attnums. While this is a very compact format, it's confusing to read and is difficult to manipulate values within the object. This wasn't a concern until statistics import functions were introduced, enabling users to inject hypothetical statistics into an object to observe their effect on the query planner. The new format is an array of objects, each object must have the keys "attributes", which must contain an array of attnums, "dependency", which must be an integer, and "degree", which must be a float. The change in format is adequately described from the changes to src/test/regress/expected/stats_ext.out so description here is redundant. --- doc/src/sgml/perform.sgml | 6 +- src/backend/utils/adt/pg_dependencies.c | 33 ++++---- src/include/statistics/statistics_format.h | 20 ++++- src/test/regress/expected/stats_ext.out | 95 ++++++++++++++++++++-- src/test/regress/sql/stats_ext.sql | 7 +- 5 files changed, 128 insertions(+), 33 deletions(-) diff --git a/doc/src/sgml/perform.sgml b/doc/src/sgml/perform.sgml index b2dc2d27a770..014a542daf5a 100644 --- a/doc/src/sgml/perform.sgml +++ b/doc/src/sgml/perform.sgml @@ -1488,9 +1488,9 @@ ANALYZE zipcodes; SELECT stxname, stxkeys, stxddependencies FROM pg_statistic_ext join pg_statistic_ext_data on (oid = stxoid) WHERE stxname = 'stts'; - stxname | stxkeys | stxddependencies ----------+---------+------------------------------------------ - stts | 1 5 | {"1 => 5": 1.000000, "5 => 1": 0.423130} + stxname | stxkeys | stxddependencies +---------+---------+---------------------------------------------------------------------------------------------------------------------- + stts | 1 5 | [{"attributes": [1], "dependency": 5, "degree": 1.000000}, {"attributes": [5], "dependency": 1, "degree": 0.423130}] (1 row) Here it can be seen that column 1 (zip code) fully determines column diff --git a/src/backend/utils/adt/pg_dependencies.c b/src/backend/utils/adt/pg_dependencies.c index a0a9440fd5c0..87181aa00e9a 100644 --- a/src/backend/utils/adt/pg_dependencies.c +++ b/src/backend/utils/adt/pg_dependencies.c @@ -16,6 +16,7 @@ #include "lib/stringinfo.h" #include "statistics/extended_stats_internal.h" +#include "statistics/statistics_format.h" #include "utils/fmgrprotos.h" /* @@ -46,34 +47,34 @@ pg_dependencies_out(PG_FUNCTION_ARGS) { bytea *data = PG_GETARG_BYTEA_PP(0); MVDependencies *dependencies = statext_dependencies_deserialize(data); - int i, - j; StringInfoData str; initStringInfo(&str); - appendStringInfoChar(&str, '{'); + appendStringInfoChar(&str, '['); - for (i = 0; i < dependencies->ndeps; i++) + for (int i = 0; i < dependencies->ndeps; i++) { MVDependency *dependency = dependencies->deps[i]; if (i > 0) appendStringInfoString(&str, ", "); - appendStringInfoChar(&str, '"'); - for (j = 0; j < dependency->nattributes; j++) - { - if (j == dependency->nattributes - 1) - appendStringInfoString(&str, " => "); - else if (j > 0) - appendStringInfoString(&str, ", "); - - appendStringInfo(&str, "%d", dependency->attributes[j]); - } - appendStringInfo(&str, "\": %f", dependency->degree); + if (dependency->nattributes <= 1) + elog(ERROR, "invalid zero-length nattributes array in MVDependencies"); + + appendStringInfo(&str, "{\"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\": [%d", + dependency->attributes[0]); + + for (int j = 1; j < dependency->nattributes - 1; j++) + appendStringInfo(&str, ", %d", dependency->attributes[j]); + + appendStringInfo(&str, "], \"" PG_DEPENDENCIES_KEY_DEPENDENCY "\": %d, " + "\"" PG_DEPENDENCIES_KEY_DEGREE "\": %f}", + dependency->attributes[dependency->nattributes - 1], + dependency->degree); } - appendStringInfoChar(&str, '}'); + appendStringInfoChar(&str, ']'); PG_RETURN_CSTRING(str.data); } diff --git a/src/include/statistics/statistics_format.h b/src/include/statistics/statistics_format.h index ba97c0880be1..40655b9ec3b1 100644 --- a/src/include/statistics/statistics_format.h +++ b/src/include/statistics/statistics_format.h @@ -20,11 +20,27 @@ * * [{"ndistinct": 11, "attributes": [3,4]}, * {"ndistinct": 11, "attributes": [3,6]}, - * {"ndistinct": 11, "attributes": [4,6]}, - * {"ndistinct": 11, "attributes": [3,4,6]}] + * ... ] + * * ---------- */ #define PG_NDISTINCT_KEY_ATTRIBUTES "attributes" #define PG_NDISTINCT_KEY_NDISTINCT "ndistinct" + +/* ---------- + * pg_dependencies in human-readable format is a JSON array made of elements + * with a predefined set of keys, like: + * + * [{"degree": 1.000000, "attributes": [3], "dependency": 4}, + * {"degree": 1.000000, "attributes": [3], "dependency": 6}, + * ... ] + * + * ---------- + */ + +#define PG_DEPENDENCIES_KEY_ATTRIBUTES "attributes" +#define PG_DEPENDENCIES_KEY_DEPENDENCY "dependency" +#define PG_DEPENDENCIES_KEY_DEGREE "degree" + #endif /* STATISTICS_FORMAT_H */ diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index e9379afe39e2..5a4077f8ed56 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -196,7 +196,8 @@ Statistics objects: "public.ab1_a_b_stats" ON a, b FROM ab1; STATISTICS 0 ANALYZE ab1; -SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit +SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, + jsonb_pretty(d.stxddependencies::text::jsonb) AS stxddependencies, stxdmcv, stxdinherit FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) WHERE s.stxname = 'ab1_a_b_stats'; stxname | stxdndistinct | stxddependencies | stxdmcv | stxdinherit @@ -1433,10 +1434,48 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_dependencies; ANALYZE functional_dependencies; -- print the detected dependencies -SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; - dependencies ------------------------------------------------------------------------------------------------------------- - {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000, "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000} +SELECT jsonb_pretty(dependencies::text::jsonb) AS dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; + dependencies +----------------------------- + [ + + { + + "degree": 1.000000,+ + "attributes": [ + + 3 + + ], + + "dependency": 4 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + 3 + + ], + + "dependency": 6 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + 4 + + ], + + "dependency": 6 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + 3, + + 4 + + ], + + "dependency": 6 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + 3, + + 6 + + ], + + "dependency": 4 + + } + + ] (1 row) SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); @@ -1775,10 +1814,48 @@ SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FROM functional_dependencies; ANALYZE functional_dependencies; -- print the detected dependencies -SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; - dependencies ------------------------------------------------------------------------------------------------------------------------- - {"-1 => -2": 1.000000, "-1 => -3": 1.000000, "-2 => -3": 1.000000, "-1, -2 => -3": 1.000000, "-1, -3 => -2": 1.000000} +SELECT jsonb_pretty(dependencies::text::jsonb) AS dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; + dependencies +----------------------------- + [ + + { + + "degree": 1.000000,+ + "attributes": [ + + -1 + + ], + + "dependency": -2 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + -1 + + ], + + "dependency": -3 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + -2 + + ], + + "dependency": -3 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + -1, + + -2 + + ], + + "dependency": -3 + + }, + + { + + "degree": 1.000000,+ + "attributes": [ + + -1, + + -3 + + ], + + "dependency": -2 + + } + + ] (1 row) SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'''); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index fc4aee6d8399..94e2139c5042 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -125,7 +125,8 @@ ALTER TABLE ab1 ALTER a SET STATISTICS -1; ALTER STATISTICS ab1_a_b_stats SET STATISTICS 0; \d ab1 ANALYZE ab1; -SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, stxddependencies, stxdmcv, stxdinherit +SELECT stxname, jsonb_pretty(d.stxdndistinct::text::jsonb) AS stxdndistinct, + jsonb_pretty(d.stxddependencies::text::jsonb) AS stxddependencies, stxdmcv, stxdinherit FROM pg_statistic_ext s LEFT JOIN pg_statistic_ext_data d ON (d.stxoid = s.oid) WHERE s.stxname = 'ab1_a_b_stats'; ALTER STATISTICS ab1_a_b_stats SET STATISTICS -1; @@ -708,7 +709,7 @@ CREATE STATISTICS func_deps_stat (dependencies) ON a, b, c FROM functional_depen ANALYZE functional_dependencies; -- print the detected dependencies -SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; +SELECT jsonb_pretty(dependencies::text::jsonb) AS dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE a = 1 AND b = ''1'''); @@ -844,7 +845,7 @@ CREATE STATISTICS func_deps_stat (dependencies) ON (a * 2), upper(b), (c + 1) FR ANALYZE functional_dependencies; -- print the detected dependencies -SELECT dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; +SELECT jsonb_pretty(dependencies::text::jsonb) AS dependencies FROM pg_stats_ext WHERE statistics_name = 'func_deps_stat'; SELECT * FROM check_estimated_rows('SELECT * FROM functional_dependencies WHERE (a * 2) = 2 AND upper(b) = ''1'''); From 29c6b893f647f3c0fa89b43da5202d2ff3d75941 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 11 Nov 2025 16:47:00 +0900 Subject: [PATCH 3/7] Add working input function for pg_ndistinct. This will consume the format that was established when the output function for pg_ndistinct was recently changed. This will be needed for importing extended statistics. --- src/backend/utils/adt/pg_ndistinct.c | 595 ++++++++++++++++++++- src/test/regress/expected/pg_ndistinct.out | 109 ++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/pg_ndistinct.sql | 34 ++ 4 files changed, 732 insertions(+), 8 deletions(-) create mode 100644 src/test/regress/expected/pg_ndistinct.out create mode 100644 src/test/regress/sql/pg_ndistinct.sql diff --git a/src/backend/utils/adt/pg_ndistinct.c b/src/backend/utils/adt/pg_ndistinct.c index 97efc290ef5e..96eaa09b4ed8 100644 --- a/src/backend/utils/adt/pg_ndistinct.c +++ b/src/backend/utils/adt/pg_ndistinct.c @@ -14,34 +14,615 @@ #include "postgres.h" +#include "common/int.h" +#include "common/jsonapi.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics_format.h" +#include "utils/builtins.h" #include "utils/fmgrprotos.h" +typedef enum +{ + NDIST_EXPECT_START = 0, + NDIST_EXPECT_ITEM, + NDIST_EXPECT_KEY, + NDIST_EXPECT_ATTNUM_LIST, + NDIST_EXPECT_ATTNUM, + NDIST_EXPECT_NDISTINCT, + NDIST_EXPECT_COMPLETE +} NDistinctSemanticState; + +typedef struct +{ + const char *str; + NDistinctSemanticState state; + + List *distinct_items; /* Accumulated complete MVNDistinctItems */ + Node *escontext; + + bool found_attributes; /* Item has an attributes key */ + bool found_ndistinct; /* Item has ndistinct key */ + List *attnum_list; /* Accumulated attributes attnums */ + int64 ndistinct; +} NDistinctParseState; + +/* + * Invoked at the start of each MVNDistinctItem. + * + * The entire JSON document shoul be one array of MVNDistinctItem objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +ndistinct_object_start(void *state) +{ + NDistinctParseState *parse = state; + + switch(parse->state) + { + case NDIST_EXPECT_ITEM: + /* Now we expect to see attributes/ndistinct keys */ + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Expected Item object."))); + } + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Routine to allow qsorting of AttNumbers + */ +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a, b); +} + + +/* + * Invoked at the end of an object. + * + * Check to ensure that it was a complete MVNDistinctItem + * + */ +static JsonParseErrorType +ndistinct_object_end(void *state) +{ + NDistinctParseState *parse = state; + + int natts = 0; + AttrNumber *attrsort; + + MVNDistinctItem *item; + + if (!parse->found_attributes) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" key."))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_ndistinct) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_NDISTINCT_KEY_NDISTINCT "\" key."))); + return JSON_SEM_ACTION_FAILED; + } + + /* + * We need at least 2 attnums for a ndistinct item, anything less is + * malformed. + */ + natts = parse->attnum_list->length; + if (natts < 2) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"" PG_NDISTINCT_KEY_ATTRIBUTES + "\" key must contain an array of at least two attnums."))); + + return JSON_SEM_ACTION_FAILED; + } + attrsort = palloc0(natts * sizeof(AttrNumber)); + + /* Create the MVNDistinctItem */ + item = palloc(sizeof(MVNDistinctItem)); + item->nattributes = natts; + item->attributes = palloc0(natts * sizeof(AttrNumber)); + item->ndistinct = (double) parse->ndistinct; + + /* fill out both attnum list and sortable list */ + for (int i = 0; i < natts; i++) + { + attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; + item->attributes[i] = attrsort[i]; + } + + /* Check attrsort for uniqueness */ + qsort(attrsort, natts, sizeof(AttrNumber), attnum_compare); + for (int i = 1; i < natts; i++) + { + if (attrsort[i] == attrsort[i - 1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("attnum list duplicate value found: %d.", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + } + pfree(attrsort); + + parse->distinct_items = lappend(parse->distinct_items, (void *) item); + + /* reset item state vars */ + list_free(parse->attnum_list); + parse->attnum_list = NIL; + parse->ndistinct = 0; + parse->found_attributes = false; + parse->found_ndistinct = false; + + /* Now we are looking for the next MVNDistinctItem */ + parse->state = NDIST_EXPECT_ITEM; + return JSON_SUCCESS; +} + + +/* + * ndsitinct input format has two types of arrays, the outer MVNDistinctItem + * array, and the attnum list array within each MVNDistinctItem. + */ +static JsonParseErrorType +ndistinct_array_start(void *state) +{ + NDistinctParseState *parse = state; + + switch (parse->state) + { + case NDIST_EXPECT_ATTNUM_LIST: + parse->state = NDIST_EXPECT_ATTNUM; + break; + + case NDIST_EXPECT_START: + parse->state = NDIST_EXPECT_ITEM; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place."))); + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; +} + + +static JsonParseErrorType +ndistinct_array_end(void *state) +{ + NDistinctParseState *parse = state; + + switch (parse->state) + { + case NDIST_EXPECT_ATTNUM: + if (parse->attnum_list != NIL) + { + /* The attnum list is complete, look for more MVNDistinctItem keys */ + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("The \"" PG_NDISTINCT_KEY_ATTRIBUTES + "\" key must be an non-empty array."))); + return JSON_SEM_ACTION_FAILED; + break; + + case NDIST_EXPECT_ITEM: + if (parse->distinct_items != NIL) + { + /* Item list is complete, we're done. */ + parse->state = NDIST_EXPECT_COMPLETE; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item array cannot be empty."))); + + return JSON_SEM_ACTION_FAILED; + break; + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Array found in unexpected place."))); + } + return JSON_SEM_ACTION_FAILED; +} + + +/* + * The valid keys for the MVNDistinctItem object are: + * - attributes + * - ndistinct + */ +static JsonParseErrorType +ndistinct_object_field_start(void *state, char *fname, bool isnull) +{ + NDistinctParseState *parse = state; + + if (strcmp(fname, PG_NDISTINCT_KEY_ATTRIBUTES) == 0) + { + parse->found_attributes = true; + parse->state = NDIST_EXPECT_ATTNUM_LIST; + return JSON_SUCCESS; + } + + if (strcmp(fname, PG_NDISTINCT_KEY_NDISTINCT) == 0) + { + parse->found_ndistinct = true; + parse->state = NDIST_EXPECT_NDISTINCT; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Invalid key \"%s\". Only allowed keys are \"" + PG_NDISTINCT_KEY_ATTRIBUTES "\" and \"" + PG_NDISTINCT_KEY_NDISTINCT "\".", fname))); + return JSON_SEM_ACTION_FAILED; +} + +/* + * + */ +static JsonParseErrorType +ndistinct_array_element_start(void *state, bool isnull) +{ + NDistinctParseState *parse = state; + + switch(parse->state) + { + case NDIST_EXPECT_ATTNUM: + if (!isnull) + return JSON_SUCCESS; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null."))); + + break; + + case NDIST_EXPECT_ITEM: + if (!isnull) + return JSON_SUCCESS; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Item list elements cannot be null."))); + + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected array element."))); + } + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the ndistinct input parser. + * + */ +static JsonParseErrorType +ndistinct_scalar(void *state, char *token, JsonTokenType tokentype) +{ + NDistinctParseState *parse = state; + AttrNumber attnum; + + switch(parse->state) + { + case NDIST_EXPECT_ATTNUM: + attnum = pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); + return JSON_SUCCESS; + break; + + case NDIST_EXPECT_NDISTINCT: + /* + * While the structure dictates that ndistinct in a double precision + * floating point, in practice it has always been an integer, and it + * is output as such. Therefore, we follow usage precendent over the + * actual storage structure, and read it in as an integer. + */ + parse->ndistinct = pg_strtoint64_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->state = NDIST_EXPECT_KEY; + return JSON_SUCCESS; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", parse->str), + errdetail("Unexpected scalar."))); + } + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Compare the attribute arrays of two MVNDistinctItem values, + * looking for duplicate sets. + */ +static +bool has_duplicate_attributes(const MVNDistinctItem *a, + const MVNDistinctItem *b) +{ + int i; + + if (a->nattributes != b->nattributes) + return false; + + for (i = 0; i < a->nattributes; i++) + { + if (a->attributes[i] != b->attributes[i]) + return false; + } + + return true; +} + +/* + * Ensure that an attnum appears as one of the attnums in a given + * MVNDistinctItem. + */ +static +bool item_has_attnum(const MVNDistinctItem *item, AttrNumber attnum) +{ + for (int i = 0; i < item->nattributes; i++) + { + if (attnum == item->attributes[i]) + return true; + } + return false; +} + +/* + * Ensure that the attributes of one MVNDistinctItem A are a proper subset + * of the reference MVNDistinctItem B. + */ +static +bool item_is_attnum_subset(const MVNDistinctItem *item, + const MVNDistinctItem *refitem) +{ + for (int i = 0; i < item->nattributes; i++) + { + if (!item_has_attnum(refitem,item->attributes[i])) + return false; + } + return true; +} + +/* + * Generate a string representing an array of attnum. + * + * Freeing the allocated string is responsibility of the caller. + */ +static +const char *item_attnum_list(const MVNDistinctItem *item) +{ + StringInfoData str; + + initStringInfo(&str); + + appendStringInfo(&str, "%d", item->attributes[0]); + + for (int i = 1; i < item->nattributes; i++) + appendStringInfo(&str, ", %d", item->attributes[i]); + + return str.data; +} /* * pg_ndistinct_in * input routine for type pg_ndistinct * - * pg_ndistinct is real enough to be a table column, but it has no - * operations of its own, and disallows input (just like pg_node_tree). + * example input: + * [{"attributes": [6, -1], "ndistinct": 14}, + * {"attributes": [6, -2], "ndistinct": 9143}, + * {"attributes": [-1,-2], "ndistinct": 13454}, + * {"attributes": [6, -1, -2], "ndistinct": 14549}] */ Datum pg_ndistinct_in(PG_FUNCTION_ARGS) { - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_ndistinct"))); + char *str = PG_GETARG_CSTRING(0); - PG_RETURN_VOID(); /* keep compiler quiet */ + NDistinctParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + int item_most_attrs = 0; + int item_most_attrs_idx = 0; + + /* initialize semantic state */ + parse_state.str = str; + parse_state.state = NDIST_EXPECT_START; + parse_state.distinct_items = NIL; + parse_state.escontext = fcinfo->context; + parse_state.found_attributes = false; + parse_state.found_ndistinct = false; + parse_state.attnum_list = NIL; + parse_state.ndistinct = 0; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = ndistinct_object_start; + sem_action.object_end = ndistinct_object_end; + sem_action.array_start = ndistinct_array_start; + sem_action.array_end = ndistinct_array_end; + sem_action.object_field_start = ndistinct_object_field_start; + sem_action.object_field_end = NULL; + sem_action.array_element_start = ndistinct_array_element_start; + sem_action.array_element_end = NULL; + sem_action.scalar = ndistinct_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), + PG_UTF8, true); + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS && + parse_state.distinct_items != NIL) + { + MVNDistinct *ndistinct; + int nitems = parse_state.distinct_items->length; + bytea *bytes; + + + ndistinct = palloc(offsetof(MVNDistinct, items) + + nitems * sizeof(MVNDistinctItem)); + + ndistinct->magic = STATS_NDISTINCT_MAGIC; + ndistinct->type = STATS_NDISTINCT_TYPE_BASIC; + ndistinct->nitems = nitems; + + for (int i = 0; i < nitems; i++) + { + MVNDistinctItem *item = parse_state.distinct_items->elements[i].ptr_value; + + /* + * Ensure that this item does not duplicate the attributes of any + * pre-existing item. + */ + for (int j = 0; j < i; j++) + { + if (has_duplicate_attributes(item, &ndistinct->items[j])) + { + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Duplicate \"" PG_NDISTINCT_KEY_ATTRIBUTES "\" array : [%s]", + item_attnum_list(item)))); + PG_RETURN_NULL(); + } + } + + ndistinct->items[i].ndistinct = item->ndistinct; + ndistinct->items[i].nattributes = item->nattributes; + ndistinct->items[i].attributes = item->attributes; + + /* + * Keep track of the first longest attribute list. All other attribute + * lists must be a subset of this list. + */ + if (item->nattributes > item_most_attrs) + { + item_most_attrs = item->nattributes; + item_most_attrs_idx = i; + } + + /* + * Free the MVNDistinctItem, but not the attributes we're still + * using. + */ + pfree(item); + } + + /* + * Verify that all attnum sets are a proper subset of the first longest + * attnum set. + */ + for (int i = 0; i < nitems; i++) + { + if (i == item_most_attrs_idx) + continue; + + if (!item_is_attnum_subset(&ndistinct->items[i], + &ndistinct->items[item_most_attrs_idx])) + { + const MVNDistinctItem *item = &ndistinct->items[i]; + const MVNDistinctItem *refitem = &ndistinct->items[item_most_attrs_idx]; + const char *item_list = item_attnum_list(item); + const char *refitem_list = item_attnum_list(refitem); + + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("\"" PG_NDISTINCT_KEY_ATTRIBUTES "\" array: [%s]" + "must be a subset of array: [%s]", + item_list, refitem_list))); + PG_RETURN_NULL(); + } + } + + bytes = statext_ndistinct_serialize(ndistinct); + + list_free(parse_state.distinct_items); + for (int i = 0; i < nitems; i++) + pfree(ndistinct->items[i].attributes); + pfree(ndistinct); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); /* escontext already set */ + + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_ndistinct: \"%s\"", str), + errdetail("Must be valid JSON."))); + PG_RETURN_NULL(); } /* * pg_ndistinct_out * output routine for type pg_ndistinct * - * Produces a human-readable representation of the value. + * Produces a human-readable representation of the value, in the format: + * [{"attributes": [attnum,. ..], "ndistinct": int}, ...] + * */ Datum pg_ndistinct_out(PG_FUNCTION_ARGS) diff --git a/src/test/regress/expected/pg_ndistinct.out b/src/test/regress/expected/pg_ndistinct.out new file mode 100644 index 000000000000..d99e84a2bceb --- /dev/null +++ b/src/test/regress/expected/pg_ndistinct.out @@ -0,0 +1,109 @@ +-- Tests for type pg_distinct +-- Invalid inputs +SELECT '[]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[]" +LINE 1: SELECT '[]'::pg_ndistinct; + ^ +DETAIL: Item array cannot be empty. +SELECT '[null]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[null]" +LINE 1: SELECT '[null]'::pg_ndistinct; + ^ +DETAIL: Item list elements cannot be null. +-- Invalid keys +SELECT '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes_invalid" : [2,3], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]'::... + ^ +DETAIL: Invalid key "attributes_invalid". Only allowed keys are "attributes" and "ndistinct". +SELECT '[{"attributes" : [2,3], "invalid" : 3, "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "invalid" : 3, "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "invalid" : 3, "ndistinct" :... + ^ +DETAIL: Invalid key "invalid". Only allowed keys are "attributes" and "ndistinct". +-- Missing key +SELECT '[{"attributes" : [2,3]}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3]}]" +LINE 1: SELECT '[{"attributes" : [2,3]}]'::pg_ndistinct; + ^ +DETAIL: Item must contain "ndistinct" key. +SELECT '[{"ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"ndistinct" : 4}]" +LINE 1: SELECT '[{"ndistinct" : 4}]'::pg_ndistinct; + ^ +DETAIL: Item must contain "attributes" key. +-- Valid keys, invalid values +SELECT '[{"attributes" : null, "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : null, "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : null, "ndistinct" : 4}]'::pg_ndisti... + ^ +DETAIL: Unexpected scalar. +SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,null], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_nd... + ^ +DETAIL: Attnum list elements cannot be null. +SELECT '[{"attributes" : [2,3], "ndistinct" : null}]'::pg_ndistinct; +ERROR: invalid input syntax for type bigint: "null" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : null}]'::pg_nd... + ^ +SELECT '[{"attributes" : [2,"a"], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: invalid input syntax for type smallint: "a" +LINE 1: SELECT '[{"attributes" : [2,"a"], "ndistinct" : 4}]'::pg_ndi... + ^ +SELECT '[{"attributes" : [2,3], "ndistinct" : "a"}]'::pg_ndistinct; +ERROR: invalid input syntax for type bigint: "a" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : "a"}]'::pg_ndi... + ^ +SELECT '[{"attributes" : [2,3], "ndistinct" : []}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : []}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : []}]'::pg_ndis... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : [2,3], "ndistinct" : [null]}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : [null]}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : [null]}]'::pg_... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : [2,3], "ndistinct" : [1,null]}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : [1,null]}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : [1,null]}]'::p... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : 1, "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : 1, "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : 1, "ndistinct" : 4}]'::pg_ndistinct... + ^ +DETAIL: Unexpected scalar. +SELECT '[{"attributes" : "a", "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : "a", "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : "a", "ndistinct" : 4}]'::pg_ndistin... + ^ +DETAIL: Unexpected scalar. +-- Duplicated attributes +SELECT '[{"attributes" : [2,2], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,2], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,2], "ndistinct" : 4}]'::pg_ndist... + ^ +DETAIL: attnum list duplicate value found: 2. +-- Valid inputs +-- Duplicated attribute lists. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,3], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + ^ +DETAIL: Duplicate "attributes" array : [2, 3] +-- Partially-covered attribute lists. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; +ERROR: malformed pg_ndistinct: "[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + ^ +DETAIL: "attributes" array: [2, 3]must be a subset of array: [1, 3, -1, -2] diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f56482fb9f12..f3f0b5f2f317 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import pg_ndistinct # ---------- # Load huge amounts of data diff --git a/src/test/regress/sql/pg_ndistinct.sql b/src/test/regress/sql/pg_ndistinct.sql new file mode 100644 index 000000000000..ca89fed6fe27 --- /dev/null +++ b/src/test/regress/sql/pg_ndistinct.sql @@ -0,0 +1,34 @@ +-- Tests for type pg_distinct + +-- Invalid inputs +SELECT '[]'::pg_ndistinct; +SELECT '[null]'::pg_ndistinct; +-- Invalid keys +SELECT '[{"attributes_invalid" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "invalid" : 3, "ndistinct" : 4}]'::pg_ndistinct; +-- Missing key +SELECT '[{"attributes" : [2,3]}]'::pg_ndistinct; +SELECT '[{"ndistinct" : 4}]'::pg_ndistinct; +-- Valid keys, invalid values +SELECT '[{"attributes" : null, "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,null], "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "ndistinct" : null}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,"a"], "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "ndistinct" : "a"}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "ndistinct" : []}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "ndistinct" : [null]}]'::pg_ndistinct; +SELECT '[{"attributes" : [2,3], "ndistinct" : [1,null]}]'::pg_ndistinct; +SELECT '[{"attributes" : 1, "ndistinct" : 4}]'::pg_ndistinct; +SELECT '[{"attributes" : "a", "ndistinct" : 4}]'::pg_ndistinct; +-- Duplicated attributes +SELECT '[{"attributes" : [2,2], "ndistinct" : 4}]'::pg_ndistinct; + +-- Valid inputs +-- Duplicated attribute lists. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,3], "ndistinct" : 4}]'::pg_ndistinct; +-- Partially-covered attribute lists. +SELECT '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [1,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct; From ead36b17639f8a1e44070a763f468233d3e66209 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 11 Nov 2025 16:55:47 +0900 Subject: [PATCH 4/7] Add working input function for pg_dependencies. This will consume the format that was established when the output function for pg_dependencies was recently changed. This will be needed for importing extended statistics. --- src/backend/utils/adt/pg_dependencies.c | 620 +++++++++++++++++- src/test/regress/expected/pg_dependencies.out | 128 ++++ src/test/regress/parallel_schedule | 2 +- src/test/regress/sql/pg_dependencies.sql | 39 ++ 4 files changed, 778 insertions(+), 11 deletions(-) create mode 100644 src/test/regress/expected/pg_dependencies.out create mode 100644 src/test/regress/sql/pg_dependencies.sql diff --git a/src/backend/utils/adt/pg_dependencies.c b/src/backend/utils/adt/pg_dependencies.c index 87181aa00e9a..f40394fc359b 100644 --- a/src/backend/utils/adt/pg_dependencies.c +++ b/src/backend/utils/adt/pg_dependencies.c @@ -14,29 +14,629 @@ #include "postgres.h" +#include "common/int.h" +#include "common/jsonapi.h" #include "lib/stringinfo.h" +#include "mb/pg_wchar.h" +#include "nodes/miscnodes.h" #include "statistics/extended_stats_internal.h" #include "statistics/statistics_format.h" +#include "utils/builtins.h" +#include "utils/float.h" #include "utils/fmgrprotos.h" +typedef enum +{ + DEPS_EXPECT_START = 0, + DEPS_EXPECT_ITEM, + DEPS_EXPECT_KEY, + DEPS_EXPECT_ATTNUM_LIST, + DEPS_EXPECT_ATTNUM, + DEPS_EXPECT_DEPENDENCY, + DEPS_EXPECT_DEGREE, + DEPS_PARSE_COMPLETE +} DepsParseSemanticState; + +typedef struct +{ + const char *str; + DepsParseSemanticState state; + + List *dependency_list; + Node *escontext; + + bool found_attributes; /* Item has an attributes key */ + bool found_dependency; /* Item has an dependency key */ + bool found_degree; /* Item has degree key */ + List *attnum_list; /* Accumulated attributes attnums */ + AttrNumber dependency; + double degree; +} DependenciesParseState; + +/* + * Invoked at the start of each MVDependency object. + * + * The entire JSON document shoul be one array of MVDependency objects. + * + * If we're anywhere else in the document, it's an error. + */ +static JsonParseErrorType +dependencies_object_start(void *state) +{ + DependenciesParseState *parse = state; + + if (parse->state != DEPS_EXPECT_ITEM) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Expected Item object."))); + return JSON_SEM_ACTION_FAILED; + } + + /* Now we expect to see attributes/dependency/degree keys */ + parse->state = DEPS_EXPECT_KEY; + return JSON_SUCCESS; +} + +static int +attnum_compare(const void *aptr, const void *bptr) +{ + AttrNumber a = *(const AttrNumber *) aptr; + AttrNumber b = *(const AttrNumber *) bptr; + + return pg_cmp_s16(a, b); +} + +static JsonParseErrorType +dependencies_object_end(void *state) +{ + DependenciesParseState *parse = state; + + MVDependency *dep; + AttrNumber *attrsort; + + int natts = 0; + + if (!parse->found_attributes) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\" key."))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_dependency) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_DEPENDENCIES_KEY_DEPENDENCY "\" key."))); + return JSON_SEM_ACTION_FAILED; + } + + if (!parse->found_degree) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item must contain \"" PG_DEPENDENCIES_KEY_DEGREE "\" key."))); + return JSON_SEM_ACTION_FAILED; + } + + /* + * We need at least 1 attnum for a dependencies item, anything less is + * malformed. + */ + natts = parse->attnum_list->length; + if (natts < 1) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("The \"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\" key must contain an array of at least one attnum."))); + + return JSON_SEM_ACTION_FAILED; + } + attrsort = palloc0(natts * sizeof(AttrNumber)); + + /* + * Allocate enough space for the dependency, the attnums in the list, plus + * the final attnum + */ + dep = palloc0(offsetof(MVDependency, attributes) + ((natts + 1) * sizeof(AttrNumber))); + dep->nattributes = natts + 1; + + dep->attributes[natts] = parse->dependency; + dep->degree = parse->degree; + + attrsort = palloc0(dep->nattributes * sizeof(AttrNumber)); + attrsort[natts] = parse->dependency; + + for (int i = 0; i < natts; i++) + { + attrsort[i] = (AttrNumber) parse->attnum_list->elements[i].int_value; + dep->attributes[i] = attrsort[i]; + } + + /* Check attrsort for uniqueness */ + qsort(attrsort, natts + 1, sizeof(AttrNumber), attnum_compare); + for (int i = 1; i < dep->nattributes; i++) + if (attrsort[i] == attrsort[i - 1]) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("\"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\" list duplicate value found: %d.", attrsort[i]))); + + return JSON_SEM_ACTION_FAILED; + } + pfree(attrsort); + + parse->dependency_list = lappend(parse->dependency_list, (void *) dep); + + /* reset dep item state vars */ + list_free(parse->attnum_list); + parse->attnum_list = NIL; + parse->dependency = 0; + parse->degree = 0.0; + parse->found_attributes = false; + parse->found_dependency = false; + parse->found_degree = false; + + /* Now we are looking for the next MVDependency */ + parse->state = DEPS_EXPECT_ITEM; + return JSON_SUCCESS; +} + +/* + * dependencies input format does not have arrays, so any array elements encountered + * are an error. + */ +static JsonParseErrorType +dependencies_array_start(void *state) +{ + DependenciesParseState *parse = state; + + switch (parse->state) + { + case DEPS_EXPECT_ATTNUM_LIST: + parse->state = DEPS_EXPECT_ATTNUM; + break; + case DEPS_EXPECT_START: + parse->state = DEPS_EXPECT_ITEM; + break; + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Array found in unexpected place."))); + return JSON_SEM_ACTION_FAILED; + } + + return JSON_SUCCESS; +} + +/* + * Either the end of an attnum list or the whole object + */ +static JsonParseErrorType +dependencies_array_end(void *state) +{ + DependenciesParseState *parse = state; + + switch (parse->state) + { + case DEPS_EXPECT_ATTNUM: + if (parse->attnum_list == NIL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("The \"" PG_DEPENDENCIES_KEY_ATTRIBUTES + "\" key must be an non-empty array."))); + return JSON_SEM_ACTION_FAILED; + } + + parse->state = DEPS_EXPECT_KEY; + break; + + case DEPS_EXPECT_ITEM: + if (parse->dependency_list == NIL) + { + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("The dependency list must be an non-empty array."))); + return JSON_SEM_ACTION_FAILED; + } + parse->state = DEPS_PARSE_COMPLETE; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Array found in unexpected place."))); + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; +} + +/* + * The valid keys for the MVDependency object are: + * - attributes + * - depeendency + * - degree + */ +static JsonParseErrorType +dependencies_object_field_start(void *state, char *fname, bool isnull) +{ + DependenciesParseState *parse = state; + + if (strcmp(fname, PG_DEPENDENCIES_KEY_ATTRIBUTES) == 0) + { + parse->found_attributes = true; + parse->state = DEPS_EXPECT_ATTNUM_LIST; + return JSON_SUCCESS; + } + + if (strcmp(fname, PG_DEPENDENCIES_KEY_DEPENDENCY) == 0) + { + parse->found_dependency = true; + parse->state = DEPS_EXPECT_DEPENDENCY; + return JSON_SUCCESS; + } + + if (strcmp(fname, PG_DEPENDENCIES_KEY_DEGREE) == 0) + { + parse->found_degree = true; + parse->state = DEPS_EXPECT_DEGREE; + return JSON_SUCCESS; + } + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Invalid key \"%s\". Only allowed keys are \"" + PG_DEPENDENCIES_KEY_ATTRIBUTES "\", \"" + PG_DEPENDENCIES_KEY_DEPENDENCY "\" and \"" + PG_DEPENDENCIES_KEY_DEGREE "\".", fname))); + return JSON_SEM_ACTION_FAILED; +} + +/* + * pg_dependencies input format does not have arrays, so any array elements + * encountered are an error. + */ +static JsonParseErrorType +dependencies_array_element_start(void *state, bool isnull) +{ + DependenciesParseState *parse = state; + + switch(parse->state) + { + case DEPS_EXPECT_ATTNUM: + if (!isnull) + return JSON_SUCCESS; + + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Attnum list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + break; + + case DEPS_EXPECT_ITEM: + if (!isnull) + return JSON_SUCCESS; + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Item list elements cannot be null."))); + + return JSON_SEM_ACTION_FAILED; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Unexpected array element."))); + } + + return JSON_SEM_ACTION_FAILED; +} + +/* + * Handle scalar events from the dependencies input parser. + * + * There is only one case where we will encounter a scalar, and that is the + * dependency degree for the previous object key. + */ +static JsonParseErrorType +dependencies_scalar(void *state, char *token, JsonTokenType tokentype) +{ + DependenciesParseState *parse = state; + AttrNumber attnum; + + switch(parse->state) + { + case DEPS_EXPECT_ATTNUM: + attnum = pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + parse->attnum_list = lappend_int(parse->attnum_list, (int) attnum); + return JSON_SUCCESS; + break; + + case DEPS_EXPECT_DEPENDENCY: + parse->dependency = (AttrNumber) pg_strtoint16_safe(token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + return JSON_SUCCESS; + break; + + case DEPS_EXPECT_DEGREE: + parse->degree = float8in_internal(token, NULL, "double", + token, parse->escontext); + + if (SOFT_ERROR_OCCURRED(parse->escontext)) + return JSON_SEM_ACTION_FAILED; + + return JSON_SUCCESS; + break; + + default: + ereturn(parse->escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", parse->str), + errdetail("Unexpected scalar."))); + } + + return JSON_SEM_ACTION_FAILED; +} + +/*************************************************** BLAH *********************************************/ +/* + * Compare the attribute arrays of two MVDependency values, + * looking for duplicate sets. + */ +static +bool has_duplicate_attributes(const MVDependency *a, const MVDependency *b) +{ + int i; + + if (a->nattributes != b->nattributes) + return false; + + for (i = 0; i < a->nattributes; i++) + { + if (a->attributes[i] != b->attributes[i]) + return false; + } + + return true; +} + +/* + * Ensure that an attnum appears as one of the attnums in a given + * MVDependency. + */ +static +bool dep_has_attnum(const MVDependency *item, AttrNumber attnum) +{ + for (int i = 0; i < item->nattributes; i++) + { + if (attnum == item->attributes[i]) + return true; + } + return false; +} + +/* + * Ensure that the attributes of one MVDependency A are a proper subset + * of the reference MVDependency B. + */ +static +bool dep_is_attnum_subset(const MVDependency *item, + const MVDependency *refitem) +{ + for (int i = 0; i < item->nattributes; i++) + { + if (!dep_has_attnum(refitem,item->attributes[i])) + return false; + } + return true; +} + +/* + * Generate a string representing an array of attnums. Internally, the + * dependency attribute is the last element, so we leave that off. + * + * + * Freeing the allocated string is responsibility of the caller. + */ +static +const char *dep_attnum_list(const MVDependency *item) +{ + StringInfoData str; + + initStringInfo(&str); + + appendStringInfo(&str, "%d", item->attributes[0]); + + for (int i = 1; i < item->nattributes - 1; i++) + appendStringInfo(&str, ", %d", item->attributes[i]); + + return str.data; +} + +/* + * Return the dependency, which is the last attribute element. + */ +static +const AttrNumber dep_attnum_dependency(const MVDependency *item) +{ + return item->attributes[item->nattributes - 1]; +} + + + + +/*************************************************** BLAH *********************************************/ /* * pg_dependencies_in - input routine for type pg_dependencies. * - * pg_dependencies is real enough to be a table column, but it has no operations - * of its own, and disallows input too + * This format is valid JSON, with the expected format: + * [{"attributes": [1,2], "dependency": -1, "degree": 1.0000}, + * {"attributes": [1,-1], "dependency": 2, "degree": 0.0000}, + * {"attributes": [2,-1], "dependency": 1, "degree": 1.0000}] + * */ Datum pg_dependencies_in(PG_FUNCTION_ARGS) { - /* - * pg_node_list stores the data in binary form and parsing text input is - * not needed, so disallow this. - */ - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot accept a value of type %s", "pg_dependencies"))); + char *str = PG_GETARG_CSTRING(0); - PG_RETURN_VOID(); /* keep compiler quiet */ + DependenciesParseState parse_state; + JsonParseErrorType result; + JsonLexContext *lex; + JsonSemAction sem_action; + + /* initialize the semantic state */ + parse_state.str = str; + parse_state.state = DEPS_EXPECT_START; + parse_state.dependency_list = NIL; + parse_state.attnum_list = NIL; + parse_state.dependency = 0; + parse_state.degree = 0.0; + parse_state.found_attributes = false; + parse_state.found_dependency = false; + parse_state.found_degree = false; + parse_state.escontext = fcinfo->context; + + /* set callbacks */ + sem_action.semstate = (void *) &parse_state; + sem_action.object_start = dependencies_object_start; + sem_action.object_end = dependencies_object_end; + sem_action.array_start = dependencies_array_start; + sem_action.array_end = dependencies_array_end; + sem_action.array_element_start = dependencies_array_element_start; + sem_action.array_element_end = NULL; + sem_action.object_field_start = dependencies_object_field_start; + sem_action.object_field_end = NULL; + sem_action.scalar = dependencies_scalar; + + lex = makeJsonLexContextCstringLen(NULL, str, strlen(str), PG_UTF8, true); + + result = pg_parse_json(lex, &sem_action); + freeJsonLexContext(lex); + + if (result == JSON_SUCCESS) + { + List *list = parse_state.dependency_list; + int ndeps = list->length; + MVDependencies *mvdeps; + bytea *bytes; + + int dep_most_attrs = 0; + int dep_most_attrs_idx = 0; + + mvdeps = palloc0(offsetof(MVDependencies, deps) + ndeps * sizeof(MVDependency)); + mvdeps->magic = STATS_DEPS_MAGIC; + mvdeps->type = STATS_DEPS_TYPE_BASIC; + mvdeps->ndeps = ndeps; + + /* copy MVDependency structs out of the list into the MVDependencies */ + for (int i = 0; i < ndeps; i++) + { + mvdeps->deps[i] = list->elements[i].ptr_value; + + /* + * Ensure that this item does not duplicate the attributes of any + * pre-existing item. + */ + for (int j = 0; j < i; j++) + { + if (has_duplicate_attributes(mvdeps->deps[i], mvdeps->deps[j])) + { + MVDependency *dep = mvdeps->deps[i]; + + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", str), + errdetail("Duplicate \"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\" array: [%s]" + " with \"" PG_DEPENDENCIES_KEY_DEPENDENCY "\": %d.", + dep_attnum_list(dep), dep_attnum_dependency(dep)))); + PG_RETURN_NULL(); + } + } + + /* + * Keep track of the first longest attribute list. All other attribute + * lists must be a subset of this list. + */ + if (mvdeps->deps[i]->nattributes > dep_most_attrs) + { + dep_most_attrs = mvdeps->deps[i]->nattributes; + dep_most_attrs_idx = i; + } + } + + /* + * Verify that all attnum sets are a proper subset of the first longest + * attnum set. + */ + for (int i = 0; i < ndeps; i++) + { + if (i == dep_most_attrs_idx) + continue; + + if (!dep_is_attnum_subset(mvdeps->deps[i], + mvdeps->deps[dep_most_attrs_idx])) + { + MVDependency *dep = mvdeps->deps[i]; + MVDependency *refdep = mvdeps->deps[dep_most_attrs_idx]; + const char *dep_list = dep_attnum_list(dep); + const char *refdep_list = dep_attnum_list(refdep); + + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", str), + errdetail("\"" PG_DEPENDENCIES_KEY_ATTRIBUTES "\" array: [%s]" + " with dependency %d must be a subset of array: [%s]" + " with dependency %d.", + dep_list, dep_attnum_dependency(dep), + refdep_list, dep_attnum_dependency(refdep)))); + PG_RETURN_NULL(); + } + } + bytes = statext_dependencies_serialize(mvdeps); + + list_free(list); + for (int i = 0; i < ndeps; i++) + pfree(mvdeps->deps[i]); + pfree(mvdeps); + + PG_RETURN_BYTEA_P(bytes); + } + else if (result == JSON_SEM_ACTION_FAILED) + PG_RETURN_NULL(); + + /* Anything else is a generic JSON parse error */ + ereturn(parse_state.escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("malformed pg_dependencies: \"%s\"", str), + errdetail("Must be valid JSON."))); + + PG_RETURN_NULL(); /* keep compiler quiet */ } /* diff --git a/src/test/regress/expected/pg_dependencies.out b/src/test/regress/expected/pg_dependencies.out new file mode 100644 index 000000000000..9aa2df242782 --- /dev/null +++ b/src/test/regress/expected/pg_dependencies.out @@ -0,0 +1,128 @@ +-- Tests for type pg_distinct +-- Invalid inputs +SELECT '[]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[]" +LINE 1: SELECT '[]'::pg_dependencies; + ^ +DETAIL: The dependency list must be an non-empty array. +SELECT '[null]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[null]" +LINE 1: SELECT '[null]'::pg_dependencies; + ^ +DETAIL: Item list elements cannot be null. +-- Invalid keys +SELECT '[{"attributes_invalid" : [2,3], "dependency" : 4}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes_invalid" : [2,3], "dependency" : 4}]" +LINE 1: SELECT '[{"attributes_invalid" : [2,3], "dependency" : 4}]':... + ^ +DETAIL: Invalid key "attributes_invalid". Only allowed keys are "attributes", "dependency" and "degree". +SELECT '[{"attributes" : [2,3], "invalid" : 3, "dependency" : 4}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "invalid" : 3, "dependency" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "invalid" : 3, "dependency" ... + ^ +DETAIL: Invalid key "invalid". Only allowed keys are "attributes", "dependency" and "degree". +-- Missing keys +SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_depe... + ^ +DETAIL: Item must contain "degree" key. +SELECT '[{"attributes" : [2,3], "degree" : 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "degree" : 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "degree" : 1.000}]'::pg_depe... + ^ +DETAIL: Item must contain "dependency" key. +SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : 4}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_depe... + ^ +DETAIL: Item must contain "degree" key. +-- Valid keys, invalid values +SELECT '[{"attributes" : null, "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : null, "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : null, "dependency" : 4, "degree": 1... + ^ +DETAIL: Unexpected scalar. +SELECT '[{"attributes" : [2,null], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,null], "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,null], "dependency" : 4, "degree... + ^ +DETAIL: Attnum list elements cannot be null. +SELECT '[{"attributes" : [2,3], "dependency" : null, "degree": 1.000}]'::pg_dependencies; +ERROR: invalid input syntax for type smallint: "null" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : null, "degree... + ^ +SELECT '[{"attributes" : [2,"a"], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: invalid input syntax for type smallint: "a" +LINE 1: SELECT '[{"attributes" : [2,"a"], "dependency" : 4, "degree"... + ^ +SELECT '[{"attributes" : [2,3], "dependency" : "a", "degree": 1.000}]'::pg_dependencies; +ERROR: invalid input syntax for type smallint: "a" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : "a", "degree"... + ^ +SELECT '[{"attributes" : [2,3], "dependency" : [], "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : [], "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : [], "degree":... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : [2,3], "dependency" : [null], "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : [null], "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : [null], "degr... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : [2,3], "dependency" : [1,null], "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : [1,null], "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : [1,null], "de... + ^ +DETAIL: Array found in unexpected place. +SELECT '[{"attributes" : 1, "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : 1, "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : 1, "dependency" : 4, "degree": 1.00... + ^ +DETAIL: Unexpected scalar. +SELECT '[{"attributes" : "a", "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : "a", "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : "a", "dependency" : 4, "degree": 1.... + ^ +DETAIL: Unexpected scalar. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": NaN}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : 4, "degree": NaN}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": ... + ^ +DETAIL: Must be valid JSON. +-- Duplicated attributes +SELECT '[{"attributes" : [2,2], "dependency" : 4, "degree": 0.500}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,2], "dependency" : 4, "degree": 0.500}]" +LINE 1: SELECT '[{"attributes" : [2,2], "dependency" : 4, "degree": ... + ^ +DETAIL: "attributes" list duplicate value found: 2. +-- Duplicated attribute lists. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3], "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": ... + ^ +DETAIL: Duplicate "attributes" array: [2, 3] with "dependency": 4. +-- Partially-covered attribute lists. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [1,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1,-2], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +ERROR: malformed pg_dependencies: "[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [1,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1,-2], "dependency" : 4, "degree": 1.000}]" +LINE 1: SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": ... + ^ +DETAIL: "attributes" array: [1, -1] with dependency 4 must be a subset of array: [2, 3, -1, -2] with dependency 4. +-- Valid inputs +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 0.250}, + {"attributes" : [2,-1], "dependency" : 4, "degree": 0.500}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 0.750}, + {"attributes" : [2,3,-1,-2], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; + pg_dependencies +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + [{"attributes": [2, 3], "dependency": 4, "degree": 0.250000}, {"attributes": [2, -1], "dependency": 4, "degree": 0.500000}, {"attributes": [2, 3, -1], "dependency": 4, "degree": 0.750000}, {"attributes": [2, 3, -1, -2], "dependency": 4, "degree": 1.000000}] +(1 row) + diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index f3f0b5f2f317..cc6d799bceaf 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -28,7 +28,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import pg_ndistinct +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database stats_import pg_ndistinct pg_dependencies # ---------- # Load huge amounts of data diff --git a/src/test/regress/sql/pg_dependencies.sql b/src/test/regress/sql/pg_dependencies.sql new file mode 100644 index 000000000000..116f6c924cd9 --- /dev/null +++ b/src/test/regress/sql/pg_dependencies.sql @@ -0,0 +1,39 @@ +-- Tests for type pg_distinct + +-- Invalid inputs +SELECT '[]'::pg_dependencies; +SELECT '[null]'::pg_dependencies; +-- Invalid keys +SELECT '[{"attributes_invalid" : [2,3], "dependency" : 4}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "invalid" : 3, "dependency" : 4}]'::pg_dependencies; +-- Missing keys +SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "degree" : 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : 4}]'::pg_dependencies; +-- Valid keys, invalid values +SELECT '[{"attributes" : null, "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,null], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : null, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,"a"], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : "a", "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : [], "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : [null], "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : [1,null], "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : 1, "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : "a", "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": NaN}]'::pg_dependencies; +-- Duplicated attributes +SELECT '[{"attributes" : [2,2], "dependency" : 4, "degree": 0.500}]'::pg_dependencies; +-- Duplicated attribute lists. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +-- Partially-covered attribute lists. +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 1.000}, + {"attributes" : [1,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 1.000}, + {"attributes" : [2,3,-1,-2], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; +-- Valid inputs +SELECT '[{"attributes" : [2,3], "dependency" : 4, "degree": 0.250}, + {"attributes" : [2,-1], "dependency" : 4, "degree": 0.500}, + {"attributes" : [2,3,-1], "dependency" : 4, "degree": 0.750}, + {"attributes" : [2,3,-1,-2], "dependency" : 4, "degree": 1.000}]'::pg_dependencies; From c8ead5ecaf09d3e9e75130e4b9c9397e2e4eae09 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Tue, 4 Nov 2025 23:50:01 -0500 Subject: [PATCH 5/7] Expose attribute statistics functions for use in extended_stats. Many of the operations of attribute stats have analogous operations in extended stats. * get_attr_stat_type() renamed to statatt_get_type() * init_empty_stats_tuple() renamed to statatt_init_empty_tuple() * text_to_stavalues() * get_elem_stat_type() renamed to statatt_get_elem_type() Also, add comments explaining the function argument index enums, and the arrays that are indexed by those enums. --- src/backend/statistics/attribute_stats.c | 126 +++++++++++------------ src/include/statistics/statistics.h | 17 +++ 2 files changed, 77 insertions(+), 66 deletions(-) diff --git a/src/backend/statistics/attribute_stats.c b/src/backend/statistics/attribute_stats.c index ef4d768feab7..d0c67a4128e0 100644 --- a/src/backend/statistics/attribute_stats.c +++ b/src/backend/statistics/attribute_stats.c @@ -64,6 +64,10 @@ enum attribute_stats_argnum NUM_ATTRIBUTE_STATS_ARGS }; +/* + * The argument names and typoids of the arguments for + * attribute_statistics_update. + */ static struct StatsArgInfo attarginfo[] = { [ATTRELSCHEMA_ARG] = {"schemaname", TEXTOID}, @@ -101,6 +105,10 @@ enum clear_attribute_stats_argnum C_NUM_ATTRIBUTE_STATS_ARGS }; +/* + * The argument names and typoids of the arguments for + * pg_clear_attribute_stats. + */ static struct StatsArgInfo cleararginfo[] = { [C_ATTRELSCHEMA_ARG] = {"relation", TEXTOID}, @@ -112,23 +120,9 @@ static struct StatsArgInfo cleararginfo[] = static bool attribute_statistics_update(FunctionCallInfo fcinfo); static Node *get_attr_expr(Relation rel, int attnum); -static void get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr); -static bool get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr); -static Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, - Oid typid, int32 typmod, bool *ok); -static void set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull); static void upsert_pg_statistic(Relation starel, HeapTuple oldtup, const Datum *values, const bool *nulls, const bool *replaces); static bool delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit); -static void init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces); /* * Insert or Update Attribute Statistics @@ -298,16 +292,16 @@ attribute_statistics_update(FunctionCallInfo fcinfo) } /* derive information from attribute */ - get_attr_stat_type(reloid, attnum, - &atttypid, &atttypmod, - &atttyptype, &atttypcoll, - &eq_opr, <_opr); + statatt_get_type(reloid, attnum, + &atttypid, &atttypmod, + &atttyptype, &atttypcoll, + &eq_opr, <_opr); /* if needed, derive element type */ if (do_mcelem || do_dechist) { - if (!get_elem_stat_type(atttypid, atttyptype, - &elemtypid, &elem_eq_opr)) + if (!statatt_get_elem_type(atttypid, atttyptype, + &elemtypid, &elem_eq_opr)) { ereport(WARNING, (errmsg("could not determine element type of column \"%s\"", attname), @@ -361,7 +355,7 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (HeapTupleIsValid(statup)) heap_deform_tuple(statup, RelationGetDescr(starel), values, nulls); else - init_empty_stats_tuple(reloid, attnum, inherited, values, nulls, + statatt_init_empty_tuple(reloid, attnum, inherited, values, nulls, replaces); /* if specified, set to argument values */ @@ -394,10 +388,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_MCV, - eq_opr, atttypcoll, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCV, + eq_opr, atttypcoll, + stanumbers, false, stavalues, false); } else result = false; @@ -417,10 +411,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_HISTOGRAM, - lt_opr, atttypcoll, - 0, true, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_HISTOGRAM, + lt_opr, atttypcoll, + 0, true, stavalues, false); } else result = false; @@ -433,10 +427,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) ArrayType *arry = construct_array_builtin(elems, 1, FLOAT4OID); Datum stanumbers = PointerGetDatum(arry); - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_CORRELATION, - lt_opr, atttypcoll, - stanumbers, false, 0, true); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_CORRELATION, + lt_opr, atttypcoll, + stanumbers, false, 0, true); } /* STATISTIC_KIND_MCELEM */ @@ -454,10 +448,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_MCELEM, - elem_eq_opr, atttypcoll, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCELEM, + elem_eq_opr, atttypcoll, + stanumbers, false, stavalues, false); } else result = false; @@ -468,10 +462,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) { Datum stanumbers = PG_GETARG_DATUM(ELEM_COUNT_HISTOGRAM_ARG); - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_DECHIST, - elem_eq_opr, atttypcoll, - stanumbers, false, 0, true); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_DECHIST, + elem_eq_opr, atttypcoll, + stanumbers, false, 0, true); } /* @@ -494,10 +488,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_BOUNDS_HISTOGRAM, - InvalidOid, InvalidOid, - 0, true, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_BOUNDS_HISTOGRAM, + InvalidOid, InvalidOid, + 0, true, stavalues, false); } else result = false; @@ -521,10 +515,10 @@ attribute_statistics_update(FunctionCallInfo fcinfo) if (converted) { - set_stats_slot(values, nulls, replaces, - STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, - Float8LessOperator, InvalidOid, - stanumbers, false, stavalues, false); + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, + Float8LessOperator, InvalidOid, + stanumbers, false, stavalues, false); } else result = false; @@ -584,11 +578,11 @@ get_attr_expr(Relation rel, int attnum) /* * Derive type information from the attribute. */ -static void -get_attr_stat_type(Oid reloid, AttrNumber attnum, - Oid *atttypid, int32 *atttypmod, - char *atttyptype, Oid *atttypcoll, - Oid *eq_opr, Oid *lt_opr) +void +statatt_get_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr) { Relation rel = relation_open(reloid, AccessShareLock); Form_pg_attribute attr; @@ -666,9 +660,9 @@ get_attr_stat_type(Oid reloid, AttrNumber attnum, /* * Derive element type information from the attribute type. */ -static bool -get_elem_stat_type(Oid atttypid, char atttyptype, - Oid *elemtypid, Oid *elem_eq_opr) +bool +statatt_get_elem_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr) { TypeCacheEntry *elemtypcache; @@ -706,7 +700,7 @@ get_elem_stat_type(Oid atttypid, char atttyptype, * to false. If the resulting array contains NULLs, raise a WARNING and set ok * to false. Otherwise, set ok to true. */ -static Datum +Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, int32 typmod, bool *ok) { @@ -759,11 +753,11 @@ text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, Oid typid, * Find and update the slot with the given stakind, or use the first empty * slot. */ -static void -set_stats_slot(Datum *values, bool *nulls, bool *replaces, - int16 stakind, Oid staop, Oid stacoll, - Datum stanumbers, bool stanumbers_isnull, - Datum stavalues, bool stavalues_isnull) +void +statatt_set_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull) { int slotidx; int first_empty = -1; @@ -883,9 +877,9 @@ delete_pg_statistic(Oid reloid, AttrNumber attnum, bool stainherit) /* * Initialize values and nulls for a new stats tuple. */ -static void -init_empty_stats_tuple(Oid reloid, int16 attnum, bool inherited, - Datum *values, bool *nulls, bool *replaces) +void +statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces) { memset(nulls, true, sizeof(bool) * Natts_pg_statistic); memset(replaces, true, sizeof(bool) * Natts_pg_statistic); diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h index 7dd0f9755454..0df66b352a10 100644 --- a/src/include/statistics/statistics.h +++ b/src/include/statistics/statistics.h @@ -127,4 +127,21 @@ extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind, int nclauses); extern HeapTuple statext_expressions_load(Oid stxoid, bool inh, int idx); +extern void statatt_get_type(Oid reloid, AttrNumber attnum, + Oid *atttypid, int32 *atttypmod, + char *atttyptype, Oid *atttypcoll, + Oid *eq_opr, Oid *lt_opr); +extern void statatt_init_empty_tuple(Oid reloid, int16 attnum, bool inherited, + Datum *values, bool *nulls, bool *replaces); + +extern void statatt_set_slot(Datum *values, bool *nulls, bool *replaces, + int16 stakind, Oid staop, Oid stacoll, + Datum stanumbers, bool stanumbers_isnull, + Datum stavalues, bool stavalues_isnull); + +extern Datum text_to_stavalues(const char *staname, FmgrInfo *array_in, Datum d, + Oid typid, int32 typmod, bool *ok); +extern bool statatt_get_elem_type(Oid atttypid, char atttyptype, + Oid *elemtypid, Oid *elem_eq_opr); + #endif /* STATISTICS_H */ From 77c4e372eb27640fe451f455a082bfdac71bdae3 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Tue, 11 Nov 2025 16:58:26 +0900 Subject: [PATCH 6/7] Add extended statistics support functions. Add pg_restore_extended_stats() and pg_clear_extended_stats(). These functions closely mirror their relation and attribute counterparts, but for extended statistics (i.e. CREATE STATISTICS) objects. --- doc/src/sgml/func/func-admin.sgml | 98 ++ src/backend/statistics/dependencies.c | 61 + src/backend/statistics/extended_stats.c | 1141 ++++++++++++++++- src/backend/statistics/mcv.c | 144 +++ src/backend/statistics/mvdistinct.c | 62 + src/include/catalog/pg_proc.dat | 18 + .../statistics/extended_stats_internal.h | 17 + src/test/regress/expected/stats_import.out | 1123 ++++++++++++++++ src/test/regress/sql/stats_import.sql | 364 ++++++ 9 files changed, 3027 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml index 1b465bc8ba71..574d4a35a64f 100644 --- a/doc/src/sgml/func/func-admin.sgml +++ b/doc/src/sgml/func/func-admin.sgml @@ -2167,6 +2167,104 @@ SELECT pg_restore_attribute_stats( + + + + pg_restore_extended_stats + + pg_restore_extended_stats ( + VARIADIC kwargs "any" ) + boolean + + + Creates or updates statistics for statistics objects. Ordinarily, + these statistics are collected automatically or updated as a part of + or , so + it's not necessary to call this function. However, it is useful + after a restore to enable the optimizer to choose better plans if + ANALYZE has not been run yet. + + + The tracked statistics may change from version to version, so + arguments are passed as pairs of argname + and argvalue in the form: + + SELECT pg_restore_extended_stats( + 'arg1name', 'arg1value'::arg1type, + 'arg2name', 'arg2value'::arg2type, + 'arg3name', 'arg3value'::arg3type); + + + + For example, to set the n_distinct, + dependencies, and exprs + values for the statistics object myschema.mystatsobj: + + SELECT pg_restore_extended_stats( + 'statistics_schemaname', 'myschema'::name, + 'statistics_name', 'mytable'::name, + 'inherited', false, + 'n_distinct', '{"2, 3": 4, "2, -1": 4, "2, -2": 4, "3, -1": 4, "3, -2": 4}'::pg_ndistinct, + 'dependencies', '{"2 => 1": 1.000000, "2 => -1": 1.000000, "2 => -2": 1.000000}'::pg_dependencies + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[]); + + + + The required arguments are statistics_schemaname with a value + of type name, which specifies the statistics object's schema; + statistics_name with a value of type name, which specifies + the name of the statistics object; and inherited, which + specifies whether the statistics include values from child tables. + Other arguments are the names and values of statistics corresponding + to columns in pg_stats_ext + . To accept statistics for any expressions in the extended statistics object, the + parameter exprs with a type text[] is available, the array + must be two dimensional with an outer array in length equal to the number of expressions in + the object, and the inner array elements for each of the statistical columns in pg_stats_ext_exprs, some + of which are themselves arrays. + + + Additionally, this function accepts argument name + version of type integer, which + specifies the server version from which the statistics originated. + This is anticipated to be helpful in porting statistics from older + versions of PostgreSQL. + + + Minor errors are reported as a WARNING and + ignored, and remaining statistics will still be restored. If all + specified statistics are successfully restored, returns + true, otherwise false. + + + The caller must have the MAINTAIN privilege on the + table or be the owner of the database. + + + + + + + + pg_clear_extended_stats + + pg_clear_extended_stats ( + statistics_schemaname name, + statistics_name name, + inherited boolean ) + void + + + Clears statistics for the given statistics object, as + though the object was newly created. + + + The caller must have the MAINTAIN privilege on + the table or be the owner of the database. + + + diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c index 6f63b4f3ffbf..31a9f1cfc7c9 100644 --- a/src/backend/statistics/dependencies.c +++ b/src/backend/statistics/dependencies.c @@ -1065,6 +1065,55 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses, return s1; } +/* + * Validate an MVDependencies against the extended statistics object definition. + * + * Every MVDependencies must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_dependencies_validate_deps(MVDependencies *dependencies, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < dependencies->ndeps; i++) + { + MVDependency *dep = dependencies->deps[i]; + + for (int j = 0; j < dep->nattributes; j++) + { + AttrNumber attnum = dep->attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_dependencies: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; +} + /* * dependency_is_compatible_expression * Determines if the expression is compatible with functional dependencies @@ -1248,6 +1297,18 @@ dependency_is_compatible_expression(Node *clause, Index relid, List *statlist, N return false; } +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_dependencies(MVDependencies *dependencies) +{ + for (int i = 0; i < dependencies->ndeps; i++) + pfree(dependencies->deps[i]); + + pfree(dependencies); +} + /* * dependencies_clauselist_selectivity * Return the estimated selectivity of (a subset of) the given clauses diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c index 3c3d2d315c6f..23ab3cf87e18 100644 --- a/src/backend/statistics/extended_stats.c +++ b/src/backend/statistics/extended_stats.c @@ -18,21 +18,28 @@ #include "access/detoast.h" #include "access/genam.h" +#include "access/heapam.h" +#include "access/htup.h" #include "access/htup_details.h" #include "access/table.h" #include "catalog/indexing.h" +#include "catalog/pg_collation.h" #include "catalog/pg_statistic_ext.h" #include "catalog/pg_statistic_ext_data.h" +#include "catalog/pg_type_d.h" +#include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/progress.h" #include "executor/executor.h" #include "miscadmin.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "parser/parsetree.h" #include "pgstat.h" #include "postmaster/autovacuum.h" #include "statistics/extended_stats_internal.h" +#include "statistics/stat_utils.h" #include "statistics/statistics.h" #include "utils/acl.h" #include "utils/array.h" @@ -72,6 +79,84 @@ typedef struct StatExtEntry List *exprs; /* expressions */ } StatExtEntry; +/* + * An index of the args for extended_statistics_update(). + */ +enum extended_stats_argnum +{ + STATSCHEMA_ARG = 0, + STATNAME_ARG, + INHERITED_ARG, + NDISTINCT_ARG, + DEPENDENCIES_ARG, + MOST_COMMON_VALS_ARG, + MOST_COMMON_VAL_NULLS_ARG, + MOST_COMMON_FREQS_ARG, + MOST_COMMON_BASE_FREQS_ARG, + EXPRESSIONS_ARG, + NUM_EXTENDED_STATS_ARGS +}; + +/* + * The argument names and typoids of the arguments for + * extended_statistics_update(). + */ +static struct StatsArgInfo extarginfo[] = +{ + [STATSCHEMA_ARG] = {"statistics_schemaname", TEXTOID}, + [STATNAME_ARG] = {"statistics_name", TEXTOID}, + [INHERITED_ARG] = {"inherited", BOOLOID}, + [NDISTINCT_ARG] = {"n_distinct", PG_NDISTINCTOID}, + [DEPENDENCIES_ARG] = {"dependencies", PG_DEPENDENCIESOID}, + [MOST_COMMON_VALS_ARG] = {"most_common_vals", TEXTARRAYOID}, + [MOST_COMMON_VAL_NULLS_ARG] = {"most_common_val_nulls", BOOLARRAYOID}, + [MOST_COMMON_FREQS_ARG] = {"most_common_freqs", FLOAT8ARRAYOID}, + [MOST_COMMON_BASE_FREQS_ARG] = {"most_common_base_freqs", FLOAT8ARRAYOID}, + [EXPRESSIONS_ARG] = {"exprs", TEXTARRAYOID}, + [NUM_EXTENDED_STATS_ARGS] = {0} +}; + +/* + * An index of the elements of the stxdexprs datum, which repeat for each + * expression in the extended statistics object. + * + * NOTE: the RANGE_LENGTH & RANGE_BOUNDS stats are not yet reflected in any + * version of pg_stat_ext_exprs. + */ +enum extended_stats_exprs_element +{ + NULL_FRAC_ELEM = 0, + AVG_WIDTH_ELEM, + N_DISTINCT_ELEM, + MOST_COMMON_VALS_ELEM, + MOST_COMMON_FREQS_ELEM, + HISTOGRAM_BOUNDS_ELEM, + CORRELATION_ELEM, + MOST_COMMON_ELEMS_ELEM, + MOST_COMMON_ELEM_FREQS_ELEM, + ELEM_COUNT_HISTOGRAM_ELEM, + NUM_ATTRIBUTE_STATS_ELEMS +}; + +/* + * The argument names and typoids of the repeating arguments for stxdexprs. + */ +static struct StatsArgInfo extexprarginfo[] = +{ + [NULL_FRAC_ELEM] = {"null_frac", FLOAT4OID}, + [AVG_WIDTH_ELEM] = {"avg_width", INT4OID}, + [N_DISTINCT_ELEM] = {"n_distinct", FLOAT4OID}, + [MOST_COMMON_VALS_ELEM] = {"most_common_vals", TEXTOID}, + [MOST_COMMON_FREQS_ELEM] = {"most_common_freqs", FLOAT4ARRAYOID}, + [HISTOGRAM_BOUNDS_ELEM] = {"histogram_bounds", TEXTOID}, + [CORRELATION_ELEM] = {"correlation", FLOAT4OID}, + [MOST_COMMON_ELEMS_ELEM] = {"most_common_elems", TEXTOID}, + [MOST_COMMON_ELEM_FREQS_ELEM] = {"most_common_elem_freqs", FLOAT4ARRAYOID}, + [ELEM_COUNT_HISTOGRAM_ELEM] = {"elem_count_histogram", FLOAT4ARRAYOID}, + [NUM_ATTRIBUTE_STATS_ELEMS] = {0} +}; + +static bool extended_statistics_update(FunctionCallInfo fcinfo); static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid); static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs, @@ -99,6 +184,28 @@ static StatsBuildData *make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, VacAttrStats **stats, int stattarget); +static HeapTuple get_pg_statistic_ext(Relation pg_stext, Oid nspoid, + const char *stxname); +static bool delete_pg_statistic_ext_data(Oid stxoid, bool inherited); + +typedef struct +{ + bool ndistinct; + bool dependencies; + bool mcv; + bool expressions; +} stakindFlags; + +static void expand_stxkind(HeapTuple tup, stakindFlags * enabled); +static void upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces); +static bool check_mcvlist_array(ArrayType *arr, int argindex, + int required_ndims, int mcv_length); +static Datum import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr); +static bool text_to_float4(Datum input, Datum *output); +static bool text_to_int4(Datum input, Datum *output); + /* * Compute requested extended stats, using the rows sampled for the plain @@ -121,7 +228,7 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows, /* Do nothing if there are no columns to analyze. */ if (!natts) - return; + return; /* the list of stats has to be allocated outside the memory context */ pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); @@ -2612,3 +2719,1035 @@ make_build_data(Relation rel, StatExtEntry *stat, int numrows, HeapTuple *rows, return result; } + +static HeapTuple +get_pg_statistic_ext(Relation pg_stext, Oid nspoid, const char *stxname) +{ + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + Oid stxoid = InvalidOid; + + ScanKeyInit(&key[0], + Anum_pg_statistic_ext_stxname, + BTEqualStrategyNumber, + F_NAMEEQ, + CStringGetDatum(stxname)); + ScanKeyInit(&key[1], + Anum_pg_statistic_ext_stxnamespace, + BTEqualStrategyNumber, + F_OIDEQ, + ObjectIdGetDatum(nspoid)); + + /* + * Try to find matching pg_statistic_ext row. + */ + scan = systable_beginscan(pg_stext, + StatisticExtNameIndexId, + true, + NULL, + 2, + key); + + /* Unique index, either we get a tuple or we don't. */ + tup = systable_getnext(scan); + + if (HeapTupleIsValid(tup)) + stxoid = ((Form_pg_statistic_ext) GETSTRUCT(tup))->oid; + + systable_endscan(scan); + + if (!OidIsValid(stxoid)) + return NULL; + + return SearchSysCacheCopy1(STATEXTOID, ObjectIdGetDatum(stxoid)); +} + +/* + * Decode the stxkind column so that we know which stats types to expect. + */ +static void +expand_stxkind(HeapTuple tup, stakindFlags * enabled) +{ + Datum datum; + ArrayType *arr; + char *kinds; + + datum = SysCacheGetAttrNotNull(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxkind); + arr = DatumGetArrayTypeP(datum); + if (ARR_NDIM(arr) != 1 || ARR_HASNULL(arr) || ARR_ELEMTYPE(arr) != CHAROID) + elog(ERROR, "stxkind is not a 1-D char array"); + + kinds = (char *) ARR_DATA_PTR(arr); + + for (int i = 0; i < ARR_DIMS(arr)[0]; i++) + if (kinds[i] == STATS_EXT_NDISTINCT) + enabled->ndistinct = true; + else if (kinds[i] == STATS_EXT_DEPENDENCIES) + enabled->dependencies = true; + else if (kinds[i] == STATS_EXT_MCV) + enabled->mcv = true; + else if (kinds[i] == STATS_EXT_EXPRESSIONS) + enabled->expressions = true; +} + +static void +upsert_pg_statistic_ext_data(Datum *values, bool *nulls, bool *replaces) +{ + Relation pg_stextdata; + HeapTuple stxdtup; + HeapTuple newtup; + + pg_stextdata = table_open(StatisticExtDataRelationId, RowExclusiveLock); + + stxdtup = SearchSysCache2(STATEXTDATASTXOID, + values[Anum_pg_statistic_ext_data_stxoid - 1], + values[Anum_pg_statistic_ext_data_stxdinherit - 1]); + + if (HeapTupleIsValid(stxdtup)) + { + newtup = heap_modify_tuple(stxdtup, + RelationGetDescr(pg_stextdata), + values, + nulls, + replaces); + CatalogTupleUpdate(pg_stextdata, &newtup->t_self, newtup); + ReleaseSysCache(stxdtup); + } + else + { + newtup = heap_form_tuple(RelationGetDescr(pg_stextdata), values, nulls); + CatalogTupleInsert(pg_stextdata, newtup); + } + + heap_freetuple(newtup); + + CommandCounterIncrement(); + + table_close(pg_stextdata, RowExclusiveLock); +} + +/* + * Insert or Update Extended Statistics + * + * Major errors, such as the table not existing, the statistics object not + * existing, or a permissions failure are always reported at ERROR. Other + * errors, such as a conversion failure on one statistic kind, are reported + * as WARNINGs, and other statistic kinds may still be updated. + */ +static bool +extended_statistics_update(FunctionCallInfo fcinfo) +{ + Oid nspoid; + char *nspname; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup = NULL; + + stakindFlags enabled; + stakindFlags has; + + Form_pg_statistic_ext stxform; + + Datum values[Natts_pg_statistic_ext_data]; + bool nulls[Natts_pg_statistic_ext_data]; + bool replaces[Natts_pg_statistic_ext_data]; + + bool success = true; + + Datum exprdatum; + bool isnull; + List *exprs = NIL; + int numattnums = 0; + int numexprs = 0; + int numattrs = 0; + + /* arrays of type info, if we need them */ + Oid *atttypids = NULL; + int32 *atttypmods = NULL; + Oid *atttypcolls = NULL; + Relation rel; + Oid locked_table = InvalidOid; + + memset(nulls, false, sizeof(nulls)); + memset(values, 0, sizeof(values)); + memset(replaces, 0, sizeof(replaces)); + memset(&enabled, 0, sizeof(enabled)); + + has.mcv = (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) && + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) && + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) && + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)); + has.ndistinct = !PG_ARGISNULL(NDISTINCT_ARG); + has.dependencies = !PG_ARGISNULL(DEPENDENCIES_ARG); + has.expressions = !PG_ARGISNULL(EXPRESSIONS_ARG); + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_BOOL(false); + } + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_BOOL(false); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + get_namespace_name(nspoid), stxname))); + PG_RETURN_BOOL(false); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + expand_stxkind(tup, &enabled); + numattnums = stxform->stxkeys.dim1; + + /* decode expression (if any) */ + exprdatum = SysCacheGetAttr(STATEXTOID, + tup, + Anum_pg_statistic_ext_stxexprs, + &isnull); + + if (!isnull) + { + char *s; + + s = TextDatumGetCString(exprdatum); + exprs = (List *) stringToNode(s); + pfree(s); + + /* + * Run the expressions through eval_const_expressions. This is not + * just an optimization, but is necessary, because the planner + * will be comparing them to similarly-processed qual clauses, and + * may fail to detect valid matches without this. We must not use + * canonicalize_qual, however, since these aren't qual + * expressions. + */ + exprs = (List *) eval_const_expressions(NULL, (Node *) exprs); + + /* May as well fix opfuncids too */ + fix_opfuncids((Node *) exprs); + } + numexprs = list_length(exprs); + numattrs = numattnums + numexprs; + + /* Roundabout way of getting a RangeVar on the underlying table */ + rel = relation_open(stxform->stxrelid, AccessShareLock); + + /* no need to fetch reloid, we already have it */ + RangeVarGetRelidExtended(makeRangeVar(nspname, + RelationGetRelationName(rel), -1), + ShareUpdateExclusiveLock, 0, + RangeVarCallbackForStats, &locked_table); + + relation_close(rel, AccessShareLock); + + if (has.mcv) + { + if (!enabled.mcv) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" were all " + "specified for extended statistics object that does not expect MCV ", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + has.mcv = false; + success = false; + } + } + else + { + /* The MCV args must all be NULL */ + if (!PG_ARGISNULL(MOST_COMMON_VALS_ARG) || + !PG_ARGISNULL(MOST_COMMON_VAL_NULLS_ARG) || + !PG_ARGISNULL(MOST_COMMON_FREQS_ARG) || + !PG_ARGISNULL(MOST_COMMON_BASE_FREQS_ARG)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV parameters \"%s\", \"%s\", \"%s\", and \"%s\" must be all specified if any are specified", + extarginfo[MOST_COMMON_VALS_ARG].argname, + extarginfo[MOST_COMMON_VAL_NULLS_ARG].argname, + extarginfo[MOST_COMMON_FREQS_ARG].argname, + extarginfo[MOST_COMMON_BASE_FREQS_ARG].argname))); + success = false; + } + } + + if (has.ndistinct && !enabled.ndistinct) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[NDISTINCT_ARG].argname, + extarginfo[NDISTINCT_ARG].argname))); + has.ndistinct = false; + success = false; + } + + if (has.dependencies && !enabled.dependencies) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.dependencies = false; + success = false; + } + + if (has.expressions && !enabled.expressions) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" was specified for extended statistics object " + "that does not expect \"%s\"", + extarginfo[DEPENDENCIES_ARG].argname, + extarginfo[DEPENDENCIES_ARG].argname))); + has.expressions = false; + success = false; + } + + /* + * Either of these statsistic types requires that we supply + * semi-filled-out VacAttrStatP array. + * + * + * It is not possible to use the existing lookup_var_attr_stats() and + * examine_attribute() because these functions will skip attributes for + * which attstattarget is 0, and we may have stats to import for those + * attributes. + */ + if (has.mcv || has.expressions) + { + atttypids = palloc0(numattrs * sizeof(Oid)); + atttypmods = palloc0(numattrs * sizeof(int32)); + atttypcolls = palloc0(numattrs * sizeof(Oid)); + + for (int i = 0; i < numattnums; i++) + { + AttrNumber attnum = stxform->stxkeys.values[i]; + + Oid lt_opr; + Oid eq_opr; + char typetype; + + /* + * fetch attribute entries the same as are done for attribute + * stats + */ + statatt_get_type(stxform->stxrelid, + attnum, + &atttypids[i], + &atttypmods[i], + &typetype, + &atttypcolls[i], + <_opr, + &eq_opr); + } + + for (int i = numattnums; i < numattrs; i++) + { + Node *expr = list_nth(exprs, i - numattnums); + + atttypids[i] = exprType(expr); + atttypmods[i] = exprTypmod(expr); + atttypcolls[i] = exprCollation(expr); + + /* + * Duplicate logic from get_attr_stat_type + */ + + /* + * If it's a multirange, step down to the range type, as is done + * by multirange_typanalyze(). + */ + if (type_is_multirange(atttypids[i])) + atttypids[i] = get_multirange_range(atttypids[i]); + + /* + * Special case: collation for tsvector is DEFAULT_COLLATION_OID. + * See compute_tsvector_stats(). + */ + if (atttypids[i] == TSVECTOROID) + atttypcolls[i] = DEFAULT_COLLATION_OID; + + } + } + + /* Primary Key: cannot be NULL or replaced. */ + values[Anum_pg_statistic_ext_data_stxoid - 1] = ObjectIdGetDatum(stxform->oid); + values[Anum_pg_statistic_ext_data_stxdinherit - 1] = BoolGetDatum(inherited); + + if (has.ndistinct) + { + Datum ndistinct_datum = PG_GETARG_DATUM(NDISTINCT_ARG); + bytea *data = DatumGetByteaPP(ndistinct_datum); + MVNDistinct *ndistinct = statext_ndistinct_deserialize(data); + + if (pg_ndistinct_validate_items(ndistinct, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxdndistinct - 1] = ndistinct_datum; + replaces[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + success = false; + } + + free_pg_ndistinct(ndistinct); + } + else + nulls[Anum_pg_statistic_ext_data_stxdndistinct - 1] = true; + + if (has.dependencies) + { + Datum dependencies_datum = PG_GETARG_DATUM(DEPENDENCIES_ARG); + bytea *data = DatumGetByteaPP(dependencies_datum); + MVDependencies *dependencies = statext_dependencies_deserialize(data); + + if (pg_dependencies_validate_deps(dependencies, &stxform->stxkeys, numexprs, WARNING)) + { + values[Anum_pg_statistic_ext_data_stxddependencies - 1] = dependencies_datum; + replaces[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + } + else + { + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + success = false; + } + + free_pg_dependencies(dependencies); + } + else + nulls[Anum_pg_statistic_ext_data_stxddependencies - 1] = true; + + if (has.mcv) + { + Datum datum; + ArrayType *mcv_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VALS_ARG); + ArrayType *nulls_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_VAL_NULLS_ARG); + ArrayType *freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_FREQS_ARG); + ArrayType *base_freqs_arr = PG_GETARG_ARRAYTYPE_P(MOST_COMMON_BASE_FREQS_ARG); + int nitems; + Datum *mcv_elems; + bool *mcv_nulls; + int check_nummcv; + + /* + * The mcv_arr is an array of arrays of text, and we use it as the + * reference array for checking the lengths of the other 3 arrays. + */ + if (ARR_NDIM(mcv_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must be a text array of 2 dimensions.", + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return (Datum) 0; + } + + nitems = ARR_DIMS(mcv_arr)[0]; + + /* fixed length arrays that cannot contain NULLs */ + if (!check_mcvlist_array(nulls_arr, MOST_COMMON_VAL_NULLS_ARG, + 2, nitems) || + !check_mcvlist_array(freqs_arr, MOST_COMMON_FREQS_ARG, + 1, nitems) || + !check_mcvlist_array(base_freqs_arr, MOST_COMMON_BASE_FREQS_ARG, + 1, nitems)) + return (Datum) 0; + + + deconstruct_array_builtin(mcv_arr, TEXTOID, &mcv_elems, + &mcv_nulls, &check_nummcv); + + Assert(check_nummcv == (nitems * numattrs)); + + datum = import_mcvlist(tup, WARNING, numattrs, + atttypids, atttypmods, atttypcolls, + nitems, mcv_elems, mcv_nulls, + (bool *) ARR_DATA_PTR(nulls_arr), + (float8 *) ARR_DATA_PTR(freqs_arr), + (float8 *) ARR_DATA_PTR(base_freqs_arr)); + + values[Anum_pg_statistic_ext_data_stxdmcv - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdmcv - 1] = true; + + if (has.expressions) + { + Datum datum; + Relation pgsd; + + pgsd = table_open(StatisticRelationId, RowExclusiveLock); + + datum = import_expressions(pgsd, numexprs, + &atttypids[numattnums], &atttypmods[numattnums], + &atttypcolls[numattnums], + PG_GETARG_ARRAYTYPE_P(EXPRESSIONS_ARG)); + + table_close(pgsd, RowExclusiveLock); + + values[Anum_pg_statistic_ext_data_stxdexpr - 1] = datum; + replaces[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + } + else + nulls[Anum_pg_statistic_ext_data_stxdexpr - 1] = true; + + upsert_pg_statistic_ext_data(values, nulls, replaces); + + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + if (atttypids != NULL) + pfree(atttypids); + if (atttypmods != NULL) + pfree(atttypmods); + if (atttypcolls != NULL) + pfree(atttypcolls); + return success; +} + +/* + * Consistency checks to ensure that other mcvlist arrays are in alignment + * with the mcv array. + */ +static bool +check_mcvlist_array(ArrayType *arr, int argindex, int required_ndims, + int mcv_length) +{ + if (ARR_NDIM(arr) != required_ndims) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be an array of %d dimensions.", + extarginfo[argindex].argname, required_ndims))); + return false; + } + + if (array_contains_nulls(arr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Array \"%s\" cannot contain NULLs.", + extarginfo[argindex].argname))); + return false; + } + + if (ARR_DIMS(arr)[0] != mcv_length) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameters \"%s\" must have the same number of elements as \"%s\"", + extarginfo[argindex].argname, + extarginfo[MOST_COMMON_VALS_ARG].argname))); + return false; + } + + return true; +} + +/* + * Create the stxdexprs datum using the user input in an array of array of + * text, referenced against the datatypes for the expressions. + */ +static Datum +import_expressions(Relation pgsd, int numexprs, + Oid *atttypids, int32 *atttypmods, + Oid *atttypcolls, ArrayType *exprs_arr) +{ + Datum *exprs_elems; + bool *exprs_nulls; + int check_numexprs; + int offset = 0; + + FmgrInfo array_in_fn; + + Oid pgstypoid = get_rel_type_id(StatisticRelationId); + + ArrayBuildState *astate = NULL; + + + if (ARR_NDIM(exprs_arr) != 2) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must be a text array of 2 dimensions.", + extarginfo[EXPRESSIONS_ARG].argname))); + return (Datum) 0; + } + + if (ARR_DIMS(exprs_arr)[0] != numexprs) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an outer dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, numexprs))); + return (Datum) 0; + } + if (ARR_DIMS(exprs_arr)[1] != NUM_ATTRIBUTE_STATS_ELEMS) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Parameter \"%s\" must have an inner dimension of %d elements.", + extarginfo[EXPRESSIONS_ARG].argname, + NUM_ATTRIBUTE_STATS_ELEMS))); + return (Datum) 0; + } + + fmgr_info(F_ARRAY_IN, &array_in_fn); + + deconstruct_array_builtin(exprs_arr, TEXTOID, &exprs_elems, + &exprs_nulls, &check_numexprs); + + for (int i = 0; i < numexprs; i++) + { + Oid typid = atttypids[i]; + int32 typmod = atttypmods[i]; + Oid stacoll = atttypcolls[i]; + TypeCacheEntry *typcache; + + Oid elemtypid = InvalidOid; + Oid elem_eq_opr = InvalidOid; + + bool ok; + + Datum values[Natts_pg_statistic]; + bool nulls[Natts_pg_statistic]; + bool replaces[Natts_pg_statistic]; + + HeapTuple pgstup; + Datum pgstdat; + + /* finds the right operators even if atttypid is a domain */ + typcache = lookup_type_cache(typid, TYPECACHE_LT_OPR | TYPECACHE_EQ_OPR); + + statatt_init_empty_tuple(InvalidOid, InvalidAttrNumber, false, + values, nulls, replaces); + + if (!exprs_nulls[offset + NULL_FRAC_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + NULL_FRAC_ELEM], + &values[Anum_pg_statistic_stanullfrac - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[NULL_FRAC_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + AVG_WIDTH_ELEM]) + { + ok = text_to_int4(exprs_elems[offset + AVG_WIDTH_ELEM], + &values[Anum_pg_statistic_stawidth - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[AVG_WIDTH_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + N_DISTINCT_ELEM]) + { + ok = text_to_float4(exprs_elems[offset + N_DISTINCT_ELEM], + &values[Anum_pg_statistic_stadistinct - 1]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + NULL_FRAC_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[N_DISTINCT_ELEM].argname, s))); + pfree(s); + return (Datum) 0; + } + } + + /* + * The STAKIND statistics are the same as the ones found in attribute + * stats. However, these are all derived from text columns, whereas + * the ones derived for attribute stats are a mix of datatypes. This + * limits the opportunities for code sharing between the two. + */ + + /* STATISTIC_KIND_MCV */ + if (exprs_nulls[offset + MOST_COMMON_VALS_ELEM] != + exprs_nulls[offset + MOST_COMMON_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + extexprarginfo[MOST_COMMON_FREQS_ELEM].argname))); + return (Datum) 0; + } + + if (!exprs_nulls[offset + MOST_COMMON_VALS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_VALS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_VALS_ELEM].argname, + &array_in_fn, exprs_elems[offset + MOST_COMMON_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCV, + typcache->eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + /* STATISTIC_KIND_HISTOGRAM */ + if (!exprs_nulls[offset + HISTOGRAM_BOUNDS_ELEM]) + { + Datum stavalues; + + stavalues = text_to_stavalues(extexprarginfo[HISTOGRAM_BOUNDS_ELEM].argname, + &array_in_fn, exprs_elems[offset + HISTOGRAM_BOUNDS_ELEM], + typid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_HISTOGRAM, + typcache->lt_opr, stacoll, + 0, true, stavalues, false); + } + + /* STATISTIC_KIND_CORRELATION */ + if (!exprs_nulls[offset + CORRELATION_ELEM]) + { + Datum corr[] = {(Datum) 0}; + ArrayType *arry; + Datum stanumbers; + + ok = text_to_float4(exprs_elems[offset + CORRELATION_ELEM], &corr[0]); + + if (!ok) + { + char *s = TextDatumGetCString(exprs_elems[offset + CORRELATION_ELEM]); + + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s element \"%s\" does not match expected input type.", + extexprarginfo[CORRELATION_ELEM].argname, s))); + return (Datum) 0; + } + + arry = construct_array_builtin(corr, 1, FLOAT4OID); + + stanumbers = PointerGetDatum(arry); + + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_CORRELATION, + typcache->lt_opr, stacoll, + stanumbers, false, 0, true); + } + + /* STATISTIC_KIND_MCELEM */ + if (exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] != + exprs_nulls[offset + MOST_COMMON_ELEM_FREQS_ELEM]) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Expression %s and %s must both be NOT NULL or both NULL.", + extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname))); + return (Datum) 0; + } + + /* + * We only need to fetch element type and eq operator if we have a + * stat of type MCELEM or DECHIST. + */ + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM] || + !exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + if (!statatt_get_elem_type(typid, typcache->typtype, + &elemtypid, &elem_eq_opr)) + { + ereport(WARNING, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + (errmsg("unable to determine element type of expression")))); + return (Datum) 0; + } + } + + if (!exprs_nulls[offset + MOST_COMMON_ELEMS_ELEM]) + { + Datum stavalues; + Datum stanumbers; + + stavalues = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEMS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEMS_ELEM], + elemtypid, typmod, &ok); + + if (!ok) + return (Datum) 0; + + stanumbers = text_to_stavalues(extexprarginfo[MOST_COMMON_ELEM_FREQS_ELEM].argname, + &array_in_fn, + exprs_elems[offset + MOST_COMMON_ELEM_FREQS_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + statatt_set_slot(values, nulls, replaces, + STATISTIC_KIND_MCELEM, + elem_eq_opr, stacoll, + stanumbers, false, stavalues, false); + } + + if (!exprs_nulls[offset + ELEM_COUNT_HISTOGRAM_ELEM]) + { + Datum stanumbers; + + stanumbers = text_to_stavalues(extexprarginfo[ELEM_COUNT_HISTOGRAM_ELEM].argname, + &array_in_fn, + exprs_elems[offset + ELEM_COUNT_HISTOGRAM_ELEM], + FLOAT4OID, -1, &ok); + + if (!ok) + return (Datum) 0; + + statatt_set_slot(values, nulls, replaces, STATISTIC_KIND_DECHIST, + elem_eq_opr, stacoll, + stanumbers, false, 0, true); + } + + /* + * Currently there are no extended stats exports of the statistic + * kinds STATISTIC_KIND_BOUNDS_HISTOGRAM or + * STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM so these cannot be imported. + * These may be added in the future. + */ + + pgstup = heap_form_tuple(RelationGetDescr(pgsd), values, nulls); + pgstdat = heap_copy_tuple_as_datum(pgstup, RelationGetDescr(pgsd)); + astate = accumArrayResult(astate, pgstdat, false, pgstypoid, + CurrentMemoryContext); + + offset += NUM_ATTRIBUTE_STATS_ELEMS; + } + + pfree(exprs_elems); + pfree(exprs_nulls); + + return makeArrayResult(astate, CurrentMemoryContext); +} + +static bool +text_to_float4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(float4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + + +static bool +text_to_int4(Datum input, Datum *output) +{ + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + char *s; + bool ok; + + s = TextDatumGetCString(input); + ok = DirectInputFunctionCallSafe(int4in, s, InvalidOid, -1, + (Node *) &escontext, output); + + pfree(s); + return ok; +} + +static bool +delete_pg_statistic_ext_data(Oid stxoid, bool inherited) +{ + Relation sed = table_open(StatisticExtDataRelationId, RowExclusiveLock); + HeapTuple oldtup; + bool result = false; + + /* Is there already a pg_statistic tuple for this attribute? */ + oldtup = SearchSysCache2(STATEXTDATASTXOID, + ObjectIdGetDatum(stxoid), + BoolGetDatum(inherited)); + + if (HeapTupleIsValid(oldtup)) + { + CatalogTupleDelete(sed, &oldtup->t_self); + ReleaseSysCache(oldtup); + result = true; + } + + table_close(sed, RowExclusiveLock); + + CommandCounterIncrement(); + + return result; +} + +Datum +pg_restore_extended_stats(PG_FUNCTION_ARGS) +{ + LOCAL_FCINFO(positional_fcinfo, NUM_EXTENDED_STATS_ARGS); + bool result = true; + + InitFunctionCallInfoData(*positional_fcinfo, NULL, NUM_EXTENDED_STATS_ARGS, + InvalidOid, NULL, NULL); + + if (!stats_fill_fcinfo_from_arg_pairs(fcinfo, positional_fcinfo, extarginfo)) + result = false; + + if (!extended_statistics_update(positional_fcinfo)) + result = false; + + PG_RETURN_BOOL(result); +} + +/* + * Delete statistics for the given statistics object. + */ +Datum +pg_clear_extended_stats(PG_FUNCTION_ARGS) +{ + char *nspname; + Oid nspoid; + char *stxname; + bool inherited; + Relation pg_stext; + HeapTuple tup; + Relation rel; + Oid locked_table = InvalidOid; + + Form_pg_statistic_ext stxform; + + stats_check_required_arg(fcinfo, extarginfo, STATSCHEMA_ARG); + nspname = TextDatumGetCString(PG_GETARG_DATUM(STATSCHEMA_ARG)); + stats_check_required_arg(fcinfo, extarginfo, STATNAME_ARG); + stxname = TextDatumGetCString(PG_GETARG_DATUM(STATNAME_ARG)); + stats_check_required_arg(fcinfo, extarginfo, INHERITED_ARG); + inherited = PG_GETARG_NAME(INHERITED_ARG); + + if (RecoveryInProgress()) + { + ereport(WARNING, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("Statistics cannot be modified during recovery."))); + PG_RETURN_VOID(); + } + + nspoid = get_namespace_oid(nspname, true); + if (nspoid == InvalidOid) + { + ereport(WARNING, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Namespace \"%s\" not found.", stxname))); + PG_RETURN_VOID(); + } + + pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock); + tup = get_pg_statistic_ext(pg_stext, nspoid, stxname); + + if (!HeapTupleIsValid(tup)) + { + table_close(pg_stext, RowExclusiveLock); + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("Extended Statistics Object \"%s\".\"%s\" not found.", + nspname, stxname))); + PG_RETURN_VOID(); + } + + stxform = (Form_pg_statistic_ext) GETSTRUCT(tup); + + /* Roundabout way of getting a RangeVar on the underlying table */ + rel = relation_open(stxform->stxrelid, AccessShareLock); + + /* no need to fetch reloid, we already have it */ + RangeVarGetRelidExtended(makeRangeVar(nspname, + RelationGetRelationName(rel), -1), + ShareUpdateExclusiveLock, 0, + RangeVarCallbackForStats, &locked_table); + + relation_close(rel, AccessShareLock); + + delete_pg_statistic_ext_data(stxform->oid, inherited); + heap_freetuple(tup); + table_close(pg_stext, RowExclusiveLock); + + PG_RETURN_VOID(); +} diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index f59fb8215437..a917079ceb0b 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -2173,3 +2173,147 @@ mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat, return s; } + +/* + * The MCV is an array of records, but this is expected as 4 separate arrays. + * It is not possible to have a generic input function for pg_mcv_list + * because the most_common_values is a composite type with element types + * defined by the specific statistics object. + */ +Datum +import_mcvlist(HeapTuple tup, int elevel, int numattrs, Oid *atttypids, + int32 *atttypmods, Oid *atttypcolls, int nitems, + Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs) +{ + MCVList *mcvlist; + bytea *bytes; + + HeapTuple *vatuples; + VacAttrStats **vastats; + + /* + * Allocate the MCV list structure, set the global parameters. + */ + mcvlist = (MCVList *) palloc0(offsetof(MCVList, items) + + (sizeof(MCVItem) * nitems)); + + mcvlist->magic = STATS_MCV_MAGIC; + mcvlist->type = STATS_MCV_TYPE_BASIC; + mcvlist->ndimensions = numattrs; + mcvlist->nitems = nitems; + + /* Set the values for the 1-D arrays and allocate space for the 2-D arrays */ + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + item->frequency = freqs[i]; + item->base_frequency = base_freqs[i]; + item->values = (Datum *) palloc0(sizeof(Datum) * numattrs); + item->isnull = (bool *) palloc0(sizeof(bool) * numattrs); + } + + /* Walk through each dimension */ + for (int j = 0; j < numattrs; j++) + { + FmgrInfo finfo; + Oid ioparam; + Oid infunc; + int index = j; + + getTypeInputInfo(atttypids[j], &infunc, &ioparam); + fmgr_info(infunc, &finfo); + + /* store info about data type OIDs */ + mcvlist->types[j] = atttypids[j]; + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + /* These should be in agreement, but just to be safe check both */ + if (mcv_elem_nulls[index] || mcv_nulls[index]) + { + item->values[j] = (Datum) 0; + item->isnull[j] = true; + } + else + { + char *s = TextDatumGetCString(mcv_elems[index]); + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!InputFunctionCallSafe(&finfo, s, ioparam, atttypmods[j], + (Node *) &escontext, &item->values[j])) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("MCV elemement \"%s\" does not match expected input type.", s))); + return (Datum) 0; + } + + pfree(s); + } + + index += numattrs; + } + } + + /* + * The function statext_mcv_serialize() requires an array of pointers to + * VacAttrStats records, but only a few fields within those records have + * to be filled out. + */ + vastats = (VacAttrStats **) palloc0(numattrs * sizeof(VacAttrStats)); + vatuples = (HeapTuple *) palloc0(numattrs * sizeof(HeapTuple)); + + for (int i = 0; i < numattrs; i++) + { + Oid typid = atttypids[i]; + HeapTuple typtuple; + + typtuple = SearchSysCacheCopy1(TYPEOID, ObjectIdGetDatum(typid)); + + if (!HeapTupleIsValid(typtuple)) + elog(ERROR, "cache lookup failed for type %u", typid); + + vatuples[i] = typtuple; + + vastats[i] = palloc0(sizeof(VacAttrStats)); + + vastats[i]->attrtype = (Form_pg_type) GETSTRUCT(typtuple); + vastats[i]->attrtypid = typid; + vastats[i]->attrcollid = atttypcolls[i]; + } + + bytes = statext_mcv_serialize(mcvlist, vastats); + + for (int i = 0; i < numattrs; i++) + { + pfree(vatuples[i]); + pfree(vastats[i]); + } + pfree((void *) vatuples); + pfree((void *) vastats); + + if (bytes == NULL) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Unable to import mcv list"))); + return (Datum) 0; + } + + for (int i = 0; i < nitems; i++) + { + MCVItem *item = &mcvlist->items[i]; + + pfree(item->values); + pfree(item->isnull); + } + pfree(mcvlist); + pfree(mcv_elems); + pfree(mcv_nulls); + + return PointerGetDatum(bytes); +} diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c index fe452f53ae4b..839bcc9af929 100644 --- a/src/backend/statistics/mvdistinct.c +++ b/src/backend/statistics/mvdistinct.c @@ -599,6 +599,68 @@ generate_combinations_recurse(CombinationGenerator *state, } } +/* + * Free allocations of an MVNDistinct + */ +void +free_pg_ndistinct(MVNDistinct *ndistinct) +{ + for (int i = 0; i < ndistinct->nitems; i++) + pfree(ndistinct->items[i].attributes); + + pfree(ndistinct); +} + +/* + * Validate an MVNDistinct against the extended statistics object definition. + * + * Every MVNDistinctItem must be checked to ensure that the attnums in the + * attributes list correspond to attnums/expressions defined by the + * extended statistics object. + * + * Positive attnums are attributes which must be found in the stxkeys, + * while negative attnums correspond to an expr number, so the attnum + * can't be below (0 - numexprs). + */ +bool +pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, int numexprs, int elevel) +{ + int attnum_expr_lowbound = 0 - numexprs; + + for (int i = 0; i < ndistinct->nitems; i++) + { + MVNDistinctItem item = ndistinct->items[i]; + + for (int j = 0; j < item.nattributes; j++) + { + AttrNumber attnum = item.attributes[j]; + bool ok = false; + + if (attnum > 0) + { + for (int k = 0; k < stxkeys->dim1; k++) + if (attnum == stxkeys->values[k]) + { + ok = true; + break; + } + } + else if ((attnum < 0) && (attnum >= attnum_expr_lowbound)) + ok = true; + + if (!ok) + { + ereport(elevel, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("pg_ndistinct: invalid attnum for this statistics object: %d", attnum))); + return false; + } + } + } + return true; +} + + /* * generate_combinations * generate all k-combinations of N elements diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5cf9e12fcb9a..84e9f176d192 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12594,6 +12594,24 @@ proname => 'gist_translate_cmptype_common', prorettype => 'int2', proargtypes => 'int4', prosrc => 'gist_translate_cmptype_common' }, +# Extended Statistics Import +{ oid => '9947', + descr => 'restore statistics on extended statistics object', + proname => 'pg_restore_extended_stats', provolatile => 'v', proisstrict => 'f', + provariadic => 'any', + proparallel => 'u', prorettype => 'bool', + proargtypes => 'any', + proargnames => '{kwargs}', + proargmodes => '{v}', + prosrc => 'pg_restore_extended_stats' }, +{ oid => '9948', + descr => 'clear statistics on extended statistics object', + proname => 'pg_clear_extended_stats', provolatile => 'v', proisstrict => 'f', + proparallel => 'u', prorettype => 'void', + proargtypes => 'text text bool', + proargnames => '{statistics_schemaname,statistics_name,inherited}', + prosrc => 'pg_clear_extended_stats' }, + # AIO related functions { oid => '6399', descr => 'information about in-progress asynchronous IOs', proname => 'pg_get_aios', prorows => '100', proretset => 't', diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index efcb7dc35461..ba7f5dcad829 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -127,4 +127,21 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root, Selectivity *overlap_basesel, Selectivity *totalsel); +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); + +extern Datum import_mcvlist(HeapTuple tup, int elevel, int numattrs, + Oid *atttypids, int32 *atttypmods, Oid *atttypcolls, + int nitems, Datum *mcv_elems, bool *mcv_nulls, + bool *mcv_elem_nulls, float8 *freqs, float8 *base_freqs); +extern bool pg_ndistinct_validate_items(MVNDistinct *ndistinct, int2vector *stxkeys, + int numexprs, int elevel); +extern void free_pg_ndistinct(MVNDistinct *ndistinct); +extern bool pg_dependencies_validate_deps(MVDependencies *dependencies, + int2vector *stxkeys, int numexprs, + int elevel); +extern void free_pg_dependencies(MVDependencies *dependencies); + #endif /* EXTENDED_STATS_INTERNAL_H */ diff --git a/src/test/regress/expected/stats_import.out b/src/test/regress/expected/stats_import.out index 98ce7dc28410..970e9bd09833 100644 --- a/src/test/regress/expected/stats_import.out +++ b/src/test/regress/expected/stats_import.out @@ -1084,11 +1084,15 @@ SELECT 3, 'tre', (3, 3.3, 'TRE', '2003-03-03', NULL)::stats_import.complex_type, UNION ALL SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; -- Generate statistics on table with data ANALYZE stats_import.test; CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) WITH (autovacuum_enabled = false); CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -1342,6 +1346,1125 @@ AND attname = 'i'; (1 row) DROP TABLE stats_temp; +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2,1], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2,0], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [-4,3,-1], "ndistinct" : 4}, + {"attributes" : [-4,3,-2], "ndistinct" : 4}, + {"attributes" : [-4,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2,-4], "ndistinct" : 4}]'::pg_ndistinct + ); +WARNING: pg_ndistinct: invalid attnum for this statistics object: -4 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + e.dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+------------------------ +n_distinct | [ + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | 3 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 3,+ + | "attributes": [+ + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4,+ + | "attributes": [+ + | 2, + + | 3, + + | -1, + + | -2 + + | ] + + | } + + | ] +dependencies | +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 1, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 1 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [0], "dependency": -1, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: 0 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": -3, "degree": 1.000000}]'::pg_dependencies + ); +WARNING: pg_dependencies: invalid attnum for this statistics object: -3 + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + jsonb_pretty(e.dependencies::text::jsonb) AS dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+---------------------------- +n_distinct | [ + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 3, + + | "attributes": [ + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -1, + + | -2 + + | ] + + | } + + | ] +dependencies | [ + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -1 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -1 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | 3 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | 3 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | -1 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3, + + | -1 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3, + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3, + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -1, + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000,+ + | "attributes": [ + + | -1, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | 3, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 2, + + | -1, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000,+ + | "attributes": [ + + | 3, + + | -1, + + | -2 + + | ], + + | "dependency": 2 + + | } + + | ] +most_common_vals | +most_common_val_nulls | +most_common_freqs | +most_common_base_freqs | + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); +WARNING: MCV parameters "most_common_vals", "most_common_val_nulls", "most_common_freqs", and "most_common_base_freqs" must be all specified if any are specified + pg_restore_extended_stats +--------------------------- + f +(1 row) + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + jsonb_pretty(e.dependencies::text::jsonb) AS dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx +-[ RECORD 1 ]----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +n_distinct | [ + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 3, + + | "attributes": [ + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -1 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | -1, + + | -2 + + | ] + + | }, + + | { + + | "ndistinct": 4, + + | "attributes": [ + + | 2, + + | 3, + + | -1, + + | -2 + + | ] + + | } + + | ] +dependencies | [ + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -1 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -1 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | 3 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | 3 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | -1 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3, + + | -1 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3, + + | -1 + + | ], + + | "dependency": -2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3, + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -1, + + | -2 + + | ], + + | "dependency": 2 + + | }, + + | { + + | "degree": 0.500000, + + | "attributes": [ + + | -1, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | 3, + + | -2 + + | ], + + | "dependency": -1 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 2, + + | -1, + + | -2 + + | ], + + | "dependency": 3 + + | }, + + | { + + | "degree": 1.000000, + + | "attributes": [ + + | 3, + + | -1, + + | -2 + + | ], + + | "dependency": 2 + + | } + + | ] +most_common_vals | {{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}} +most_common_val_nulls | {{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}} +most_common_freqs | {0.25,0.25,0.25,0.25} +most_common_base_freqs | {0.00390625,0.015625,0.00390625,0.015625} + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + pg_restore_extended_stats +--------------------------- + t +(1 row) + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx +-[ RECORD 1 ]----------+------- +inherited | f +null_frac | 0 +avg_width | 4 +n_distinct | -0.75 +most_common_vals | {1} +most_common_freqs | {0.5} +histogram_bounds | {-1,0} +correlation | -0.6 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | +-[ RECORD 2 ]----------+------- +inherited | f +null_frac | 0.25 +avg_width | 4 +n_distinct | -0.5 +most_common_vals | {2} +most_common_freqs | {0.5} +histogram_bounds | +correlation | 1 +most_common_elems | +most_common_elem_freqs | +elem_count_histogram | + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + pg_clear_extended_stats +------------------------- + +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + count +------- + 0 +(1 row) + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + statistics_name | pg_restore_extended_stats +-----------------+--------------------------- + test_stat | t +(1 row) + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | n_distinct | dependencies | most_common_vals | most_common_val_nulls | most_common_freqs | most_common_base_freqs +-----------+------------+--------------+------------------+-----------------------+-------------------+------------------------ +(0 rows) + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + inherited | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation | most_common_elems | most_common_elem_freqs | elem_count_histogram +-----------+-----------+-----------+------------+------------------+-------------------+------------------+-------------+-------------------+------------------------+---------------------- +(0 rows) + DROP SCHEMA stats_import CASCADE; NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to type stats_import.complex_type diff --git a/src/test/regress/sql/stats_import.sql b/src/test/regress/sql/stats_import.sql index d140733a7502..48a03f5b8031 100644 --- a/src/test/regress/sql/stats_import.sql +++ b/src/test/regress/sql/stats_import.sql @@ -766,6 +766,9 @@ SELECT 4, 'four', NULL, int4range(0,100), NULL; CREATE INDEX is_odd ON stats_import.test(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test; + -- Generate statistics on table with data ANALYZE stats_import.test; @@ -774,6 +777,9 @@ CREATE TABLE stats_import.test_clone ( LIKE stats_import.test ) CREATE INDEX is_odd_clone ON stats_import.test_clone(((comp).a % 2 = 1)); +CREATE STATISTICS stats_import.test_stat_clone ON name, comp, lower(arange), array_length(tags,1) +FROM stats_import.test_clone; + -- -- Copy stats from test to test_clone, and is_odd to is_odd_clone -- @@ -970,4 +976,362 @@ AND tablename = 'stats_temp' AND inherited = false AND attname = 'i'; DROP TABLE stats_temp; + +-- set n_distinct using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2,1], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2,0], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- set n_distinct using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [-4,3,-1], "ndistinct" : 4}, + {"attributes" : [-4,3,-2], "ndistinct" : 4}, + {"attributes" : [-4,-1,-2], "ndistinct" : 4}, + {"attributes" : [3,-1,-2,-4], "ndistinct" : 4}]'::pg_ndistinct + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'n_distinct', '[{"attributes" : [2,3], "ndistinct" : 4}, + {"attributes" : [2,-1], "ndistinct" : 4}, + {"attributes" : [3,-1], "ndistinct" : 4}, + {"attributes" : [3,-2], "ndistinct" : 4}, + {"attributes" : [-1,-2], "ndistinct" : 3}, + {"attributes" : [2,3,-1], "ndistinct" : 4}, + {"attributes" : [2,3,-2], "ndistinct" : 4}, + {"attributes" : [2,-1,-2], "ndistinct" : 4}, + {"attributes" : [2,3,-1,-2], "ndistinct" : 4}]'::pg_ndistinct + ); + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + e.dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- set dependencies using at attnum (1) that is not in the statistics object +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 1, "degree": 1.000000}]'::pg_dependencies + ); + +-- set dependencies using at attnum that is 0 +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [0], "dependency": -1, "degree": 1.000000}]'::pg_dependencies + ); + +-- set dependencies using at attnum that is outside the expression bounds(below -2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": -3, "degree": 1.000000}]'::pg_dependencies + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'dependencies', '[{"attributes": [2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2], "dependency": -2, "degree": 1.000000}, + {"attributes": [3], "dependency": 2, "degree": 1.000000}, + {"attributes": [3], "dependency": -1, "degree": 1.000000}, + {"attributes": [3], "dependency": -2, "degree": 1.000000}, + {"attributes": [-1], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1], "dependency": 3, "degree": 0.500000}, + {"attributes": [-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,3], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [2,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-1], "dependency": -2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": 2, "degree": 1.000000}, + {"attributes": [3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [-1,-2], "dependency": 2, "degree": 0.500000}, + {"attributes": [-1,-2], "dependency": 3, "degree": 0.500000}, + {"attributes": [2,3,-2], "dependency": -1, "degree": 1.000000}, + {"attributes": [2,-1,-2], "dependency": 3, "degree": 1.000000}, + {"attributes": [3,-1,-2], "dependency": 2, "degree": 1.000000}]'::pg_dependencies + ); + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + jsonb_pretty(e.dependencies::text::jsonb) AS dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +-- if any one mcv param specified, all four must be specified (part 1) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[] + ); + +-- if any one mcv param specified, all four must be specified (part 2) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[] + ); + +-- if any one mcv param specified, all four must be specified (part 3) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[] + ); + +-- if any one mcv param specified, all four must be specified (part 4) +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +-- ok +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'most_common_vals', '{{four,NULL,0,NULL},{one,"(1,1.1,ONE,01-01-2001,\"{\"\"xkey\"\": \"\"xval\"\"}\")",1,2},{tre,"(3,3.3,TRE,03-03-2003,)",-1,3},{two,"(2,2.2,TWO,02-02-2002,\"[true, 4, \"\"six\"\"]\")",1,2}}'::text[], + 'most_common_val_nulls', '{{f,t,f,t},{f,f,f,f},{f,f,f,f},{f,f,f,f}}'::boolean[], + 'most_common_freqs', '{0.25,0.25,0.25,0.25}'::double precision[], + 'most_common_base_freqs', '{0.00390625,0.015625,0.00390625,0.015625}'::double precision[] + ); + +SELECT + jsonb_pretty(e.n_distinct::text::jsonb) AS n_distinct, + jsonb_pretty(e.dependencies::text::jsonb) AS dependencies, + e.most_common_vals, e.most_common_val_nulls, + e.most_common_freqs, e.most_common_base_freqs +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false +\gx + +SELECT + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', 'stats_import', + 'statistics_name', 'test_stat_clone', + 'inherited', false, + 'exprs', '{{0,4,-0.75,"{1}","{0.5}","{-1,0}",-0.6,NULL,NULL,NULL},{0.25,4,-0.5,"{2}","{0.5}",NULL,1,NULL,NULL,NULL}}'::text[] + ); + +SELECT + e.inherited, e.null_frac, e.avg_width, e.n_distinct, e.most_common_vals, + e.most_common_freqs, e.histogram_bounds, e.correlation, + e.most_common_elems, e.most_common_elem_freqs, e.elem_count_histogram +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +and e.inherited = false +\gx + +SELECT + pg_catalog.pg_clear_extended_stats( + statistics_schemaname => 'stats_import', + statistics_name => 'test_stat_clone', + inherited => false); + +SELECT COUNT(*) +FROM pg_stats_ext AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +SELECT COUNT(*) +FROM pg_stats_ext_exprs AS e +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat_clone' +AND e.inherited = false; + +-- +-- Copy stats from test_stat to test_stat_clone +-- +SELECT + e.statistics_name, + pg_catalog.pg_restore_extended_stats( + 'statistics_schemaname', e.statistics_schemaname::text, + 'statistics_name', 'test_stat_clone', + 'inherited', e.inherited, + 'n_distinct', e.n_distinct, + 'dependencies', e.dependencies, + 'most_common_vals', e.most_common_vals, + 'most_common_val_nulls', e.most_common_val_nulls, + 'most_common_freqs', e.most_common_freqs, + 'most_common_base_freqs', e.most_common_base_freqs, + 'exprs', x.exprs + ) +FROM pg_stats_ext AS e +CROSS JOIN LATERAL ( + SELECT + array_agg( + ARRAY[ee.null_frac::text, ee.avg_width::text, + ee.n_distinct::text, ee.most_common_vals::text, + ee.most_common_freqs::text, ee.histogram_bounds::text, + ee.correlation::text, ee.most_common_elems::text, + ee.most_common_elem_freqs::text, + ee.elem_count_histogram::text]) + FROM pg_stats_ext_exprs AS ee + WHERE ee.statistics_schemaname = e.statistics_schemaname + AND ee.statistics_name = e.statistics_name + AND ee.inherited = e.inherited + ) AS x(exprs) +WHERE e.statistics_schemaname = 'stats_import' +AND e.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.n_distinct, n.dependencies, n.most_common_vals, + n.most_common_val_nulls, n.most_common_freqs, + n.most_common_base_freqs +FROM pg_stats_ext AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.n_distinct, o.dependencies, o.most_common_vals, + o.most_common_val_nulls, o.most_common_freqs, + o.most_common_base_freqs +FROM pg_stats_ext AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat' +EXCEPT +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone'; + +SELECT n.inherited, + n.null_frac, n.avg_width, n.n_distinct, + n.most_common_vals::text AS most_common_vals, + n.most_common_freqs, + n.histogram_bounds::text AS histogram_bounds, + n.correlation, + n.most_common_elems::text AS most_common_elems, + n.most_common_elem_freqs, n.elem_count_histogram +FROM pg_stats_ext_exprs AS n +WHERE n.statistics_schemaname = 'stats_import' +AND n.statistics_name = 'test_stat_clone' +EXCEPT +SELECT o.inherited, + o.null_frac, o.avg_width, o.n_distinct, + o.most_common_vals::text AS most_common_vals, + o.most_common_freqs, + o.histogram_bounds::text AS histogram_bounds, + o.correlation, + o.most_common_elems::text AS most_common_elems, + o.most_common_elem_freqs, o.elem_count_histogram +FROM pg_stats_ext_exprs AS o +WHERE o.statistics_schemaname = 'stats_import' +AND o.statistics_name = 'test_stat'; + DROP SCHEMA stats_import CASCADE; From df2ccde26774bc82e2de645a8c130f27b9ee2902 Mon Sep 17 00:00:00 2001 From: Corey Huinker Date: Wed, 5 Nov 2025 01:13:04 -0500 Subject: [PATCH 7/7] Include Extended Statistics in pg_dump. Incorporate the new pg_restore_extended_stats() function into pg_dump. This detects the existence of extended statistics statistics (i.e. pg_statistic_ext_data rows). This handles many of the changes that have happened to extended statistic statistics over the various versions, including: * Format change for pg_ndistinct and pg_dependencies in current development version. Earlier versions have the format translated via the pg_dump SQL statement. * Inherited extended statistics were introduced in v15. * Expressions were introduced to extended statistics in v14. * MCV extended statistics were introduced in v13. * pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to that ndstinct and depdendencies data (the only kind of stats that existed were directly on pg_statistic_ext. * Extended Statistics were introduced in v10, so there is no support for prior versions necessary. --- src/bin/pg_dump/pg_backup.h | 1 + src/bin/pg_dump/pg_backup_archiver.c | 3 +- src/bin/pg_dump/pg_dump.c | 252 +++++++++++++++++++++++++++ src/bin/pg_dump/t/002_pg_dump.pl | 28 +++ 4 files changed, 283 insertions(+), 1 deletion(-) diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h index d9041dad7206..df708e4ced69 100644 --- a/src/bin/pg_dump/pg_backup.h +++ b/src/bin/pg_dump/pg_backup.h @@ -68,6 +68,7 @@ enum _dumpPreparedQueries PREPQUERY_DUMPCOMPOSITETYPE, PREPQUERY_DUMPDOMAIN, PREPQUERY_DUMPENUMTYPE, + PREPQUERY_DUMPEXTSTATSSTATS, PREPQUERY_DUMPFUNC, PREPQUERY_DUMPOPR, PREPQUERY_DUMPRANGETYPE, diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index 59eaecb4ed71..1bfd296e0ee9 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -3008,7 +3008,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH) strcmp(te->desc, "SEARCHPATH") == 0) return REQ_SPECIAL; - if (strcmp(te->desc, "STATISTICS DATA") == 0) + if ((strcmp(te->desc, "STATISTICS DATA") == 0) || + (strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0)) { if (!ropt->dumpStatistics) return 0; diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index a00918bacb40..8c5850f9e9b3 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo); static void dumpIndex(Archive *fout, const IndxInfo *indxinfo); static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo); static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo); +static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo); static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo); static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo); static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo); @@ -8258,6 +8259,9 @@ getExtendedStatistics(Archive *fout) /* Decide whether we want to dump it */ selectDumpableStatisticsObject(&(statsextinfo[i]), fout); + + if (fout->dopt->dumpStatistics) + statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS; } PQclear(res); @@ -11712,6 +11716,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj) break; case DO_STATSEXT: dumpStatisticsExt(fout, (const StatsExtInfo *) dobj); + dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj); break; case DO_REFRESH_MATVIEW: refreshMatViewData(fout, (const TableDataInfo *) dobj); @@ -18514,6 +18519,253 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo) free(qstatsextname); } +/* + * dumpStatisticsExtStats + * write out to fout the stats for an extended statistics object + */ +static void +dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo) +{ + DumpOptions *dopt = fout->dopt; + PQExpBuffer query; + PGresult *res; + int nstats; + + /* Do nothing if not dumping statistics */ + if (!dopt->dumpStatistics) + return; + + if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS]) + { + PQExpBuffer pq = createPQExpBuffer(); + + /* + * Set up query for constraint-specific details. + * + * 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query + * pg_stats_ext translating the ndistinct and depdendencies, 14: + * inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no + * pg_stats_ext, join pg_statistic_ext and pg_namespace + */ + + appendPQExpBufferStr(pq, + "PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n" + "SELECT "); + + /* + * Versions 15+ have inherited stats. + * + * Create this column in all version because we need to order by it later. + */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "e.inherited, "); + else + appendPQExpBufferStr(pq, "false AS inherited, "); + + /* + * Versions < 19 use the old ndistintinct and depdendencies formats + * + * These transformations may look scary, but all we're doing is translating + * + * {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11} + * + * to + * + * [{"ndistinct": 11, "attributes": [3,4]}, + * {"ndistinct": 11, "attributes": [3,6]}, + * {"ndistinct": 11, "attributes": [4,6]}, + * {"ndistinct": 11, "attributes": [3,4,6]}] + * + * and + * {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000, + * "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000} + * + * to + * + * [{"degree": 1.000000, "attributes": [3], "dependency": 4}, + * {"degree": 1.000000, "attributes": [3], "dependency": 6}, + * {"degree": 1.000000, "attributes": [4], "dependency": 6}, + * {"degree": 1.000000, "attributes": [3,4], "dependency": 6}, + * {"degree": 1.000000, "attributes": [3,6], "dependency": 4}] + */ + if (fout->remoteVersion >= 190000) + appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, "); + else + appendPQExpBufferStr(pq, + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array(kv.key, ', ')::integer[], " + " 'ndistinct', " + " kv.value::bigint )) " + "FROM json_each_text(e.n_distinct::text::json) AS kv" + ") AS n_distinct, " + "( " + "SELECT json_agg( " + " json_build_object( " + " 'attributes', " + " string_to_array( " + " split_part(kv.key, ' => ', 1), " + " ', ')::integer[], " + " 'dependency', " + " split_part(kv.key, ' => ', 2)::integer, " + " 'degree', " + " kv.value::double precision )) " + "FROM json_each_text(e.dependencies::text::json) AS kv " + ") AS dependencies, "); + + /* Versions < 12 do not have MCV */ + if (fout->remoteVersion >= 130000) + appendPQExpBufferStr(pq, + "e.most_common_vals, e.most_common_val_nulls, " + "e.most_common_freqs, e.most_common_base_freqs, "); + else + appendPQExpBufferStr(pq, + "NULL AS most_common_vals, NULL AS most_common_val_nulls, " + "NULL AS most_common_freqs, NULL AS most_common_base_freqs, "); + + /* Expressions were introduced in v14 */ + if (fout->remoteVersion >= 140000) + { + appendPQExpBufferStr(pq, + "( " + "SELECT array_agg( " + " ARRAY[ee.null_frac::text, ee.avg_width::text, " + " ee.n_distinct::text, ee.most_common_vals::text, " + " ee.most_common_freqs::text, ee.histogram_bounds::text, " + " ee.correlation::text, ee.most_common_elems::text, " + " ee.most_common_elem_freqs::text, " + " ee.elem_count_histogram::text]) " + "FROM pg_stats_ext_exprs AS ee " + "WHERE ee.statistics_schemaname = $1 " + "AND ee.statistics_name = $2 "); + + /* Inherited expressions introduced in v15 */ + if (fout->remoteVersion >= 150000) + appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited"); + + appendPQExpBufferStr(pq, ") AS exprs "); + } + else + appendPQExpBufferStr(pq, "NULL AS exprs "); + + /* pg_stats_ext introduced in v12 */ + if (fout->remoteVersion >= 120000) + appendPQExpBufferStr(pq, + "FROM pg_catalog.pg_stats_ext AS e " + "WHERE e.statistics_schemaname = $1 " + "AND e.statistics_name = $2 "); + else + appendPQExpBufferStr(pq, + "FROM ( " + "SELECT s.stxndistinct AS n_distinct, " + " s.stxdependencies AS dependencies " + "FROM pg_catalog.pg_statistics_ext AS s " + "JOIN pg_catalog.pg_namespace AS n " + "ON n.oid = s.stxnamespace " + "WHERE n.nspname = $1 " + "AND e.stxname = $2 " + ") AS e "); + + /* we always have an inherited column, but it may be a constant */ + appendPQExpBufferStr(pq, "ORDER BY inherited"); + + ExecuteSqlStatement(fout, pq->data); + + fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS] = true; + + destroyPQExpBuffer(pq); + } + + query = createPQExpBuffer(); + + appendPQExpBufferStr(query, "EXECUTE getExtStatsStats("); + appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name, "); + appendStringLiteralAH(query, statsextinfo->dobj.name, fout); + appendPQExpBufferStr(query, "::pg_catalog.name)"); + + res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK); + + destroyPQExpBuffer(query); + + nstats = PQntuples(res); + + if (nstats > 0) + { + PQExpBuffer out = createPQExpBuffer(); + + int i_inherited = PQfnumber(res, "inherited"); + int i_ndistinct = PQfnumber(res, "n_distinct"); + int i_dependencies = PQfnumber(res, "dependencies"); + int i_mcv = PQfnumber(res, "most_common_vals"); + int i_mcv_nulls = PQfnumber(res, "most_common_val_nulls"); + int i_mcf = PQfnumber(res, "most_common_freqs"); + int i_mcbf = PQfnumber(res, "most_common_base_freqs"); + int i_exprs = PQfnumber(res, "exprs"); + + for (int i = 0; i < nstats; i++) + { + if (PQgetisnull(res, i, i_inherited)) + pg_fatal("inherited cannot be NULL"); + + appendPQExpBufferStr(out, + "SELECT * FROM pg_catalog.pg_restore_extended_stats(\n"); + appendPQExpBuffer(out, "\t'version', '%d'::integer,\n", + fout->remoteVersion); + appendPQExpBufferStr(out, "\t'statistics_schemaname', "); + appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout); + appendPQExpBufferStr(out, ",\n\t'statistics_name', "); + appendStringLiteralAH(out, statsextinfo->dobj.name, fout); + appendNamedArgument(out, fout, "inherited", "boolean", + PQgetvalue(res, i, i_inherited)); + + if (!PQgetisnull(res, i, i_ndistinct)) + appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct", + PQgetvalue(res, i, i_ndistinct)); + + if (!PQgetisnull(res, i, i_dependencies)) + appendNamedArgument(out, fout, "dependencies", "pg_dependencies", + PQgetvalue(res, i, i_dependencies)); + + if (!PQgetisnull(res, i, i_mcv)) + appendNamedArgument(out, fout, "most_common_vals", "text[]", + PQgetvalue(res, i, i_mcv)); + + if (!PQgetisnull(res, i, i_mcv_nulls)) + appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]", + PQgetvalue(res, i, i_mcv_nulls)); + + if (!PQgetisnull(res, i, i_mcf)) + appendNamedArgument(out, fout, "most_common_freqs", "double precision[]", + PQgetvalue(res, i, i_mcf)); + + if (!PQgetisnull(res, i, i_mcbf)) + appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]", + PQgetvalue(res, i, i_mcbf)); + + if (!PQgetisnull(res, i, i_exprs)) + appendNamedArgument(out, fout, "exprs", "text[]", + PQgetvalue(res, i, i_exprs)); + + appendPQExpBufferStr(out, "\n);\n"); + } + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = statsextinfo->dobj.name, + .namespace = statsextinfo->dobj.namespace->dobj.name, + .owner = statsextinfo->rolname, + .description = "EXTENDED STATISTICS DATA", + .section = SECTION_POST_DATA, + .createStmt = out->data, + .deps = &statsextinfo->dobj.dumpId, + .nDeps = 1)); + destroyPQExpBuffer(out); + } + PQclear(res); +} + /* * dumpConstraint * write out to fout a user-defined constraint diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index 445a541abf63..6681265974f6 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -4772,6 +4772,34 @@ }, }, + # + # EXTENDED stats will end up in SECTION_POST_DATA. + # + 'extended_statistics_import' => { + create_sql => ' + CREATE TABLE dump_test.has_ext_stats + AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g); + CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats; + ANALYZE dump_test.has_ext_stats;', + regexp => qr/^ + \QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm, + like => { + %full_runs, + %dump_test_schema_runs, + no_data_no_schema => 1, + no_schema => 1, + section_post_data => 1, + statistics_only => 1, + schema_only_with_statistics => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + no_statistics => 1, + only_dump_measurement => 1, + schema_only => 1, + }, + }, + # # While attribute stats (aka pg_statistic stats) only appear for tables # that have been analyzed, all tables will have relation stats because