Skip to content

Commit df2ccde

Browse files
coreyhuinkerCommitfest Bot
authored andcommitted
Include Extended Statistics in pg_dump.
Incorporate the new pg_restore_extended_stats() function into pg_dump. This detects the existence of extended statistics statistics (i.e. pg_statistic_ext_data rows). This handles many of the changes that have happened to extended statistic statistics over the various versions, including: * Format change for pg_ndistinct and pg_dependencies in current development version. Earlier versions have the format translated via the pg_dump SQL statement. * Inherited extended statistics were introduced in v15. * Expressions were introduced to extended statistics in v14. * MCV extended statistics were introduced in v13. * pg_statistic_ext_data and pg_stats_ext introduced in v12, prior to that ndstinct and depdendencies data (the only kind of stats that existed were directly on pg_statistic_ext. * Extended Statistics were introduced in v10, so there is no support for prior versions necessary.
1 parent 77c4e37 commit df2ccde

File tree

4 files changed

+283
-1
lines changed

4 files changed

+283
-1
lines changed

src/bin/pg_dump/pg_backup.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ enum _dumpPreparedQueries
6868
PREPQUERY_DUMPCOMPOSITETYPE,
6969
PREPQUERY_DUMPDOMAIN,
7070
PREPQUERY_DUMPENUMTYPE,
71+
PREPQUERY_DUMPEXTSTATSSTATS,
7172
PREPQUERY_DUMPFUNC,
7273
PREPQUERY_DUMPOPR,
7374
PREPQUERY_DUMPRANGETYPE,

src/bin/pg_dump/pg_backup_archiver.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3008,7 +3008,8 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
30083008
strcmp(te->desc, "SEARCHPATH") == 0)
30093009
return REQ_SPECIAL;
30103010

3011-
if (strcmp(te->desc, "STATISTICS DATA") == 0)
3011+
if ((strcmp(te->desc, "STATISTICS DATA") == 0) ||
3012+
(strcmp(te->desc, "EXTENDED STATISTICS DATA") == 0))
30123013
{
30133014
if (!ropt->dumpStatistics)
30143015
return 0;

src/bin/pg_dump/pg_dump.c

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,7 @@ static void dumpSequenceData(Archive *fout, const TableDataInfo *tdinfo);
324324
static void dumpIndex(Archive *fout, const IndxInfo *indxinfo);
325325
static void dumpIndexAttach(Archive *fout, const IndexAttachInfo *attachinfo);
326326
static void dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo);
327+
static void dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo);
327328
static void dumpConstraint(Archive *fout, const ConstraintInfo *coninfo);
328329
static void dumpTableConstraintComment(Archive *fout, const ConstraintInfo *coninfo);
329330
static void dumpTSParser(Archive *fout, const TSParserInfo *prsinfo);
@@ -8258,6 +8259,9 @@ getExtendedStatistics(Archive *fout)
82588259

82598260
/* Decide whether we want to dump it */
82608261
selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
8262+
8263+
if (fout->dopt->dumpStatistics)
8264+
statsextinfo[i].dobj.components |= DUMP_COMPONENT_STATISTICS;
82618265
}
82628266

82638267
PQclear(res);
@@ -11712,6 +11716,7 @@ dumpDumpableObject(Archive *fout, DumpableObject *dobj)
1171211716
break;
1171311717
case DO_STATSEXT:
1171411718
dumpStatisticsExt(fout, (const StatsExtInfo *) dobj);
11719+
dumpStatisticsExtStats(fout, (const StatsExtInfo *) dobj);
1171511720
break;
1171611721
case DO_REFRESH_MATVIEW:
1171711722
refreshMatViewData(fout, (const TableDataInfo *) dobj);
@@ -18514,6 +18519,253 @@ dumpStatisticsExt(Archive *fout, const StatsExtInfo *statsextinfo)
1851418519
free(qstatsextname);
1851518520
}
1851618521

18522+
/*
18523+
* dumpStatisticsExtStats
18524+
* write out to fout the stats for an extended statistics object
18525+
*/
18526+
static void
18527+
dumpStatisticsExtStats(Archive *fout, const StatsExtInfo *statsextinfo)
18528+
{
18529+
DumpOptions *dopt = fout->dopt;
18530+
PQExpBuffer query;
18531+
PGresult *res;
18532+
int nstats;
18533+
18534+
/* Do nothing if not dumping statistics */
18535+
if (!dopt->dumpStatistics)
18536+
return;
18537+
18538+
if (!fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS])
18539+
{
18540+
PQExpBuffer pq = createPQExpBuffer();
18541+
18542+
/*
18543+
* Set up query for constraint-specific details.
18544+
*
18545+
* 19+: query pg_stats_ext and pg_stats_ext_exprs as-is 15-18: query
18546+
* pg_stats_ext translating the ndistinct and depdendencies, 14:
18547+
* inherited is always NULL 12-13: no pg_stats_ext_exprs 10-11: no
18548+
* pg_stats_ext, join pg_statistic_ext and pg_namespace
18549+
*/
18550+
18551+
appendPQExpBufferStr(pq,
18552+
"PREPARE getExtStatsStats(pg_catalog.name, pg_catalog.name) AS\n"
18553+
"SELECT ");
18554+
18555+
/*
18556+
* Versions 15+ have inherited stats.
18557+
*
18558+
* Create this column in all version because we need to order by it later.
18559+
*/
18560+
if (fout->remoteVersion >= 150000)
18561+
appendPQExpBufferStr(pq, "e.inherited, ");
18562+
else
18563+
appendPQExpBufferStr(pq, "false AS inherited, ");
18564+
18565+
/*
18566+
* Versions < 19 use the old ndistintinct and depdendencies formats
18567+
*
18568+
* These transformations may look scary, but all we're doing is translating
18569+
*
18570+
* {"3, 4": 11, "3, 6": 11, "4, 6": 11, "3, 4, 6": 11}
18571+
*
18572+
* to
18573+
*
18574+
* [{"ndistinct": 11, "attributes": [3,4]},
18575+
* {"ndistinct": 11, "attributes": [3,6]},
18576+
* {"ndistinct": 11, "attributes": [4,6]},
18577+
* {"ndistinct": 11, "attributes": [3,4,6]}]
18578+
*
18579+
* and
18580+
* {"3 => 4": 1.000000, "3 => 6": 1.000000, "4 => 6": 1.000000,
18581+
* "3, 4 => 6": 1.000000, "3, 6 => 4": 1.000000}
18582+
*
18583+
* to
18584+
*
18585+
* [{"degree": 1.000000, "attributes": [3], "dependency": 4},
18586+
* {"degree": 1.000000, "attributes": [3], "dependency": 6},
18587+
* {"degree": 1.000000, "attributes": [4], "dependency": 6},
18588+
* {"degree": 1.000000, "attributes": [3,4], "dependency": 6},
18589+
* {"degree": 1.000000, "attributes": [3,6], "dependency": 4}]
18590+
*/
18591+
if (fout->remoteVersion >= 190000)
18592+
appendPQExpBufferStr(pq, "e.n_distinct, e.dependencies, ");
18593+
else
18594+
appendPQExpBufferStr(pq,
18595+
"( "
18596+
"SELECT json_agg( "
18597+
" json_build_object( "
18598+
" 'attributes', "
18599+
" string_to_array(kv.key, ', ')::integer[], "
18600+
" 'ndistinct', "
18601+
" kv.value::bigint )) "
18602+
"FROM json_each_text(e.n_distinct::text::json) AS kv"
18603+
") AS n_distinct, "
18604+
"( "
18605+
"SELECT json_agg( "
18606+
" json_build_object( "
18607+
" 'attributes', "
18608+
" string_to_array( "
18609+
" split_part(kv.key, ' => ', 1), "
18610+
" ', ')::integer[], "
18611+
" 'dependency', "
18612+
" split_part(kv.key, ' => ', 2)::integer, "
18613+
" 'degree', "
18614+
" kv.value::double precision )) "
18615+
"FROM json_each_text(e.dependencies::text::json) AS kv "
18616+
") AS dependencies, ");
18617+
18618+
/* Versions < 12 do not have MCV */
18619+
if (fout->remoteVersion >= 130000)
18620+
appendPQExpBufferStr(pq,
18621+
"e.most_common_vals, e.most_common_val_nulls, "
18622+
"e.most_common_freqs, e.most_common_base_freqs, ");
18623+
else
18624+
appendPQExpBufferStr(pq,
18625+
"NULL AS most_common_vals, NULL AS most_common_val_nulls, "
18626+
"NULL AS most_common_freqs, NULL AS most_common_base_freqs, ");
18627+
18628+
/* Expressions were introduced in v14 */
18629+
if (fout->remoteVersion >= 140000)
18630+
{
18631+
appendPQExpBufferStr(pq,
18632+
"( "
18633+
"SELECT array_agg( "
18634+
" ARRAY[ee.null_frac::text, ee.avg_width::text, "
18635+
" ee.n_distinct::text, ee.most_common_vals::text, "
18636+
" ee.most_common_freqs::text, ee.histogram_bounds::text, "
18637+
" ee.correlation::text, ee.most_common_elems::text, "
18638+
" ee.most_common_elem_freqs::text, "
18639+
" ee.elem_count_histogram::text]) "
18640+
"FROM pg_stats_ext_exprs AS ee "
18641+
"WHERE ee.statistics_schemaname = $1 "
18642+
"AND ee.statistics_name = $2 ");
18643+
18644+
/* Inherited expressions introduced in v15 */
18645+
if (fout->remoteVersion >= 150000)
18646+
appendPQExpBufferStr(pq, "AND ee.inherited = e.inherited");
18647+
18648+
appendPQExpBufferStr(pq, ") AS exprs ");
18649+
}
18650+
else
18651+
appendPQExpBufferStr(pq, "NULL AS exprs ");
18652+
18653+
/* pg_stats_ext introduced in v12 */
18654+
if (fout->remoteVersion >= 120000)
18655+
appendPQExpBufferStr(pq,
18656+
"FROM pg_catalog.pg_stats_ext AS e "
18657+
"WHERE e.statistics_schemaname = $1 "
18658+
"AND e.statistics_name = $2 ");
18659+
else
18660+
appendPQExpBufferStr(pq,
18661+
"FROM ( "
18662+
"SELECT s.stxndistinct AS n_distinct, "
18663+
" s.stxdependencies AS dependencies "
18664+
"FROM pg_catalog.pg_statistics_ext AS s "
18665+
"JOIN pg_catalog.pg_namespace AS n "
18666+
"ON n.oid = s.stxnamespace "
18667+
"WHERE n.nspname = $1 "
18668+
"AND e.stxname = $2 "
18669+
") AS e ");
18670+
18671+
/* we always have an inherited column, but it may be a constant */
18672+
appendPQExpBufferStr(pq, "ORDER BY inherited");
18673+
18674+
ExecuteSqlStatement(fout, pq->data);
18675+
18676+
fout->is_prepared[PREPQUERY_DUMPEXTSTATSSTATS] = true;
18677+
18678+
destroyPQExpBuffer(pq);
18679+
}
18680+
18681+
query = createPQExpBuffer();
18682+
18683+
appendPQExpBufferStr(query, "EXECUTE getExtStatsStats(");
18684+
appendStringLiteralAH(query, statsextinfo->dobj.namespace->dobj.name, fout);
18685+
appendPQExpBufferStr(query, "::pg_catalog.name, ");
18686+
appendStringLiteralAH(query, statsextinfo->dobj.name, fout);
18687+
appendPQExpBufferStr(query, "::pg_catalog.name)");
18688+
18689+
res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
18690+
18691+
destroyPQExpBuffer(query);
18692+
18693+
nstats = PQntuples(res);
18694+
18695+
if (nstats > 0)
18696+
{
18697+
PQExpBuffer out = createPQExpBuffer();
18698+
18699+
int i_inherited = PQfnumber(res, "inherited");
18700+
int i_ndistinct = PQfnumber(res, "n_distinct");
18701+
int i_dependencies = PQfnumber(res, "dependencies");
18702+
int i_mcv = PQfnumber(res, "most_common_vals");
18703+
int i_mcv_nulls = PQfnumber(res, "most_common_val_nulls");
18704+
int i_mcf = PQfnumber(res, "most_common_freqs");
18705+
int i_mcbf = PQfnumber(res, "most_common_base_freqs");
18706+
int i_exprs = PQfnumber(res, "exprs");
18707+
18708+
for (int i = 0; i < nstats; i++)
18709+
{
18710+
if (PQgetisnull(res, i, i_inherited))
18711+
pg_fatal("inherited cannot be NULL");
18712+
18713+
appendPQExpBufferStr(out,
18714+
"SELECT * FROM pg_catalog.pg_restore_extended_stats(\n");
18715+
appendPQExpBuffer(out, "\t'version', '%d'::integer,\n",
18716+
fout->remoteVersion);
18717+
appendPQExpBufferStr(out, "\t'statistics_schemaname', ");
18718+
appendStringLiteralAH(out, statsextinfo->dobj.namespace->dobj.name, fout);
18719+
appendPQExpBufferStr(out, ",\n\t'statistics_name', ");
18720+
appendStringLiteralAH(out, statsextinfo->dobj.name, fout);
18721+
appendNamedArgument(out, fout, "inherited", "boolean",
18722+
PQgetvalue(res, i, i_inherited));
18723+
18724+
if (!PQgetisnull(res, i, i_ndistinct))
18725+
appendNamedArgument(out, fout, "n_distinct", "pg_ndistinct",
18726+
PQgetvalue(res, i, i_ndistinct));
18727+
18728+
if (!PQgetisnull(res, i, i_dependencies))
18729+
appendNamedArgument(out, fout, "dependencies", "pg_dependencies",
18730+
PQgetvalue(res, i, i_dependencies));
18731+
18732+
if (!PQgetisnull(res, i, i_mcv))
18733+
appendNamedArgument(out, fout, "most_common_vals", "text[]",
18734+
PQgetvalue(res, i, i_mcv));
18735+
18736+
if (!PQgetisnull(res, i, i_mcv_nulls))
18737+
appendNamedArgument(out, fout, "most_common_val_nulls", "boolean[]",
18738+
PQgetvalue(res, i, i_mcv_nulls));
18739+
18740+
if (!PQgetisnull(res, i, i_mcf))
18741+
appendNamedArgument(out, fout, "most_common_freqs", "double precision[]",
18742+
PQgetvalue(res, i, i_mcf));
18743+
18744+
if (!PQgetisnull(res, i, i_mcbf))
18745+
appendNamedArgument(out, fout, "most_common_base_freqs", "double precision[]",
18746+
PQgetvalue(res, i, i_mcbf));
18747+
18748+
if (!PQgetisnull(res, i, i_exprs))
18749+
appendNamedArgument(out, fout, "exprs", "text[]",
18750+
PQgetvalue(res, i, i_exprs));
18751+
18752+
appendPQExpBufferStr(out, "\n);\n");
18753+
}
18754+
18755+
ArchiveEntry(fout, nilCatalogId, createDumpId(),
18756+
ARCHIVE_OPTS(.tag = statsextinfo->dobj.name,
18757+
.namespace = statsextinfo->dobj.namespace->dobj.name,
18758+
.owner = statsextinfo->rolname,
18759+
.description = "EXTENDED STATISTICS DATA",
18760+
.section = SECTION_POST_DATA,
18761+
.createStmt = out->data,
18762+
.deps = &statsextinfo->dobj.dumpId,
18763+
.nDeps = 1));
18764+
destroyPQExpBuffer(out);
18765+
}
18766+
PQclear(res);
18767+
}
18768+
1851718769
/*
1851818770
* dumpConstraint
1851918771
* write out to fout a user-defined constraint

src/bin/pg_dump/t/002_pg_dump.pl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4772,6 +4772,34 @@
47724772
},
47734773
},
47744774
4775+
#
4776+
# EXTENDED stats will end up in SECTION_POST_DATA.
4777+
#
4778+
'extended_statistics_import' => {
4779+
create_sql => '
4780+
CREATE TABLE dump_test.has_ext_stats
4781+
AS SELECT g.g AS x, g.g / 2 AS y FROM generate_series(1,100) AS g(g);
4782+
CREATE STATISTICS dump_test.es1 ON x, (y % 2) FROM dump_test.has_ext_stats;
4783+
ANALYZE dump_test.has_ext_stats;',
4784+
regexp => qr/^
4785+
\QSELECT * FROM pg_catalog.pg_restore_extended_stats(\E\s+/xm,
4786+
like => {
4787+
%full_runs,
4788+
%dump_test_schema_runs,
4789+
no_data_no_schema => 1,
4790+
no_schema => 1,
4791+
section_post_data => 1,
4792+
statistics_only => 1,
4793+
schema_only_with_statistics => 1,
4794+
},
4795+
unlike => {
4796+
exclude_dump_test_schema => 1,
4797+
no_statistics => 1,
4798+
only_dump_measurement => 1,
4799+
schema_only => 1,
4800+
},
4801+
},
4802+
47754803
#
47764804
# While attribute stats (aka pg_statistic stats) only appear for tables
47774805
# that have been analyzed, all tables will have relation stats because

0 commit comments

Comments
 (0)