diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index cd28126049df..825d4b895906 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -628,6 +628,7 @@ INSERT INTO loct_empty SELECT id, 'AAA' || to_char(id, 'FM000') FROM generate_series(1, 100) id; DELETE FROM loct_empty; ANALYZE ft_empty; +INFO: Found no remote statistics for "ft_empty" EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM ft_empty ORDER BY c1; QUERY PLAN ------------------------------------------------------------------------------- @@ -4551,7 +4552,8 @@ REINDEX TABLE reind_fdw_parent; -- ok REINDEX TABLE CONCURRENTLY reind_fdw_parent; -- ok DROP TABLE reind_fdw_parent; -- =================================================================== --- conversion error +-- conversion error, will generate a WARNING for imported stats and an +-- error on locally computed stats. -- =================================================================== ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8) WHERE x1 = 1; -- ERROR @@ -4568,6 +4570,9 @@ CONTEXT: whole-row reference to foreign table "ftx" SELECT sum(c2), array_agg(c8) FROM ft1 GROUP BY c8; -- ERROR ERROR: invalid input syntax for type integer: "foo" CONTEXT: processing expression at position 2 in select list +ANALYZE ft1; -- WARNING +WARNING: invalid input syntax for type integer: "foo" +ALTER FOREIGN TABLE ft1 OPTIONS ( fetch_stats 'false' ); ANALYZE ft1; -- ERROR ERROR: invalid input syntax for type integer: "foo" CONTEXT: column "c8" of foreign table "ft1" @@ -7102,6 +7107,7 @@ INSERT INTO loct2 VALUES (1002, 'bar'); CREATE FOREIGN TABLE remt2 (c1 int, c2 text) SERVER loopback OPTIONS (table_name 'loct2'); ANALYZE loct1; ANALYZE remt2; +INFO: Found no remote statistics for "remt2" SET enable_mergejoin TO false; SET enable_hashjoin TO false; SET enable_material TO false; @@ -8784,6 +8790,7 @@ alter foreign table foo2 options (use_remote_estimate 'true'); create index i_loct1_f1 on loct1(f1); create index i_foo_f1 on foo(f1); analyze foo; +INFO: Found no remote statistics for "foo2" analyze loct1; -- inner join; expressions in the clauses appear in the equivalence class list explain (verbose, costs off) @@ -9013,7 +9020,9 @@ insert into remt1 values (2, 'bar'); insert into remt2 values (1, 'foo'); insert into remt2 values (2, 'bar'); analyze remt1; +INFO: Found no remote statistics for "remt1" analyze remt2; +INFO: Found no remote statistics for "remt2" explain (verbose, costs off) update parent set b = parent.b || remt2.b from remt2 where parent.a = remt2.a returning *; QUERY PLAN @@ -10313,6 +10322,8 @@ CREATE FOREIGN TABLE ftprt1_p1 PARTITION OF fprt1 FOR VALUES FROM (0) TO (250) CREATE FOREIGN TABLE ftprt1_p2 PARTITION OF fprt1 FOR VALUES FROM (250) TO (500) SERVER loopback OPTIONS (TABLE_NAME 'fprt1_p2'); ANALYZE fprt1; +INFO: Found no remote statistics for "ftprt1_p1" +INFO: Found no remote statistics for "ftprt1_p2" ANALYZE fprt1_p1; ANALYZE fprt1_p2; CREATE TABLE fprt2 (a int, b int, c varchar) PARTITION BY RANGE(b); @@ -10328,6 +10339,8 @@ ALTER TABLE fprt2 ATTACH PARTITION ftprt2_p1 FOR VALUES FROM (0) TO (250); CREATE FOREIGN TABLE ftprt2_p2 PARTITION OF fprt2 FOR VALUES FROM (250) TO (500) SERVER loopback OPTIONS (table_name 'fprt2_p2', use_remote_estimate 'true'); ANALYZE fprt2; +INFO: Found no remote statistics for "ftprt2_p1" +INFO: Found no remote statistics for "ftprt2_p2" ANALYZE fprt2_p1; ANALYZE fprt2_p2; -- inner join three tables @@ -10515,9 +10528,15 @@ CREATE FOREIGN TABLE fpagg_tab_p1 PARTITION OF pagg_tab FOR VALUES FROM (0) TO ( CREATE FOREIGN TABLE fpagg_tab_p2 PARTITION OF pagg_tab FOR VALUES FROM (10) TO (20) SERVER loopback OPTIONS (table_name 'pagg_tab_p2'); CREATE FOREIGN TABLE fpagg_tab_p3 PARTITION OF pagg_tab FOR VALUES FROM (20) TO (30) SERVER loopback OPTIONS (table_name 'pagg_tab_p3'); ANALYZE pagg_tab; +INFO: Found no remote statistics for "fpagg_tab_p1" +INFO: Found no remote statistics for "fpagg_tab_p2" +INFO: Found no remote statistics for "fpagg_tab_p3" ANALYZE fpagg_tab_p1; +INFO: Found no remote statistics for "fpagg_tab_p1" ANALYZE fpagg_tab_p2; +INFO: Found no remote statistics for "fpagg_tab_p2" ANALYZE fpagg_tab_p3; +INFO: Found no remote statistics for "fpagg_tab_p3" -- When GROUP BY clause matches with PARTITION KEY. -- Plan with partitionwise aggregates is disabled SET enable_partitionwise_aggregate TO false; @@ -11463,6 +11482,8 @@ CREATE FOREIGN TABLE async_p2 PARTITION OF async_pt FOR VALUES FROM (2000) TO (3 INSERT INTO async_p1 SELECT 1000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; INSERT INTO async_p2 SELECT 2000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; ANALYZE async_pt; +INFO: Found no remote statistics for "async_p1" +INFO: Found no remote statistics for "async_p2" -- simple queries CREATE TABLE result_tbl (a int, b int, c text); EXPLAIN (VERBOSE, COSTS OFF) @@ -11569,6 +11590,9 @@ CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4 SERVER loopback2 OPTIONS (table_name 'base_tbl3'); INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; ANALYZE async_pt; +INFO: Found no remote statistics for "async_p1" +INFO: Found no remote statistics for "async_p2" +INFO: Found no remote statistics for "async_p3" EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505; QUERY PLAN @@ -11610,6 +11634,8 @@ DROP TABLE base_tbl3; CREATE TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000); INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; ANALYZE async_pt; +INFO: Found no remote statistics for "async_p1" +INFO: Found no remote statistics for "async_p2" EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505; QUERY PLAN @@ -12665,6 +12691,42 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; -- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', remote_analyze 'true'); +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------+----------- +(0 rows) + +ANALYZE remote_analyze_ftable; +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------------------+----------- + remote_analyze_ftable | 3 + remote_analyze_table | 3 +(2 rows) + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; +-- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== -- Disable debug_discard_caches in order to manage remote connections diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index 04788b7e8b35..7f069373e825 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -120,6 +120,8 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) strcmp(def->defname, "async_capable") == 0 || strcmp(def->defname, "parallel_commit") == 0 || strcmp(def->defname, "parallel_abort") == 0 || + strcmp(def->defname, "fetch_stats") == 0 || + strcmp(def->defname, "remote_analyze") == 0 || strcmp(def->defname, "keep_connections") == 0) { /* these accept only boolean values */ @@ -278,6 +280,14 @@ InitPgFdwOptions(void) {"use_scram_passthrough", ForeignServerRelationId, false}, {"use_scram_passthrough", UserMappingRelationId, false}, + /* fetch_size is available on both server and table */ + {"fetch_stats", ForeignServerRelationId, false}, + {"fetch_stats", ForeignTableRelationId, false}, + + /* remote_analyze is available on both server and table */ + {"remote_analyze", ForeignServerRelationId, false}, + {"remote_analyze", ForeignTableRelationId, false}, + /* * sslcert and sslkey are in fact libpq options, but we repeat them * here to allow them to appear in both foreign server context (when diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 456b267f70b5..096cd4d0fddd 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -22,6 +22,8 @@ #include "commands/explain_format.h" #include "commands/explain_state.h" #include "executor/execAsync.h" +#include "executor/spi.h" +#include "fmgr.h" #include "foreign/fdwapi.h" #include "funcapi.h" #include "miscadmin.h" @@ -402,6 +404,7 @@ static void postgresExecForeignTruncate(List *rels, static bool postgresAnalyzeForeignTable(Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); +static FdwImportStatsResult postgresImportStatistics(Relation relation); static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid); static void postgresGetForeignJoinPaths(PlannerInfo *root, @@ -546,6 +549,114 @@ static void merge_fdw_options(PgFdwRelationInfo *fpinfo, const PgFdwRelationInfo *fpinfo_i); static int get_batch_size_option(Relation rel); +/* + * Static queries for querying remote statistics. + */ + + +/* relallfrozen introduced in v18 */ +static const char *relstats_query_18 = + "SELECT c.relpages, c.reltuples, c.relallvisible, c.relallfrozen " + "FROM pg_catalog.pg_class AS c " + "JOIN pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace " + "WHERE n.nspname = $1 AND c.relname = $2"; + +/* trust reltuples = 0 as of v14 */ +static const char *relstats_query_14 = + "SELECT c.relpages, c.reltuples, c.relallvisible, NULL AS relallfrozen " + "FROM pg_catalog.pg_class AS c " + "JOIN pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace " + "WHERE n.nspname = $1 AND c.relname = $2"; + +/* + * Before v14, a reltuples value of 0 was ambiguous: it could either mean + * the relation is empty, or it could mean that it hadn't yet been + * vacuumed or analyzed. (Newer versions use -1 for the latter case.) + * This ambiguity allegedly can cause the planner to choose inefficient + * plans after restoring to v18 or newer. To deal with this, let's just + * set reltuples to -1 in that case. + */ +static const char *relstats_query_default = + "SELECT c.relpages, " + "CASE c.reltuples WHEN 0 THEN -1 ELSE c.reltuples END AS reltuples, " + "c.relallvisible, NULL AS relallfrozen " + "FROM pg_catalog.pg_class AS c " + "JOIN pg_catalog.pg_namespace AS n ON n.oid = c.relnamespace " + "WHERE n.nspname = $1 AND c.relname = $2"; + +/* All static relstats queries have the same column order */ +enum RelStatsColumns { + RELSTATS_RELPAGES = 0, + RELSTATS_RELTUPLES, + RELSTATS_RELALLVISIBLE, + RELSTATS_RELALLFROZEN, + RELSTATS_NUM_FIELDS +}; + +/* range stats introduced in v17 */ +static const char *attstats_query_17 = + "SELECT DISTINCT ON (s.attname) attname, s.null_frac, s.avg_width, " + "s.n_distinct, s.most_common_vals, s.most_common_freqs, " + "s.histogram_bounds, s.correlation, s.most_common_elems, " + "s.most_common_elem_freqs, s.elem_count_histogram, " + "s.range_length_histogram, s.range_empty_frac, s.range_bounds_histogram " + "FROM pg_catalog.pg_stats AS s " + "WHERE s.schemaname = $1 AND s.tablename = $2 " + "ORDER BY s.attname, s.inherited DESC"; + +/* elements stats introduced in 9.2 */ +static const char *attstats_query_9_2 = + "SELECT DISTINCT ON (s.attname) attname, s.null_frac, s.avg_width, " + "s.n_distinct, s.most_common_vals, s.most_common_freqs, " + "s.histogram_bounds, s.correlation, s.most_common_elems, " + "s.most_common_elem_freqs, s.elem_count_histogram, " + "NULL AS range_length_histogram, NULL AS range_empty_frac, " + "NULL AS range_bounds_histogram " + "FROM pg_catalog.pg_stats AS s " + "WHERE s.schemaname = $1 AND s.tablename = $2 " + "ORDER BY s.attname, s.inherited DESC"; + +/* inherited introduced in 9.0 */ +static const char *attstats_query_9_0 = + "SELECT DISTINCT ON (s.attname) attname, s.null_frac, s.avg_width, " + "s.n_distinct, s.most_common_vals, s.most_common_freqs, " + "s.histogram_bounds, s.correlation, NULL AS most_common_elems, " + "NULL AS most_common_elem_freqs, NULL AS elem_count_histogram, " + "NULL AS range_length_histogram, NULL AS range_empty_frac, " + "NULL AS range_bounds_histogram " + "FROM pg_catalog.pg_stats AS s " + "WHERE s.schemaname = $1 AND s.tablename = $2 " + "ORDER BY s.attname, s.inherited DESC"; + +static const char *attstats_query_default = + "SELECT s.attname, s.null_frac, s.avg_width, " + "s.n_distinct, s.most_common_vals, s.most_common_freqs, " + "s.histogram_bounds, s.correlation, NULL AS most_common_elems, " + "NULL AS most_common_elem_freqs, NULL AS elem_count_histogram, " + "NULL AS range_length_histogram, NULL AS range_empty_frac, " + "NULL AS range_bounds_histogram " + "FROM pg_catalog.pg_stats AS s " + "WHERE s.schemaname = $1 AND s.tablename = $2 " + "ORDER BY s.attname"; + +/* All static attstats queries have the same column order */ +enum AttStatsColumns { + ATTSTATS_ATTNAME = 0, + ATTSTATS_NULL_FRAC, + ATTSTATS_AVG_WIDTH, + ATTSTATS_N_DISTINCT, + ATTSTATS_MOST_COMMON_VALS, + ATTSTATS_MOST_COMMON_FREQS, + ATTSTATS_HISTOGRAM_BOUNDS, + ATTSTATS_CORRELATION, + ATTSTATS_MOST_COMMON_ELEMS, + ATTSTATS_MOST_COMMON_ELEM_FREQS, + ATTSTATS_ELEM_COUNT_HISTOGRAM, + ATTSTATS_RANGE_LENGTH_HISTOGRAM, + ATTSTATS_RANGE_EMPTY_FRAC, + ATTSTATS_RANGE_BOUNDS_HISTOGRAM, + ATTSTATS_NUM_FIELDS +}; /* * Foreign-data wrapper handler function: return a struct with pointers @@ -595,6 +706,7 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) /* Support functions for ANALYZE */ routine->AnalyzeForeignTable = postgresAnalyzeForeignTable; + routine->ImportStatistics = postgresImportStatistics; /* Support functions for IMPORT FOREIGN SCHEMA */ routine->ImportForeignSchema = postgresImportForeignSchema; @@ -4935,6 +5047,440 @@ postgresAnalyzeForeignTable(Relation relation, return true; } +/* + * Process optional argument. + * + * Cannot be the first argument in the SQL function call. + * + * It is safe to presume that argname and argtype are quote-safe. + * + * Argument values can potentially be quite large, so free the quoted string + * after use. + */ +static void +append_optional(StringInfo str, PGresult *res, int row, int field, + const char *argname, const char *argtype) +{ + if (!PQgetisnull(res, row, field)) + { + /* Argument values can be quite large, so free after use */ + char *argval_l = quote_literal_cstr(PQgetvalue(res, row, field)); + + appendStringInfo(str, ",\n\t'%s', %s::%s", argname, argval_l, argtype); + + pfree(argval_l); + } +} + + +/* + * Generate a pg_restore_relation_stats command. + */ +static char * +restore_relation_stats_sql(PGresult *res, const char *schemaname_l, + const char *relname_l, + const int server_version_num) +{ + StringInfoData sql; + + initStringInfo(&sql); + appendStringInfo(&sql, "SELECT pg_catalog.pg_restore_relation_stats(\n" + "\t'version', %d::integer,\n" + "\t'schemaname', %s,\n" + "\t'relname', %s", + server_version_num, schemaname_l, relname_l); + + append_optional(&sql, res, 0, RELSTATS_RELPAGES, "relpages", "integer"); + append_optional(&sql, res, 0, RELSTATS_RELTUPLES, "reltuples", "real"); + append_optional(&sql, res, 0, RELSTATS_RELALLVISIBLE, "relallvisible", "integer"); + append_optional(&sql, res, 0, RELSTATS_RELALLFROZEN, "relallfrozen", "integer"); + + appendStringInfoChar(&sql, ')'); + + return sql.data; +} + +/* + * Generate a pg_restore_attribute_stats command. + */ +static char * +restore_attribute_stats_sql(PGresult *res, int row, const char *schemaname_l, + const char *relname_l, const AttrNumber attnum, + const int server_version_num) +{ + StringInfoData sql; + + initStringInfo(&sql); + appendStringInfo(&sql, "SELECT pg_catalog.pg_restore_attribute_stats(\n" + "\t'version', %d::integer,\n" + "\t'schemaname', %s,\n" + "\t'relname', %s,\n" + "\t'attnum', %d::smallint,\n" + "\t'inherited', false::boolean", + server_version_num, schemaname_l, relname_l, attnum); + + append_optional(&sql, res, row, ATTSTATS_NULL_FRAC, "null_frac", "real"); + append_optional(&sql, res, row, ATTSTATS_AVG_WIDTH, "avg_width", "integer"); + append_optional(&sql, res, row, ATTSTATS_N_DISTINCT, "n_distinct", "real"); + append_optional(&sql, res, row, ATTSTATS_MOST_COMMON_VALS, "most_common_vals", "text"); + append_optional(&sql, res, row, ATTSTATS_MOST_COMMON_FREQS, "most_common_freqs", "real[]"); + append_optional(&sql, res, row, ATTSTATS_HISTOGRAM_BOUNDS, "histogram_bounds", "text"); + append_optional(&sql, res, row, ATTSTATS_CORRELATION, "correlation", "real"); + append_optional(&sql, res, row, ATTSTATS_MOST_COMMON_ELEMS, "most_common_elems", "text"); + append_optional(&sql, res, row, ATTSTATS_MOST_COMMON_ELEM_FREQS, "most_common_elem_freqs", "real[]"); + append_optional(&sql, res, row, ATTSTATS_ELEM_COUNT_HISTOGRAM, "elem_count_histogram", "real[]"); + append_optional(&sql, res, row, ATTSTATS_RANGE_LENGTH_HISTOGRAM, "range_length_histogram", "text"); + append_optional(&sql, res, row, ATTSTATS_RANGE_EMPTY_FRAC, "range_empty_frac", "real"); + append_optional(&sql, res, row, ATTSTATS_RANGE_BOUNDS_HISTOGRAM, "range_bounds_histogram", "text"); + + appendStringInfoChar(&sql, ')'); + + return sql.data; +} + +/* + * postgresImportStatistics + * Attempt to fetch remote statistics and apply those instead of analyzing. + */ +static FdwImportStatsResult +postgresImportStatistics(Relation relation) +{ + + ForeignTable *table; + ForeignServer *server; + UserMapping *user; + PGconn *conn; + ListCell *lc; + bool fetch_stats = true; + bool remote_analyze = false; + int server_version_num; + + const char *relation_sql; + const char *attribute_sql; + const char *schemaname; + const char *relname; + const char *remote_schemaname = NULL; + const char *remote_relname = NULL; + + const char *schemaname_l; + const char *relname_l; + + char *relimport_sql; + + PGresult *res; + TupleDesc tupdesc; + const char *sql_params[2]; + int sql_param_formats[2] = {0, 0}; + + table = GetForeignTable(RelationGetRelid(relation)); + server = GetForeignServer(table->serverid); + + /* + * Server-level options can be overridden by table-level options, so check + * server-level first. + */ + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "fetch_stats") == 0) + fetch_stats = defGetBoolean(def); + else if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); + } + + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "fetch_stats") == 0) + fetch_stats = defGetBoolean(def); + else if (strcmp(def->defname, "schema_name") == 0) + remote_schemaname = defGetString(def); + else if (strcmp(def->defname, "table_name") == 0) + remote_relname = defGetString(def); + else if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); + } + + if (fetch_stats == false) + return FDW_IMPORT_STATS_DISABLED; + + user = GetUserMapping(GetUserId(), table->serverid); + conn = GetConnection(user, false, NULL); + server_version_num = PQserverVersion(conn); + + schemaname = get_namespace_name(RelationGetNamespace(relation)); + + relname = RelationGetRelationName(relation); + + schemaname_l = quote_literal_cstr(schemaname); + relname_l = quote_literal_cstr(relname); + + if (remote_schemaname == NULL) + remote_schemaname = schemaname; + if (remote_relname == NULL) + remote_relname = relname; + sql_params[0] = remote_schemaname; + sql_params[1] = remote_relname; + + if (server_version_num >= 180000) + relation_sql = relstats_query_18; + else if (server_version_num >= 140000) + relation_sql = relstats_query_14; + else + relation_sql = relstats_query_default; + + if (server_version_num >= 170000) + attribute_sql = attstats_query_17; + else if (server_version_num >= 90200) + attribute_sql = attstats_query_9_2; + else if (server_version_num >= 90000) + attribute_sql = attstats_query_9_0; + else + attribute_sql = attstats_query_default; + + /* + * pg_restore_attribute_stats + * + * We do this before relation stats because we may retry it if no stats were + * found. + */ + if (!PQsendQueryParams(conn, attribute_sql, 2, NULL, sql_params, NULL, + sql_param_formats, 0)) + { + pgfdw_report(INFO, NULL, conn, attribute_sql); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + + /* + * Get the result, and check for success. + * If the query failed or the result set is of the wrong shape, then + * fail the import and fall back to local analysis. + */ + res = pgfdw_get_result(conn); + if (PQresultStatus(res) != PGRES_TUPLES_OK + || PQnfields(res) != ATTSTATS_NUM_FIELDS) + { + pgfdw_report(INFO, res, conn, attribute_sql); + PQclear(res); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + + /* + * If we got a query response of the right shape, but there were no rows, + * then the remote is just missing statistics + */ + if (PQntuples(res) == 0) + { + StringInfoData buf; + + PQclear(res); + + /* + * If remote_analyze is not enabled, any failure to find statistics are + * considered temporary. This is not an error, but we should fall back + * to regular local analyzis if enabled. + */ + if (!remote_analyze) + { + ReleaseConnection(conn); + return FDW_IMPORT_STATS_NOTFOUND; + } + + /* + * Analyze the remote table and try again. If it's still empty, then that's + * an error indicating that we probably shouldn't do remote analysis going + * forward. + */ + initStringInfo(&buf); + + appendStringInfo(&buf, "ANALYZE %s.%s", + quote_identifier(remote_schemaname), + quote_identifier(remote_relname)); + + res = pgfdw_exec_query(conn, buf.data, NULL); + + if (res == NULL || PQresultStatus(res) != PGRES_COMMAND_OK) + { + pgfdw_report(NOTICE, res, conn, buf.data); + pfree(buf.data); + PQclear(res); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + + PQclear(res); + pfree(buf.data); + + /* retry attribute stats query */ + if (!PQsendQueryParams(conn, attribute_sql, 2, NULL, sql_params, NULL, + sql_param_formats, 0)) + { + pgfdw_report(INFO, NULL, conn, attribute_sql); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + res = pgfdw_get_result(conn); + + /* getting nothing on the second try is a failure */ + if (PQresultStatus(res) != PGRES_TUPLES_OK + || PQntuples(res) == 0 + || PQnfields(res) != ATTSTATS_NUM_FIELDS) + { + pgfdw_report(INFO, res, conn, attribute_sql); + PQclear(res); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + } + + SPI_connect(); + + /* + * Walk all local table attributes looking for name matches in the result + * set and perform a pg_restore_attribute_stats() on each match. + * + * XXX: the result set is sorted by attname, so perhaps we could do a binary + * search of the result set. Alternately we could collect the local attributes + * in a list and sort that by remote name, which would allow us to iterate via + * a merge. + * + * XXX: what should be done if match_found = false? + */ + tupdesc = RelationGetDescr(relation); + for (int i = 0; i < tupdesc->natts; i++) + { + char *remote_colname; + List *fc_options; + ListCell *fc_lc; + AttrNumber attnum; + bool match_found = false; + + /* Ignore dropped columns. */ + if (TupleDescAttr(tupdesc, i)->attisdropped) + continue; + + /* Ignore generated columns, but maybe this should fail the import? */ + if (TupleDescAttr(tupdesc, i)->attgenerated) + continue; + + attnum = TupleDescAttr(tupdesc, i)->attnum; + + /* default remote_colname is attname */ + remote_colname = NameStr(TupleDescAttr(tupdesc, i)->attname); + fc_options = GetForeignColumnOptions(RelationGetRelid(relation), i + 1); + + foreach(fc_lc, fc_options) + { + DefElem *def = (DefElem *) lfirst(fc_lc); + + if (strcmp(def->defname, "column_name") == 0) + { + remote_colname = defGetString(def); + break; + } + } + + for (int j = 0; j < PQntuples(res); j++) + { + char *attimport_sql; + + if (PQgetisnull(res, j, ATTSTATS_ATTNAME)) + continue; + + if (strcmp(PQgetvalue(res, j, ATTSTATS_ATTNAME), remote_colname) != 0) + continue; + + match_found = true; + attimport_sql = restore_attribute_stats_sql(res, j, schemaname_l, relname_l, attnum, server_version_num); + + if (SPI_execute(attimport_sql, false, 1) != SPI_OK_SELECT) + { + /* + * It takes a lot to make a restore command fail outright, so any actual + * failure is a sign that the statistics are seriously malformed, and + * we should give up on importing stats for this table. + */ + ereport(INFO, + (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), + errmsg("Attribute statistics import failed %s", attimport_sql))); + SPI_finish(); + ReleaseConnection(conn); + pfree(attimport_sql); + return FDW_IMPORT_STATS_FAILED; + } + + pfree(attimport_sql); + } + + /* TODO: should this be an error? What action could we take to remediate? */ + if (!match_found) + ereport(INFO, + (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), + errmsg("Attribute statistics found for %s.%s but no columns matched", + quote_identifier(schemaname), + quote_identifier(relname)))); + } + PQclear(res); + + /* + * pg_restore_relation_stats + */ + if (!PQsendQueryParams(conn, relation_sql, 2, NULL, sql_params, NULL, + sql_param_formats, 0)) + { + pgfdw_report(INFO, NULL, conn, relation_sql); + SPI_finish(); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + + /* + * Get the result, and check for success. + * If the query failed or the result set is of the wrong shape, then + * fail the import and fall back to local analysis. + */ + res = pgfdw_get_result(conn); + if (PQresultStatus(res) != PGRES_TUPLES_OK + || PQntuples(res) != 1 + || PQnfields(res) != RELSTATS_NUM_FIELDS) + { + /* unable to get relation stats, fall back on table sampling */ + pgfdw_report(INFO, res, conn, attribute_sql); + PQclear(res); + ReleaseConnection(conn); + return FDW_IMPORT_STATS_FAILED; + } + + relimport_sql = restore_relation_stats_sql(res, schemaname_l, relname_l, server_version_num); + + if (SPI_execute(relimport_sql, false, 0) != SPI_OK_SELECT) + { + /* + * It takes a lot to make a restore command fail outright, so any actual + * failure is a sign that the statistics are seriously malformed, and + * we should give up on importing stats for this table. + */ + ereport(INFO, + (errcode(ERRCODE_FDW_SCHEMA_NOT_FOUND), + errmsg("Relation statistics import failed %s", relimport_sql))); + SPI_finish(); + ReleaseConnection(conn); + pfree(relimport_sql); + return FDW_IMPORT_STATS_FAILED; + } + + pfree(relimport_sql); + + SPI_finish(); + + return FDW_IMPORT_STATS_OK; +} + + /* * postgresGetAnalyzeInfoForForeignTable * Count tuples in foreign table (just get pg_class.reltuples). diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 9a8f9e281353..09a98c860f16 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -1278,7 +1278,8 @@ REINDEX TABLE CONCURRENTLY reind_fdw_parent; -- ok DROP TABLE reind_fdw_parent; -- =================================================================== --- conversion error +-- conversion error, will generate a WARNING for imported stats and an +-- error on locally computed stats. -- =================================================================== ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8) WHERE x1 = 1; -- ERROR @@ -1287,6 +1288,8 @@ SELECT ftx.x1, ft2.c2, ftx.x8 FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8), ft2 SELECT ftx.x1, ft2.c2, ftx FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8), ft2 WHERE ftx.x1 = ft2.c1 AND ftx.x1 = 1; -- ERROR SELECT sum(c2), array_agg(c8) FROM ft1 GROUP BY c8; -- ERROR +ANALYZE ft1; -- WARNING +ALTER FOREIGN TABLE ft1 OPTIONS ( fetch_stats 'false' ); ANALYZE ft1; -- ERROR ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE user_enum; @@ -4379,6 +4382,38 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; +-- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); + +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', remote_analyze 'true'); + +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +ANALYZE remote_analyze_ftable; + +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; + -- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index 781a01067f7d..c395d1061d5d 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -332,7 +332,7 @@ OPTIONS (ADD password_required 'false'); - The following option controls how such an ANALYZE + The following options control how such an ANALYZE operation behaves: @@ -364,6 +364,39 @@ OPTIONS (ADD password_required 'false'); + + fetch_stats (boolean) + + + This option, which can be specified for a foreign table or a foreign + server, determines if ANALYZE on a foreign table + will first attempt to fetch and import the existing relation and + attribute statistics from the remote table, and only attempt regular + data sampling if no statistics were availble. This option is only + useful if the remote relation is one that can have regular statistics + (tables and materialized views). + The default is true. + + + + + + remote_analyze (boolean) + + + This option, which can be specified for a foreign table or a foreign + server, determines wheter an ANALYZE on a foreign + table will attempt to ANALYZE the remote table if + the first attempt to fetch remote statistics fails, and will then + make a second and final attempt to fetch remote statistics. This option + is only meaningful if the foreign table has + fetch_stats enabled at either the server or table + level. + The default is false. + + + + diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index c2e216563c63..616333d55744 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -196,13 +196,57 @@ analyze_rel(Oid relid, RangeVar *relation, { /* * For a foreign table, call the FDW's hook function to see whether it - * supports analysis. + * supports statistics import and/or analysis. */ FdwRoutine *fdwroutine; bool ok = false; fdwroutine = GetFdwRoutineForRelation(onerel, false); + if (fdwroutine->ImportStatistics != NULL) + { + FdwImportStatsResult res; + + /* + * Fetching pre-existing remote stats is not guaranteed to be a quick + * operation. + * + * XXX: Should this be it's own fetch type? If not, then there might be + * confusion when a long stats-fetch fails, followed by a regular analyze, + * which would make it look like the table was analyzed twice. + */ + pgstat_progress_start_command(PROGRESS_COMMAND_ANALYZE, + RelationGetRelid(onerel)); + + res = fdwroutine->ImportStatistics(onerel); + + pgstat_progress_end_command(); + + /* + * If we were able to import statistics, then there is no need to collect + * samples for local analysis. + */ + switch(res) + { + case FDW_IMPORT_STATS_OK: + relation_close(onerel, NoLock); + return; + break; + case FDW_IMPORT_STATS_DISABLED: + break; + case FDW_IMPORT_STATS_NOTFOUND: + ereport(INFO, + (errmsg("Found no remote statistics for \"%s\"", + RelationGetRelationName(onerel)))); + break; + case FDW_IMPORT_STATS_FAILED: + default: + ereport(INFO, + (errmsg("Fetching remote statistics from \"%s\" failed", + RelationGetRelationName(onerel)))); + } + } + if (fdwroutine->AnalyzeForeignTable != NULL) ok = fdwroutine->AnalyzeForeignTable(onerel, &acquirefunc, diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index fcd7e7027f3b..eb6284979fbd 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -19,6 +19,15 @@ /* avoid including explain_state.h here */ typedef struct ExplainState ExplainState; +/* result of ImportStatistics */ +typedef enum +{ + FDW_IMPORT_STATS_OK = 0, /* was able to import statistics */ + FDW_IMPORT_STATS_DISABLED, /* import disabled for this table */ + FDW_IMPORT_STATS_NOTFOUND, /* no remote attribute stats found */ + FDW_IMPORT_STATS_FAILED /* remote query failure of some kind */ +} FdwImportStatsResult; + /* * Callback function signatures --- see fdwhandler.sgml for more info. @@ -157,6 +166,8 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); +typedef FdwImportStatsResult (*ImportStatistics_function) (Relation relation); + typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt, Oid serverOid); @@ -255,6 +266,7 @@ typedef struct FdwRoutine /* Support functions for ANALYZE */ AnalyzeForeignTable_function AnalyzeForeignTable; + ImportStatistics_function ImportStatistics; /* Support functions for IMPORT FOREIGN SCHEMA */ ImportForeignSchema_function ImportForeignSchema;