From 441458817036cd19b60903e023a1b4d1c76654ea Mon Sep 17 00:00:00 2001 From: Tatsuro Yamada Date: Thu, 5 Jun 2025 18:50:34 +0900 Subject: [PATCH] Add new GUC parameter: enable_groupagg Previously, there was no GUC parameter to control the use of GroupAggregate, so we couldn't influence the planner's choice in certain queries. This patch adds a new parameter, "enable_groupagg", which allows users to enable or disable GroupAggregate explicitly. By disabling GroupAggregate, the planner may choose HashAggregate instead, potentially resulting in a more efficient execution plan for some queries. --- doc/src/sgml/config.sgml | 14 ++ src/backend/optimizer/path/costsize.c | 3 + src/backend/utils/misc/guc_tables.c | 10 ++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/optimizer/cost.h | 1 + src/test/regress/expected/aggregates.out | 120 ++++++++++++++++++ src/test/regress/expected/sysviews.out | 3 +- src/test/regress/sql/aggregates.sql | 81 ++++++++++++ 8 files changed, 232 insertions(+), 1 deletion(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c7acc0f182f3..cc1e4b9ba460 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5519,6 +5519,20 @@ ANY num_sync ( + enable_groupagg (boolean) + + enable_groupagg configuration parameter + + + + + Enables or disables the query planner's use of grouped + aggregation plan types. The default is on. + + + + enable_hashjoin (boolean) diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 1f04a2c182ca..0b66a89615d5 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -150,6 +150,7 @@ bool enable_tidscan = true; bool enable_sort = true; bool enable_incremental_sort = true; bool enable_hashagg = true; +bool enable_groupagg = true; bool enable_nestloop = true; bool enable_material = true; bool enable_memoize = true; @@ -2763,6 +2764,8 @@ cost_agg(Path *path, PlannerInfo *root, /* Here we are able to deliver output on-the-fly */ startup_cost = input_startup_cost; total_cost = input_total_cost; + if (aggstrategy == AGG_SORTED && !enable_groupagg && enable_hashagg) + ++disabled_nodes; if (aggstrategy == AGG_MIXED && !enable_hashagg) ++disabled_nodes; /* calcs phrased this way to match HASHED case, see note above */ diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index d14b1678e7fe..4416c250968f 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -869,6 +869,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_groupagg", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of grouped aggregation plans."), + NULL, + GUC_EXPLAIN + }, + &enable_groupagg, + true, + NULL, NULL, NULL + }, { {"enable_material", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of materialization."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index a9d8293474af..4b5379f98ce9 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -408,6 +408,7 @@ #enable_bitmapscan = on #enable_gathermerge = on #enable_hashagg = on +#enable_groupagg = on #enable_hashjoin = on #enable_incremental_sort = on #enable_indexscan = on diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index b523bcda8f3d..c002e8c71445 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -57,6 +57,7 @@ extern PGDLLIMPORT bool enable_tidscan; extern PGDLLIMPORT bool enable_sort; extern PGDLLIMPORT bool enable_incremental_sort; extern PGDLLIMPORT bool enable_hashagg; +extern PGDLLIMPORT bool enable_groupagg; extern PGDLLIMPORT bool enable_nestloop; extern PGDLLIMPORT bool enable_material; extern PGDLLIMPORT bool enable_memoize; diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1f1ce2380af6..0911cb33c0a6 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -3581,3 +3581,123 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; +-- create table to check enable_groupagg +CREATE TABLE test_groupagg( + id serial primary key, + c1 text, + c2 text, + c3 numeric); +INSERT INTO test_groupagg (c1, c2, c3) VALUES +('a', 'GGG', 100), +('a', 'GGG', 150), +('a', 'rrr', 200), +('b', 'ooo', 300), +('b', 'ooo', 250), +('b', 'uuu', 100), +('c', 'ppp', 500), +('c', 'ppp', 600), +('c', 'aaa', 550); +ANALYZE; +-- default: GroupAgg and HashAgg are mixed and selected +SET max_parallel_workers to 0; +SET max_parallel_workers_per_gather to 0; +SET enable_hashagg to default; +SET enable_groupagg to default; +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + QUERY PLAN +----------------------------------------------------------------------------- + GroupAggregate + Group Key: sub.c1 + -> Sort + Sort Key: sub.c1 + -> Subquery Scan on sub + -> HashAggregate + Group Key: test_groupagg.c1, test_groupagg.c2 + -> Seq Scan on test_groupagg + Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0' +(9 rows) + +-- Only GroupAgg is selected +set enable_hashagg to off; +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + QUERY PLAN +----------------------------------------------------------------------------------------------------- + GroupAggregate + Group Key: test_groupagg.c1 + -> GroupAggregate + Group Key: test_groupagg.c1, test_groupagg.c2 + -> Sort + Sort Key: test_groupagg.c1, test_groupagg.c2 + -> Seq Scan on test_groupagg + Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_hashagg = 'off' +(8 rows) + +-- Only HashAgg is selected +SET enable_hashagg to on; +SET enable_groupagg to off; +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Sort + Sort Key: test_groupagg.c1 + -> HashAggregate + Group Key: test_groupagg.c1 + -> HashAggregate + Group Key: test_groupagg.c1, test_groupagg.c2 + -> Seq Scan on test_groupagg + Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_groupagg = 'off' +(8 rows) + +-- Only GroupAgg is selected as a fallback +SET enable_hashagg to off; +SET enable_groupagg to off; +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------ + GroupAggregate + Group Key: test_groupagg.c1 + -> GroupAggregate + Group Key: test_groupagg.c1, test_groupagg.c2 + -> Sort + Sort Key: test_groupagg.c1, test_groupagg.c2 + -> Seq Scan on test_groupagg + Settings: max_parallel_workers = '0', max_parallel_workers_per_gather = '0', enable_hashagg = 'off', enable_groupagg = 'off' +(8 rows) + +RESET enable_hashagg; +RESET enable_groupagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE test_groupagg; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 83228cfca293..f10371d6e269 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -153,6 +153,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_distinct_reordering | on enable_gathermerge | on enable_group_by_reordering | on + enable_groupagg | on enable_hashagg | on enable_hashjoin | on enable_incremental_sort | on @@ -172,7 +173,7 @@ select name, setting from pg_settings where name like 'enable%'; enable_seqscan | on enable_sort | on enable_tidscan | on -(24 rows) +(25 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 277b4b198ccc..92493ead1f97 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -1644,3 +1644,84 @@ drop table agg_hash_1; drop table agg_hash_2; drop table agg_hash_3; drop table agg_hash_4; + +-- create table to check enable_groupagg +CREATE TABLE test_groupagg( + id serial primary key, + c1 text, + c2 text, + c3 numeric); +INSERT INTO test_groupagg (c1, c2, c3) VALUES +('a', 'GGG', 100), +('a', 'GGG', 150), +('a', 'rrr', 200), +('b', 'ooo', 300), +('b', 'ooo', 250), +('b', 'uuu', 100), +('c', 'ppp', 500), +('c', 'ppp', 600), +('c', 'aaa', 550); +ANALYZE; + +-- default: GroupAgg and HashAgg are mixed and selected +SET max_parallel_workers to 0; +SET max_parallel_workers_per_gather to 0; +SET enable_hashagg to default; +SET enable_groupagg to default; + +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + +-- Only GroupAgg is selected +set enable_hashagg to off; + +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + +-- Only HashAgg is selected +SET enable_hashagg to on; +SET enable_groupagg to off; + +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + +-- Only GroupAgg is selected as a fallback +SET enable_hashagg to off; +SET enable_groupagg to off; + +EXPLAIN(costs off, settings) +SELECT c1, AVG(total) +FROM ( + SELECT c1, c2, SUM(c3) AS total + FROM test_groupagg + GROUP BY c1, c2 +) AS sub +GROUP BY c1 +ORDER BY c1; + +RESET enable_hashagg; +RESET enable_groupagg; +RESET max_parallel_workers; +RESET max_parallel_workers_per_gather; +DROP TABLE test_groupagg;