@@ -406,6 +406,7 @@ static void build_hash_tables(AggState *aggstate);
406406static void build_hash_table (AggState * aggstate , int setno , long nbuckets );
407407static void hashagg_recompile_expressions (AggState * aggstate , bool minslot ,
408408 bool nullcheck );
409+ static void hash_create_memory (AggState * aggstate );
409410static long hash_choose_num_buckets (double hashentrysize ,
410411 long ngroups , Size memory );
411412static int hash_choose_num_partitions (double input_groups ,
@@ -1509,7 +1510,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
15091510{
15101511 AggStatePerHash perhash = & aggstate -> perhash [setno ];
15111512 MemoryContext metacxt = aggstate -> hash_metacxt ;
1512- MemoryContext hashcxt = aggstate -> hashcontext -> ecxt_per_tuple_memory ;
1513+ MemoryContext tablecxt = aggstate -> hash_tablecxt ;
15131514 MemoryContext tmpcxt = aggstate -> tmpcontext -> ecxt_per_tuple_memory ;
15141515 Size additionalsize ;
15151516
@@ -1535,7 +1536,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
15351536 nbuckets ,
15361537 additionalsize ,
15371538 metacxt ,
1538- hashcxt ,
1539+ tablecxt ,
15391540 tmpcxt ,
15401541 DO_AGGSPLIT_SKIPFINAL (aggstate -> aggsplit ));
15411542}
@@ -1706,15 +1707,19 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
17061707 tupleWidth );
17071708 Size pergroupSize = numTrans * sizeof (AggStatePerGroupData );
17081709
1709- tupleChunkSize = CHUNKHDRSZ + tupleSize ;
1710-
1711- if ( pergroupSize > 0 )
1712- pergroupChunkSize = CHUNKHDRSZ + pergroupSize ;
1713- else
1714- pergroupChunkSize = 0 ;
1710+ /*
1711+ * Entries use the Bump allocator, so the chunk sizes are the same as the
1712+ * requested sizes.
1713+ */
1714+ tupleChunkSize = MAXALIGN ( tupleSize );
1715+ pergroupChunkSize = pergroupSize ;
17151716
1717+ /*
1718+ * Transition values use AllocSet, which has a chunk header and also uses
1719+ * power-of-two allocations.
1720+ */
17161721 if (transitionSpace > 0 )
1717- transitionChunkSize = CHUNKHDRSZ + transitionSpace ;
1722+ transitionChunkSize = CHUNKHDRSZ + pg_nextpower2_size_t ( transitionSpace ) ;
17181723 else
17191724 transitionChunkSize = 0 ;
17201725
@@ -1864,8 +1869,11 @@ hash_agg_check_limits(AggState *aggstate)
18641869 uint64 ngroups = aggstate -> hash_ngroups_current ;
18651870 Size meta_mem = MemoryContextMemAllocated (aggstate -> hash_metacxt ,
18661871 true);
1867- Size hashkey_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory ,
1868- true);
1872+ Size entry_mem = MemoryContextMemAllocated (aggstate -> hash_tablecxt ,
1873+ true);
1874+ Size tval_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory ,
1875+ true);
1876+ Size total_mem = meta_mem + entry_mem + tval_mem ;
18691877 bool do_spill = false;
18701878
18711879#ifdef USE_INJECTION_POINTS
@@ -1884,7 +1892,7 @@ hash_agg_check_limits(AggState *aggstate)
18841892 * can be sure to make progress even in edge cases.
18851893 */
18861894 if (aggstate -> hash_ngroups_current > 0 &&
1887- (meta_mem + hashkey_mem > aggstate -> hash_mem_limit ||
1895+ (total_mem > aggstate -> hash_mem_limit ||
18881896 ngroups > aggstate -> hash_ngroups_limit ))
18891897 {
18901898 do_spill = true;
@@ -1939,6 +1947,7 @@ static void
19391947hash_agg_update_metrics (AggState * aggstate , bool from_tape , int npartitions )
19401948{
19411949 Size meta_mem ;
1950+ Size entry_mem ;
19421951 Size hashkey_mem ;
19431952 Size buffer_mem ;
19441953 Size total_mem ;
@@ -1950,7 +1959,10 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19501959 /* memory for the hash table itself */
19511960 meta_mem = MemoryContextMemAllocated (aggstate -> hash_metacxt , true);
19521961
1953- /* memory for the group keys and transition states */
1962+ /* memory for hash entries */
1963+ entry_mem = MemoryContextMemAllocated (aggstate -> hash_tablecxt , true);
1964+
1965+ /* memory for byref transition states */
19541966 hashkey_mem = MemoryContextMemAllocated (aggstate -> hashcontext -> ecxt_per_tuple_memory , true);
19551967
19561968 /* memory for read/write tape buffers, if spilled */
@@ -1959,7 +1971,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19591971 buffer_mem += HASHAGG_READ_BUFFER_SIZE ;
19601972
19611973 /* update peak mem */
1962- total_mem = meta_mem + hashkey_mem + buffer_mem ;
1974+ total_mem = meta_mem + entry_mem + hashkey_mem + buffer_mem ;
19631975 if (total_mem > aggstate -> hash_mem_peak )
19641976 aggstate -> hash_mem_peak = total_mem ;
19651977
@@ -1981,6 +1993,64 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
19811993 }
19821994}
19831995
1996+ /*
1997+ * Create memory contexts used for hash aggregation.
1998+ */
1999+ static void
2000+ hash_create_memory (AggState * aggstate )
2001+ {
2002+ Size maxBlockSize = ALLOCSET_DEFAULT_MAXSIZE ;
2003+
2004+ /*
2005+ * The hashcontext's per-tuple memory will be used for byref transition
2006+ * values and returned by AggCheckCallContext().
2007+ */
2008+ aggstate -> hashcontext = CreateWorkExprContext (aggstate -> ss .ps .state );
2009+
2010+ /*
2011+ * The meta context will be used for the bucket array of
2012+ * TupleHashEntryData (or arrays, in the case of grouping sets). As the
2013+ * hash table grows, the bucket array will double in size and the old one
2014+ * will be freed, so an AllocSet is appropriate. For large bucket arrays,
2015+ * the large allocation path will be used, so it's not worth worrying
2016+ * about wasting space due to power-of-two allocations.
2017+ */
2018+ aggstate -> hash_metacxt = AllocSetContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
2019+ "HashAgg meta context" ,
2020+ ALLOCSET_DEFAULT_SIZES );
2021+
2022+ /*
2023+ * The hash entries themselves, which include the grouping key
2024+ * (firstTuple) and pergroup data, are stored in the table context. The
2025+ * bump allocator can be used because the entries are not freed until the
2026+ * entire hash table is reset. The bump allocator is faster for
2027+ * allocations and avoids wasting space on the chunk header or
2028+ * power-of-two allocations.
2029+ *
2030+ * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2031+ * to avoid large jumps in memory usage.
2032+ */
2033+
2034+ /*
2035+ * Like CreateWorkExprContext(), use smaller sizings for smaller work_mem,
2036+ * to avoid large jumps in memory usage.
2037+ */
2038+ maxBlockSize = pg_prevpower2_size_t (work_mem * (Size ) 1024 / 16 );
2039+
2040+ /* But no bigger than ALLOCSET_DEFAULT_MAXSIZE */
2041+ maxBlockSize = Min (maxBlockSize , ALLOCSET_DEFAULT_MAXSIZE );
2042+
2043+ /* and no smaller than ALLOCSET_DEFAULT_INITSIZE */
2044+ maxBlockSize = Max (maxBlockSize , ALLOCSET_DEFAULT_INITSIZE );
2045+
2046+ aggstate -> hash_tablecxt = BumpContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
2047+ "HashAgg table context" ,
2048+ ALLOCSET_DEFAULT_MINSIZE ,
2049+ ALLOCSET_DEFAULT_INITSIZE ,
2050+ maxBlockSize );
2051+
2052+ }
2053+
19842054/*
19852055 * Choose a reasonable number of buckets for the initial hash table size.
19862056 */
@@ -2642,6 +2712,7 @@ agg_refill_hash_table(AggState *aggstate)
26422712
26432713 /* free memory and reset hash tables */
26442714 ReScanExprContext (aggstate -> hashcontext );
2715+ MemoryContextReset (aggstate -> hash_tablecxt );
26452716 for (int setno = 0 ; setno < aggstate -> num_hashes ; setno ++ )
26462717 ResetTupleHashTable (aggstate -> perhash [setno ].hashtable );
26472718
@@ -3326,7 +3397,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
33263397 }
33273398
33283399 if (use_hashing )
3329- aggstate -> hashcontext = CreateWorkExprContext ( estate );
3400+ hash_create_memory ( aggstate );
33303401
33313402 ExecAssignExprContext (estate , & aggstate -> ss .ps );
33323403
@@ -3621,9 +3692,6 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
36213692 Plan * outerplan = outerPlan (node );
36223693 uint64 totalGroups = 0 ;
36233694
3624- aggstate -> hash_metacxt = AllocSetContextCreate (aggstate -> ss .ps .state -> es_query_cxt ,
3625- "HashAgg meta context" ,
3626- ALLOCSET_DEFAULT_SIZES );
36273695 aggstate -> hash_spill_rslot = ExecInitExtraTupleSlot (estate , scanDesc ,
36283696 & TTSOpsMinimalTuple );
36293697 aggstate -> hash_spill_wslot = ExecInitExtraTupleSlot (estate , scanDesc ,
@@ -4368,6 +4436,12 @@ ExecEndAgg(AggState *node)
43684436 MemoryContextDelete (node -> hash_metacxt );
43694437 node -> hash_metacxt = NULL ;
43704438 }
4439+ if (node -> hash_tablecxt != NULL )
4440+ {
4441+ MemoryContextDelete (node -> hash_tablecxt );
4442+ node -> hash_tablecxt = NULL ;
4443+ }
4444+
43714445
43724446 for (transno = 0 ; transno < node -> numtrans ; transno ++ )
43734447 {
@@ -4484,6 +4558,7 @@ ExecReScanAgg(AggState *node)
44844558 node -> hash_ngroups_current = 0 ;
44854559
44864560 ReScanExprContext (node -> hashcontext );
4561+ MemoryContextReset (node -> hash_tablecxt );
44874562 /* Rebuild an empty hash table */
44884563 build_hash_tables (node );
44894564 node -> table_filled = false;
0 commit comments