Skip to content

Commit 8f29467

Browse files
committed
Change "long" numGroups fields to be Cardinality (i.e., double).
We've been nibbling away at removing uses of "long" for a long time, since its width is platform-dependent. Here's one more: change the remaining "long" fields in Plan nodes to Cardinality, since the three surviving examples all represent group-count estimates. The upstream planner code was converted to Cardinality some time ago; for example the corresponding fields in Path nodes are type Cardinality, as are the arguments of the make_foo_path functions. Downstream in the executor, it turns out that these all feed to the table-size argument of BuildTupleHashTable. Change that to "double" as well, and fix it so that it safely clamps out-of-range values to the uint32 limit of simplehash.h, as was not being done before. Essentially, this is removing all the artificial datatype-dependent limitations on these values from upstream processing, and applying just one clamp at the moment where we're forced to do so by the datatype choices of simplehash.h. Also, remove BuildTupleHashTable's misguided attempt to enforce work_mem/hash_mem_limit. It doesn't have enough information (particularly not the expected tuple width) to do that accurately, and it has no real business second-guessing the caller's choice. For all these plan types, it's really the planner's responsibility to not choose a hashed implementation if the hashtable is expected to exceed hash_mem_limit. The previous patch improved the accuracy of those estimates, and even if BuildTupleHashTable had more information it should arrive at the same conclusions. Reported-by: Jeff Janes <[email protected]> Author: Tom Lane <[email protected]> Reviewed-by: David Rowley <[email protected]> Discussion: https://postgr.es/m/CAMkU=1zia0JfW_QR8L5xA2vpa0oqVuiapm78h=WpNsHH13_9uw@mail.gmail.com
1 parent 1ea5bdb commit 8f29467

File tree

11 files changed

+55
-95
lines changed

11 files changed

+55
-95
lines changed

src/backend/executor/execGrouping.c

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
*/
1515
#include "postgres.h"
1616

17+
#include <math.h>
18+
1719
#include "access/htup_details.h"
1820
#include "access/parallel.h"
1921
#include "common/hashfn.h"
@@ -144,7 +146,7 @@ execTuplesHashPrepare(int numCols,
144146
* eqfuncoids: OIDs of equality comparison functions to use
145147
* hashfunctions: FmgrInfos of datatype-specific hashing functions to use
146148
* collations: collations to use in comparisons
147-
* nbuckets: initial estimate of hashtable size
149+
* nelements: initial estimate of hashtable size
148150
* additionalsize: size of data that may be stored along with the hash entry
149151
* metacxt: memory context for long-lived data and the simplehash table
150152
* tuplescxt: memory context in which to store the hashed tuples themselves
@@ -187,39 +189,39 @@ BuildTupleHashTable(PlanState *parent,
187189
const Oid *eqfuncoids,
188190
FmgrInfo *hashfunctions,
189191
Oid *collations,
190-
long nbuckets,
192+
double nelements,
191193
Size additionalsize,
192194
MemoryContext metacxt,
193195
MemoryContext tuplescxt,
194196
MemoryContext tempcxt,
195197
bool use_variable_hash_iv)
196198
{
197199
TupleHashTable hashtable;
198-
Size entrysize;
199-
Size hash_mem_limit;
200+
uint32 nbuckets;
200201
MemoryContext oldcontext;
201202
uint32 hash_iv = 0;
202203

203-
Assert(nbuckets > 0);
204+
/*
205+
* tuplehash_create requires a uint32 element count, so we had better
206+
* clamp the given nelements to fit in that. As long as we have to do
207+
* that, we might as well protect against completely insane input like
208+
* zero or NaN. But it is not our job here to enforce issues like staying
209+
* within hash_mem: the caller should have done that, and we don't have
210+
* enough info to second-guess.
211+
*/
212+
if (isnan(nelements) || nelements <= 0)
213+
nbuckets = 1;
214+
else if (nelements >= PG_UINT32_MAX)
215+
nbuckets = PG_UINT32_MAX;
216+
else
217+
nbuckets = (uint32) nelements;
204218

205219
/* tuplescxt must be separate, else ResetTupleHashTable breaks things */
206220
Assert(metacxt != tuplescxt);
207221

208222
/* ensure additionalsize is maxalign'ed */
209223
additionalsize = MAXALIGN(additionalsize);
210224

211-
/*
212-
* Limit initial table size request to not more than hash_mem.
213-
*
214-
* XXX this calculation seems pretty misguided, as it counts only overhead
215-
* and not the tuples themselves. But we have no knowledge of the
216-
* expected tuple width here.
217-
*/
218-
entrysize = sizeof(TupleHashEntryData) + additionalsize;
219-
hash_mem_limit = get_hash_memory_limit() / entrysize;
220-
if (nbuckets > hash_mem_limit)
221-
nbuckets = hash_mem_limit;
222-
223225
oldcontext = MemoryContextSwitchTo(metacxt);
224226

225227
hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));

src/backend/executor/nodeAgg.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -402,12 +402,12 @@ static void find_cols(AggState *aggstate, Bitmapset **aggregated,
402402
Bitmapset **unaggregated);
403403
static bool find_cols_walker(Node *node, FindColsContext *context);
404404
static void build_hash_tables(AggState *aggstate);
405-
static void build_hash_table(AggState *aggstate, int setno, long nbuckets);
405+
static void build_hash_table(AggState *aggstate, int setno, double nbuckets);
406406
static void hashagg_recompile_expressions(AggState *aggstate, bool minslot,
407407
bool nullcheck);
408408
static void hash_create_memory(AggState *aggstate);
409-
static long hash_choose_num_buckets(double hashentrysize,
410-
long ngroups, Size memory);
409+
static double hash_choose_num_buckets(double hashentrysize,
410+
double ngroups, Size memory);
411411
static int hash_choose_num_partitions(double input_groups,
412412
double hashentrysize,
413413
int used_bits,
@@ -1469,7 +1469,7 @@ build_hash_tables(AggState *aggstate)
14691469
for (setno = 0; setno < aggstate->num_hashes; ++setno)
14701470
{
14711471
AggStatePerHash perhash = &aggstate->perhash[setno];
1472-
long nbuckets;
1472+
double nbuckets;
14731473
Size memory;
14741474

14751475
if (perhash->hashtable != NULL)
@@ -1478,8 +1478,6 @@ build_hash_tables(AggState *aggstate)
14781478
continue;
14791479
}
14801480

1481-
Assert(perhash->aggnode->numGroups > 0);
1482-
14831481
memory = aggstate->hash_mem_limit / aggstate->num_hashes;
14841482

14851483
/* choose reasonable number of buckets per hashtable */
@@ -1505,7 +1503,7 @@ build_hash_tables(AggState *aggstate)
15051503
* Build a single hashtable for this grouping set.
15061504
*/
15071505
static void
1508-
build_hash_table(AggState *aggstate, int setno, long nbuckets)
1506+
build_hash_table(AggState *aggstate, int setno, double nbuckets)
15091507
{
15101508
AggStatePerHash perhash = &aggstate->perhash[setno];
15111509
MemoryContext metacxt = aggstate->hash_metacxt;
@@ -2053,24 +2051,28 @@ hash_create_memory(AggState *aggstate)
20532051
/*
20542052
* Choose a reasonable number of buckets for the initial hash table size.
20552053
*/
2056-
static long
2057-
hash_choose_num_buckets(double hashentrysize, long ngroups, Size memory)
2054+
static double
2055+
hash_choose_num_buckets(double hashentrysize, double ngroups, Size memory)
20582056
{
2059-
long max_nbuckets;
2060-
long nbuckets = ngroups;
2057+
double max_nbuckets;
2058+
double nbuckets = ngroups;
20612059

20622060
max_nbuckets = memory / hashentrysize;
20632061

20642062
/*
20652063
* Underestimating is better than overestimating. Too many buckets crowd
20662064
* out space for group keys and transition state values.
20672065
*/
2068-
max_nbuckets >>= 1;
2066+
max_nbuckets /= 2;
20692067

20702068
if (nbuckets > max_nbuckets)
20712069
nbuckets = max_nbuckets;
20722070

2073-
return Max(nbuckets, 1);
2071+
/*
2072+
* BuildTupleHashTable will clamp any obviously-insane result, so we don't
2073+
* need to be too careful here.
2074+
*/
2075+
return nbuckets;
20742076
}
20752077

20762078
/*
@@ -3686,7 +3688,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
36863688
if (use_hashing)
36873689
{
36883690
Plan *outerplan = outerPlan(node);
3689-
uint64 totalGroups = 0;
3691+
double totalGroups = 0;
36903692

36913693
aggstate->hash_spill_rslot = ExecInitExtraTupleSlot(estate, scanDesc,
36923694
&TTSOpsMinimalTuple);

src/backend/executor/nodeRecursiveunion.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ build_hash_table(RecursiveUnionState *rustate)
3535
TupleDesc desc = ExecGetResultType(outerPlanState(rustate));
3636

3737
Assert(node->numCols > 0);
38-
Assert(node->numGroups > 0);
3938

4039
/*
4140
* If both child plans deliver the same fixed tuple slot type, we can tell

src/backend/executor/nodeSetOp.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ build_hash_table(SetOpState *setopstate)
8888
TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
8989

9090
Assert(node->strategy == SETOP_HASHED);
91-
Assert(node->numGroups > 0);
9291

9392
/*
9493
* If both child plans deliver the same fixed tuple slot type, we can tell

src/backend/executor/nodeSubplan.c

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
#include "miscadmin.h"
3535
#include "nodes/makefuncs.h"
3636
#include "nodes/nodeFuncs.h"
37-
#include "optimizer/optimizer.h"
3837
#include "utils/array.h"
3938
#include "utils/lsyscache.h"
4039
#include "utils/memutils.h"
@@ -481,7 +480,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
481480
int ncols = node->numCols;
482481
ExprContext *innerecontext = node->innerecontext;
483482
MemoryContext oldcontext;
484-
long nbuckets;
483+
double nentries;
485484
TupleTableSlot *slot;
486485

487486
Assert(subplan->subLinkType == ANY_SUBLINK);
@@ -509,9 +508,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
509508
node->havehashrows = false;
510509
node->havenullrows = false;
511510

512-
nbuckets = clamp_cardinality_to_long(planstate->plan->plan_rows);
513-
if (nbuckets < 1)
514-
nbuckets = 1;
511+
nentries = planstate->plan->plan_rows;
515512

516513
if (node->hashtable)
517514
ResetTupleHashTable(node->hashtable);
@@ -524,7 +521,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
524521
node->tab_eq_funcoids,
525522
node->tab_hash_funcs,
526523
node->tab_collations,
527-
nbuckets,
524+
nentries,
528525
0, /* no additional data */
529526
node->planstate->state->es_query_cxt,
530527
node->tuplesContext,
@@ -534,12 +531,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
534531
if (!subplan->unknownEqFalse)
535532
{
536533
if (ncols == 1)
537-
nbuckets = 1; /* there can only be one entry */
534+
nentries = 1; /* there can only be one entry */
538535
else
539536
{
540-
nbuckets /= 16;
541-
if (nbuckets < 1)
542-
nbuckets = 1;
537+
nentries /= 16;
538+
if (nentries < 1)
539+
nentries = 1;
543540
}
544541

545542
if (node->hashnulls)
@@ -553,7 +550,7 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext)
553550
node->tab_eq_funcoids,
554551
node->tab_hash_funcs,
555552
node->tab_collations,
556-
nbuckets,
553+
nentries,
557554
0, /* no additional data */
558555
node->planstate->state->es_query_cxt,
559556
node->tuplesContext,

src/backend/optimizer/path/costsize.c

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -257,32 +257,6 @@ clamp_width_est(int64 tuple_width)
257257
return (int32) tuple_width;
258258
}
259259

260-
/*
261-
* clamp_cardinality_to_long
262-
* Cast a Cardinality value to a sane long value.
263-
*/
264-
long
265-
clamp_cardinality_to_long(Cardinality x)
266-
{
267-
/*
268-
* Just for paranoia's sake, ensure we do something sane with negative or
269-
* NaN values.
270-
*/
271-
if (isnan(x))
272-
return LONG_MAX;
273-
if (x <= 0)
274-
return 0;
275-
276-
/*
277-
* If "long" is 64 bits, then LONG_MAX cannot be represented exactly as a
278-
* double. Casting it to double and back may well result in overflow due
279-
* to rounding, so avoid doing that. We trust that any double value that
280-
* compares strictly less than "(double) LONG_MAX" will cast to a
281-
* representable "long" value.
282-
*/
283-
return (x < (double) LONG_MAX) ? (long) x : LONG_MAX;
284-
}
285-
286260

287261
/*
288262
* cost_seqscan

src/backend/optimizer/plan/createplan.c

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ static RecursiveUnion *make_recursive_union(List *tlist,
226226
Plan *righttree,
227227
int wtParam,
228228
List *distinctList,
229-
long numGroups);
229+
Cardinality numGroups);
230230
static BitmapAnd *make_bitmap_and(List *bitmapplans);
231231
static BitmapOr *make_bitmap_or(List *bitmapplans);
232232
static NestLoop *make_nestloop(List *tlist,
@@ -301,7 +301,7 @@ static Gather *make_gather(List *qptlist, List *qpqual,
301301
int nworkers, int rescan_param, bool single_copy, Plan *subplan);
302302
static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
303303
List *tlist, Plan *lefttree, Plan *righttree,
304-
List *groupList, long numGroups);
304+
List *groupList, Cardinality numGroups);
305305
static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
306306
static Result *make_gating_result(List *tlist, Node *resconstantqual,
307307
Plan *subplan);
@@ -2564,7 +2564,6 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
25642564
List *tlist = build_path_tlist(root, &best_path->path);
25652565
Plan *leftplan;
25662566
Plan *rightplan;
2567-
long numGroups;
25682567

25692568
/*
25702569
* SetOp doesn't project, so tlist requirements pass through; moreover we
@@ -2575,16 +2574,13 @@ create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
25752574
rightplan = create_plan_recurse(root, best_path->rightpath,
25762575
flags | CP_LABEL_TLIST);
25772576

2578-
/* Convert numGroups to long int --- but 'ware overflow! */
2579-
numGroups = clamp_cardinality_to_long(best_path->numGroups);
2580-
25812577
plan = make_setop(best_path->cmd,
25822578
best_path->strategy,
25832579
tlist,
25842580
leftplan,
25852581
rightplan,
25862582
best_path->groupList,
2587-
numGroups);
2583+
best_path->numGroups);
25882584

25892585
copy_generic_path_info(&plan->plan, (Path *) best_path);
25902586

@@ -2604,23 +2600,19 @@ create_recursiveunion_plan(PlannerInfo *root, RecursiveUnionPath *best_path)
26042600
Plan *leftplan;
26052601
Plan *rightplan;
26062602
List *tlist;
2607-
long numGroups;
26082603

26092604
/* Need both children to produce same tlist, so force it */
26102605
leftplan = create_plan_recurse(root, best_path->leftpath, CP_EXACT_TLIST);
26112606
rightplan = create_plan_recurse(root, best_path->rightpath, CP_EXACT_TLIST);
26122607

26132608
tlist = build_path_tlist(root, &best_path->path);
26142609

2615-
/* Convert numGroups to long int --- but 'ware overflow! */
2616-
numGroups = clamp_cardinality_to_long(best_path->numGroups);
2617-
26182610
plan = make_recursive_union(tlist,
26192611
leftplan,
26202612
rightplan,
26212613
best_path->wtParam,
26222614
best_path->distinctList,
2623-
numGroups);
2615+
best_path->numGroups);
26242616

26252617
copy_generic_path_info(&plan->plan, (Path *) best_path);
26262618

@@ -5845,7 +5837,7 @@ make_recursive_union(List *tlist,
58455837
Plan *righttree,
58465838
int wtParam,
58475839
List *distinctList,
5848-
long numGroups)
5840+
Cardinality numGroups)
58495841
{
58505842
RecursiveUnion *node = makeNode(RecursiveUnion);
58515843
Plan *plan = &node->plan;
@@ -6582,15 +6574,11 @@ Agg *
65826574
make_agg(List *tlist, List *qual,
65836575
AggStrategy aggstrategy, AggSplit aggsplit,
65846576
int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, Oid *grpCollations,
6585-
List *groupingSets, List *chain, double dNumGroups,
6577+
List *groupingSets, List *chain, Cardinality numGroups,
65866578
Size transitionSpace, Plan *lefttree)
65876579
{
65886580
Agg *node = makeNode(Agg);
65896581
Plan *plan = &node->plan;
6590-
long numGroups;
6591-
6592-
/* Reduce to long, but 'ware overflow! */
6593-
numGroups = clamp_cardinality_to_long(dNumGroups);
65946582

65956583
node->aggstrategy = aggstrategy;
65966584
node->aggsplit = aggsplit;
@@ -6822,7 +6810,7 @@ make_gather(List *qptlist,
68226810
static SetOp *
68236811
make_setop(SetOpCmd cmd, SetOpStrategy strategy,
68246812
List *tlist, Plan *lefttree, Plan *righttree,
6825-
List *groupList, long numGroups)
6813+
List *groupList, Cardinality numGroups)
68266814
{
68276815
SetOp *node = makeNode(SetOp);
68286816
Plan *plan = &node->plan;

src/include/executor/executor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ extern TupleHashTable BuildTupleHashTable(PlanState *parent,
138138
const Oid *eqfuncoids,
139139
FmgrInfo *hashfunctions,
140140
Oid *collations,
141-
long nbuckets,
141+
double nelements,
142142
Size additionalsize,
143143
MemoryContext metacxt,
144144
MemoryContext tuplescxt,

0 commit comments

Comments
 (0)