Skip to content

Commit 9ca6765

Browse files
committed
Don't store intermediate hash values in ExprState->resvalue
adf97c1 made it so ExprStates could support hashing and changed Hash Join to use that instead of manually extracting Datums from tuples and hashing them one column at a time. When hashing multiple columns or expressions, the code added in that commit stored the intermediate hash value in the ExprState's resvalue field. That was a mistake as steps may be injected into the ExprState between each hashing step that look at or overwrite the stored intermediate hash value. EEOP_PARAM_SET is an example of such a step. Here we fix this by adding a new dedicated field for storing intermediate hash values and adjust the code so that all apart from the final hashing step store their result in the intermediate field. In passing, rename a variable so that it's more aligned to the surrounding code and also so a few lines stay within the 80 char margin. Reported-by: Andres Freund Reviewed-by: Alena Rybakina <[email protected]> Discussion: https://postgr.es/m/CAApHDvqo9eenEFXND5zZ9JxO_k4eTA4jKMGxSyjdTrsmYvnmZw@mail.gmail.com
1 parent 089aac6 commit 9ca6765

File tree

6 files changed

+129
-17
lines changed

6 files changed

+129
-17
lines changed

src/backend/executor/execExpr.c

+31-4
Original file line numberDiff line numberDiff line change
@@ -3996,6 +3996,7 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
39963996
{
39973997
ExprState *state = makeNode(ExprState);
39983998
ExprEvalStep scratch = {0};
3999+
NullableDatum *iresult = NULL;
39994000
List *adjust_jumps = NIL;
40004001
ListCell *lc;
40014002
ListCell *lc2;
@@ -4009,6 +4010,14 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
40094010
/* Insert setup steps as needed. */
40104011
ExecCreateExprSetupSteps(state, (Node *) hash_exprs);
40114012

4013+
/*
4014+
* When hashing more than 1 expression or if we have an init value, we
4015+
* need somewhere to store the intermediate hash value so that it's
4016+
* available to be combined with the result of subsequent hashing.
4017+
*/
4018+
if (list_length(hash_exprs) > 1 || init_value != 0)
4019+
iresult = palloc(sizeof(NullableDatum));
4020+
40124021
if (init_value == 0)
40134022
{
40144023
/*
@@ -4024,8 +4033,8 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
40244033
/* Set up operation to set the initial value. */
40254034
scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
40264035
scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
4027-
scratch.resvalue = &state->resvalue;
4028-
scratch.resnull = &state->resnull;
4036+
scratch.resvalue = &iresult->value;
4037+
scratch.resnull = &iresult->isnull;
40294038

40304039
ExprEvalPushStep(state, &scratch);
40314040

@@ -4063,8 +4072,26 @@ ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops,
40634072
&fcinfo->args[0].value,
40644073
&fcinfo->args[0].isnull);
40654074

4066-
scratch.resvalue = &state->resvalue;
4067-
scratch.resnull = &state->resnull;
4075+
if (i == list_length(hash_exprs) - 1)
4076+
{
4077+
/* the result for hashing the final expr is stored in the state */
4078+
scratch.resvalue = &state->resvalue;
4079+
scratch.resnull = &state->resnull;
4080+
}
4081+
else
4082+
{
4083+
Assert(iresult != NULL);
4084+
4085+
/* intermediate values are stored in an intermediate result */
4086+
scratch.resvalue = &iresult->value;
4087+
scratch.resnull = &iresult->isnull;
4088+
}
4089+
4090+
/*
4091+
* NEXT32 opcodes need to look at the intermediate result. We might
4092+
* as well just set this for all ops. FIRSTs won't look at it.
4093+
*/
4094+
scratch.d.hashdatum.iresult = iresult;
40684095

40694096
/* Initialize function call parameter structure too */
40704097
InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);

src/backend/executor/execExprInterp.c

+9-7
Original file line numberDiff line numberDiff line change
@@ -1600,10 +1600,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
16001600
EEO_CASE(EEOP_HASHDATUM_NEXT32)
16011601
{
16021602
FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data;
1603-
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
1603+
uint32 existinghash;
16041604

1605+
existinghash = DatumGetUInt32(op->d.hashdatum.iresult->value);
16051606
/* combine successive hash values by rotating */
1606-
existing_hash = pg_rotate_left32(existing_hash, 1);
1607+
existinghash = pg_rotate_left32(existinghash, 1);
16071608

16081609
/* leave the hash value alone on NULL inputs */
16091610
if (!fcinfo->args[0].isnull)
@@ -1612,10 +1613,10 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
16121613

16131614
/* execute hash func and combine with previous hash value */
16141615
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
1615-
existing_hash = existing_hash ^ hashvalue;
1616+
existinghash = existinghash ^ hashvalue;
16161617
}
16171618

1618-
*op->resvalue = UInt32GetDatum(existing_hash);
1619+
*op->resvalue = UInt32GetDatum(existinghash);
16191620
*op->resnull = false;
16201621

16211622
EEO_NEXT();
@@ -1638,15 +1639,16 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull)
16381639
}
16391640
else
16401641
{
1641-
uint32 existing_hash = DatumGetUInt32(*op->resvalue);
1642+
uint32 existinghash;
16421643
uint32 hashvalue;
16431644

1645+
existinghash = DatumGetUInt32(op->d.hashdatum.iresult->value);
16441646
/* combine successive hash values by rotating */
1645-
existing_hash = pg_rotate_left32(existing_hash, 1);
1647+
existinghash = pg_rotate_left32(existinghash, 1);
16461648

16471649
/* execute hash func and combine with previous hash value */
16481650
hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo));
1649-
*op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue);
1651+
*op->resvalue = UInt32GetDatum(existinghash ^ hashvalue);
16501652
*op->resnull = false;
16511653
}
16521654

src/backend/jit/llvm/llvmjit_expr.c

+12-6
Original file line numberDiff line numberDiff line change
@@ -1940,13 +1940,16 @@ llvm_compile_expr(ExprState *state)
19401940
{
19411941
LLVMValueRef v_tmp1;
19421942
LLVMValueRef v_tmp2;
1943+
LLVMValueRef tmp;
1944+
1945+
tmp = l_ptr_const(&op->d.hashdatum.iresult->value,
1946+
l_ptr(TypeSizeT));
19431947

19441948
/*
19451949
* Fetch the previously hashed value from where the
1946-
* EEOP_HASHDATUM_FIRST operation stored it.
1950+
* previous hash operation stored it.
19471951
*/
1948-
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
1949-
"prevhash");
1952+
v_prevhash = l_load(b, TypeSizeT, tmp, "prevhash");
19501953

19511954
/*
19521955
* Rotate bits left by 1 bit. Be careful not to
@@ -2062,13 +2065,16 @@ llvm_compile_expr(ExprState *state)
20622065
{
20632066
LLVMValueRef v_tmp1;
20642067
LLVMValueRef v_tmp2;
2068+
LLVMValueRef tmp;
2069+
2070+
tmp = l_ptr_const(&op->d.hashdatum.iresult->value,
2071+
l_ptr(TypeSizeT));
20652072

20662073
/*
20672074
* Fetch the previously hashed value from where the
2068-
* EEOP_HASHDATUM_FIRST_STRICT operation stored it.
2075+
* previous hash operation stored it.
20692076
*/
2070-
v_prevhash = l_load(b, TypeSizeT, v_resvaluep,
2071-
"prevhash");
2077+
v_prevhash = l_load(b, TypeSizeT, tmp, "prevhash");
20722078

20732079
/*
20742080
* Rotate bits left by 1 bit. Be careful not to

src/include/executor/execExpr.h

+1
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,7 @@ typedef struct ExprEvalStep
580580
/* faster to access without additional indirection: */
581581
PGFunction fn_addr; /* actual call address */
582582
int jumpdone; /* jump here on null */
583+
NullableDatum *iresult; /* intermediate hash result */
583584
} hashdatum;
584585

585586
/* for EEOP_CONVERT_ROWTYPE */

src/test/regress/expected/join.out

+55
Original file line numberDiff line numberDiff line change
@@ -2358,6 +2358,61 @@ where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
23582358
----+----+----------+----------
23592359
(0 rows)
23602360

2361+
--
2362+
-- Test hash joins with multiple hash keys and subplans.
2363+
--
2364+
-- First ensure we get a hash join with multiple hash keys.
2365+
explain (costs off)
2366+
select t1.unique1,t2.unique1 from tenk1 t1
2367+
inner join tenk1 t2 on t1.two = t2.two
2368+
and t1.unique1 = (select min(unique1) from tenk1
2369+
where t2.unique1=unique1)
2370+
where t1.unique1 < 10 and t2.unique1 < 10
2371+
order by t1.unique1;
2372+
QUERY PLAN
2373+
----------------------------------------------------------------------------------------------------
2374+
Sort
2375+
Sort Key: t1.unique1
2376+
-> Hash Join
2377+
Hash Cond: ((t1.two = t2.two) AND (t1.unique1 = (SubPlan 2)))
2378+
-> Bitmap Heap Scan on tenk1 t1
2379+
Recheck Cond: (unique1 < 10)
2380+
-> Bitmap Index Scan on tenk1_unique1
2381+
Index Cond: (unique1 < 10)
2382+
-> Hash
2383+
-> Bitmap Heap Scan on tenk1 t2
2384+
Recheck Cond: (unique1 < 10)
2385+
-> Bitmap Index Scan on tenk1_unique1
2386+
Index Cond: (unique1 < 10)
2387+
SubPlan 2
2388+
-> Result
2389+
InitPlan 1
2390+
-> Limit
2391+
-> Index Only Scan using tenk1_unique1 on tenk1
2392+
Index Cond: ((unique1 IS NOT NULL) AND (unique1 = t2.unique1))
2393+
(19 rows)
2394+
2395+
-- Ensure we get the expected result
2396+
select t1.unique1,t2.unique1 from tenk1 t1
2397+
inner join tenk1 t2 on t1.two = t2.two
2398+
and t1.unique1 = (select min(unique1) from tenk1
2399+
where t2.unique1=unique1)
2400+
where t1.unique1 < 10 and t2.unique1 < 10
2401+
order by t1.unique1;
2402+
unique1 | unique1
2403+
---------+---------
2404+
0 | 0
2405+
1 | 1
2406+
2 | 2
2407+
3 | 3
2408+
4 | 4
2409+
5 | 5
2410+
6 | 6
2411+
7 | 7
2412+
8 | 8
2413+
9 | 9
2414+
(10 rows)
2415+
23612416
--
23622417
-- checks for correct handling of quals in multiway outer joins
23632418
--

src/test/regress/sql/join.sql

+21
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,27 @@ select a.f1, b.f1, t.thousand, t.tenthous from
441441
(select sum(f1) as f1 from int4_tbl i4b) b
442442
where b.f1 = t.thousand and a.f1 = b.f1 and (a.f1+b.f1+999) = t.tenthous;
443443

444+
--
445+
-- Test hash joins with multiple hash keys and subplans.
446+
--
447+
448+
-- First ensure we get a hash join with multiple hash keys.
449+
explain (costs off)
450+
select t1.unique1,t2.unique1 from tenk1 t1
451+
inner join tenk1 t2 on t1.two = t2.two
452+
and t1.unique1 = (select min(unique1) from tenk1
453+
where t2.unique1=unique1)
454+
where t1.unique1 < 10 and t2.unique1 < 10
455+
order by t1.unique1;
456+
457+
-- Ensure we get the expected result
458+
select t1.unique1,t2.unique1 from tenk1 t1
459+
inner join tenk1 t2 on t1.two = t2.two
460+
and t1.unique1 = (select min(unique1) from tenk1
461+
where t2.unique1=unique1)
462+
where t1.unique1 < 10 and t2.unique1 < 10
463+
order by t1.unique1;
464+
444465
--
445466
-- checks for correct handling of quals in multiway outer joins
446467
--

0 commit comments

Comments
 (0)