@@ -153,8 +153,11 @@ MultiExecPrivateHash(HashState *node)
153153 econtext = node -> ps .ps_ExprContext ;
154154
155155 /*
156- * Get all tuples from the node below the Hash node and insert into the
157- * hash table (or temp files).
156+ * Get all tuples from the node below the Hash node and insert the
157+ * potentially-matchable ones into the hash table (or temp files). Tuples
158+ * that can't possibly match because they have null join keys are dumped
159+ * into a separate tuplestore, or just summarily discarded if we don't
160+ * need to emit them with null-extension.
158161 */
159162 for (;;)
160163 {
@@ -174,6 +177,7 @@ MultiExecPrivateHash(HashState *node)
174177
175178 if (!isnull )
176179 {
180+ /* normal case with a non-null join key */
177181 uint32 hashvalue = DatumGetUInt32 (hashdatum );
178182 int bucketNumber ;
179183
@@ -192,6 +196,14 @@ MultiExecPrivateHash(HashState *node)
192196 }
193197 hashtable -> totalTuples += 1 ;
194198 }
199+ else if (node -> keep_null_tuples )
200+ {
201+ /* null join key, but we must save tuple to be emitted later */
202+ if (node -> null_tuple_store == NULL )
203+ node -> null_tuple_store = ExecHashBuildNullTupleStore (hashtable );
204+ tuplestore_puttupleslot (node -> null_tuple_store , slot );
205+ }
206+ /* else we can discard the tuple immediately */
195207 }
196208
197209 /* resize the hash table if needed (NTUP_PER_BUCKET exceeded) */
@@ -222,7 +234,6 @@ MultiExecParallelHash(HashState *node)
222234 HashJoinTable hashtable ;
223235 TupleTableSlot * slot ;
224236 ExprContext * econtext ;
225- uint32 hashvalue ;
226237 Barrier * build_barrier ;
227238 int i ;
228239
@@ -282,6 +293,7 @@ MultiExecParallelHash(HashState *node)
282293 for (;;)
283294 {
284295 bool isnull ;
296+ uint32 hashvalue ;
285297
286298 slot = ExecProcNode (outerNode );
287299 if (TupIsNull (slot ))
@@ -295,8 +307,19 @@ MultiExecParallelHash(HashState *node)
295307 & isnull ));
296308
297309 if (!isnull )
310+ {
311+ /* normal case with a non-null join key */
298312 ExecParallelHashTableInsert (hashtable , slot , hashvalue );
299- hashtable -> partialTuples ++ ;
313+ hashtable -> partialTuples ++ ;
314+ }
315+ else if (node -> keep_null_tuples )
316+ {
317+ /* null join key, but save tuple to be emitted later */
318+ if (node -> null_tuple_store == NULL )
319+ node -> null_tuple_store = ExecHashBuildNullTupleStore (hashtable );
320+ tuplestore_puttupleslot (node -> null_tuple_store , slot );
321+ }
322+ /* else we can discard the tuple immediately */
300323 }
301324
302325 /*
@@ -404,14 +427,10 @@ ExecInitHash(Hash *node, EState *estate, int eflags)
404427
405428 Assert (node -> plan .qual == NIL );
406429
407- /*
408- * Delay initialization of hash_expr until ExecInitHashJoin(). We cannot
409- * build the ExprState here as we don't yet know the join type we're going
410- * to be hashing values for and we need to know that before calling
411- * ExecBuildHash32Expr as the keep_nulls parameter depends on the join
412- * type.
413- */
430+ /* these fields will be filled by ExecInitHashJoin() */
414431 hashstate -> hash_expr = NULL ;
432+ hashstate -> null_tuple_store = NULL ;
433+ hashstate -> keep_null_tuples = false;
415434
416435 return hashstate ;
417436}
@@ -2753,6 +2772,31 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
27532772 }
27542773}
27552774
2775+ /*
2776+ * Build a tuplestore suitable for holding null-keyed input tuples.
2777+ * (This function doesn't care whether it's for outer or inner tuples.)
2778+ *
2779+ * Note that in a parallel hash join, each worker has its own tuplestore(s)
2780+ * for these. There's no need to interact with other workers to decide
2781+ * what to do with them. So they're always in private storage.
2782+ */
2783+ Tuplestorestate *
2784+ ExecHashBuildNullTupleStore (HashJoinTable hashtable )
2785+ {
2786+ Tuplestorestate * tstore ;
2787+ MemoryContext oldcxt ;
2788+
2789+ /*
2790+ * We keep the tuplestore in the hashCxt to ensure it won't go away too
2791+ * soon. Size it at work_mem/16 so that it doesn't bloat the node's space
2792+ * consumption too much.
2793+ */
2794+ oldcxt = MemoryContextSwitchTo (hashtable -> hashCxt );
2795+ tstore = tuplestore_begin_heap (false, false, work_mem / 16 );
2796+ MemoryContextSwitchTo (oldcxt );
2797+ return tstore ;
2798+ }
2799+
27562800/*
27572801 * Reserve space in the DSM segment for instrumentation data.
27582802 */
0 commit comments