diff --git a/src/Makefile.global.in b/src/Makefile.global.in index 0aa389bc7101..ab53ca7421c1 100644 --- a/src/Makefile.global.in +++ b/src/Makefile.global.in @@ -201,9 +201,11 @@ with_liburing = @with_liburing@ with_libxml = @with_libxml@ with_libxslt = @with_libxslt@ with_llvm = @with_llvm@ +with_lz4 = @with_lz4@ with_system_tzdata = @with_system_tzdata@ with_uuid = @with_uuid@ with_zlib = @with_zlib@ +with_zstd = @with_zstd@ enable_rpath = @enable_rpath@ enable_nls = @enable_nls@ enable_debug = @enable_debug@ diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 5661ad768300..384265ca74ae 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -1434,7 +1434,7 @@ ExecHashJoinSaveTuple(MinimalTuple tuple, uint32 hashvalue, { MemoryContext oldctx = MemoryContextSwitchTo(hashtable->spillCxt); - file = BufFileCreateTemp(false); + file = BufFileCreateCompressTemp(false); *fileptr = file; MemoryContextSwitchTo(oldctx); diff --git a/src/backend/storage/file/buffile.c b/src/backend/storage/file/buffile.c index 366d70d38a19..cdb3f150ebf6 100644 --- a/src/backend/storage/file/buffile.c +++ b/src/backend/storage/file/buffile.c @@ -53,6 +53,27 @@ #include "storage/bufmgr.h" #include "storage/fd.h" #include "utils/resowner.h" +#include "utils/memutils.h" +#include "common/pg_lzcompress.h" + +#ifdef USE_LZ4 +#include +#endif + +#ifdef HAVE_LIBZ +#include +#endif + +#ifdef USE_ZSTD +#include +#endif + +/* Compression types */ +#define TEMP_NONE_COMPRESSION 0 +#define TEMP_PGLZ_COMPRESSION 1 +#define TEMP_LZ4_COMPRESSION 2 +#define TEMP_GZIP_COMPRESSION 3 +#define TEMP_ZSTD_COMPRESSION 4 /* * We break BufFiles into gigabyte-sized segments, regardless of RELSEG_SIZE. @@ -62,6 +83,11 @@ #define MAX_PHYSICAL_FILESIZE 0x40000000 #define BUFFILE_SEG_SIZE (MAX_PHYSICAL_FILESIZE / BLCKSZ) +/* + * Optional transparent compression of temporary files. Disaled by default. + */ +int temp_file_compression = TEMP_NONE_COMPRESSION; + /* * This data structure represents a buffered file that consists of one or * more physical files (each accessed through a virtual file descriptor @@ -101,8 +127,28 @@ struct BufFile * wasting per-file alignment padding when some users create many files. */ PGAlignedBlock buffer; + + int compress; /* enabled compression for the file */ + char *cBuffer; /* compression buffer */ }; +/* + * Header written right before each chunk of data with compression enabled. + * The 'len' is the length of the data buffer written right after the header, + * and 'raw_len' is the length of uncompressed data. If the data ends up not + * being compressed (e.g. when pglz does not reach the compression ratio), + * the raw_len is set to -1 and the len is the raw (uncompressed) length. + * + * To make things simpler, we write these headers even for mathods that do + * not fail (or rather when they fail, it's a proper error). The space for + * an extra integer seems negligible. + */ +typedef struct CompressHeader +{ + int len; /* data length (compressed, excluding header) */ + int raw_len; /* raw length (-1: not compressed) */ +} CompressHeader; + static BufFile *makeBufFileCommon(int nfiles); static BufFile *makeBufFile(File firstfile); static void extendBufFile(BufFile *file); @@ -127,6 +173,8 @@ makeBufFileCommon(int nfiles) file->curOffset = 0; file->pos = 0; file->nbytes = 0; + file->compress = TEMP_NONE_COMPRESSION; + file->cBuffer = NULL; return file; } @@ -215,6 +263,106 @@ BufFileCreateTemp(bool interXact) return file; } +/* + * BufFileCreateCompressTemp + * Create a temporary file with transparent compression. + * + * The temporary files will use compression, depending on the current value of + * temp_file_compression GUC. + * + * Note: Compressed files do not support random access. A seek operation other + * than seek to the beginning of the buffile will corrupt data. + * + * Note: The compression algorithm is determined by temp_file_compression GUC. + * If set to "none" (TEMP_NONE_COMPRESSION), the file is not compressed. + * + * Note: We want to limit the number of memory allocations for the compression + * buffer. A single buffer is enough, compression happens block at a time. + * + * XXX Is the allocation optimization worth it? Do we want to allocate stuff + * in TopMemoryContext? + * + * XXX We should prevent such silent data corruption, by errorr-ing out after + * incompatible seek. + */ +BufFile * +BufFileCreateCompressTemp(bool interXact) +{ + static char *buff = NULL; + static int allocated_for_compression = TEMP_NONE_COMPRESSION; + static int allocated_size = 0; + BufFile *file = BufFileCreateTemp(interXact); + + if (temp_file_compression != TEMP_NONE_COMPRESSION) + { + int size = 0; + + switch (temp_file_compression) + { + case TEMP_LZ4_COMPRESSION: +#ifdef USE_LZ4 + size = LZ4_compressBound(BLCKSZ) + sizeof(CompressHeader); +#endif + break; + + case TEMP_GZIP_COMPRESSION: + { +#ifdef HAVE_LIBZ + size = compressBound(BLCKSZ) + sizeof(CompressHeader); +#endif + break; + } + + case TEMP_ZSTD_COMPRESSION: + { +#ifdef USE_ZSTD + size = ZSTD_COMPRESSBOUND(BLCKSZ) + sizeof(CompressHeader); +#endif + break; + } + + case TEMP_PGLZ_COMPRESSION: + size = pglz_maximum_compressed_size(BLCKSZ, BLCKSZ) + sizeof(CompressHeader); + break; + } + + /* + * Allocate or reallocate buffer if needed - first call, compression + * method changed, or the buffer is too small. + * + * XXX Can the buffer be too small if the method did not change? Does the + * method matter, or just the size? + */ + if ((buff == NULL) || + (allocated_for_compression != temp_file_compression) || + (allocated_size < size)) + { + /* + * FIXME Isn't this pfree wrong? How do we know the buffer is not + * used by some existing temp file? + */ + if (buff != NULL) + pfree(buff); + + /* + * Persistent buffer for all temporary file compressions. + */ + buff = MemoryContextAlloc(TopMemoryContext, size); + allocated_for_compression = temp_file_compression; + allocated_size = size; + } + + file->compress = temp_file_compression; + file->cBuffer = buff; + } + + /* compression with buffer, or no compression and no buffer */ + Assert((!file->compress && file->cBuffer == NULL) || + (file->compress && file->cBuffer != NULL)); + + return file; +} + /* * Build the name for a given segment of a given BufFile. */ @@ -454,21 +602,165 @@ BufFileLoadBuffer(BufFile *file) else INSTR_TIME_SET_ZERO(io_start); - /* - * Read whatever we can get, up to a full bufferload. - */ - file->nbytes = FileRead(thisfile, - file->buffer.data, - sizeof(file->buffer.data), - file->curOffset, - WAIT_EVENT_BUFFILE_READ); - if (file->nbytes < 0) + if (file->compress == TEMP_NONE_COMPRESSION) { - file->nbytes = 0; - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - FilePathName(thisfile)))); + /* + * Read whatever we can get, up to a full bufferload. + */ + file->nbytes = FileRead(thisfile, + file->buffer.data, + sizeof(file->buffer), + file->curOffset, + WAIT_EVENT_BUFFILE_READ); + if (file->nbytes < 0) + { + file->nbytes = 0; + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + FilePathName(thisfile)))); + } + } + else + { + /* + * Read and decompress data from a temporary file. We first read the + * header with compressed/raw lengths, and then the compressed data. + */ + int nread; + CompressHeader header; + + nread = FileRead(thisfile, + &header, + sizeof(header), + file->curOffset, + WAIT_EVENT_BUFFILE_READ); + + /* did we read the length of the next buffer? */ + if (nread == 0) + { + /* eof, nothing to do */ + } + else if (nread != sizeof(header)) + { + /* unexpected number of bytes, also covers (nread < 0) */ + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + FilePathName(thisfile)))); + } + else + { + /* read length of compressed data, read (and decompress) data */ + char *buff = file->cBuffer; + + Assert(file->cBuffer != NULL); + + /* advance past the length field */ + file->curOffset += sizeof(header); + + /* + * raw_len==-1 means the data was not compressed after all, which + * can happen e.g. for non-compressible data with pglz. In that + * case just copy the data in place. Otherwise do the decompression. + * + * XXX Maybe we should just do the FileRead first, and then either + * decompress or memcpy() for raw_len=-1. That'd be an extra memcpy, + * but it'd make the code simpler (this ways we do the error checks + * twice, for each branch). + */ + if (header.raw_len == -1) + { + nread = FileRead(thisfile, + file->buffer.data, + header.len, + file->curOffset, + WAIT_EVENT_BUFFILE_READ); + if (nread != header.len) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + FilePathName(thisfile)))); + } + + file->nbytes = nread; + file->curOffset += nread; + } + else + { + /* + * Read compressed data into the separate buffer, and then + * decompress into the target file buffer. + */ + nread = FileRead(thisfile, + buff, + header.len, + file->curOffset, + WAIT_EVENT_BUFFILE_READ); + if (nread != header.len) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + FilePathName(thisfile)))); + } + + switch (file->compress) + { + case TEMP_LZ4_COMPRESSION: +#ifdef USE_LZ4 + file->nbytes = LZ4_decompress_safe(buff, + file->buffer.data, header.len, + sizeof(file->buffer)); +#endif + break; + + case TEMP_GZIP_COMPRESSION: + { +#ifdef HAVE_LIBZ + int ret; + size_t len = sizeof(file->buffer); + + ret = uncompress((uint8 *) file->buffer.data, &len, + (uint8 *) buff, header.len); + if (ret != Z_OK) + elog(ERROR, "uncompress failed"); + + file->nbytes = len; +#endif + break; + } + case TEMP_ZSTD_COMPRESSION: + { +#ifdef USE_ZSTD + size_t ret; + + ret = ZSTD_decompress(file->buffer.data, sizeof(file->buffer), + buff, header.len); + if (ZSTD_isError(ret)) + elog(ERROR, "ZSTD_decompress failed"); + + file->nbytes = ret; +#endif + break; + } + case TEMP_PGLZ_COMPRESSION: + file->nbytes = pglz_decompress(buff, header.len, + file->buffer.data, header.raw_len, false); + break; + } + file->curOffset += nread; + + if (file->nbytes < 0) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compressed data is corrupt"))); + + /* should have got the expected length */ + Assert(file->nbytes == header.raw_len); + } + } } if (track_io_timing) @@ -494,8 +786,141 @@ static void BufFileDumpBuffer(BufFile *file) { int wpos = 0; - int bytestowrite; + int bytestowrite = 0; File thisfile; + char *DataToWrite = file->buffer.data; + int nbytesOriginal = file->nbytes; + + /* + * Compress the data if requested for this temporary file (and if enabled + * by the temp_file_compression GUC). + * + * The compressed data is written to the one shared compression buffer. + * There's only a single compression operation at any given time, so one + * buffer is enough. + * + * Then we simply point the "DataToWrite" buffer at the compressed buffer. + * + * XXX I'm not 100% happy with all the variables here, there seems to be + * more than necessary. + */ + if (file->compress != TEMP_NONE_COMPRESSION) + { + char *cData; + int cSize = 0; + CompressHeader header; + + Assert(file->cBuffer != NULL); + cData = file->cBuffer; + + /* initialize the header for compression */ + header.len = -1; + header.raw_len = nbytesOriginal; + + switch (file->compress) + { + case TEMP_LZ4_COMPRESSION: + { +#ifdef USE_LZ4 + int cBufferSize = LZ4_compressBound(file->nbytes); + + /* + * XXX We might use lz4 stream compression here. Depending + * on the data, that might improve the compression ratio. + * The length is stored at the beginning, we'll fill it in + * at the end. + */ + cSize = LZ4_compress_fast(file->buffer.data, + cData + sizeof(CompressHeader), + file->nbytes, cBufferSize, + 10); /* somewhat higher value */ + if (cSize == 0) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compression failed, compressed size %d, original size %d", + cSize, nbytesOriginal))); + } +#endif + break; + } + case TEMP_GZIP_COMPRESSION: + { +#ifdef HAVE_LIBZ + int ret; + size_t len = compressBound(file->nbytes); + + /* XXX maybe lower level? the default is pretty slow ... */ + ret = compress2((uint8 *) (cData + sizeof(CompressHeader)), &len, + (uint8 *) file->buffer.data, file->nbytes, + Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compression failed, compressed size %d, original size %d", + cSize, nbytesOriginal))); + } + + cSize = len; +#endif + break; + } + case TEMP_ZSTD_COMPRESSION: + { +#ifdef USE_ZSTD + size_t ret; + size_t len = ZSTD_compressBound(file->nbytes); + + /* XXX maybe lower level? the default is pretty slow ... */ + ret = ZSTD_compress(cData + sizeof(CompressHeader), len, + file->buffer.data, file->nbytes, + ZSTD_defaultCLevel()); + if (ZSTD_isError(ret)) + { + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("compression failed, compressed size %d, original size %d", + cSize, nbytesOriginal))); + } + + cSize = ret; +#endif + break; + } + case TEMP_PGLZ_COMPRESSION: + cSize = pglz_compress(file->buffer.data, file->nbytes, + cData + sizeof(CompressHeader), + PGLZ_strategy_always); + + /* + * pglz returns -1 for non-compressible data. In that case + * just copy the raw data into the output buffer. + */ + if (cSize == -1) + { + memcpy(cData + sizeof(CompressHeader), file->buffer.data, + header.raw_len); + + cSize = header.raw_len; + header.raw_len = -1; + } + break; + } + + Assert(cSize != -1); + header.len = cSize; + + /* + * Write the header with compressed length at the beginning of the + * buffer. We store both the compressed and raw lengths, and use + * raw_len=-1 when the data was not compressed after all. + */ + memcpy(cData, &header, sizeof(CompressHeader)); + file->nbytes = header.len + sizeof(CompressHeader); + + DataToWrite = cData; + } /* * Unlike BufFileLoadBuffer, we must dump the whole buffer even if it @@ -535,7 +960,7 @@ BufFileDumpBuffer(BufFile *file) INSTR_TIME_SET_ZERO(io_start); bytestowrite = FileWrite(thisfile, - file->buffer.data + wpos, + DataToWrite + wpos, bytestowrite, file->curOffset, WAIT_EVENT_BUFFILE_WRITE); @@ -564,7 +989,17 @@ BufFileDumpBuffer(BufFile *file) * logical file position, ie, original value + pos, in case that is less * (as could happen due to a small backwards seek in a dirty buffer!) */ - file->curOffset -= (file->nbytes - file->pos); + if (!file->compress) + file->curOffset -= (file->nbytes - file->pos); + else if (nbytesOriginal - file->pos != 0) + { + /* + * curOffset must be corrected also if compression is enabled, nbytes + * was changed by compression but we have to use the original value of + * nbytes + */ + file->curOffset -= bytestowrite; + } if (file->curOffset < 0) /* handle possible segment crossing */ { file->curFile--; @@ -602,8 +1037,14 @@ BufFileReadCommon(BufFile *file, void *ptr, size_t size, bool exact, bool eofOK) { if (file->pos >= file->nbytes) { - /* Try to load more data into buffer. */ - file->curOffset += file->pos; + /* + * Try to load more data into buffer. + * + * curOffset is moved within BufFileLoadBuffer because stored data + * size differs from loaded/ decompressed size + */ + if (!file->compress) + file->curOffset += file->pos; file->pos = 0; file->nbytes = 0; BufFileLoadBuffer(file); diff --git a/src/backend/utils/misc/guc_parameters.dat b/src/backend/utils/misc/guc_parameters.dat index 1128167c0251..a3fef11adbea 100644 --- a/src/backend/utils/misc/guc_parameters.dat +++ b/src/backend/utils/misc/guc_parameters.dat @@ -738,6 +738,13 @@ boot_val => 'false', }, +{ name => 'temp_file_compression', type => 'enum', context => 'PGC_USERSET', group => 'CLIENT_CONN_STATEMENT', + short_desc => 'Sets the default compression method for temporary files.', + variable => 'temp_file_compression', + boot_val => 'TEMP_NONE_COMPRESSION', + options => 'temp_file_compression_options', +}, + { name => 'default_transaction_isolation', type => 'enum', context => 'PGC_USERSET', group => 'CLIENT_CONN_STATEMENT', short_desc => 'Sets the transaction isolation level of each new transaction.', variable => 'DefaultXactIsoLevel', diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 0209b2067a22..98b55a901bae 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -78,6 +78,7 @@ #include "replication/syncrep.h" #include "storage/aio.h" #include "storage/bufmgr.h" +#include "storage/buffile.h" #include "storage/bufpage.h" #include "storage/copydir.h" #include "storage/io_worker.h" @@ -464,6 +465,24 @@ static const struct config_enum_entry default_toast_compression_options[] = { {NULL, 0, false} }; +/* + * pglz and zstd support should be added as future enhancement + */ +static const struct config_enum_entry temp_file_compression_options[] = { + {"no", TEMP_NONE_COMPRESSION, false}, + {"pglz", TEMP_PGLZ_COMPRESSION, false}, +#ifdef USE_LZ4 + {"lz4", TEMP_LZ4_COMPRESSION, false}, +#endif +#ifdef HAVE_LIBZ + {"gzip", TEMP_GZIP_COMPRESSION, false}, +#endif +#ifdef USE_ZSTD + {"zstd", TEMP_ZSTD_COMPRESSION, false}, +#endif + {NULL, 0, false} +}; + static const struct config_enum_entry wal_compression_options[] = { {"pglz", WAL_COMPRESSION_PGLZ, false}, #ifdef USE_LZ4 diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index f62b61967ef6..98a5ec9f16dc 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -182,6 +182,7 @@ #max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated # for NOTIFY / LISTEN queue +#temp_file_compression = 'no' # enables temporary files compression # - Kernel Resources - diff --git a/src/backend/utils/sort/tuplestore.c b/src/backend/utils/sort/tuplestore.c index c9aecab8d66c..db3de5cd6b20 100644 --- a/src/backend/utils/sort/tuplestore.c +++ b/src/backend/utils/sort/tuplestore.c @@ -860,7 +860,13 @@ tuplestore_puttuple_common(Tuplestorestate *state, void *tuple) */ oldcxt = MemoryContextSwitchTo(state->context->parent); - state->myfile = BufFileCreateTemp(state->interXact); + /* + * If requested random access, can't compress the temp file. + */ + if ((state->eflags & EXEC_FLAG_BACKWARD) != 0) + state->myfile = BufFileCreateTemp(state->interXact); + else + state->myfile = BufFileCreateCompressTemp(state->interXact); MemoryContextSwitchTo(oldcxt); diff --git a/src/include/storage/buffile.h b/src/include/storage/buffile.h index a2f4821f240b..ac6fe6029399 100644 --- a/src/include/storage/buffile.h +++ b/src/include/storage/buffile.h @@ -32,11 +32,23 @@ typedef struct BufFile BufFile; +typedef enum +{ + TEMP_NONE_COMPRESSION, + TEMP_PGLZ_COMPRESSION, + TEMP_LZ4_COMPRESSION, + TEMP_GZIP_COMPRESSION, + TEMP_ZSTD_COMPRESSION +} TempCompression; + +extern PGDLLIMPORT int temp_file_compression; + /* * prototypes for functions in buffile.c */ extern BufFile *BufFileCreateTemp(bool interXact); +extern BufFile *BufFileCreateCompressTemp(bool interXact); extern void BufFileClose(BufFile *file); pg_nodiscard extern size_t BufFileRead(BufFile *file, void *ptr, size_t size); extern void BufFileReadExact(BufFile *file, void *ptr, size_t size); diff --git a/src/test/regress/GNUmakefile b/src/test/regress/GNUmakefile index ef2bddf42cab..ddd993f88899 100644 --- a/src/test/regress/GNUmakefile +++ b/src/test/regress/GNUmakefile @@ -94,6 +94,18 @@ installdirs-tests: installdirs REGRESS_OPTS = --dlpath=. --max-concurrent-tests=20 \ $(EXTRA_REGRESS_OPTS) +ifeq ($(with_lz4),yes) +override EXTRA_TESTS := $(EXTRA_TESTS) join_hash_lz4 +endif + +ifeq ($(with_zlib),yes) +override EXTRA_TESTS := $(EXTRA_TESTS) join_hash_gzip +endif + +ifeq ($(with_zstd),yes) +override EXTRA_TESTS := $(EXTRA_TESTS) join_hash_zstd +endif + check: all $(pg_regress_check) $(REGRESS_OPTS) --schedule=$(srcdir)/parallel_schedule $(MAXCONNOPT) $(EXTRA_TESTS) diff --git a/src/test/regress/expected/join_hash_gzip.out b/src/test/regress/expected/join_hash_gzip.out new file mode 100644 index 000000000000..b8e04588b46c --- /dev/null +++ b/src/test/regress/expected/join_hash_gzip.out @@ -0,0 +1,1166 @@ +-- +-- exercises for the hash join code +-- +begin; +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'gzip'; +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + QUERY PLAN +------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(6 rows) + +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +--------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 1 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(6 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +----------------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 4 +(1 row) + +rollback to settings; +-- A couple of other hash join tests unrelated to work_mem management. +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + original | final +----------+------- + 1 | 1 +(1 row) + +rollback to settings; +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- A full outer join where every record is matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- A full outer join where every record is not matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Parallel Seq Scan on simple s + -> Parallel Hash + -> Parallel Seq Scan on simple r +(9 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + QUERY PLAN +---------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Left Join + Hash Cond: (wide.id = wide_1.id) + -> Parallel Seq Scan on wide + -> Parallel Hash + -> Parallel Seq Scan on wide wide_1 +(9 rows) + +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + length +-------- + 320000 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +ROLLBACK TO settings; +rollback; +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + -> Hash + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + -> Hash + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +ROLLBACK; +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl i8 + -> Sort + Sort Key: t1.fivethous, i4.f1 + -> Hash Join + Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on int4_tbl i4 +(9 rows) + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + q2 | fivethous | f1 +-----+-----------+---- + 456 | 456 | 0 + 456 | 456 | 0 + 123 | 123 | 0 + 123 | 123 | 0 +(4 rows) + +rollback; diff --git a/src/test/regress/expected/join_hash_lz4.out b/src/test/regress/expected/join_hash_lz4.out new file mode 100644 index 000000000000..966a5cd8f553 --- /dev/null +++ b/src/test/regress/expected/join_hash_lz4.out @@ -0,0 +1,1166 @@ +-- +-- exercises for the hash join code +-- +begin; +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'lz4'; +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + QUERY PLAN +------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(6 rows) + +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +--------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 1 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(6 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +----------------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 4 +(1 row) + +rollback to settings; +-- A couple of other hash join tests unrelated to work_mem management. +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + original | final +----------+------- + 1 | 1 +(1 row) + +rollback to settings; +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- A full outer join where every record is matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- A full outer join where every record is not matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Parallel Seq Scan on simple s + -> Parallel Hash + -> Parallel Seq Scan on simple r +(9 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + QUERY PLAN +---------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Left Join + Hash Cond: (wide.id = wide_1.id) + -> Parallel Seq Scan on wide + -> Parallel Hash + -> Parallel Seq Scan on wide wide_1 +(9 rows) + +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + length +-------- + 320000 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +ROLLBACK TO settings; +rollback; +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + -> Hash + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + -> Hash + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +ROLLBACK; +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl i8 + -> Sort + Sort Key: t1.fivethous, i4.f1 + -> Hash Join + Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on int4_tbl i4 +(9 rows) + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + q2 | fivethous | f1 +-----+-----------+---- + 456 | 456 | 0 + 456 | 456 | 0 + 123 | 123 | 0 + 123 | 123 | 0 +(4 rows) + +rollback; diff --git a/src/test/regress/expected/join_hash_pglz.out b/src/test/regress/expected/join_hash_pglz.out new file mode 100644 index 000000000000..99c67f982af9 --- /dev/null +++ b/src/test/regress/expected/join_hash_pglz.out @@ -0,0 +1,1166 @@ +-- +-- exercises for the hash join code +-- +begin; +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'pglz'; +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + QUERY PLAN +------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(6 rows) + +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +--------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 1 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(6 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +----------------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 4 +(1 row) + +rollback to settings; +-- A couple of other hash join tests unrelated to work_mem management. +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + original | final +----------+------- + 1 | 1 +(1 row) + +rollback to settings; +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- A full outer join where every record is matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- A full outer join where every record is not matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Parallel Seq Scan on simple s + -> Parallel Hash + -> Parallel Seq Scan on simple r +(9 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + QUERY PLAN +---------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Left Join + Hash Cond: (wide.id = wide_1.id) + -> Parallel Seq Scan on wide + -> Parallel Hash + -> Parallel Seq Scan on wide wide_1 +(9 rows) + +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + length +-------- + 320000 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +ROLLBACK TO settings; +rollback; +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + -> Hash + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + -> Hash + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +ROLLBACK; +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl i8 + -> Sort + Sort Key: t1.fivethous, i4.f1 + -> Hash Join + Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on int4_tbl i4 +(9 rows) + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + q2 | fivethous | f1 +-----+-----------+---- + 456 | 456 | 0 + 456 | 456 | 0 + 123 | 123 | 0 + 123 | 123 | 0 +(4 rows) + +rollback; diff --git a/src/test/regress/expected/join_hash_zstd.out b/src/test/regress/expected/join_hash_zstd.out new file mode 100644 index 000000000000..ee342387041a --- /dev/null +++ b/src/test/regress/expected/join_hash_zstd.out @@ -0,0 +1,1166 @@ +-- +-- exercises for the hash join code +-- +begin; +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'zstd'; +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | f +(1 row) + +rollback to settings; +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r join simple s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + t | f +(1 row) + +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + QUERY PLAN +------------------------------------------------------ + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(6 rows) + +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +------------------------------------------------------------------ + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); + QUERY PLAN +--------------------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 1 + -> Partial Aggregate + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on bigger_than_it_looks s +(9 rows) + +select count(*) from simple r join bigger_than_it_looks s using (id); + count +------- + 20000 +(1 row) + +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); + initially_multibatch | increased_batches +----------------------+------------------- + f | t +(1 row) + +rollback to settings; +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(6 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +-------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Hash + -> Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 2 +(1 row) + +rollback to settings; +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); + QUERY PLAN +----------------------------------------------------------------- + Aggregate + -> Gather + Workers Planned: 1 + -> Parallel Hash Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on extremely_skewed s +(8 rows) + +select count(*) from simple r join extremely_skewed s using (id); + count +------- + 20000 +(1 row) + +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); + original | final +----------+------- + 1 | 4 +(1 row) + +rollback to settings; +-- A couple of other hash join tests unrelated to work_mem management. +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); + original | final +----------+------- + 1 | 1 +(1 row) + +rollback to settings; +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Hash + -> Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + QUERY PLAN +------------------------------------------------------------------------------------ + Aggregate + -> Nested Loop Left Join + Join Filter: ((join_foo.id < (b1.id + 1)) AND (join_foo.id > (b1.id - 1))) + -> Seq Scan on join_foo + -> Gather + Workers Planned: 2 + -> Parallel Hash Join + Hash Cond: (b1.id = b2.id) + -> Parallel Seq Scan on join_bar b1 + -> Parallel Hash + -> Parallel Seq Scan on join_bar b2 +(11 rows) + +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; + count +------- + 3 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); + multibatch +------------ + f +(1 row) + +rollback to settings; +-- A full outer join where every record is matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: (r.id = s.id) + -> Seq Scan on simple r + -> Hash + -> Seq Scan on simple s +(6 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: (r.id = s.id) + -> Parallel Seq Scan on simple r + -> Parallel Hash + -> Parallel Seq Scan on simple s +(9 rows) + +select count(*) from simple r full outer join simple s using (id); + count +------- + 20000 +(1 row) + +rollback to settings; +-- A full outer join where every record is not matched. +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +---------------------------------------- + Aggregate + -> Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Seq Scan on simple s + -> Hash + -> Seq Scan on simple r +(6 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + QUERY PLAN +------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Full Join + Hash Cond: ((0 - s.id) = r.id) + -> Parallel Seq Scan on simple s + -> Parallel Hash + -> Parallel Seq Scan on simple r +(9 rows) + +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); + count +------- + 40000 +(1 row) + +rollback to settings; +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + QUERY PLAN +---------------------------------------------------------------- + Finalize Aggregate + -> Gather + Workers Planned: 2 + -> Partial Aggregate + -> Parallel Hash Left Join + Hash Cond: (wide.id = wide_1.id) + -> Parallel Seq Scan on wide + -> Parallel Hash + -> Parallel Seq Scan on wide wide_1 +(9 rows) + +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); + length +-------- + 320000 +(1 row) + +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); + multibatch +------------ + t +(1 row) + +rollback to settings; +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; + id | id +----+---- + 1 | + | 2 +(2 rows) + +ROLLBACK TO settings; +rollback; +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: ((hjtest_1.id = (SubPlan 1)) AND ((SubPlan 2) = (SubPlan 3))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + -> Hash + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + QUERY PLAN +------------------------------------------------------------------------------------------------ + Hash Join + Output: hjtest_1.a, hjtest_2.a, (hjtest_1.tableoid)::regclass, (hjtest_2.tableoid)::regclass + Hash Cond: (((SubPlan 1) = hjtest_1.id) AND ((SubPlan 3) = (SubPlan 2))) + Join Filter: (hjtest_1.a <> hjtest_2.b) + -> Seq Scan on public.hjtest_2 + Output: hjtest_2.a, hjtest_2.tableoid, hjtest_2.id, hjtest_2.c, hjtest_2.b + Filter: ((SubPlan 5) < 55) + SubPlan 5 + -> Result + Output: (hjtest_2.c * 5) + -> Hash + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + -> Seq Scan on public.hjtest_1 + Output: hjtest_1.a, hjtest_1.tableoid, hjtest_1.id, hjtest_1.b + Filter: ((SubPlan 4) < 50) + SubPlan 4 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 2 + -> Result + Output: (hjtest_1.b * 5) + SubPlan 1 + -> Result + Output: 1 + One-Time Filter: (hjtest_2.id = 1) + SubPlan 3 + -> Result + Output: (hjtest_2.c * 5) +(28 rows) + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + a1 | a2 | t1 | t2 +------+----+----------+---------- + text | t | hjtest_1 | hjtest_2 +(1 row) + +ROLLBACK; +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + QUERY PLAN +----------------------------------------------------------- + Nested Loop + -> Seq Scan on int8_tbl i8 + -> Sort + Sort Key: t1.fivethous, i4.f1 + -> Hash Join + Hash Cond: (t1.fivethous = (i4.f1 + i8.q2)) + -> Seq Scan on tenk1 t1 + -> Hash + -> Seq Scan on int4_tbl i4 +(9 rows) + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + q2 | fivethous | f1 +-----+-----------+---- + 456 | 456 | 0 + 456 | 456 | 0 + 123 | 123 | 0 + 123 | 123 | 0 +(4 rows) + +rollback; diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a0f5fab0f5df..e18fecbc6692 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -15,7 +15,6 @@ test: test_setup # The first group of parallel tests # ---------- test: boolean char name varchar text int2 int4 int8 oid float4 float8 bit numeric txid uuid enum money rangetypes pg_lsn regproc - # ---------- # The second group of parallel tests # multirangetypes depends on rangetypes @@ -140,3 +139,6 @@ test: fast_default # run tablespace test at the end because it drops the tablespace created during # setup that other tests may use. test: tablespace + +# this test is equivalent to join_hash test just the compression is enabled +test: join_hash_pglz diff --git a/src/test/regress/sql/join_hash_gzip.sql b/src/test/regress/sql/join_hash_gzip.sql new file mode 100644 index 000000000000..a70974f843eb --- /dev/null +++ b/src/test/regress/sql/join_hash_gzip.sql @@ -0,0 +1,626 @@ +-- +-- exercises for the hash join code +-- + +begin; + +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'gzip'; + +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; + +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; + +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; + +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; + +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); + +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- A couple of other hash join tests unrelated to work_mem management. + +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. + +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); + +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- A full outer join where every record is matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- A full outer join where every record is not matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + + +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) + +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); +rollback to settings; + + +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; +ROLLBACK TO settings; + +rollback; + +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order + +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); + +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +ROLLBACK; + +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; + +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +rollback; diff --git a/src/test/regress/sql/join_hash_lz4.sql b/src/test/regress/sql/join_hash_lz4.sql new file mode 100644 index 000000000000..1d19c1980e1d --- /dev/null +++ b/src/test/regress/sql/join_hash_lz4.sql @@ -0,0 +1,626 @@ +-- +-- exercises for the hash join code +-- + +begin; + +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'lz4'; + +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; + +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; + +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; + +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; + +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); + +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- A couple of other hash join tests unrelated to work_mem management. + +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. + +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); + +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- A full outer join where every record is matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- A full outer join where every record is not matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + + +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) + +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); +rollback to settings; + + +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; +ROLLBACK TO settings; + +rollback; + +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order + +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); + +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +ROLLBACK; + +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; + +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +rollback; diff --git a/src/test/regress/sql/join_hash_pglz.sql b/src/test/regress/sql/join_hash_pglz.sql new file mode 100644 index 000000000000..2686afab272d --- /dev/null +++ b/src/test/regress/sql/join_hash_pglz.sql @@ -0,0 +1,626 @@ +-- +-- exercises for the hash join code +-- + +begin; + +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'pglz'; + +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; + +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; + +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; + +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; + +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); + +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- A couple of other hash join tests unrelated to work_mem management. + +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. + +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); + +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- A full outer join where every record is matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- A full outer join where every record is not matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + + +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) + +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); +rollback to settings; + + +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; +ROLLBACK TO settings; + +rollback; + +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order + +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); + +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +ROLLBACK; + +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; + +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +rollback; diff --git a/src/test/regress/sql/join_hash_zstd.sql b/src/test/regress/sql/join_hash_zstd.sql new file mode 100644 index 000000000000..3337ce028888 --- /dev/null +++ b/src/test/regress/sql/join_hash_zstd.sql @@ -0,0 +1,626 @@ +-- +-- exercises for the hash join code +-- + +begin; + +set local min_parallel_table_scan_size = 0; +set local parallel_setup_cost = 0; +set local enable_hashjoin = on; +set local temp_file_compression = 'zstd'; + +-- Extract bucket and batch counts from an explain analyze plan. In +-- general we can't make assertions about how many batches (or +-- buckets) will be required because it can vary, but we can in some +-- special cases and we can check for growth. +create or replace function find_hash(node json) +returns json language plpgsql +as +$$ +declare + x json; + child json; +begin + if node->>'Node Type' = 'Hash' then + return node; + else + for child in select json_array_elements(node->'Plans') + loop + x := find_hash(child); + if x is not null then + return x; + end if; + end loop; + return null; + end if; +end; +$$; +create or replace function hash_join_batches(query text) +returns table (original int, final int) language plpgsql +as +$$ +declare + whole_plan json; + hash_node json; +begin + for whole_plan in + execute 'explain (analyze, format ''json'') ' || query + loop + hash_node := find_hash(json_extract_path(whole_plan, '0', 'Plan')); + original := hash_node->>'Original Hash Batches'; + final := hash_node->>'Hash Batches'; + return next; + end loop; +end; +$$; + +-- Make a simple relation with well distributed keys and correctly +-- estimated size. +create table simple as + select generate_series(1, 20000) AS id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table simple set (parallel_workers = 2); +analyze simple; + +-- Make a relation whose size we will under-estimate. We want stats +-- to say 1000 rows, but actually there are 20,000 rows. +create table bigger_than_it_looks as + select generate_series(1, 20000) as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'; +alter table bigger_than_it_looks set (autovacuum_enabled = 'false'); +alter table bigger_than_it_looks set (parallel_workers = 2); +analyze bigger_than_it_looks; +update pg_class set reltuples = 1000 where relname = 'bigger_than_it_looks'; + +-- Make a relation whose size we underestimate and that also has a +-- kind of skew that breaks our batching scheme. We want stats to say +-- 2 rows, but actually there are 20,000 rows with the same key. +create table extremely_skewed (id int, t text); +alter table extremely_skewed set (autovacuum_enabled = 'false'); +alter table extremely_skewed set (parallel_workers = 2); +analyze extremely_skewed; +insert into extremely_skewed + select 42 as id, 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + from generate_series(1, 20000); +update pg_class + set reltuples = 2, relpages = pg_relation_size('extremely_skewed') / 8192 + where relname = 'extremely_skewed'; + +-- Make a relation with a couple of enormous tuples. +create table wide as select generate_series(1, 2) as id, rpad('', 320000, 'x') as t; +alter table wide set (parallel_workers = 2); + +-- The "optimal" case: the hash table fits in memory; we plan for 1 +-- batch, we stick to that number, and peak memory usage stays within +-- our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- The "good" case: batches required, but we plan the right number; we +-- plan for some number of batches, and we stick to that number, and +-- peak memory usage says within our work_mem budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join simple s using (id); +select count(*) from simple r join simple s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +-- parallel full multi-batch hash join +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- The "bad" case: during execution we need to increase number of +-- batches; in this case we plan for 1 batch, and increase at least a +-- couple of times, and peak memory usage stays within our work_mem +-- budget + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) FROM simple r JOIN bigger_than_it_looks s USING (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '192kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join bigger_than_it_looks s using (id); +select count(*) from simple r join bigger_than_it_looks s using (id); +select original > 1 as initially_multibatch, final > original as increased_batches + from hash_join_batches( +$$ + select count(*) from simple r join bigger_than_it_looks s using (id); +$$); +rollback to settings; + +-- The "ugly" case: increasing the number of batches during execution +-- doesn't help, so stop trying to fit in work_mem and hope for the +-- best; in this case we plan for 1 batch, increases just once and +-- then stop increasing because that didn't help at all, so we blow +-- right through the work_mem budget and hope for the best... + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-oblivious hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = off; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- parallel with parallel-aware hash join +savepoint settings; +set local max_parallel_workers_per_gather = 1; +set local work_mem = '128kB'; +set local hash_mem_multiplier = 1.0; +set local enable_parallel_hash = on; +explain (costs off) + select count(*) from simple r join extremely_skewed s using (id); +select count(*) from simple r join extremely_skewed s using (id); +select * from hash_join_batches( +$$ + select count(*) from simple r join extremely_skewed s using (id); +$$); +rollback to settings; + +-- A couple of other hash join tests unrelated to work_mem management. + +-- Check that EXPLAIN ANALYZE has data even if the leader doesn't participate +savepoint settings; +set local max_parallel_workers_per_gather = 2; +set local work_mem = '4MB'; +set local hash_mem_multiplier = 1.0; +set local parallel_leader_participation = off; +select * from hash_join_batches( +$$ + select count(*) from simple r join simple s using (id); +$$); +rollback to settings; + +-- Exercise rescans. We'll turn off parallel_leader_participation so +-- that we can check that instrumentation comes back correctly. + +create table join_foo as select generate_series(1, 3) as id, 'xxxxx'::text as t; +alter table join_foo set (parallel_workers = 0); +create table join_bar as select generate_series(1, 10000) as id, 'xxxxx'::text as t; +alter table join_bar set (parallel_workers = 2); + +-- multi-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-oblivious +savepoint settings; +set enable_parallel_hash = off; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- multi-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '64kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- single-batch with rescan, parallel-aware +savepoint settings; +set enable_parallel_hash = on; +set parallel_leader_participation = off; +set min_parallel_table_scan_size = 0; +set parallel_setup_cost = 0; +set parallel_tuple_cost = 0; +set max_parallel_workers_per_gather = 2; +set enable_material = off; +set enable_mergejoin = off; +set work_mem = '4MB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +select final > 1 as multibatch + from hash_join_batches( +$$ + select count(*) from join_foo + left join (select b1.id, b1.t from join_bar b1 join join_bar b2 using (id)) ss + on join_foo.id < ss.id + 1 and join_foo.id > ss.id - 1; +$$); +rollback to settings; + +-- A full outer join where every record is matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s using (id); +select count(*) from simple r full outer join simple s using (id); +rollback to settings; + +-- A full outer join where every record is not matched. + +-- non-parallel +savepoint settings; +set local max_parallel_workers_per_gather = 0; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism not possible with parallel-oblivious full hash join +savepoint settings; +set enable_parallel_hash = off; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + +-- parallelism is possible with parallel-aware full hash join +savepoint settings; +set local max_parallel_workers_per_gather = 2; +explain (costs off) + select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +select count(*) from simple r full outer join simple s on (r.id = 0 - s.id); +rollback to settings; + + +-- exercise special code paths for huge tuples (note use of non-strict +-- expression and left join required to get the detoasted tuple into +-- the hash table) + +-- parallel with parallel-aware hash join (hits ExecParallelHashLoadTuple and +-- sts_puttuple oversized tuple cases because it's multi-batch) +savepoint settings; +set max_parallel_workers_per_gather = 2; +set enable_parallel_hash = on; +set work_mem = '128kB'; +set hash_mem_multiplier = 1.0; +explain (costs off) + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select length(max(s.t)) +from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +select final > 1 as multibatch + from hash_join_batches( +$$ + select length(max(s.t)) + from wide left join (select id, coalesce(t, '') || '' as t from wide) s using (id); +$$); +rollback to settings; + + +-- Hash join reuses the HOT status bit to indicate match status. This can only +-- be guaranteed to produce correct results if all the hash join tuple match +-- bits are reset before reuse. This is done upon loading them into the +-- hashtable. +SAVEPOINT settings; +SET enable_parallel_hash = on; +SET min_parallel_table_scan_size = 0; +SET parallel_setup_cost = 0; +SET parallel_tuple_cost = 0; +CREATE TABLE hjtest_matchbits_t1(id int); +CREATE TABLE hjtest_matchbits_t2(id int); +INSERT INTO hjtest_matchbits_t1 VALUES (1); +INSERT INTO hjtest_matchbits_t2 VALUES (2); +-- Update should create a HOT tuple. If this status bit isn't cleared, we won't +-- correctly emit the NULL-extended unmatching tuple in full hash join. +UPDATE hjtest_matchbits_t2 set id = 2; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id + ORDER BY t1.id; +-- Test serial full hash join. +-- Resetting parallel_setup_cost should force a serial plan. +-- Just to be safe, however, set enable_parallel_hash to off, as parallel full +-- hash joins are only supported with shared hashtables. +RESET parallel_setup_cost; +SET enable_parallel_hash = off; +SELECT * FROM hjtest_matchbits_t1 t1 FULL JOIN hjtest_matchbits_t2 t2 ON t1.id = t2.id; +ROLLBACK TO settings; + +rollback; + +-- Verify that hash key expressions reference the correct +-- nodes. Hashjoin's hashkeys need to reference its outer plan, Hash's +-- need to reference Hash's outer plan (which is below HashJoin's +-- inner plan). It's not trivial to verify that the references are +-- correct (we don't display the hashkeys themselves), but if the +-- hashkeys contain subplan references, those will be displayed. Force +-- subplans to appear just about everywhere. +-- +-- Bug report: +-- https://www.postgresql.org/message-id/CAPpHfdvGVegF_TKKRiBrSmatJL2dR9uwFCuR%2BteQ_8tEXU8mxg%40mail.gmail.com +-- +BEGIN; +SET LOCAL enable_sort = OFF; -- avoid mergejoins +SET LOCAL from_collapse_limit = 1; -- allows easy changing of join order + +CREATE TABLE hjtest_1 (a text, b int, id int, c bool); +CREATE TABLE hjtest_2 (a bool, id int, b text, c int); + +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 2, 1, false); -- matches +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 2, false); -- fails id join condition +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 20, 1, false); -- fails < 50 +INSERT INTO hjtest_1(a, b, id, c) VALUES ('text', 1, 1, false); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 2); -- matches +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 3, 'another', 7); -- fails id join condition +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 90); -- fails < 55 +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'another', 3); -- fails (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) +INSERT INTO hjtest_2(a, id, b, c) VALUES (true, 1, 'text', 1); -- fails hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_1, hjtest_2 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +EXPLAIN (COSTS OFF, VERBOSE) +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +SELECT hjtest_1.a a1, hjtest_2.a a2,hjtest_1.tableoid::regclass t1, hjtest_2.tableoid::regclass t2 +FROM hjtest_2, hjtest_1 +WHERE + hjtest_1.id = (SELECT 1 WHERE hjtest_2.id = 1) + AND (SELECT hjtest_1.b * 5) = (SELECT hjtest_2.c*5) + AND (SELECT hjtest_1.b * 5) < 50 + AND (SELECT hjtest_2.c * 5) < 55 + AND hjtest_1.a <> hjtest_2.b; + +ROLLBACK; + +-- Verify that we behave sanely when the inner hash keys contain parameters +-- (that is, outer or lateral references). This situation has to defeat +-- re-use of the inner hash table across rescans. +begin; +set local enable_hashjoin = on; + +explain (costs off) +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +select i8.q2, ss.* from +int8_tbl i8, +lateral (select t1.fivethous, i4.f1 from tenk1 t1 join int4_tbl i4 + on t1.fivethous = i4.f1+i8.q2 order by 1,2) ss; + +rollback; diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 432509277c98..b67b14824d1a 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3028,6 +3028,7 @@ Tcl_NotifierProcs Tcl_Obj Tcl_Size Tcl_Time +TempCompression TempNamespaceStatus TestDSMRegistryHashEntry TestDSMRegistryStruct