diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 1b7a63cbaa40..1f2fec95de53 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,16 +4,17 @@ MODULE_big = amcheck
OBJS = \
$(WIN32RES) \
verify_common.o \
+ verify_gist.o \
verify_gin.o \
verify_heapam.o \
verify_nbtree.o
EXTENSION = amcheck
DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
- amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql
+ amcheck--1.3--1.4.sql amcheck--1.4--1.5.sql amcheck--1.5--1.6.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
-REGRESS = check check_btree check_gin check_heap
+REGRESS = check check_btree check_gin check_gist check_heap
EXTRA_INSTALL = contrib/pg_walinspect
TAP_TESTS = 1
diff --git a/contrib/amcheck/amcheck--1.5--1.6.sql b/contrib/amcheck/amcheck--1.5--1.6.sql
new file mode 100644
index 000000000000..a6a1debff12c
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.5--1.6.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.5--1.6.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.6'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass,boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index c8ba6d7c9bc3..2f329ef2cf49 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.5'
+default_version = '1.6'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 000000000000..cbc3e27e6793
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,145 @@
+SELECT setseed(1);
+ setseed
+---------
+
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check
+------------------
+
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+ attstorage
+------------
+ x
+(1 row)
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
+ gist_index_check
+------------------
+
+(1 row)
+
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index 1f0c347ed541..13b36b495ed9 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -5,6 +5,7 @@ amcheck_sources = files(
'verify_gin.c',
'verify_heapam.c',
'verify_nbtree.c',
+ 'verify_gist.c',
)
if host_system == 'windows'
@@ -27,6 +28,7 @@ install_data(
'amcheck--1.2--1.3.sql',
'amcheck--1.3--1.4.sql',
'amcheck--1.4--1.5.sql',
+ 'amcheck--1.5--1.6.sql',
kwargs: contrib_data_args,
)
@@ -39,6 +41,7 @@ tests += {
'check',
'check_btree',
'check_gin',
+ 'check_gist',
'check_heap',
],
},
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 000000000000..37966423b8b8
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,62 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
+
+--
+-- Similar to BUG #15597
+--
+CREATE TABLE toast_bug(c point,buggy text);
+ALTER TABLE toast_bug ALTER COLUMN buggy SET STORAGE extended;
+CREATE INDEX toasty ON toast_bug USING gist(c) INCLUDE(buggy);
+
+-- pg_attribute entry for toasty.buggy (the index) will have plain storage:
+UPDATE pg_attribute SET attstorage = 'p'
+WHERE attrelid = 'toasty'::regclass AND attname = 'buggy';
+
+-- Whereas pg_attribute entry for toast_bug.buggy (the table) still has extended storage:
+SELECT attstorage FROM pg_attribute
+WHERE attrelid = 'toast_bug'::regclass AND attname = 'buggy';
+
+-- Insert compressible heap tuple (comfortably exceeds TOAST_TUPLE_THRESHOLD):
+INSERT INTO toast_bug SELECT point(0,0), repeat('a', 2200);
+-- Should not get false positive report of corruption:
+SELECT gist_index_check('toasty', true);
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.c b/contrib/amcheck/verify_common.c
index a31ce06ed99a..e9b4887f65e4 100644
--- a/contrib/amcheck/verify_common.c
+++ b/contrib/amcheck/verify_common.c
@@ -13,6 +13,7 @@
#include "postgres.h"
#include "access/genam.h"
+#include "access/heaptoast.h"
#include "access/table.h"
#include "access/tableam.h"
#include "verify_common.h"
@@ -189,3 +190,114 @@ index_checkable(Relation rel, Oid am_id)
return amcheck_index_mainfork_expected(rel);
}
+
+IndexTuple
+amcheck_normalize_tuple(Relation irel, IndexTuple itup)
+{
+ TupleDesc tupleDescriptor = RelationGetDescr(irel);
+ Datum normalized[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ bool need_free[INDEX_MAX_KEYS];
+ bool formnewtup = false;
+ IndexTuple reformed;
+ int i;
+
+ /* Easy case: It's immediately clear that tuple has no varlena datums */
+ if (!IndexTupleHasVarwidths(itup))
+ return itup;
+
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ {
+ Form_pg_attribute att;
+
+ att = TupleDescAttr(tupleDescriptor, i);
+
+ /* Assume untoasted/already normalized datum initially */
+ need_free[i] = false;
+ normalized[i] = index_getattr(itup, att->attnum,
+ tupleDescriptor,
+ &isnull[i]);
+ if (att->attbyval || att->attlen != -1 || isnull[i])
+ continue;
+
+ /*
+ * Callers always pass a tuple that could safely be inserted into the
+ * index without further processing, so an external varlena header
+ * should never be encountered here
+ */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)),
+ RelationGetRelationName(irel))));
+ else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
+ VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
+ (att->attstorage == TYPSTORAGE_EXTENDED ||
+ att->attstorage == TYPSTORAGE_MAIN))
+ {
+ /*
+ * This value will be compressed by index_form_tuple() with the
+ * current storage settings. We may be here because this tuple
+ * was formed with different storage settings. So, force forming.
+ */
+ formnewtup = true;
+ }
+ else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
+ {
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
+ need_free[i] = true;
+ }
+
+ /*
+ * Short tuples may have 1B or 4B header. Convert 4B header of short
+ * tuples to 1B
+ */
+ else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
+ {
+ /* convert to short varlena */
+ Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
+ char *data = palloc(len);
+
+ SET_VARSIZE_SHORT(data, len);
+ memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
+
+ formnewtup = true;
+ normalized[i] = PointerGetDatum(data);
+ need_free[i] = true;
+ }
+ }
+
+ /*
+ * Easier case: Tuple has varlena datums, none of which are compressed or
+ * short with 4B header
+ */
+ if (!formnewtup)
+ return itup;
+
+ /*
+ * Hard case: Tuple had compressed varlena datums that necessitate
+ * creating normalized version of the tuple from uncompressed input datums
+ * (normalized input datums). This is rather naive, but shouldn't be
+ * necessary too often.
+ *
+ * In the heap, tuples may contain short varlena datums with both 1B
+ * header and 4B headers. But the corresponding index tuple should always
+ * have such varlena's with 1B headers. So, if there is a short varlena
+ * with 4B header, we need to convert it for fingerprinting.
+ *
+ * Note that we rely on deterministic index_form_tuple() TOAST compression
+ * of normalized input.
+ */
+ reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
+ reformed->t_tid = itup->t_tid;
+
+ /* Cannot leak memory here */
+ for (i = 0; i < tupleDescriptor->natts; i++)
+ if (need_free[i])
+ pfree(DatumGetPointer(normalized[i]));
+
+ return reformed;
+}
\ No newline at end of file
diff --git a/contrib/amcheck/verify_common.h b/contrib/amcheck/verify_common.h
index 3fa63d2121ab..ffe0d30beb36 100644
--- a/contrib/amcheck/verify_common.h
+++ b/contrib/amcheck/verify_common.h
@@ -26,3 +26,5 @@ extern void amcheck_lock_relation_and_check(Oid indrelid,
Oid am_id,
IndexDoCheckCallback check,
LOCKMODE lockmode, void *state);
+
+extern IndexTuple amcheck_normalize_tuple(Relation irel, IndexTuple itup);
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 000000000000..c15cd6ab5564
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,664 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ * Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlink, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2025, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/heaptoast.h"
+#include "access/tableam.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "verify_common.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+ int depth;
+
+ /* Referenced block number to check next */
+ BlockNumber blkno;
+
+ /*
+ * Correctness of this parent tuple will be checked against contents of
+ * referenced page. This tuple will be NULL for the root block.
+ */
+ IndexTuple parenttup;
+
+ /*
+ * LSN to handle concurrent scans of the page. It's necessary to avoid
+ * missing some subtrees from the page that was split just before we read it.
+ */
+ XLogRecPtr parentlsn;
+
+ /*
+ * Reference to parent page for re-locking in case of found parent-child
+ * tuple discrepancies.
+ */
+ BlockNumber parentblk;
+
+ /* Pointer to a next stack item. */
+ struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+ /* GiST state */
+ GISTSTATE *state;
+ /* Bloom filter fingerprints index tuples */
+ bloom_filter *filter;
+
+ Snapshot snapshot;
+ Relation rel;
+ Relation heaprel;
+
+ /* Debug counter for reporting percentage of work already done */
+ int64 heaptuplespresent;
+
+ /* progress reporting stuff */
+ BlockNumber totalblocks;
+ BlockNumber reportedblocks;
+ BlockNumber scannedblocks;
+ BlockNumber deltablocks;
+
+ int leafdepth;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void giststate_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly);
+static void gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed,
+ BufferAccessStrategy strategy);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+ BlockNumber childblkno,
+ BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+ Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+ Datum *values, bool *isnull,
+ bool tupleIsAlive, void *checkstate);
+static IndexTuple gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+ Oid indrelid = PG_GETARG_OID(0);
+ bool heapallindexed = PG_GETARG_BOOL(1);
+
+ amcheck_lock_relation_and_check(indrelid,
+ GIST_AM_OID,
+ gist_check_parent_keys_consistency,
+ AccessShareLock,
+ &heapallindexed);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Initialize GIST state files needed to perform.
+ * This initialized bloom filter and snapshot.
+ */
+static void
+giststate_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+ int64 total_pages;
+ int64 total_elems;
+ uint64 seed;
+
+ /*
+ * Size Bloom filter based on estimated number of tuples in index. This
+ * logic is similar to B-tree, see verify_btree.c .
+ */
+ total_pages = result->totalblocks;
+ total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+ (int64) rel->rd_rel->reltuples);
+ seed = pg_prng_uint64(&pg_global_prng_state);
+ result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+ result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+ /*
+ * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+ * COMMITTED mode. A new snapshot is guaranteed to have all the entries
+ * it requires in the index.
+ *
+ * We must defend against the possibility that an old xact snapshot was
+ * returned at higher isolation levels when that snapshot is not safe for
+ * index scans of the target index. This is possible when the snapshot
+ * sees tuples that are before the index's indcheckxmin horizon. Throwing
+ * an error here should be very rare. It doesn't seem worth using a
+ * secondary snapshot to avoid this.
+ */
+ if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+ !TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+ result->snapshot->xmin))
+ ereport(ERROR,
+ (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+ errmsg("index \"%s\" cannot be verified using transaction snapshot",
+ RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on the leaf page. To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * This check allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only the root block number. On
+ * each iteration the top block number is replaced by referenced block numbers.
+ *
+ *
+ * gist_check_internal_page() in its turn takes every tuple and tries to
+ * adjust it by tuples on the referenced child page. Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+ void *callback_state, bool readonly)
+{
+ BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+ GistScanItem *stack;
+ MemoryContext mctx;
+ MemoryContext oldcontext;
+ GISTSTATE *state;
+ bool heapallindexed = *((bool *) callback_state);
+ GistCheckState *check_state = palloc0(sizeof(GistCheckState));
+
+ mctx = AllocSetContextCreate(CurrentMemoryContext,
+ "amcheck context",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(mctx);
+
+ state = initGISTstate(rel);
+
+ check_state->state = state;
+ check_state->rel = rel;
+ check_state->heaprel = heaprel;
+
+ /*
+ * We don't know the height of the tree yet, but as soon as we encounter a
+ * leaf page, we will set 'leafdepth' to its depth.
+ */
+ check_state->leafdepth = -1;
+
+ check_state->totalblocks = RelationGetNumberOfBlocks(rel);
+ /* report every 100 blocks or 5%, whichever is bigger */
+ check_state->deltablocks = Max(check_state->totalblocks / 20, 100);
+
+ if (heapallindexed)
+ giststate_init_heapallindexed(rel, check_state);
+
+ /* Start the scan at the root page */
+ stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+ stack->depth = 0;
+ stack->parenttup = NULL;
+ stack->parentblk = InvalidBlockNumber;
+ stack->parentlsn = InvalidXLogRecPtr;
+ stack->blkno = GIST_ROOT_BLKNO;
+
+ /*
+ * This GiST scan is effectively an "old" VACUUM version before commit
+ * fe280694d which introduced physical order scanning.
+ */
+
+ while (stack)
+ {
+ GistScanItem *stack_next;
+ Buffer buffer;
+ Page page;
+ XLogRecPtr lsn;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* Report progress */
+ if (check_state->scannedblocks > check_state->reportedblocks +
+ check_state->deltablocks)
+ {
+ elog(DEBUG1, "verified level %u blocks of approximately %u total",
+ check_state->scannedblocks, check_state->totalblocks);
+ check_state->reportedblocks = check_state->scannedblocks;
+ }
+ check_state->scannedblocks++;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+ RBM_NORMAL, strategy);
+ LockBuffer(buffer, GIST_SHARE);
+ page = (Page) BufferGetPage(buffer);
+ lsn = BufferGetLSNAtomic(buffer);
+
+ /* Do basic sanity checks on the page headers */
+ check_index_page(rel, buffer, stack->blkno);
+
+ /*
+ * It's possible that the page was split since we looked at the
+ * parent, so that we didn't missed the downlink of the right sibling
+ * when we scanned the parent. If so, add the right sibling to the
+ * stack now.
+ */
+ if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+ {
+ /* split page detected, install right link to the stack */
+ GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+ ptr->depth = stack->depth;
+ ptr->parenttup = CopyIndexTuple(stack->parenttup);
+ ptr->parentblk = stack->parentblk;
+ ptr->parentlsn = stack->parentlsn;
+ ptr->blkno = GistPageGetOpaque(page)->rightlink;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+
+ gist_check_page(check_state, stack, page, heapallindexed, strategy);
+
+ if (!GistPageIsLeaf(page))
+ {
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /* Internal page, so recurse to the child */
+ GistScanItem *ptr;
+ ItemId iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+ ptr->depth = stack->depth + 1;
+ ptr->parenttup = CopyIndexTuple(idxtuple);
+ ptr->parentblk = stack->blkno;
+ ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+ ptr->parentlsn = lsn;
+ ptr->next = stack->next;
+ stack->next = ptr;
+ }
+ }
+
+ LockBuffer(buffer, GIST_UNLOCK);
+ ReleaseBuffer(buffer);
+
+ /* Step to next item in the queue */
+ stack_next = stack->next;
+ if (stack->parenttup)
+ pfree(stack->parenttup);
+ pfree(stack);
+ stack = stack_next;
+ }
+
+ if (heapallindexed)
+ {
+ IndexInfo *indexinfo = BuildIndexInfo(rel);
+ TableScanDesc scan;
+
+ scan = table_beginscan_strat(heaprel, /* relation */
+ check_state->snapshot, /* snapshot */
+ 0, /* number of keys */
+ NULL, /* scan key */
+ true, /* buffer access strategy OK */
+ true); /* syncscan OK? */
+
+ /*
+ * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+ */
+ indexinfo->ii_Concurrent = true;
+
+ indexinfo->ii_Unique = false;
+ indexinfo->ii_ExclusionOps = NULL;
+ indexinfo->ii_ExclusionProcs = NULL;
+ indexinfo->ii_ExclusionStrats = NULL;
+
+ elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+ RelationGetRelationName(rel),
+ RelationGetRelationName(heaprel));
+
+ table_index_build_scan(heaprel, rel, indexinfo, true, false,
+ gist_tuple_present_callback, (void *) check_state, scan);
+
+ ereport(DEBUG1,
+ (errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+ check_state->heaptuplespresent,
+ RelationGetRelationName(heaprel),
+ 100.0 * bloom_prop_bits_set(check_state->filter))));
+
+ UnregisterSnapshot(check_state->snapshot);
+ bloom_free(check_state->filter);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(mctx);
+ pfree(check_state);
+}
+
+static void
+gist_check_page(GistCheckState * check_state, GistScanItem * stack,
+ Page page, bool heapallindexed, BufferAccessStrategy strategy)
+{
+ OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+
+ /* Check that the tree has the same height in all branches */
+ if (GistPageIsLeaf(page))
+ {
+ if (check_state->leafdepth == -1)
+ check_state->leafdepth = stack->depth;
+ else if (stack->depth != check_state->leafdepth)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+ RelationGetRelationName(check_state->rel), stack->blkno)));
+ }
+
+ /*
+ * Check that each tuple looks valid, and is consistent with the downlink
+ * we followed when we stepped on this page.
+ */
+ for (OffsetNumber i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ ItemId iid = PageGetItemIdCareful(check_state->rel, stack->blkno, page, i);
+ IndexTuple idxtuple = (IndexTuple) PageGetItem(page, iid);
+ IndexTuple tmpTuple = NULL;
+
+ /*
+ * Check that it's not a leftover invalid tuple from pre-9.1 See also
+ * gistdoinsert() and gistbulkdelete() handling of such tuples. We do
+ * consider it an error here.
+ */
+ if (GistTupleIsInvalid(idxtuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i),
+ errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+ errhint("Please REINDEX it.")));
+
+ if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+
+ /*
+ * Check if this tuple is consistent with the downlink in the parent.
+ */
+ if (stack->parenttup)
+ tmpTuple = gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state);
+
+ if (tmpTuple)
+ {
+ /*
+ * There was a discrepancy between parent and child tuples. We
+ * need to verify it is not a result of concurrent call of
+ * gistplacetopage(). So, lock parent and try to find downlink for
+ * current page. It may be missing due to concurrent page split,
+ * this is OK.
+ *
+ * Note that when we acquire parent tuple now we hold lock for both
+ * parent and child buffers. Thus the parent tuple must include the
+ * keyspace of the child.
+ */
+
+ pfree(tmpTuple);
+ pfree(stack->parenttup);
+ stack->parenttup = gist_refind_parent(check_state->rel, stack->parentblk,
+ stack->blkno, strategy);
+
+ /* We found it - make a final check before failing */
+ if (!stack->parenttup)
+ elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+ stack->blkno, stack->parentblk);
+ else if (gistgetadjusted(check_state->rel, stack->parenttup, idxtuple, check_state->state))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+ RelationGetRelationName(check_state->rel), stack->blkno, i)));
+ else
+ {
+ /*
+ * But now it is properly adjusted - nothing to do here.
+ */
+ }
+ }
+
+ if (GistPageIsLeaf(page))
+ {
+ if (heapallindexed)
+ {
+ IndexTuple norm;
+
+ norm = gistFormNormalizedTuple(check_state, idxtuple);
+
+ bloom_add_element(check_state->filter,
+ (unsigned char *) norm,
+ IndexTupleSize(norm));
+
+ /* Be tidy */
+ if (norm != idxtuple)
+ pfree(norm);
+ }
+ }
+ else
+ {
+ OffsetNumber off = ItemPointerGetOffsetNumber(&(idxtuple->t_tid));
+
+ if (off != TUPLE_IS_VALID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has on page %u offset %u has item id not pointing to 0xffff, but %hu",
+ RelationGetRelationName(check_state->rel), stack->blkno, i, off)));
+ }
+ }
+}
+
+/*
+ * gistFormNormalizedTuple - analogue to gistFormTuple, but performs deTOASTing
+ * of all included data (for covering indexes). While we do not expect
+ * toasted attributes in normal indexes, this can happen as a result of
+ * intervention into system catalog. Detoasting of key attributes is expected
+ * to be done by opclass decompression methods, if the indexed type might be
+ * toasted.
+ */
+static IndexTuple
+gistFormNormalizedTuple(GistCheckState *giststate,
+ IndexTuple itup)
+{
+ return amcheck_normalize_tuple(giststate->rel, itup);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+ bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+ GistCheckState *state = (GistCheckState *) checkstate;
+ IndexTuple itup, norm;
+ Datum compatt[INDEX_MAX_KEYS];
+
+ /* Generate a normalized index tuple for fingerprinting */
+ gistCompressValues(state->state, index, values, isnull, true, compatt);
+ itup = index_form_tuple(RelationGetDescr(index), compatt, isnull);
+ itup->t_tid = *tid;
+
+ norm = gistFormNormalizedTuple(state, itup);
+
+ /* Probe Bloom filter -- tuple should be present */
+ if (bloom_lacks_element(state->filter, (unsigned char *) norm,
+ IndexTupleSize(norm)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+ ItemPointerGetBlockNumber(&(norm->t_tid)),
+ ItemPointerGetOffsetNumber(&(norm->t_tid)),
+ RelationGetRelationName(state->heaprel),
+ RelationGetRelationName(state->rel))));
+
+ state->heaptuplespresent++;
+
+ pfree(itup);
+ /* Be tidy */
+ if (norm != itup)
+ pfree(norm);
+}
+
+/*
+ * check_index_page - verification of basic invariants about GiST page data
+ * This function does not do any tuple analysis.
+ */
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+ Page page = BufferGetPage(buffer);
+
+ gistcheckpage(rel, buffer);
+
+ if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has corrupted page %d",
+ RelationGetRelationName(rel), blockNo)));
+
+ if (GistPageIsDeleted(page))
+ {
+ if (!GistPageIsLeaf(page))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted internal page %d",
+ RelationGetRelationName(rel), blockNo)));
+ if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has deleted page %d with tuples",
+ RelationGetRelationName(rel), blockNo)));
+ }
+ else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" has page %d with exceeding count of tuples",
+ RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find the downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+ BlockNumber parentblkno, BlockNumber childblkno,
+ BufferAccessStrategy strategy)
+{
+ Buffer parentbuf;
+ Page parentpage;
+ OffsetNumber parent_maxoff;
+ IndexTuple result = NULL;
+
+ parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+ strategy);
+
+ LockBuffer(parentbuf, GIST_SHARE);
+ parentpage = BufferGetPage(parentbuf);
+
+ if (GistPageIsLeaf(parentpage))
+ {
+ /*
+ * Currently GiST never deletes internal pages, thus they can never
+ * become leaf.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("index \"%s\" internal page %d became leaf",
+ RelationGetRelationName(rel), parentblkno)));
+ }
+
+ parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+ for (OffsetNumber o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+ {
+ ItemId p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+ IndexTuple itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+ if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+ {
+ /*
+ * Found it! Make a copy and return it while both parent and child
+ * pages are locked. This guarantees that at this particular
+ * moment tuples must be coherent to each other.
+ */
+ result = CopyIndexTuple(itup);
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(parentbuf);
+
+ return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+ OffsetNumber offset)
+{
+ ItemId itemid = PageGetItemId(page, offset);
+
+ if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+ BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("line pointer points past end of tuple space in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ /*
+ * Verify that the line pointer isn't LP_REDIRECT or LP_UNUSED, since gist
+ * never uses either. Verify that the line pointer has storage, too, since
+ * even LP_DEAD items should.
+ */
+ if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+ ItemIdGetLength(itemid) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("invalid line pointer storage in index \"%s\"",
+ RelationGetRelationName(rel)),
+ errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+ block, offset, ItemIdGetOffset(itemid),
+ ItemIdGetLength(itemid),
+ ItemIdGetFlags(itemid))));
+
+ return itemid;
+}
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 0949c88983ac..678528f2fd50 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2859,115 +2859,10 @@ bt_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
static IndexTuple
bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
{
- TupleDesc tupleDescriptor = RelationGetDescr(state->rel);
- Datum normalized[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- bool need_free[INDEX_MAX_KEYS];
- bool formnewtup = false;
- IndexTuple reformed;
- int i;
-
/* Caller should only pass "logical" non-pivot tuples here */
Assert(!BTreeTupleIsPosting(itup) && !BTreeTupleIsPivot(itup));
- /* Easy case: It's immediately clear that tuple has no varlena datums */
- if (!IndexTupleHasVarwidths(itup))
- return itup;
-
- for (i = 0; i < tupleDescriptor->natts; i++)
- {
- Form_pg_attribute att;
-
- att = TupleDescAttr(tupleDescriptor, i);
-
- /* Assume untoasted/already normalized datum initially */
- need_free[i] = false;
- normalized[i] = index_getattr(itup, att->attnum,
- tupleDescriptor,
- &isnull[i]);
- if (att->attbyval || att->attlen != -1 || isnull[i])
- continue;
-
- /*
- * Callers always pass a tuple that could safely be inserted into the
- * index without further processing, so an external varlena header
- * should never be encountered here
- */
- if (VARATT_IS_EXTERNAL(DatumGetPointer(normalized[i])))
- ereport(ERROR,
- (errcode(ERRCODE_INDEX_CORRUPTED),
- errmsg("external varlena datum in tuple that references heap row (%u,%u) in index \"%s\"",
- ItemPointerGetBlockNumber(&(itup->t_tid)),
- ItemPointerGetOffsetNumber(&(itup->t_tid)),
- RelationGetRelationName(state->rel))));
- else if (!VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])) &&
- VARSIZE(DatumGetPointer(normalized[i])) > TOAST_INDEX_TARGET &&
- (att->attstorage == TYPSTORAGE_EXTENDED ||
- att->attstorage == TYPSTORAGE_MAIN))
- {
- /*
- * This value will be compressed by index_form_tuple() with the
- * current storage settings. We may be here because this tuple
- * was formed with different storage settings. So, force forming.
- */
- formnewtup = true;
- }
- else if (VARATT_IS_COMPRESSED(DatumGetPointer(normalized[i])))
- {
- formnewtup = true;
- normalized[i] = PointerGetDatum(PG_DETOAST_DATUM(normalized[i]));
- need_free[i] = true;
- }
-
- /*
- * Short tuples may have 1B or 4B header. Convert 4B header of short
- * tuples to 1B
- */
- else if (VARATT_CAN_MAKE_SHORT(DatumGetPointer(normalized[i])))
- {
- /* convert to short varlena */
- Size len = VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(normalized[i]));
- char *data = palloc(len);
-
- SET_VARSIZE_SHORT(data, len);
- memcpy(data + 1, VARDATA(DatumGetPointer(normalized[i])), len - 1);
-
- formnewtup = true;
- normalized[i] = PointerGetDatum(data);
- need_free[i] = true;
- }
- }
-
- /*
- * Easier case: Tuple has varlena datums, none of which are compressed or
- * short with 4B header
- */
- if (!formnewtup)
- return itup;
-
- /*
- * Hard case: Tuple had compressed varlena datums that necessitate
- * creating normalized version of the tuple from uncompressed input datums
- * (normalized input datums). This is rather naive, but shouldn't be
- * necessary too often.
- *
- * In the heap, tuples may contain short varlena datums with both 1B
- * header and 4B headers. But the corresponding index tuple should always
- * have such varlena's with 1B headers. So, if there is a short varlena
- * with 4B header, we need to convert it for fingerprinting.
- *
- * Note that we rely on deterministic index_form_tuple() TOAST compression
- * of normalized input.
- */
- reformed = index_form_tuple(tupleDescriptor, normalized, isnull);
- reformed->t_tid = itup->t_tid;
-
- /* Cannot leak memory here */
- for (i = 0; i < tupleDescriptor->natts; i++)
- if (need_free[i])
- pfree(DatumGetPointer(normalized[i]));
-
- return reformed;
+ return amcheck_normalize_tuple(state->rel, itup);
}
/*
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 0aff0a6c8c6f..7e4b6c6f6927 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -208,6 +208,25 @@ ORDER BY c.relpages DESC LIMIT 10;
+
+
+ gist_index_check(index regclass, heapallindexed boolean) returns void
+
+ gist_index_check
+
+
+
+
+
+ gist_index_check tests that its target GiST
+ has consistent parent-child tuples relations (no parent tuples
+ require tuple adjustement) and page graph respects balanced-tree
+ invariants (internal pages reference only leaf page or only internal
+ pages).
+
+
+
+