diff --git a/doc/src/sgml/func/func-info.sgml b/doc/src/sgml/func/func-info.sgml index d4508114a48e..62b912c6a047 100644 --- a/doc/src/sgml/func/func-info.sgml +++ b/doc/src/sgml/func/func-info.sgml @@ -2975,6 +2975,39 @@ acl | {postgres=arwdDxtm/postgres,foo=r/postgres} modify key columns. + + + + + pg_get_multixact_stats + + pg_get_multixact_stats () + record + ( num_mxids integer, + num_members bigint, + members_size bigint, + oldest_multixact xid ) + + + Returns statistics about current multixact usage: + num_mxids is the total number of multixact IDs currently present in the system, + num_members is the total number of multixact member entries currently + present in the system, + members_size is the storage occupied by num_members + in the pg_multixact/members directory, + oldest_multixact is the oldest multixact ID still in use. + + + The function reports statistics at the time it is invoked. Values may vary between calls, + even within a single transaction. + + + Returns NULL when multixact statistics are unavailable, + such as during startup before multixact initialization completes. + Specifically, this occurs when the oldest multixact offset + corresponding to a multixact referenced by a relation is not known. + + diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml index 120bac8875fd..28af2c458b93 100644 --- a/doc/src/sgml/maintenance.sgml +++ b/doc/src/sgml/maintenance.sgml @@ -813,14 +813,60 @@ HINT: Execute a database-wide VACUUM in that database. As a safety device, an aggressive vacuum scan will occur for any table whose multixact-age is greater than . Also, if the - storage occupied by multixacts members exceeds about 10GB, aggressive vacuum - scans will occur more often for all tables, starting with those that - have the oldest multixact-age. Both of these kinds of aggressive - scans will occur even if autovacuum is nominally disabled. The members storage - area can grow up to about 20GB before reaching wraparound. + linkend="guc-autovacuum-multixact-freeze-max-age"/>. Also, if the number + of multixact member entries created exceeds approximately 2^31 entries + (occupying roughly 10GB in the pg_multixact/members directory), + aggressive vacuum scans will occur more often for all tables, starting with those that + have the oldest multixact-age. Both of these kinds of aggressive + scans will occur even if autovacuum is nominally disabled. The members can grow + up to approximately 2^32 entries (occupying roughly 20GB in the + pg_multixact/members directory) before reaching wraparound. + + The pg_get_multixact_stats() function described in + provides a way to monitor + multixact allocation and usage patterns in real time, for example: + +postgres=# SELECT *,pg_size_pretty(members_size) members_size_pretty FROM pg_catalog.pg_get_multixact_stats(); + num_mxids | num_members | members_size | oldest_multixact | members_size_pretty +-----------+-------------+--------------+------------------+--------------------- + 311740299 | 2785241176 | 13926205880 | 2 | 13 GB +(1 row) + + This output shows a system with significant multixact activity: about ~312 million + multixact IDs and ~2.8 billion member entries consuming 13 GB of storage space. + By leveraging this information, the function helps: + + + + Identify unusual multixact activity from concurrent row-level locks + or foreign key operations. For example, a spike in num_mxids might indicate + multiple sessions running UPDATE statements with foreign key checks, + concurrent SELECT FOR SHARE operations, or frequent use of savepoints + causing lock contention. + + + + + Track multixact cleanup efficiency by monitoring oldest_multixact. + If this value remains unchanged while num_members grows, it could indicate + that long-running transactions are preventing cleanup, or autovacuum is + not keeping up with the workload. + + + + + Detect potential performance impacts before they become critical. + For instance, high multixact usage from frequent row-level locking or + foreign key operations can lead to increased I/O and CPU overhead during + vacuum operations. Monitoring these stats helps tune autovacuum frequency + and transaction patterns. + + + + + Similar to the XID case, if autovacuum fails to clear old MXIDs from a table, the system will begin to emit warning messages when the database's oldest MXIDs reach forty diff --git a/src/backend/utils/adt/multixactfuncs.c b/src/backend/utils/adt/multixactfuncs.c index e74ea9383486..286676d3829f 100644 --- a/src/backend/utils/adt/multixactfuncs.c +++ b/src/backend/utils/adt/multixactfuncs.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/multixact.h" +#include "access/htup_details.h" #include "funcapi.h" #include "utils/builtins.h" @@ -85,3 +86,48 @@ pg_get_multixact_members(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funccxt); } + +/* + * pg_get_multixact_stats + * + * Returns statistics about current multixact usage. + * + * Returns NULL if the oldest referenced offset is unknown. + */ +Datum +pg_get_multixact_stats(PG_FUNCTION_ARGS) +{ + TupleDesc tupdesc; + Datum values[4]; + bool nulls[4]; + MultiXactOffset members; + MultiXactId oldestMultiXactId; + uint32 multixacts; + MultiXactOffset oldestOffset; + int64 membersBytes; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("return type must be a row type"))); + + if (GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset)) + { + /* + * Calculate storage space for members. Members are stored in groups of 4, + * with each group taking 20 bytes, resulting in 5 bytes per member. + * Note: This ignores small page overhead (12 bytes per 8KB) + */ + membersBytes = (int64) members * 5; + + values[0] = UInt32GetDatum(multixacts); + values[1] = UInt32GetDatum(members); + values[2] = Int64GetDatum(membersBytes); + values[3] = UInt32GetDatum(oldestMultiXactId); + memset(nulls, false, sizeof(nulls)); + + return HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)); + } + + PG_RETURN_NULL(); +} diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 5cf9e12fcb9a..cf8633abff8f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12604,4 +12604,14 @@ proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', prosrc => 'pg_get_aios' }, +# Get multixact usage +{ oid => '9001', descr => 'get current multixact usage statistics', + proname => 'pg_get_multixact_stats', + provolatile => 'v', proparallel => 's', prorettype => 'record', + proargtypes => '', + proallargtypes => '{int8,int8,int8,xid}', + proargmodes => '{o,o,o,o}', + proargnames => '{num_mxids,num_members,members_size,oldest_multixact}', + prosrc => 'pg_get_multixact_stats'}, + ] diff --git a/src/test/isolation/expected/multixact_stats.out b/src/test/isolation/expected/multixact_stats.out new file mode 100644 index 000000000000..27a6510c4ad5 --- /dev/null +++ b/src/test/isolation/expected/multixact_stats.out @@ -0,0 +1,89 @@ +Parsed test spec with 2 sessions + +starting permutation: snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit +step snap0: + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s1_begin: BEGIN; +step s1_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap1: + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step s2_begin: BEGIN; +step s2_lock: SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; +?column? +-------- + 1 +(1 row) + +step snap2: + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); + +step check_while_pinned: + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); + +assertion |ok +------------------------+-- +is_init_mxids |t +is_init_members |t +is_init_oldest_mxid |t +is_init_oldest_off |t +is_oldest_mxid_nondec_01|t +is_oldest_mxid_nondec_12|t +is_oldest_off_nondec_01 |t +is_oldest_off_nondec_12 |t +is_members_increased_ge1|t +is_mxids_nondec_01 |t +is_mxids_nondec_12 | +is_members_nondec_01 | +is_members_nondec_12 | +(13 rows) + +step s1_commit: COMMIT; +step s2_commit: COMMIT; diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule index 5afae33d3703..bab8a8eaf316 100644 --- a/src/test/isolation/isolation_schedule +++ b/src/test/isolation/isolation_schedule @@ -120,3 +120,4 @@ test: serializable-parallel-2 test: serializable-parallel-3 test: matview-write-skew test: lock-nowait +test: multixact_stats diff --git a/src/test/isolation/specs/multixact_stats.spec b/src/test/isolation/specs/multixact_stats.spec new file mode 100644 index 000000000000..7b0346545040 --- /dev/null +++ b/src/test/isolation/specs/multixact_stats.spec @@ -0,0 +1,113 @@ +# Test invariants for pg_get_multixact_stats() +# We create exactly one fresh MultiXact on a brand-new table. While it is pinned +# by two open transactions, we assert only invariants that background VACUUM/FREEZE +# cannot violate: +# • members increased by ≥ 1 when the second session locked the row, +# • num_mxids / num_members did not decrease vs earlier snapshots, +# • oldest_* never decreases. +# We make NO assertions after releasing locks (freezing/truncation may shrink deltas). +# +# Terminology (global counters): +# num_mxids, num_members : "in-use" deltas derived from global horizons +# oldest_multixact, offset : oldest horizons; they move forward, never backward +# +# All assertions execute while our multixact is pinned by open txns, which protects +# the truncation horizon (VACUUM can't advance past our pinned multi). + +setup +{ + CREATE TABLE mxq(id int PRIMARY KEY, v int); + INSERT INTO mxq VALUES (1, 42); +} + +teardown +{ + DROP TABLE mxq; +} + +# Two sessions that lock on the same tuple -> one MultiXact with >= 2 members. +session "s1" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s1_begin { BEGIN; } +step s1_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s1_commit { COMMIT; } + +session "s2" +setup { SET client_min_messages = warning; SET lock_timeout = '5s'; } +step s2_begin { BEGIN; } +step s2_lock { SELECT 1 FROM mxq WHERE id=1 FOR KEY SHARE; } +step s2_commit { COMMIT; } + +# Baseline BEFORE any locking; may be NULLs if multixact isn't initialized yet. +step snap0 { + CREATE TEMP TABLE snap0 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# After s1 has locked the row. +step snap1 { + CREATE TEMP TABLE snap1 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# After s2 joins on the SAME tuple -> multixact with >= 2 members. +step snap2 { + CREATE TEMP TABLE snap2 AS + SELECT num_mxids, num_members, oldest_multixact + FROM pg_get_multixact_stats(); +} + +# Pretty, deterministic key/value output of boolean checks. +# Keys: +# is_init_mxids : num_mxids is non-NULL +# is_init_members : num_members is non-NULL +# is_init_oldest_mxid : oldest_multixact is non-NULL +# is_oldest_mxid_nondec_01 : oldest_multixact did not decrease (snap0→snap1) +# is_oldest_mxid_nondec_12 : oldest_multixact did not decrease (snap1→snap2) +# is_members_increased_ge1 : members increased by at least 1 when s2 joined +# is_mxids_nondec_01 : num_mxids did not decrease (snap0→snap1) +# is_mxids_nondec_12 : num_mxids did not decrease (snap1→snap2) +# is_members_nondec_01 : num_members did not decrease (snap0→snap1) +# is_members_nondec_12 : num_members did not decrease (snap1→snap2) +step check_while_pinned { + SELECT r.assertion, r.ok + FROM snap0 s0 + JOIN snap1 s1 ON TRUE + JOIN snap2 s2 ON TRUE, + LATERAL unnest( + ARRAY[ + 'is_init_mxids', + 'is_init_members', + 'is_init_oldest_mxid', + 'is_init_oldest_off', + 'is_oldest_mxid_nondec_01', + 'is_oldest_mxid_nondec_12', + 'is_oldest_off_nondec_01', + 'is_oldest_off_nondec_12', + 'is_members_increased_ge1', + 'is_mxids_nondec_01', + 'is_mxids_nondec_12', + 'is_members_nondec_01', + 'is_members_nondec_12' + ], + ARRAY[ + (s2.num_mxids IS NOT NULL), + (s2.num_members IS NOT NULL), + (s2.oldest_multixact IS NOT NULL), + + (s1.oldest_multixact::text::bigint >= COALESCE(s0.oldest_multixact::text::bigint, 0)), + (s2.oldest_multixact::text::bigint >= COALESCE(s1.oldest_multixact::text::bigint, 0)), + + (s2.num_members >= COALESCE(s1.num_members, 0) + 1), + + (s1.num_mxids >= COALESCE(s0.num_mxids, 0)), + (s2.num_mxids >= COALESCE(s1.num_mxids, 0)), + (s1.num_members >= COALESCE(s0.num_members, 0)), + (s2.num_members >= COALESCE(s1.num_members, 0)) + ] + ) AS r(assertion, ok); +} + +permutation snap0 s1_begin s1_lock snap1 s2_begin s2_lock snap2 check_while_pinned s1_commit s2_commit