From 2d73ea26657f27c57af93a4f9f0d596285d8f4c2 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 12 Nov 2025 14:19:32 +0200
Subject: [PATCH 01/10] Move pg_multixact SLRU page format definitions to
 separate header

---
 src/backend/access/transam/multixact.c  | 119 --------------------
 src/include/access/multixact_internal.h | 140 ++++++++++++++++++++++++
 2 files changed, 140 insertions(+), 119 deletions(-)
 create mode 100644 src/include/access/multixact_internal.h

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 9d5f130af7ef..acb2a6788f93 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -89,125 +89,6 @@
 #include "utils/memutils.h"
 
 
-/*
- * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
- * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
- */
-
-/* We need four bytes per offset */
-#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-
-static inline int64
-MultiXactIdToOffsetPage(MultiXactId multi)
-{
-	return multi / MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int
-MultiXactIdToOffsetEntry(MultiXactId multi)
-{
-	return multi % MULTIXACT_OFFSETS_PER_PAGE;
-}
-
-static inline int64
-MultiXactIdToOffsetSegment(MultiXactId multi)
-{
-	return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/*
- * The situation for members is a bit more complex: we store one byte of
- * additional flag bits for each TransactionId.  To do this without getting
- * into alignment issues, we store four bytes of flags, and then the
- * corresponding 4 Xids.  Each such 5-word (20-byte) set we call a "group", and
- * are stored as a whole in pages.  Thus, with 8kB BLCKSZ, we keep 409 groups
- * per page.  This wastes 12 bytes per page, but that's OK -- simplicity (and
- * performance) trumps space efficiency here.
- *
- * Note that the "offset" macros work with byte offset, not array indexes, so
- * arithmetic must be done using "char *" pointers.
- */
-/* We need eight bits per xact, so one xact fits in a byte */
-#define MXACT_MEMBER_BITS_PER_XACT			8
-#define MXACT_MEMBER_FLAGS_PER_BYTE			1
-#define MXACT_MEMBER_XACT_BITMASK	((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
-
-/* how many full bytes of flags are there in a group? */
-#define MULTIXACT_FLAGBYTES_PER_GROUP		4
-#define MULTIXACT_MEMBERS_PER_MEMBERGROUP	\
-	(MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
-/* size in bytes of a complete group */
-#define MULTIXACT_MEMBERGROUP_SIZE \
-	(sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
-#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
-#define MULTIXACT_MEMBERS_PER_PAGE	\
-	(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
-
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages.  (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
- */
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
-		((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
-/* page in which a member is to be found */
-static inline int64
-MXOffsetToMemberPage(MultiXactOffset offset)
-{
-	return offset / MULTIXACT_MEMBERS_PER_PAGE;
-}
-
-static inline int64
-MXOffsetToMemberSegment(MultiXactOffset offset)
-{
-	return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
-}
-
-/* Location (byte offset within page) of flag word for a given member */
-static inline int
-MXOffsetToFlagsOffset(MultiXactOffset offset)
-{
-	MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-	int			grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
-	int			byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
-
-	return byteoff;
-}
-
-static inline int
-MXOffsetToFlagsBitShift(MultiXactOffset offset)
-{
-	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-	int			bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
-
-	return bshift;
-}
-
-/* Location (byte offset within page) of TransactionId of given member */
-static inline int
-MXOffsetToMemberOffset(MultiXactOffset offset)
-{
-	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
-
-	return MXOffsetToFlagsOffset(offset) +
-		MULTIXACT_FLAGBYTES_PER_GROUP +
-		member_in_group * sizeof(TransactionId);
-}
-
 /* Multixact members wraparound thresholds. */
 #define MULTIXACT_MEMBER_SAFE_THRESHOLD		(MaxMultiXactOffset / 2)
 #define MULTIXACT_MEMBER_DANGER_THRESHOLD	\
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h
new file mode 100644
index 000000000000..9b56deaef31f
--- /dev/null
+++ b/src/include/access/multixact_internal.h
@@ -0,0 +1,140 @@
+/*
+ * multixact_internal.h
+ *
+ * PostgreSQL multi-transaction-log manager internal declarations
+ *
+ * These functions and definitions are for dealing with pg_multixact pages.
+ * They are internal to multixact.c, but they are exported here to allow
+ * pg_upgrade to write pg_multixact files directly.
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/multixact_internal.h
+ */
+#ifndef MULTIXACT_INTERNAL_H
+#define MULTIXACT_INTERNAL_H
+
+#include "access/multixact.h"
+
+
+/*
+ * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
+ * used everywhere else in Postgres.
+ *
+ * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
+ * MultiXact page numbering also wraps around at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
+ * take no explicit notice of that fact in this module, except when comparing
+ * segment and page numbers in TruncateMultiXact (see
+ * MultiXactOffsetPagePrecedes).
+ */
+
+/* We need four bytes per offset */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+	return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+	return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int64
+MultiXactIdToOffsetSegment(MultiXactId multi)
+{
+	return MultiXactIdToOffsetPage(multi) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId.  To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids.  Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages.  Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page.  This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT			8
+#define MXACT_MEMBER_FLAGS_PER_BYTE			1
+#define MXACT_MEMBER_XACT_BITMASK	((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP		4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP	\
+	(MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+	(sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE	\
+	(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/*
+ * Because the number of items per page is not a divisor of the last item
+ * number (member 0xFFFFFFFF), the last segment does not use the maximum number
+ * of pages, and moreover the last used page therein does not use the same
+ * number of items as previous pages.  (Another way to say it is that the
+ * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
+ * has some empty space after that item.)
+ *
+ * This constant is the number of members in the last page of the last segment.
+ */
+#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
+		((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(MultiXactOffset offset)
+{
+	return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+static inline int64
+MXOffsetToMemberSegment(MultiXactOffset offset)
+{
+	return MXOffsetToMemberPage(offset) / SLRU_PAGES_PER_SEGMENT;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset offset)
+{
+	MultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+	int			grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+	int			byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+	return byteoff;
+}
+
+static inline int
+MXOffsetToFlagsBitShift(MultiXactOffset offset)
+{
+	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+	int			bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+	return bshift;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(MultiXactOffset offset)
+{
+	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+	return MXOffsetToFlagsOffset(offset) +
+		MULTIXACT_FLAGBYTES_PER_GROUP +
+		member_in_group * sizeof(TransactionId);
+}
+
+#endif							/* MULTIXACT_INTERNAL_H */

From 2b0caf0eeab3c65d63d4194cafd129a6e0a41bbd Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Wed, 7 Aug 2024 16:35:22 +0300
Subject: [PATCH 02/10] Use 64-bit multixact offsets

Switching to 64-bit multitransaction offsets removes wraparound and the
2^32 limit on their total number.

Author: Maxim Orlov <orlovmg@gmail.com>
Discussion: FIXME
---
 src/backend/access/rmgrdesc/mxactdesc.c   |   4 +-
 src/backend/access/rmgrdesc/xlogdesc.c    |   2 +-
 src/backend/access/transam/multixact.c    | 390 ++++------------------
 src/backend/access/transam/xlog.c         |   2 +-
 src/backend/access/transam/xlogrecovery.c |   2 +-
 src/backend/commands/vacuum.c             |   2 +-
 src/backend/postmaster/autovacuum.c       |   4 +-
 src/bin/pg_controldata/pg_controldata.c   |   2 +-
 src/bin/pg_resetwal/pg_resetwal.c         |  30 +-
 src/bin/pg_resetwal/t/001_basic.pl        |   4 +-
 src/include/access/multixact.h            |   3 -
 src/include/access/multixact_internal.h   |  24 +-
 src/include/c.h                           |   2 +-
 13 files changed, 95 insertions(+), 376 deletions(-)

diff --git a/src/backend/access/rmgrdesc/mxactdesc.c b/src/backend/access/rmgrdesc/mxactdesc.c
index 3ca0582db364..052dd0a4ce56 100644
--- a/src/backend/access/rmgrdesc/mxactdesc.c
+++ b/src/backend/access/rmgrdesc/mxactdesc.c
@@ -65,7 +65,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
 		xl_multixact_create *xlrec = (xl_multixact_create *) rec;
 		int			i;
 
-		appendStringInfo(buf, "%u offset %u nmembers %d: ", xlrec->mid,
+		appendStringInfo(buf, "%u offset %" PRIu64 " nmembers %d: ", xlrec->mid,
 						 xlrec->moff, xlrec->nmembers);
 		for (i = 0; i < xlrec->nmembers; i++)
 			out_member(buf, &xlrec->members[i]);
@@ -74,7 +74,7 @@ multixact_desc(StringInfo buf, XLogReaderState *record)
 	{
 		xl_multixact_truncate *xlrec = (xl_multixact_truncate *) rec;
 
-		appendStringInfo(buf, "offsets [%u, %u), members [%u, %u)",
+		appendStringInfo(buf, "offsets [%u, %u), members [%" PRIu64 ", %" PRIu64 ")",
 						 xlrec->startTruncOff, xlrec->endTruncOff,
 						 xlrec->startTruncMemb, xlrec->endTruncMemb);
 	}
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index cd6c2a2f650a..441034f5929c 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -66,7 +66,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 		CheckPoint *checkpoint = (CheckPoint *) rec;
 
 		appendStringInfo(buf, "redo %X/%08X; "
-						 "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
+						 "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %" PRIu64 "; "
 						 "oldest xid %u in DB %u; oldest multi %u in DB %u; "
 						 "oldest/newest commit timestamp xid: %u/%u; "
 						 "oldest running xid %u; %s",
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index acb2a6788f93..34a745c07be4 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -69,6 +69,7 @@
 #include "postgres.h"
 
 #include "access/multixact.h"
+#include "access/multixact_internal.h"
 #include "access/slru.h"
 #include "access/twophase.h"
 #include "access/twophase_rmgr.h"
@@ -89,10 +90,14 @@
 #include "utils/memutils.h"
 
 
-/* Multixact members wraparound thresholds. */
-#define MULTIXACT_MEMBER_SAFE_THRESHOLD		(MaxMultiXactOffset / 2)
-#define MULTIXACT_MEMBER_DANGER_THRESHOLD	\
-	(MaxMultiXactOffset - MaxMultiXactOffset / 4)
+/*
+ * Multixact members warning threshold.
+ *
+ * If the difference between nextOffset and oldestOffset exceeds this value,
+ * we trigger autovacuum in order to release disk space consumed by the
+ * members SLRU.
+ */
+#define MULTIXACT_MEMBER_AUTOVAC_THRESHOLD		UINT64CONST(4000000000)
 
 static inline MultiXactId
 PreviousMultiXactId(MultiXactId multi)
@@ -149,9 +154,6 @@ typedef struct MultiXactStateData
 	MultiXactId multiStopLimit;
 	MultiXactId multiWrapLimit;
 
-	/* support for members anti-wraparound measures */
-	MultiXactOffset offsetStopLimit;	/* known if oldestOffsetKnown */
-
 	/*
 	 * This is used to sleep until a multixact offset is written when we want
 	 * to create the next one.
@@ -282,8 +284,6 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
 									MultiXactOffset offset2);
 static void ExtendMultiXactOffset(MultiXactId multi);
 static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
-									 MultiXactOffset start, uint32 distance);
 static bool SetOffsetVacuumLimit(bool is_startup);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
 static void WriteMTruncateXlogRec(Oid oldestMultiDB,
@@ -1023,90 +1023,22 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	ExtendMultiXactOffset(result);
 
 	/*
-	 * Reserve the members space, similarly to above.  Also, be careful not to
-	 * return zero as the starting offset for any multixact. See
-	 * GetMultiXactIdMembers() for motivation.
+	 * Reserve the members space, similarly to above.
 	 */
 	nextOffset = MultiXactState->nextOffset;
-	if (nextOffset == 0)
-	{
-		*offset = 1;
-		nmembers++;				/* allocate member slot 0 too */
-	}
-	else
-		*offset = nextOffset;
-
-	/*----------
-	 * Protect against overrun of the members space as well, with the
-	 * following rules:
-	 *
-	 * If we're past offsetStopLimit, refuse to generate more multis.
-	 * If we're close to offsetStopLimit, emit a warning.
-	 *
-	 * Arbitrarily, we start emitting warnings when we're 20 segments or less
-	 * from offsetStopLimit.
-	 *
-	 * Note we haven't updated the shared state yet, so if we fail at this
-	 * point, the multixact ID we grabbed can still be used by the next guy.
-	 *
-	 * Note that there is no point in forcing autovacuum runs here: the
-	 * multixact freeze settings would have to be reduced for that to have any
-	 * effect.
-	 *----------
-	 */
-#define OFFSET_WARN_SEGMENTS	20
-	if (MultiXactState->oldestOffsetKnown &&
-		MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit, nextOffset,
-								 nmembers))
-	{
-		/* see comment in the corresponding offsets wraparound case */
-		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-
-		ereport(ERROR,
-				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg("multixact \"members\" limit exceeded"),
-				 errdetail_plural("This command would create a multixact with %u members, but the remaining space is only enough for %u member.",
-								  "This command would create a multixact with %u members, but the remaining space is only enough for %u members.",
-								  MultiXactState->offsetStopLimit - nextOffset - 1,
-								  nmembers,
-								  MultiXactState->offsetStopLimit - nextOffset - 1),
-				 errhint("Execute a database-wide VACUUM in database with OID %u with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.",
-						 MultiXactState->oldestMultiXactDB)));
-	}
 
 	/*
-	 * Check whether we should kick autovacuum into action, to prevent members
-	 * wraparound. NB we use a much larger window to trigger autovacuum than
-	 * just the warning limit. The warning is just a measure of last resort -
-	 * this is in line with GetNewTransactionId's behaviour.
+	 * Offsets are 64-bit integers and will never wrap around.  Firstly, it
+	 * would take an unrealistic amount of time and resources to consume 2^64
+	 * offsets.  Secondly, multixid creation is WAL-logged, so you would run
+	 * out of LSNs before reaching offset wraparound.  Nevertheless, check for
+	 * wraparound as a sanity check.
 	 */
-	if (!MultiXactState->oldestOffsetKnown ||
-		(MultiXactState->nextOffset - MultiXactState->oldestOffset
-		 > MULTIXACT_MEMBER_SAFE_THRESHOLD))
-	{
-		/*
-		 * To avoid swamping the postmaster with signals, we issue the autovac
-		 * request only when crossing a segment boundary. With default
-		 * compilation settings that's roughly after 50k members.  This still
-		 * gives plenty of chances before we get into real trouble.
-		 */
-		if ((MXOffsetToMemberPage(nextOffset) / SLRU_PAGES_PER_SEGMENT) !=
-			(MXOffsetToMemberPage(nextOffset + nmembers) / SLRU_PAGES_PER_SEGMENT))
-			SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
-	}
-
-	if (MultiXactState->oldestOffsetKnown &&
-		MultiXactOffsetWouldWrap(MultiXactState->offsetStopLimit,
-								 nextOffset,
-								 nmembers + MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT * OFFSET_WARN_SEGMENTS))
-		ereport(WARNING,
+	if (nextOffset + nmembers < nextOffset)
+		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-				 errmsg_plural("database with OID %u must be vacuumed before %d more multixact member is used",
-							   "database with OID %u must be vacuumed before %d more multixact members are used",
-							   MultiXactState->offsetStopLimit - nextOffset + nmembers,
-							   MultiXactState->oldestMultiXactDB,
-							   MultiXactState->offsetStopLimit - nextOffset + nmembers),
-				 errhint("Execute a database-wide VACUUM in that database with reduced \"vacuum_multixact_freeze_min_age\" and \"vacuum_multixact_freeze_table_age\" settings.")));
+				 errmsg("MultiXact members would wrap around")));
+	*offset = nextOffset;
 
 	ExtendMultiXactMember(nextOffset, nmembers);
 
@@ -1127,8 +1059,7 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 	 * the next iteration.  But note that nextMXact may be InvalidMultiXactId
 	 * or the first value on a segment-beginning page after this routine
 	 * exits, so anyone else looking at the variable must be prepared to deal
-	 * with either case.  Similarly, nextOffset may be zero, but we won't use
-	 * that as the actual start offset of the next multixact.
+	 * with either case.
 	 */
 	(MultiXactState->nextMXact)++;
 
@@ -1136,7 +1067,8 @@ GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
 
 	LWLockRelease(MultiXactGenLock);
 
-	debug_elog4(DEBUG2, "GetNew: returning %u offset %u", result, *offset);
+	debug_elog4(DEBUG2, "GetNew: returning %u offset %" PRIu64, result,
+				*offset);
 	return result;
 }
 
@@ -1178,7 +1110,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	MultiXactOffset *offptr;
 	MultiXactOffset offset;
 	int			length;
-	int			truelength;
 	MultiXactId oldestMXact;
 	MultiXactId nextMXact;
 	MultiXactId tmpMXact;
@@ -1277,16 +1208,7 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	 * we have just for this; the process in charge will signal the CV as soon
 	 * as it has finished writing the multixact offset.
 	 *
-	 * 3. Because GetNewMultiXactId increments offset zero to offset one to
-	 * handle case #2, there is an ambiguity near the point of offset
-	 * wraparound.  If we see next multixact's offset is one, is that our
-	 * multixact's actual endpoint, or did it end at zero with a subsequent
-	 * increment?  We handle this using the knowledge that if the zero'th
-	 * member slot wasn't filled, it'll contain zero, and zero isn't a valid
-	 * transaction ID so it can't be a multixact member.  Therefore, if we
-	 * read a zero from the members array, just ignore it.
-	 *
-	 * This is all pretty messy, but the mess occurs only in infrequent corner
+	 * This is a little messy, but the mess occurs only in infrequent corner
 	 * cases, so it seems better than holding the MultiXactGenLock for a long
 	 * time on every multixact creation.
 	 */
@@ -1372,6 +1294,9 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 	LWLockRelease(lock);
 	lock = NULL;
 
+	/* A multixid with zero members should not happen */
+	Assert(length > 0);
+
 	/*
 	 * If we slept above, clean up state; it's no longer needed.
 	 */
@@ -1380,7 +1305,6 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 
 	ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
 
-	truelength = 0;
 	prev_pageno = -1;
 	for (int i = 0; i < length; i++, offset++)
 	{
@@ -1417,37 +1341,27 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 
 		xactptr = (TransactionId *)
 			(MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
-
-		if (!TransactionIdIsValid(*xactptr))
-		{
-			/* Corner case 3: we must be looking at unused slot zero */
-			Assert(offset == 0);
-			continue;
-		}
+		Assert(TransactionIdIsValid(*xactptr));
 
 		flagsoff = MXOffsetToFlagsOffset(offset);
 		bshift = MXOffsetToFlagsBitShift(offset);
 		flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
 
-		ptr[truelength].xid = *xactptr;
-		ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
-		truelength++;
+		ptr[i].xid = *xactptr;
+		ptr[i].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
 	}
 
 	LWLockRelease(lock);
 
-	/* A multixid with zero members should not happen */
-	Assert(truelength > 0);
-
 	/*
 	 * Copy the result into the local cache.
 	 */
-	mXactCachePut(multi, truelength, ptr);
+	mXactCachePut(multi, length, ptr);
 
 	debug_elog3(DEBUG2, "GetMembers: no cache for %s",
-				mxid_to_string(multi, truelength, ptr));
+				mxid_to_string(multi, length, ptr));
 	*members = ptr;
-	return truelength;
+	return length;
 }
 
 /*
@@ -1854,7 +1768,7 @@ MultiXactShmemInit(void)
 				  "pg_multixact/members", LWTRANCHE_MULTIXACTMEMBER_BUFFER,
 				  LWTRANCHE_MULTIXACTMEMBER_SLRU,
 				  SYNC_HANDLER_MULTIXACT_MEMBER,
-				  false);
+				  true);
 	/* doesn't call SimpleLruTruncate() or meet criteria for unit tests */
 
 	/* Initialize our shared state struct */
@@ -2031,7 +1945,6 @@ TrimMultiXact(void)
 		slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, true, nextMXact);
 		offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 		offptr += entryno;
-
 		MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
 
 		MultiXactOffsetCtl->shared->page_dirty[slotno] = true;
@@ -2104,7 +2017,7 @@ MultiXactGetCheckptMulti(bool is_shutdown,
 	LWLockRelease(MultiXactGenLock);
 
 	debug_elog6(DEBUG2,
-				"MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
+				"MultiXact: checkpoint is nextMulti %u, nextOffset %" PRIu64 ", oldestMulti %u in DB %u",
 				*nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
 }
 
@@ -2139,7 +2052,7 @@ void
 MultiXactSetNextMXact(MultiXactId nextMulti,
 					  MultiXactOffset nextMultiOffset)
 {
-	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
+	debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %" PRIu64,
 				nextMulti, nextMultiOffset);
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->nextMXact = nextMulti;
@@ -2330,7 +2243,7 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
 	}
 	if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
 	{
-		debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
+		debug_elog3(DEBUG2, "MultiXact: setting next offset to %" PRIU64,
 					minMultiOffset);
 		MultiXactState->nextOffset = minMultiOffset;
 	}
@@ -2432,23 +2345,8 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 			LWLockRelease(lock);
 		}
 
-		/*
-		 * Compute the number of items till end of current page.  Careful: if
-		 * addition of unsigned ints wraps around, we're at the last page of
-		 * the last segment; since that page holds a different number of items
-		 * than other pages, we need to do it differently.
-		 */
-		if (offset + MAX_MEMBERS_IN_LAST_MEMBERS_PAGE < offset)
-		{
-			/*
-			 * This is the last page of the last segment; we can compute the
-			 * number of items left to allocate in it without modulo
-			 * arithmetic.
-			 */
-			difference = MaxMultiXactOffset - offset + 1;
-		}
-		else
-			difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+		/* Compute the number of items till end of current page. */
+		difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
 
 		/*
 		 * Advance to next page, taking care to properly handle the wraparound
@@ -2514,15 +2412,14 @@ GetOldestMultiXactId(void)
 }
 
 /*
- * Determine how aggressively we need to vacuum in order to prevent member
- * wraparound.
+ * Determine if we need to vacuum to keep the size of the members SLRU in
+ * check.
  *
  * To do so determine what's the oldest member offset and install the limit
  * info in MultiXactState, where it can be used to prevent overrun of old data
  * in the members SLRU area.
  *
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * The return value is true if autovacuum is required and false otherwise.
  */
 static bool
 SetOffsetVacuumLimit(bool is_startup)
@@ -2534,8 +2431,6 @@ SetOffsetVacuumLimit(bool is_startup)
 	MultiXactOffset nextOffset;
 	bool		oldestOffsetKnown = false;
 	bool		prevOldestOffsetKnown;
-	MultiXactOffset offsetStopLimit = 0;
-	MultiXactOffset prevOffsetStopLimit;
 
 	/*
 	 * NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2550,7 +2445,6 @@ SetOffsetVacuumLimit(bool is_startup)
 	nextOffset = MultiXactState->nextOffset;
 	prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
 	prevOldestOffset = MultiXactState->oldestOffset;
-	prevOffsetStopLimit = MultiXactState->offsetStopLimit;
 	Assert(MultiXactState->finishedStartup);
 	LWLockRelease(MultiXactGenLock);
 
@@ -2581,13 +2475,9 @@ SetOffsetVacuumLimit(bool is_startup)
 		oldestOffsetKnown =
 			find_multixact_start(oldestMultiXactId, &oldestOffset);
 
-		if (oldestOffsetKnown)
-			ereport(DEBUG1,
-					(errmsg_internal("oldest MultiXactId member is at offset %u",
-									 oldestOffset)));
-		else
+		if (!oldestOffsetKnown)
 			ereport(LOG,
-					(errmsg("MultiXact member wraparound protections are disabled because oldest checkpointed MultiXact %u does not exist on disk",
+					(errmsg("oldest checkpointed MultiXact %u does not exist on disk",
 							oldestMultiXactId)));
 	}
 
@@ -2597,97 +2487,32 @@ SetOffsetVacuumLimit(bool is_startup)
 	 * If we can, compute limits (and install them MultiXactState) to prevent
 	 * overrun of old data in the members SLRU area. We can only do so if the
 	 * oldest offset is known though.
+	 *
+	 * FIXME: Is !oldestOffsetKnown possible anymore? At least update the comment:
+	 * we won't overrun members anymore.
 	 */
-	if (oldestOffsetKnown)
-	{
-		/* move back to start of the corresponding segment */
-		offsetStopLimit = oldestOffset - (oldestOffset %
-										  (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT));
-
-		/* always leave one segment before the wraparound point */
-		offsetStopLimit -= (MULTIXACT_MEMBERS_PER_PAGE * SLRU_PAGES_PER_SEGMENT);
-
-		if (!prevOldestOffsetKnown && !is_startup)
-			ereport(LOG,
-					(errmsg("MultiXact member wraparound protections are now enabled")));
-
-		ereport(DEBUG1,
-				(errmsg_internal("MultiXact member stop limit is now %u based on MultiXact %u",
-								 offsetStopLimit, oldestMultiXactId)));
-	}
-	else if (prevOldestOffsetKnown)
+	if (prevOldestOffsetKnown)
 	{
 		/*
 		 * If we failed to get the oldest offset this time, but we have a
 		 * value from a previous pass through this function, use the old
-		 * values rather than automatically forcing an emergency autovacuum
-		 * cycle again.
+		 * values rather than automatically forcing an autovacuum cycle again.
 		 */
 		oldestOffset = prevOldestOffset;
 		oldestOffsetKnown = true;
-		offsetStopLimit = prevOffsetStopLimit;
 	}
 
 	/* Install the computed values */
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->oldestOffset = oldestOffset;
 	MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
-	MultiXactState->offsetStopLimit = offsetStopLimit;
 	LWLockRelease(MultiXactGenLock);
 
 	/*
-	 * Do we need an emergency autovacuum?	If we're not sure, assume yes.
+	 * Do we need autovacuum?	If we're not sure, assume yes.
 	 */
 	return !oldestOffsetKnown ||
-		(nextOffset - oldestOffset > MULTIXACT_MEMBER_SAFE_THRESHOLD);
-}
-
-/*
- * Return whether adding "distance" to "start" would move past "boundary".
- *
- * We use this to determine whether the addition is "wrapping around" the
- * boundary point, hence the name.  The reason we don't want to use the regular
- * 2^31-modulo arithmetic here is that we want to be able to use the whole of
- * the 2^32-1 space here, allowing for more multixacts than would fit
- * otherwise.
- */
-static bool
-MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start,
-						 uint32 distance)
-{
-	MultiXactOffset finish;
-
-	/*
-	 * Note that offset number 0 is not used (see GetMultiXactIdMembers), so
-	 * if the addition wraps around the UINT_MAX boundary, skip that value.
-	 */
-	finish = start + distance;
-	if (finish < start)
-		finish++;
-
-	/*-----------------------------------------------------------------------
-	 * When the boundary is numerically greater than the starting point, any
-	 * value numerically between the two is not wrapped:
-	 *
-	 *	<----S----B---->
-	 *	[---)			 = F wrapped past B (and UINT_MAX)
-	 *		 [---)		 = F not wrapped
-	 *			  [----] = F wrapped past B
-	 *
-	 * When the boundary is numerically less than the starting point (i.e. the
-	 * UINT_MAX wraparound occurs somewhere in between) then all values in
-	 * between are wrapped:
-	 *
-	 *	<----B----S---->
-	 *	[---)			 = F not wrapped past B (but wrapped past UINT_MAX)
-	 *		 [---)		 = F wrapped past B (and UINT_MAX)
-	 *			  [----] = F not wrapped
-	 *-----------------------------------------------------------------------
-	 */
-	if (start < boundary)
-		return finish >= boundary || finish < start;
-	else
-		return finish >= boundary && finish < start;
+		(nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD);
 }
 
 /*
@@ -2727,6 +2552,7 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
 	offptr += entryno;
 	offset = *offptr;
+
 	LWLockRelease(SimpleLruGetBankLock(MultiXactOffsetCtl, pageno));
 
 	*result = offset;
@@ -2774,73 +2600,6 @@ GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
 	return true;
 }
 
-/*
- * Multixact members can be removed once the multixacts that refer to them
- * are older than every datminmxid.  autovacuum_multixact_freeze_max_age and
- * vacuum_multixact_freeze_table_age work together to make sure we never have
- * too many multixacts; we hope that, at least under normal circumstances,
- * this will also be sufficient to keep us from using too many offsets.
- * However, if the average multixact has many members, we might exhaust the
- * members space while still using few enough members that these limits fail
- * to trigger relminmxid advancement by VACUUM.  At that point, we'd have no
- * choice but to start failing multixact-creating operations with an error.
- *
- * To prevent that, if more than a threshold portion of the members space is
- * used, we effectively reduce autovacuum_multixact_freeze_max_age and
- * to a value just less than the number of multixacts in use.  We hope that
- * this will quickly trigger autovacuuming on the table or tables with the
- * oldest relminmxid, thus allowing datminmxid values to advance and removing
- * some members.
- *
- * As the fraction of the member space currently in use grows, we become
- * more aggressive in clamping this value.  That not only causes autovacuum
- * to ramp up, but also makes any manual vacuums the user issues more
- * aggressive.  This happens because vacuum_get_cutoffs() will clamp the
- * freeze table and the minimum freeze age cutoffs based on the effective
- * autovacuum_multixact_freeze_max_age this function returns.  In the worst
- * case, we'll claim the freeze_max_age to zero, and every vacuum of any
- * table will freeze every multixact.
- */
-int
-MultiXactMemberFreezeThreshold(void)
-{
-	MultiXactOffset members;
-	uint32		multixacts;
-	uint32		victim_multixacts;
-	double		fraction;
-	int			result;
-	MultiXactId oldestMultiXactId;
-	MultiXactOffset oldestOffset;
-
-	/* If we can't determine member space utilization, assume the worst. */
-	if (!GetMultiXactInfo(&multixacts, &members, &oldestMultiXactId, &oldestOffset))
-		return 0;
-
-	/* If member space utilization is low, no special action is required. */
-	if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
-		return autovacuum_multixact_freeze_max_age;
-
-	/*
-	 * Compute a target for relminmxid advancement.  The number of multixacts
-	 * we try to eliminate from the system is based on how far we are past
-	 * MULTIXACT_MEMBER_SAFE_THRESHOLD.
-	 */
-	fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
-		(MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
-	victim_multixacts = multixacts * fraction;
-
-	/* fraction could be > 1.0, but lowest possible freeze age is zero */
-	if (victim_multixacts > multixacts)
-		return 0;
-	result = multixacts - victim_multixacts;
-
-	/*
-	 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
-	 * autovacuum less aggressive than it would otherwise be.
-	 */
-	return Min(result, autovacuum_multixact_freeze_max_age);
-}
-
 typedef struct mxtruncinfo
 {
 	int64		earliestExistingPage;
@@ -2867,36 +2626,12 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int64 segpage, void *data
 
 /*
  * Delete members segments [oldest, newOldest)
- *
- * The members SLRU can, in contrast to the offsets one, be filled to almost
- * the full range at once. This means SimpleLruTruncate() can't trivially be
- * used - instead the to-be-deleted range is computed using the offsets
- * SLRU. C.f. TruncateMultiXact().
  */
 static void
 PerformMembersTruncation(MultiXactOffset oldestOffset, MultiXactOffset newOldestOffset)
 {
-	const int64 maxsegment = MXOffsetToMemberSegment(MaxMultiXactOffset);
-	int64		startsegment = MXOffsetToMemberSegment(oldestOffset);
-	int64		endsegment = MXOffsetToMemberSegment(newOldestOffset);
-	int64		segment = startsegment;
-
-	/*
-	 * Delete all the segments but the last one. The last segment can still
-	 * contain, possibly partially, valid data.
-	 */
-	while (segment != endsegment)
-	{
-		elog(DEBUG2, "truncating multixact members segment %" PRIx64,
-			 segment);
-		SlruDeleteSegment(MultiXactMemberCtl, segment);
-
-		/* move to next segment, handling wraparound correctly */
-		if (segment == maxsegment)
-			segment = 0;
-		else
-			segment += 1;
-	}
+	SimpleLruTruncate(MultiXactMemberCtl,
+					  MXOffsetToMemberPage(newOldestOffset));
 }
 
 /*
@@ -3040,7 +2775,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 
 	elog(DEBUG1, "performing multixact truncation: "
 		 "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
-		 "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+		 "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
 		 oldestMulti, newOldestMulti,
 		 MultiXactIdToOffsetSegment(oldestMulti),
 		 MultiXactIdToOffsetSegment(newOldestMulti),
@@ -3120,20 +2855,13 @@ MultiXactOffsetPagePrecedes(int64 page1, int64 page2)
 
 /*
  * Decide whether a MultiXactMember page number is "older" for truncation
- * purposes.  There is no "invalid offset number" so use the numbers verbatim.
+ * purposes.  There is no "invalid offset number" and members never wrap
+ * around, so use the numbers verbatim.
  */
 static bool
 MultiXactMemberPagePrecedes(int64 page1, int64 page2)
 {
-	MultiXactOffset offset1;
-	MultiXactOffset offset2;
-
-	offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
-	offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
-
-	return (MultiXactOffsetPrecedes(offset1, offset2) &&
-			MultiXactOffsetPrecedes(offset1,
-									offset2 + MULTIXACT_MEMBERS_PER_PAGE - 1));
+	return page1 < page2;
 }
 
 /*
@@ -3171,7 +2899,7 @@ MultiXactIdPrecedesOrEquals(MultiXactId multi1, MultiXactId multi2)
 static bool
 MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 {
-	int32		diff = (int32) (offset1 - offset2);
+	int64		diff = (int64) (offset1 - offset2);
 
 	return (diff < 0);
 }
@@ -3268,7 +2996,7 @@ multixact_redo(XLogReaderState *record)
 
 		elog(DEBUG1, "replaying multixact truncation: "
 			 "offsets [%u, %u), offsets segments [%" PRIx64 ", %" PRIx64 "), "
-			 "members [%u, %u), members segments [%" PRIx64 ", %" PRIx64 ")",
+			 "members [%" PRIu64 ", %" PRIu64 "), members segments [%" PRIx64 ", %" PRIx64 ")",
 			 xlrec.startTruncOff, xlrec.endTruncOff,
 			 MultiXactIdToOffsetSegment(xlrec.startTruncOff),
 			 MultiXactIdToOffsetSegment(xlrec.endTruncOff),
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 22d0a2e8c3a6..ef405d66b3bd 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5139,7 +5139,7 @@ BootStrapXLOG(uint32 data_checksum_version)
 		FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
 	checkPoint.nextOid = FirstGenbkiObjectId;
 	checkPoint.nextMulti = FirstMultiXactId;
-	checkPoint.nextMultiOffset = 0;
+	checkPoint.nextMultiOffset = 1;
 	checkPoint.oldestXid = FirstNormalTransactionId;
 	checkPoint.oldestXidDB = Template1DbOid;
 	checkPoint.oldestMulti = FirstMultiXactId;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 21b8f179ba0d..51dea342a4d1 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -886,7 +886,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 							 U64FromFullTransactionId(checkPoint.nextXid),
 							 checkPoint.nextOid)));
 	ereport(DEBUG1,
-			(errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
+			(errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %" PRIu64,
 							 checkPoint.nextMulti, checkPoint.nextMultiOffset)));
 	ereport(DEBUG1,
 			(errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e785dd55ce56..100e1a72c221 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1148,7 +1148,7 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams params,
 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 	 * short of multixact member space.
 	 */
-	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+	effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
 
 	/*
 	 * Almost ready to set freeze output parameters; check if OldestXmin or
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 1c38488f2cbb..bf66f494e3ae 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1151,7 +1151,7 @@ do_start_worker(void)
 
 	/* Also determine the oldest datminmxid we will consider. */
 	recentMulti = ReadNextMultiXactId();
-	multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
+	multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
 	if (multiForceLimit < FirstMultiXactId)
 		multiForceLimit -= FirstMultiXactId;
 
@@ -1939,7 +1939,7 @@ do_autovacuum(void)
 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 	 * short of multixact member space.
 	 */
-	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
+	effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
 
 	/*
 	 * Find the pg_database entry and select the default freeze ages. We use
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 30ad46912e18..a4060309ae0e 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -271,7 +271,7 @@ main(int argc, char *argv[])
 		   ControlFile->checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
 		   ControlFile->checkPointCopy.nextMulti);
-	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
+	printf(_("Latest checkpoint's NextMultiOffset:  %" PRIu64 "\n"),
 		   ControlFile->checkPointCopy.nextMultiOffset);
 	printf(_("Latest checkpoint's oldestXID:        %u\n"),
 		   ControlFile->checkPointCopy.oldestXid);
diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c
index a31e7643cf09..7c6c2741a17a 100644
--- a/src/bin/pg_resetwal/pg_resetwal.c
+++ b/src/bin/pg_resetwal/pg_resetwal.c
@@ -92,6 +92,7 @@ static void KillExistingArchiveStatus(void);
 static void KillExistingWALSummaries(void);
 static void WriteEmptyXLOG(void);
 static void usage(void);
+static uint64 strtou64_strict(const char *s, char **endptr, int base);
 
 
 int
@@ -120,7 +121,6 @@ main(int argc, char *argv[])
 	MultiXactId set_oldestmxid = 0;
 	char	   *endptr;
 	char	   *endptr2;
-	int64		tmpi64;
 	char	   *DataDir = NULL;
 	char	   *log_fname = NULL;
 	int			fd;
@@ -269,17 +269,14 @@ main(int argc, char *argv[])
 
 			case 'O':
 				errno = 0;
-				tmpi64 = strtoi64(optarg, &endptr, 0);
+				set_mxoff = strtou64_strict(optarg, &endptr, 0);
 				if (endptr == optarg || *endptr != '\0' || errno != 0)
 				{
 					pg_log_error("invalid argument for option %s", "-O");
 					pg_log_error_hint("Try \"%s --help\" for more information.", progname);
 					exit(1);
 				}
-				if (tmpi64 < 0 || tmpi64 > (int64) MaxMultiXactOffset)
-					pg_fatal("multitransaction offset (-O) must be between 0 and %u", MaxMultiXactOffset);
 
-				set_mxoff = (MultiXactOffset) tmpi64;
 				mxoff_given = true;
 				break;
 
@@ -749,7 +746,7 @@ PrintControlValues(bool guessed)
 		   ControlFile.checkPointCopy.nextOid);
 	printf(_("Latest checkpoint's NextMultiXactId:  %u\n"),
 		   ControlFile.checkPointCopy.nextMulti);
-	printf(_("Latest checkpoint's NextMultiOffset:  %u\n"),
+	printf(_("Latest checkpoint's NextMultiOffset:  %" PRIu64 "\n"),
 		   ControlFile.checkPointCopy.nextMultiOffset);
 	printf(_("Latest checkpoint's oldestXID:        %u\n"),
 		   ControlFile.checkPointCopy.oldestXid);
@@ -825,7 +822,7 @@ PrintNewControlValues(void)
 
 	if (mxoff_given)
 	{
-		printf(_("NextMultiOffset:                      %u\n"),
+		printf(_("NextMultiOffset:                      %" PRIu64 "\n"),
 			   ControlFile.checkPointCopy.nextMultiOffset);
 	}
 
@@ -1210,3 +1207,22 @@ usage(void)
 	printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 	printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
 }
+
+/* Like strtou64(), but negative values are not accepted. */
+static uint64
+strtou64_strict(const char *s, char **endptr, int base)
+{
+	/* skip leading whitespace */
+	while (isspace(*s))
+		s++;
+
+	/* reject negative values */
+	if (*s == '-')
+	{
+		*endptr = (char *) s;
+		errno = ERANGE;
+		return UINT64_MAX;
+	}
+
+	return strtou64(s, endptr, base);
+}
diff --git a/src/bin/pg_resetwal/t/001_basic.pl b/src/bin/pg_resetwal/t/001_basic.pl
index 90ecb8afe187..5a175e285d19 100644
--- a/src/bin/pg_resetwal/t/001_basic.pl
+++ b/src/bin/pg_resetwal/t/001_basic.pl
@@ -145,7 +145,7 @@
 	'fails with incorrect -O option');
 command_fails_like(
 	[ 'pg_resetwal', '-O' => '-1', $node->data_dir ],
-	qr/must be between 0 and 4294967295/,
+	qr/error: invalid argument for option -O/,
 	'fails with -O value -1');
 # --wal-segsize
 command_fails_like(
@@ -215,7 +215,7 @@ sub get_slru_files
   sprintf("%d,%d", hex($files[0]) == 0 ? 3 : hex($files[0]), hex($files[-1]));
 
 @files = get_slru_files('pg_multixact/offsets');
-$mult = 32 * $blcksz / 4;
+$mult = 32 * $blcksz / 8;
 # --multixact-ids argument is "new,old"
 push @cmd,
   '--multixact-ids' => sprintf("%d,%d",
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 82e4bb90dd58..7d98fe0fe32f 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -28,8 +28,6 @@
 
 #define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
 
-#define MaxMultiXactOffset	((MultiXactOffset) 0xFFFFFFFF)
-
 /*
  * Possible multixact lock modes ("status").  The first four modes are for
  * tuple locks (FOR KEY SHARE, FOR SHARE, FOR NO KEY UPDATE, FOR UPDATE); the
@@ -147,7 +145,6 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
 extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
 									  MultiXactOffset minMultiOffset);
 extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
-extern int	MultiXactMemberFreezeThreshold(void);
 
 extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info,
 									   void *recdata, uint32 len);
diff --git a/src/include/access/multixact_internal.h b/src/include/access/multixact_internal.h
index 9b56deaef31f..b0227759e39f 100644
--- a/src/include/access/multixact_internal.h
+++ b/src/include/access/multixact_internal.h
@@ -17,21 +17,12 @@
 
 #include "access/multixact.h"
 
-
 /*
  * Defines for MultiXactOffset page sizes.  A page is the same BLCKSZ as is
  * used everywhere else in Postgres.
- *
- * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
- * MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT.  We need
- * take no explicit notice of that fact in this module, except when comparing
- * segment and page numbers in TruncateMultiXact (see
- * MultiXactOffsetPagePrecedes).
  */
 
-/* We need four bytes per offset */
+/* We need 8 bytes per offset */
 #define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
 
 static inline int64
@@ -80,19 +71,6 @@ MultiXactIdToOffsetSegment(MultiXactId multi)
 #define MULTIXACT_MEMBERS_PER_PAGE	\
 	(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
 
-/*
- * Because the number of items per page is not a divisor of the last item
- * number (member 0xFFFFFFFF), the last segment does not use the maximum number
- * of pages, and moreover the last used page therein does not use the same
- * number of items as previous pages.  (Another way to say it is that the
- * 0xFFFFFFFF member is somewhere in the middle of the last page, so the page
- * has some empty space after that item.)
- *
- * This constant is the number of members in the last page of the last segment.
- */
-#define MAX_MEMBERS_IN_LAST_MEMBERS_PAGE \
-		((uint32) ((0xFFFFFFFF % MULTIXACT_MEMBERS_PER_PAGE) + 1))
-
 /* page in which a member is to be found */
 static inline int64
 MXOffsetToMemberPage(MultiXactOffset offset)
diff --git a/src/include/c.h b/src/include/c.h
index 757dfff47825..bc92a6f4565c 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -670,7 +670,7 @@ typedef uint32 SubTransactionId;
 /* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
 typedef TransactionId MultiXactId;
 
-typedef uint32 MultiXactOffset;
+typedef uint64 MultiXactOffset;
 
 typedef uint32 CommandId;
 

From 27b6c1f398aecfb11e4877d5f9a2625061269dd6 Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Fri, 24 Oct 2025 11:47:50 +0300
Subject: [PATCH 03/10] TEST: bump catversion

To avoid constant CF-bot complains, make catversion bump in a separate
commit.

NOTE: keep it in sync with MULTIXACTOFFSET_FORMATCHANGE_CAT_VER
---
 src/include/catalog/catversion.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 7eefca1ae429..b0162c2bf63b 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,7 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202511101
+// FIXME: bump it
+#define CATALOG_VERSION_NO	999999999
 
 #endif

From 009657f130a382062c7a05065fbd9d00cba0e1a7 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 12 Nov 2025 21:47:44 +0200
Subject: [PATCH 04/10] Add pg_upgrade for 64 bit multixact offsets

Author: Maxim Orlov <orlovmg@gmail.com>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
---
 src/backend/access/transam/multixact.c        |  56 ---
 src/bin/pg_upgrade/Makefile                   |   3 +
 src/bin/pg_upgrade/meson.build                |   4 +
 src/bin/pg_upgrade/multixact_new.c            | 101 ++++++
 src/bin/pg_upgrade/multixact_new.h            |  23 ++
 src/bin/pg_upgrade/multixact_old.c            | 297 ++++++++++++++++
 src/bin/pg_upgrade/multixact_old.h            |  29 ++
 src/bin/pg_upgrade/pg_upgrade.c               | 108 +++++-
 src/bin/pg_upgrade/pg_upgrade.h               |   5 +
 src/bin/pg_upgrade/slru_io.c                  | 242 +++++++++++++
 src/bin/pg_upgrade/slru_io.h                  |  52 +++
 .../pg_upgrade/t/007_multixact_conversion.pl  | 329 ++++++++++++++++++
 src/test/perl/PostgreSQL/Test/Cluster.pm      |  21 +-
 src/tools/pgindent/typedefs.list              |   3 +
 14 files changed, 1204 insertions(+), 69 deletions(-)
 create mode 100644 src/bin/pg_upgrade/multixact_new.c
 create mode 100644 src/bin/pg_upgrade/multixact_new.h
 create mode 100644 src/bin/pg_upgrade/multixact_old.c
 create mode 100644 src/bin/pg_upgrade/multixact_old.h
 create mode 100644 src/bin/pg_upgrade/slru_io.c
 create mode 100644 src/bin/pg_upgrade/slru_io.h
 create mode 100644 src/bin/pg_upgrade/t/007_multixact_conversion.pl

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 34a745c07be4..e0323ec1014f 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -1824,48 +1824,6 @@ BootStrapMultiXact(void)
 	SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
 }
 
-/*
- * MaybeExtendOffsetSlru
- *		Extend the offsets SLRU area, if necessary
- *
- * After a binary upgrade from <= 9.2, the pg_multixact/offsets SLRU area might
- * contain files that are shorter than necessary; this would occur if the old
- * installation had used multixacts beyond the first page (files cannot be
- * copied, because the on-disk representation is different).  pg_upgrade would
- * update pg_control to set the next offset value to be at that position, so
- * that tuples marked as locked by such MultiXacts would be seen as visible
- * without having to consult multixact.  However, trying to create and use a
- * new MultiXactId would result in an error because the page on which the new
- * value would reside does not exist.  This routine is in charge of creating
- * such pages.
- */
-static void
-MaybeExtendOffsetSlru(void)
-{
-	int64		pageno;
-	LWLock	   *lock;
-
-	pageno = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
-	lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
-
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	if (!SimpleLruDoesPhysicalPageExist(MultiXactOffsetCtl, pageno))
-	{
-		int			slotno;
-
-		/*
-		 * Fortunately for us, SimpleLruWritePage is already prepared to deal
-		 * with creating a new segment file even if the page we're writing is
-		 * not the first in it, so this is enough.
-		 */
-		slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-	}
-
-	LWLockRelease(lock);
-}
-
 /*
  * This must be called ONCE during postmaster or standalone-backend startup.
  *
@@ -2058,20 +2016,6 @@ MultiXactSetNextMXact(MultiXactId nextMulti,
 	MultiXactState->nextMXact = nextMulti;
 	MultiXactState->nextOffset = nextMultiOffset;
 	LWLockRelease(MultiXactGenLock);
-
-	/*
-	 * During a binary upgrade, make sure that the offsets SLRU is large
-	 * enough to contain the next value that would be created.
-	 *
-	 * We need to do this pretty early during the first startup in binary
-	 * upgrade mode: before StartupMultiXact() in fact, because this routine
-	 * is called even before that by StartupXLOG().  And we can't do it
-	 * earlier than at this point, because during that first call of this
-	 * routine we determine the MultiXactState->nextMXact value that
-	 * MaybeExtendOffsetSlru needs.
-	 */
-	if (IsBinaryUpgrade)
-		MaybeExtendOffsetSlru();
 }
 
 /*
diff --git a/src/bin/pg_upgrade/Makefile b/src/bin/pg_upgrade/Makefile
index 69fcf593caec..42995d53b0bc 100644
--- a/src/bin/pg_upgrade/Makefile
+++ b/src/bin/pg_upgrade/Makefile
@@ -18,11 +18,14 @@ OBJS = \
 	file.o \
 	function.o \
 	info.o \
+	multixact_new.o \
+	multixact_old.o \
 	option.o \
 	parallel.o \
 	pg_upgrade.o \
 	relfilenumber.o \
 	server.o \
+	slru_io.o \
 	tablespace.o \
 	task.o \
 	util.o \
diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index ac992f0d14b1..fff0db3b560d 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -8,11 +8,14 @@ pg_upgrade_sources = files(
   'file.c',
   'function.c',
   'info.c',
+  'multixact_new.c',
+  'multixact_old.c',
   'option.c',
   'parallel.c',
   'pg_upgrade.c',
   'relfilenumber.c',
   'server.c',
+  'slru_io.c',
   'tablespace.c',
   'task.c',
   'util.c',
@@ -47,6 +50,7 @@ tests += {
       't/004_subscription.pl',
       't/005_char_signedness.pl',
       't/006_transfer_modes.pl',
+      't/007_multixact_conversion.pl',
     ],
     'test_kwargs': {'priority': 40}, # pg_upgrade tests are slow
   },
diff --git a/src/bin/pg_upgrade/multixact_new.c b/src/bin/pg_upgrade/multixact_new.c
new file mode 100644
index 000000000000..8284a2015fc3
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_new.c
@@ -0,0 +1,101 @@
+/*
+ * multixact_new.c
+ *
+ * Functions to write multixacts in the v19 format with 64-bit
+ * MultiXactOffsets
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_new.c
+ */
+
+#include "postgres_fe.h"
+
+#include "access/multixact_internal.h"
+#include "multixact_new.h"
+
+MultiXactWriter *
+AllocMultiXactWrite(const char *pgdata, MultiXactId firstMulti,
+					MultiXactOffset firstOffset)
+{
+	MultiXactWriter *state = pg_malloc(sizeof(*state));
+	char		dir[MAXPGPATH] = {0};
+
+	pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
+	state->offset = AllocSlruWrite(dir, false);
+	SlruWriteSwitchPage(state->offset, MultiXactIdToOffsetPage(firstMulti));
+
+	pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
+	state->members = AllocSlruWrite(dir, true /* use long segment names */ );
+	SlruWriteSwitchPage(state->members, MXOffsetToMemberPage(firstOffset));
+
+	return state;
+}
+
+/*
+ * Write a new multixact with members.
+ *
+ * Simplified version of the correspoding server function, hence the name.
+ */
+void
+RecordNewMultiXact(MultiXactWriter *state, MultiXactOffset offset,
+				   MultiXactId multi, int nmembers, MultiXactMember *members)
+{
+	int64		pageno;
+	int64		prev_pageno;
+	int			entryno;
+	char	   *buf;
+	MultiXactOffset *offptr;
+
+	pageno = MultiXactIdToOffsetPage(multi);
+	entryno = MultiXactIdToOffsetEntry(multi);
+
+	/* Store the offset */
+	buf = SlruWriteSwitchPage(state->offset, pageno);
+	offptr = (MultiXactOffset *) buf;
+	offptr[entryno] = offset;
+
+	/* Store the members */
+	prev_pageno = -1;
+	for (int i = 0; i < nmembers; i++, offset++)
+	{
+		TransactionId *memberptr;
+		uint32	   *flagsptr;
+		uint32		flagsval;
+		int			bshift;
+		int			flagsoff;
+		int			memberoff;
+
+		Assert(members[i].status <= MultiXactStatusUpdate);
+
+		pageno = MXOffsetToMemberPage(offset);
+		memberoff = MXOffsetToMemberOffset(offset);
+		flagsoff = MXOffsetToFlagsOffset(offset);
+		bshift = MXOffsetToFlagsBitShift(offset);
+
+		if (pageno != prev_pageno)
+		{
+			buf = SlruWriteSwitchPage(state->members, pageno);
+			prev_pageno = pageno;
+		}
+
+		memberptr = (TransactionId *) (buf + memberoff);
+
+		*memberptr = members[i].xid;
+
+		flagsptr = (uint32 *) (buf + flagsoff);
+
+		flagsval = *flagsptr;
+		flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+		flagsval |= (members[i].status << bshift);
+		*flagsptr = flagsval;
+	}
+}
+
+void
+FreeMultiXactWrite(MultiXactWriter *state)
+{
+	FreeSlruWrite(state->offset);
+	FreeSlruWrite(state->members);
+
+	pfree(state);
+}
diff --git a/src/bin/pg_upgrade/multixact_new.h b/src/bin/pg_upgrade/multixact_new.h
new file mode 100644
index 000000000000..f66e6af7e45e
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_new.h
@@ -0,0 +1,23 @@
+/*
+ * multixact_new.h
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_new.h
+ */
+#include "access/multixact.h"
+
+#include "slru_io.h"
+
+typedef struct MultiXactWriter
+{
+	SlruSegState *offset;
+	SlruSegState *members;
+} MultiXactWriter;
+
+extern MultiXactWriter *AllocMultiXactWrite(const char *pgdata,
+											MultiXactId firstMulti,
+											MultiXactOffset firstOffset);
+extern void RecordNewMultiXact(MultiXactWriter *state, MultiXactOffset offset,
+							   MultiXactId multi, int nmembers,
+							   MultiXactMember *members);
+extern void FreeMultiXactWrite(MultiXactWriter *writer);
diff --git a/src/bin/pg_upgrade/multixact_old.c b/src/bin/pg_upgrade/multixact_old.c
new file mode 100644
index 000000000000..7bf7db4b009a
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_old.c
@@ -0,0 +1,297 @@
+/*
+ * multixact_old.c
+ *
+ * Functions to read pre-v19 multixacts
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_old.c
+ */
+
+#include "postgres_fe.h"
+
+#include "multixact_old.h"
+#include "pg_upgrade.h"
+
+/*
+ * NOTE: below are a bunch of definitions that are copy-pasted from
+ * multixact.c from version 18.  The only difference is that we use the
+ * OldMultiXactOffset type equal to uint32 instead of MultiXactOffset which
+ * became uint64.
+ */
+
+/* We need four bytes per offset and 8 bytes per base for each page. */
+#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(OldMultiXactOffset))
+
+static inline int64
+MultiXactIdToOffsetPage(MultiXactId multi)
+{
+	return multi / MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+static inline int
+MultiXactIdToOffsetEntry(MultiXactId multi)
+{
+	return multi % MULTIXACT_OFFSETS_PER_PAGE;
+}
+
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId.  To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids.  Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages.  Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page.  This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT			8
+#define MXACT_MEMBER_FLAGS_PER_BYTE			1
+#define MXACT_MEMBER_XACT_BITMASK	((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP		4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP	\
+	(MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+	(sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE	\
+	(MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+static inline int64
+MXOffsetToMemberPage(OldMultiXactOffset offset)
+{
+	return offset / MULTIXACT_MEMBERS_PER_PAGE;
+}
+
+/* Location (byte offset within page) of flag word for a given member */
+static inline int
+MXOffsetToFlagsOffset(MultiXactOffset offset)
+{
+	OldMultiXactOffset group = offset / MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+	int			grouponpg = group % MULTIXACT_MEMBERGROUPS_PER_PAGE;
+	int			byteoff = grouponpg * MULTIXACT_MEMBERGROUP_SIZE;
+
+	return byteoff;
+}
+
+/* Location (byte offset within page) of TransactionId of given member */
+static inline int
+MXOffsetToMemberOffset(OldMultiXactOffset offset)
+{
+	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+
+	return MXOffsetToFlagsOffset(offset) +
+		MULTIXACT_FLAGBYTES_PER_GROUP +
+		member_in_group * sizeof(TransactionId);
+}
+
+static inline int
+MXOffsetToFlagsBitShift(OldMultiXactOffset offset)
+{
+	int			member_in_group = offset % MULTIXACT_MEMBERS_PER_MEMBERGROUP;
+	int			bshift = member_in_group * MXACT_MEMBER_BITS_PER_XACT;
+
+	return bshift;
+}
+
+/*
+ * Construct reader of old multixacts.
+ *
+ * Returns the malloced memory used by the all other calls in this module.
+ */
+OldMultiXactReader *
+AllocOldMultiXactRead(char *pgdata, MultiXactId nextMulti,
+					  OldMultiXactOffset nextOffset)
+{
+	OldMultiXactReader *state = state = pg_malloc(sizeof(*state));
+	char		dir[MAXPGPATH] = {0};
+
+	state->nextMXact = nextMulti;
+	state->nextOffset = nextOffset;
+
+	pg_sprintf(dir, "%s/pg_multixact/offsets", pgdata);
+	state->offset = AllocSlruRead(dir, false);
+
+	pg_sprintf(dir, "%s/pg_multixact/members", pgdata);
+	state->members = AllocSlruRead(dir, false);
+
+	return state;
+}
+
+/*
+ * This is a simplified version of the GetMultiXactIdMembers() server function.
+ *
+ * - Only return the updating member, if any. Upgrade only cares about the
+ *   updaters. If there is no updating member, return the first locking-only
+ *   member. We don't have any way to represent "no members", but we also don't
+ *   need to preserve all the locking members.
+ *
+ * - We don't need to worry about locking and some corner cases because there's
+ *   no concurrent activity.
+ */
+void
+GetOldMultiXactIdSingleMember(OldMultiXactReader *state, MultiXactId multi,
+							  TransactionId *result, MultiXactStatus *status)
+{
+	MultiXactId nextMXact,
+				nextOffset,
+				tmpMXact;
+	int64		pageno,
+				prev_pageno;
+	int			entryno,
+				length;
+	char	   *buf;
+	OldMultiXactOffset *offptr,
+				offset;
+	TransactionId result_xid = InvalidTransactionId;
+	bool		result_isupdate = false;
+
+	nextMXact = state->nextMXact;
+	nextOffset = state->nextOffset;
+
+	/*
+	 * See GetMultiXactIdMembers in multixact.c
+	 *
+	 * Find out the offset at which we need to start reading MultiXactMembers
+	 * and the number of members in the multixact.  We determine the latter as
+	 * the difference between this multixact's starting offset and the next
+	 * one's.  However, there are some corner cases to worry about:
+	 *
+	 * 1. This multixact may be the latest one created, in which case there is
+	 * no next one to look at.  In this case the nextOffset value we just
+	 * saved is the correct endpoint.
+	 *
+	 * 2. The next multixact may still be in process of being filled in...
+	 * This cannot happen during upgrade.
+	 *
+	 * 3. Because GetNewMultiXactId increments offset zero to offset one to
+	 * handle case #2, there is an ambiguity near the point of offset
+	 * wraparound.  If we see next multixact's offset is one, is that our
+	 * multixact's actual endpoint, or did it end at zero with a subsequent
+	 * increment?  We handle this using the knowledge that if the zero'th
+	 * member slot wasn't filled, it'll contain zero, and zero isn't a valid
+	 * transaction ID so it can't be a multixact member.  Therefore, if we
+	 * read a zero from the members array, just ignore it.
+	 */
+
+	pageno = MultiXactIdToOffsetPage(multi);
+	entryno = MultiXactIdToOffsetEntry(multi);
+
+	buf = SlruReadSwitchPage(state->offset, pageno);
+	offptr = (OldMultiXactOffset *) buf;
+	offptr += entryno;
+	offset = *offptr;
+
+	Assert(offset != 0);
+
+	/*
+	 * Use the same increment rule as GetNewMultiXactId(), that is, don't
+	 * handle wraparound explicitly until needed.
+	 */
+	tmpMXact = multi + 1;
+
+	if (nextMXact == tmpMXact)
+	{
+		/* Corner case 1: there is no next multixact */
+		length = nextOffset - offset;
+	}
+	else
+	{
+		OldMultiXactOffset nextMXOffset;
+
+		/* handle wraparound if needed */
+		if (tmpMXact < FirstMultiXactId)
+			tmpMXact = FirstMultiXactId;
+
+		prev_pageno = pageno;
+
+		pageno = MultiXactIdToOffsetPage(tmpMXact);
+		entryno = MultiXactIdToOffsetEntry(tmpMXact);
+
+		if (pageno != prev_pageno)
+			buf = SlruReadSwitchPage(state->offset, pageno);
+
+		offptr = (OldMultiXactOffset *) buf;
+		offptr += entryno;
+		nextMXOffset = *offptr;
+
+		/*
+		 * Corner case 2: next multixact is still being filled in, this must
+		 * not happen during upgrade.
+		 */
+		Assert(nextMXOffset != 0);
+
+		length = nextMXOffset - offset;
+	}
+
+	prev_pageno = -1;
+	for (int i = 0; i < length; i++, offset++)
+	{
+		TransactionId *xactptr;
+		uint32	   *flagsptr;
+		int			flagsoff;
+		int			bshift;
+		int			memberoff;
+		MultiXactStatus st;
+
+		pageno = MXOffsetToMemberPage(offset);
+		memberoff = MXOffsetToMemberOffset(offset);
+
+		if (pageno != prev_pageno)
+		{
+			buf = SlruReadSwitchPage(state->members, pageno);
+			prev_pageno = pageno;
+		}
+
+		xactptr = (TransactionId *) (buf + memberoff);
+		if (!TransactionIdIsValid(*xactptr))
+		{
+			/* Corner case 3: we must be looking at unused slot zero */
+			Assert(offset == 0);
+			continue;
+		}
+
+		flagsoff = MXOffsetToFlagsOffset(offset);
+		bshift = MXOffsetToFlagsBitShift(offset);
+		flagsptr = (uint32 *) (buf + flagsoff);
+
+		st = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
+
+		/* Verify that there is a single update Xid among the given members. */
+		if (ISUPDATE_from_mxstatus(st))
+		{
+			if (result_isupdate)
+				pg_fatal("multixact %u has more than one updating member",
+						 multi);
+			result_xid = *xactptr;
+			result_isupdate = true;
+		}
+		else if (!TransactionIdIsValid(result_xid))
+			result_xid = *xactptr;
+	}
+
+	/* A multixid with zero members should not happen */
+	Assert(TransactionIdIsValid(result_xid));
+
+	*result = result_xid;
+	*status = result_isupdate ? MultiXactStatusUpdate :
+		MultiXactStatusForKeyShare;
+}
+
+/*
+ * Frees the malloced reader.
+ */
+void
+FreeOldMultiXactReader(OldMultiXactReader *state)
+{
+	FreeSlruRead(state->offset);
+	FreeSlruRead(state->members);
+
+	pfree(state);
+}
diff --git a/src/bin/pg_upgrade/multixact_old.h b/src/bin/pg_upgrade/multixact_old.h
new file mode 100644
index 000000000000..8eb5af2ccafd
--- /dev/null
+++ b/src/bin/pg_upgrade/multixact_old.h
@@ -0,0 +1,29 @@
+/*
+ * multixact_old.h
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/multixact_old.h
+ */
+
+#include "access/multixact.h"
+#include "slru_io.h"
+
+typedef uint32 OldMultiXactOffset;
+
+typedef struct OldMultiXactReader
+{
+	MultiXactId nextMXact;
+	OldMultiXactOffset nextOffset;
+
+	SlruSegState *offset;
+	SlruSegState *members;
+} OldMultiXactReader;
+
+extern OldMultiXactReader *AllocOldMultiXactRead(char *pgdata,
+												 MultiXactId nextMulti,
+												 OldMultiXactOffset nextOffset);
+extern void GetOldMultiXactIdSingleMember(OldMultiXactReader *state,
+										  MultiXactId multi,
+										  TransactionId *result,
+										  MultiXactStatus *status);
+extern void FreeOldMultiXactReader(OldMultiXactReader *reader);
diff --git a/src/bin/pg_upgrade/pg_upgrade.c b/src/bin/pg_upgrade/pg_upgrade.c
index 490e98fa26f2..0fdd05c127c1 100644
--- a/src/bin/pg_upgrade/pg_upgrade.c
+++ b/src/bin/pg_upgrade/pg_upgrade.c
@@ -48,6 +48,8 @@
 #include "common/logging.h"
 #include "common/restricted_token.h"
 #include "fe_utils/string_utils.h"
+#include "multixact_old.h"
+#include "multixact_new.h"
 #include "pg_upgrade.h"
 
 /*
@@ -769,6 +771,81 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
 	check_ok();
 }
 
+/*
+ * Convert pg_multixact/offset and /members to new format with 64-bit offsets.
+ */
+static void
+convert_multixacts(MultiXactId *new_nxtmulti, MultiXactOffset *new_nxtmxoff)
+{
+	MultiXactId oldest_multi,
+				next_multi;
+	OldMultiXactReader *old_reader;
+	MultiXactWriter *new_writer;
+	MultiXactOffset next_offset;
+
+	/*
+	 * The range of valid multi XIDs is unchanged by the conversion (they are
+	 * referenced from the heap tables), but the members SLRU is rewritten to
+	 * start from offset 1.
+	 */
+	oldest_multi = old_cluster.controldata.chkpnt_oldstMulti;
+	next_multi = old_cluster.controldata.chkpnt_nxtmulti;
+	next_offset = 1;
+
+	old_reader = AllocOldMultiXactRead(old_cluster.pgdata,
+									   old_cluster.controldata.chkpnt_nxtmulti,
+									   old_cluster.controldata.chkpnt_nxtmxoff);
+	new_writer = AllocMultiXactWrite(new_cluster.pgdata,
+									 oldest_multi, next_offset);
+
+	/* handle wraparound */
+	if (next_multi < FirstMultiXactId)
+		next_multi = FirstMultiXactId;
+
+	/*
+	 * Read multixids from old files one by one, and write them back in the
+	 * new format.
+	 */
+	for (MultiXactId multi = oldest_multi; multi != next_multi;)
+	{
+		TransactionId xid;
+		MultiXactStatus status;
+		MultiXactMember member;
+
+		/*
+		 * Read the old multixid.  The locking-only XIDs that may be part of
+		 * multi-xids don't matter after upgrade, as there can be no
+		 * transactions running across upgrade.  So as a little optimization,
+		 * we only read one member from each multixid: the one updating one,
+		 * or if there was no update, arbitrarily the first locking xid.
+		 */
+		GetOldMultiXactIdSingleMember(old_reader, multi, &xid, &status);
+
+		/* Write it out in new format */
+		member.xid = xid;
+		member.status = status;
+		RecordNewMultiXact(new_writer, next_offset, multi, 1, &member);
+
+		next_offset += 1;
+		multi++;
+		/* handle wraparound */
+		if (multi < FirstMultiXactId)
+			multi = FirstMultiXactId;
+	}
+
+	/*
+	 * Update the nextMXact/Offset values in the control file to match what we
+	 * wrote.  The nextMXact is unchanged, but nextOffset will be different.
+	 */
+	Assert(next_multi == old_cluster.controldata.chkpnt_nxtmulti);
+	*new_nxtmulti = next_multi;
+	*new_nxtmxoff = next_offset;
+
+	/* Release resources */
+	FreeMultiXactWrite(new_writer);
+	FreeOldMultiXactReader(old_reader);
+}
+
 static void
 copy_xact_xlog_xid(void)
 {
@@ -816,8 +893,29 @@ copy_xact_xlog_xid(void)
 	if (old_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER &&
 		new_cluster.controldata.cat_ver >= MULTIXACT_FORMATCHANGE_CAT_VER)
 	{
-		copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
-		copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+		MultiXactId new_nxtmulti = old_cluster.controldata.chkpnt_nxtmulti;
+		MultiXactOffset new_nxtmxoff = old_cluster.controldata.chkpnt_nxtmxoff;
+
+		/*
+		 * If the old server is before the
+		 * MULTIXACTOFFSET_FORMATCHANGE_CAT_VER it must have 32-bit multixid
+		 * offsets, thus it should be converted.
+		 */
+		if (old_cluster.controldata.cat_ver < MULTIXACTOFFSET_FORMATCHANGE_CAT_VER &&
+			new_cluster.controldata.cat_ver >= MULTIXACTOFFSET_FORMATCHANGE_CAT_VER)
+		{
+			remove_new_subdir("pg_multixact/members", false);
+			remove_new_subdir("pg_multixact/offsets", false);
+
+			prep_status("Converting pg_multixact/offsets to 64-bit");
+			convert_multixacts(&new_nxtmulti, &new_nxtmxoff);
+			check_ok();
+		}
+		else
+		{
+			copy_subdir_files("pg_multixact/offsets", "pg_multixact/offsets");
+			copy_subdir_files("pg_multixact/members", "pg_multixact/members");
+		}
 
 		prep_status("Setting next multixact ID and offset for new cluster");
 
@@ -826,10 +924,8 @@ copy_xact_xlog_xid(void)
 		 * counters here and the oldest multi present on system.
 		 */
 		exec_prog(UTILITY_LOG_FILE, NULL, true, true,
-				  "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
-				  new_cluster.bindir,
-				  old_cluster.controldata.chkpnt_nxtmxoff,
-				  old_cluster.controldata.chkpnt_nxtmulti,
+				  "\"%s/pg_resetwal\" -O %" PRIu64 " -m %u,%u \"%s\"",
+				  new_cluster.bindir, new_nxtmxoff, new_nxtmulti,
 				  old_cluster.controldata.chkpnt_oldstMulti,
 				  new_cluster.pgdata);
 		check_ok();
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index e86336f4be95..127b2cb00fab 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -114,6 +114,11 @@ extern char *output_files[];
  */
 #define MULTIXACT_FORMATCHANGE_CAT_VER 201301231
 
+/*
+ * Swicth from 32-bit to 64-bit for multixid offsets.
+ */
+#define MULTIXACTOFFSET_FORMATCHANGE_CAT_VER 999999999
+
 /*
  * large object chunk size added to pg_controldata,
  * commit 5f93c37805e7485488480916b4585e098d3cc883
diff --git a/src/bin/pg_upgrade/slru_io.c b/src/bin/pg_upgrade/slru_io.c
new file mode 100644
index 000000000000..010094184be4
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.c
@@ -0,0 +1,242 @@
+/*
+ * slru_io.c
+ *
+ * Routines for reading and writing SLRU files during upgrade.
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.c
+ */
+
+#include "postgres_fe.h"
+
+#include <fcntl.h>
+
+#include "common/fe_memutils.h"
+#include "common/file_perm.h"
+#include "common/file_utils.h"
+#include "port/pg_iovec.h"
+#include "pg_upgrade.h"
+#include "slru_io.h"
+
+static SlruSegState *AllocSlruSegState(const char *dir);
+static char *SlruFileName(SlruSegState *state, int64 segno);
+static void SlruFlush(SlruSegState *state);
+
+static SlruSegState *
+AllocSlruSegState(const char *dir)
+{
+	SlruSegState *state = pg_malloc(sizeof(*state));
+
+	state->dir = pstrdup(dir);
+	state->fn = NULL;
+	state->fd = -1;
+	state->segno = -1;
+	state->pageno = 0;
+
+	return state;
+}
+
+/* similar to the backend function with the same name */
+static char *
+SlruFileName(SlruSegState *state, int64 segno)
+{
+	if (state->long_segment_names)
+	{
+		Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFFFFFFFFFFF));
+		return psprintf("%s/%015" PRIX64, state->dir, segno);
+	}
+	else
+	{
+		Assert(segno >= 0 && segno <= INT64CONST(0xFFFFFF));
+		return psprintf("%s/%04X", state->dir, (unsigned int) segno);
+	}
+}
+
+/*
+ * Create slru reader for dir.
+ *
+ * Returns the malloced memory used by the all other read calls in this module.
+ */
+SlruSegState *
+AllocSlruRead(const char *dir, bool long_segment_names)
+{
+	SlruSegState *state = AllocSlruSegState(dir);
+
+	state->writing = false;
+	state->long_segment_names = long_segment_names;
+
+	return state;
+}
+
+/*
+ * Open given page for reading.
+ *
+ * Reading can be done in random order.
+ */
+char *
+SlruReadSwitchPageSlow(SlruSegState *state, uint64 pageno)
+{
+	int64		segno;
+
+	Assert(!state->writing);	/* read only mode */
+
+	if (state->segno != -1 && pageno == state->pageno)
+		return state->buf.data;
+
+	segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	if (segno != state->segno)
+	{
+		if (state->segno != -1)
+		{
+			close(state->fd);
+			state->fd = -1;
+
+			pg_free(state->fn);
+			state->fn = NULL;
+
+			state->segno = -1;
+		}
+
+		/* Open new segment */
+		state->fn = SlruFileName(state, segno);
+		if ((state->fd = open(state->fn, O_RDONLY | PG_BINARY, 0)) < 0)
+			pg_fatal("could not open file \"%s\": %m", state->fn);
+	}
+
+	state->segno = segno;
+
+	{
+		struct iovec iovec = {
+			.iov_base = &state->buf,
+			.iov_len = BLCKSZ,
+		};
+		off_t		offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+		if (pg_preadv(state->fd, &iovec, 1, offset) < 0)
+			pg_fatal("could not read file \"%s\": %m", state->fn);
+
+		state->pageno = pageno;
+	}
+
+	return state->buf.data;
+}
+
+/*
+ * Frees the malloced reader.
+ */
+void
+FreeSlruRead(SlruSegState *state)
+{
+	Assert(!state->writing);	/* read only mode */
+
+	if (state->fd != -1)
+		close(state->fd);
+	pg_free(state);
+}
+
+/*
+ * Create slru writer for dir.
+ *
+ * Returns the malloced memory used by the all other write calls in this module.
+ */
+SlruSegState *
+AllocSlruWrite(const char *dir, bool long_segment_names)
+{
+	SlruSegState *state = AllocSlruSegState(dir);
+
+	state->writing = true;
+	state->long_segment_names = long_segment_names;
+
+	return state;
+}
+
+/*
+ * Open the given page for writing.
+ *
+ * NOTE: This uses O_EXCL when stepping to a new segment, so this assumes that
+ * each segment is written in full before moving on to next one.  This
+ * limitation would be easy to lift if needed, but it fits the usage pattern of
+ * current callers.
+ */
+char *
+SlruWriteSwitchPageSlow(SlruSegState *state, uint64 pageno)
+{
+	int64		segno;
+	off_t		offset;
+
+	if (state->segno != -1 && pageno == state->pageno)
+		return state->buf.data;
+
+	segno = pageno / SLRU_PAGES_PER_SEGMENT;
+	offset = (pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+	SlruFlush(state);
+	memset(state->buf.data, 0, BLCKSZ);
+
+	if (segno != state->segno)
+	{
+		if (state->segno != -1)
+		{
+			close(state->fd);
+			state->fd = -1;
+
+			pg_free(state->fn);
+			state->fn = NULL;
+
+			state->segno = -1;
+		}
+
+		/* Create the segment */
+		state->fn = SlruFileName(state, segno);
+		if ((state->fd = open(state->fn, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+							  pg_file_create_mode)) < 0)
+		{
+			pg_fatal("could not create file \"%s\": %m", state->fn);
+		}
+
+		state->segno = segno;
+
+		if (offset > 0)
+		{
+			if (pg_pwrite_zeros(state->fd, offset, 0) < 0)
+				pg_fatal("could not write file \"%s\": %m", state->fn);
+		}
+	}
+
+	state->pageno = pageno;
+
+	return state->buf.data;
+}
+
+static void
+SlruFlush(SlruSegState *state)
+{
+	struct iovec iovec = {
+		.iov_base = &state->buf,
+		.iov_len = BLCKSZ,
+	};
+	off_t		offset;
+
+	if (state->segno == -1)
+		return;
+
+	offset = (state->pageno % SLRU_PAGES_PER_SEGMENT) * BLCKSZ;
+
+	if (pg_pwritev_with_retry(state->fd, &iovec, 1, offset) < 0)
+		pg_fatal("could not write file \"%s\": %m", state->fn);
+}
+
+/*
+ * Frees the malloced writer.
+ */
+void
+FreeSlruWrite(SlruSegState *state)
+{
+	Assert(state->writing);
+
+	SlruFlush(state);
+
+	if (state->fd != -1)
+		close(state->fd);
+	pg_free(state);
+}
diff --git a/src/bin/pg_upgrade/slru_io.h b/src/bin/pg_upgrade/slru_io.h
new file mode 100644
index 000000000000..5c80a679b4d1
--- /dev/null
+++ b/src/bin/pg_upgrade/slru_io.h
@@ -0,0 +1,52 @@
+/*
+ * slru_io.h
+ *
+ * Copyright (c) 2025, PostgreSQL Global Development Group
+ * src/bin/pg_upgrade/slru_io.h
+ */
+
+#ifndef SLRU_IO_H
+#define SLRU_IO_H
+
+/*
+ * State for reading or writing an SLRU, with a one page buffer.
+ */
+typedef struct SlruSegState
+{
+	bool		writing;
+	bool		long_segment_names;
+
+	char	   *dir;
+	char	   *fn;
+	int			fd;
+	int64		segno;
+	uint64		pageno;
+
+	PGAlignedBlock buf;
+} SlruSegState;
+
+extern SlruSegState *AllocSlruRead(const char *dir, bool long_segment_names);
+extern char *SlruReadSwitchPageSlow(SlruSegState *state, uint64 pageno);
+extern void FreeSlruRead(SlruSegState *state);
+
+static inline char *
+SlruReadSwitchPage(SlruSegState *state, uint64 pageno)
+{
+	if (state->segno != -1 && pageno == state->pageno)
+		return state->buf.data;
+	return SlruReadSwitchPageSlow(state, pageno);
+}
+
+extern SlruSegState *AllocSlruWrite(const char *dir, bool long_segment_names);
+extern char *SlruWriteSwitchPageSlow(SlruSegState *state, uint64 pageno);
+extern void FreeSlruWrite(SlruSegState *state);
+
+static inline char *
+SlruWriteSwitchPage(SlruSegState *state, uint64 pageno)
+{
+	if (state->segno != -1 && pageno == state->pageno)
+		return state->buf.data;
+	return SlruWriteSwitchPageSlow(state, pageno);
+}
+
+#endif							/* SLRU_IO_H */
diff --git a/src/bin/pg_upgrade/t/007_multixact_conversion.pl b/src/bin/pg_upgrade/t/007_multixact_conversion.pl
new file mode 100644
index 000000000000..fe8da9aded2c
--- /dev/null
+++ b/src/bin/pg_upgrade/t/007_multixact_conversion.pl
@@ -0,0 +1,329 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Version 19 expanded MultiXactOffset from 32 to 64 bits. Upgrading
+# across that requires rewriting the SLRU files to the new format.
+# This file contains tests for the conversion.
+#
+# To run, set 'oldinstall' ENV variable to point to a pre-v19
+# installation. If it's not set, or if it points to a v19 or above
+# installation, this still performs a very basic test, upgrading a
+# cluster with some multixacts. It's not very interesting, however,
+# because there's no conversion involved in that case.
+
+use strict;
+use warnings FATAL => 'all';
+
+use Math::BigInt;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Temp dir for a dumps.
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+# A workload that consumes multixids. The purpose of this is to
+# generate some multixids in the old cluster, so that we can test
+# upgrading them. The workload is a mix of KEY SHARE locking queries
+# and UPDATEs, and commits and aborts. It consumes around 3000
+# multixids with 30000 members. That's enough to span more than one
+# multixids 'offsets' page, and more than one 'members' segment.
+#
+# The workload leaves behind a table called 'mxofftest' containing a
+# small number of rows referencing some of the generated multixids.
+#
+# Because this function is used to generate test data on the old
+# installation, it needs to work with older PostgreSQL server
+# versions.
+#
+# The first argument is the cluster to connect to, the second argument
+# is a cluster using the new version. We need the 'psql' binary from
+# the new version, the new cluster is otherwise unused. (We need to
+# use the new 'psql' because some of the more advanced background psql
+# perl module features depend on a fairly recent psql version.)
+sub mxact_workload
+{
+	my $node = shift;       # Cluster to connect to
+	my $binnode = shift;    # Use the psql binary from this cluster
+
+	my $connstr = $node->connstr('postgres');
+
+	$node->start;
+	$node->safe_psql('postgres', qq[
+		CREATE TABLE mxofftest (id INT PRIMARY KEY, n_updated INT)
+		  WITH (AUTOVACUUM_ENABLED=FALSE);
+		INSERT INTO mxofftest SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;
+	]);
+
+	my $nclients = 20;
+	my $update_every = 13;
+	my $abort_every = 11;
+	my @connections = ();
+
+	# Open multiple connections to the database. Start a transaction
+	# in each connection.
+	for (0 .. $nclients)
+	{
+		# Use the psql binary from the new installation. The
+		# BackgroundPsql functionality doesn't work with older psql
+		# versions.
+		my $conn = $binnode->background_psql('',
+			connstr => $node->connstr('postgres'));
+		$conn->query_safe("SET enable_seqscan=off");
+		$conn->query_safe("BEGIN");
+
+		push(@connections, $conn);
+	}
+
+	# Run queries using cycling through the connections in a
+	# round-robin fashion. We keep a transaction open in each
+	# connection at all times, and lock/update the rows. With 10
+	# connections, each SELECT FOR KEY SHARE query generates a new
+	# multixid, containing the 10 XIDs of all the transactions running
+	# at the time.
+	for (my $i = 0; $i < 3000; $i++)
+	{
+		my $conn = $connections[ $i % $nclients ];
+
+		my $sql;
+		if ($i % $abort_every == 0)
+		{
+			$sql = "ABORT; ";
+		}
+		else
+		{
+			$sql = "COMMIT; ";
+		}
+		$sql .= "BEGIN; ";
+
+		if ($i % $update_every == 0)
+		{
+			$sql .= qq[
+			  UPDATE mxofftest SET n_updated = n_updated + 1 WHERE id = ${i} % 50;
+			];
+		}
+		else
+		{
+			my $threshold = int($i / 3000 * 50);
+			$sql .= qq[
+			  select count(*) from (
+				SELECT * FROM mxofftest WHERE id >= $threshold FOR KEY SHARE
+			  ) as x
+			];
+		}
+		$conn->query_safe($sql);
+	}
+
+	for my $conn (@connections)
+	{
+		$conn->quit();
+	}
+
+	$node->stop;
+	return;
+}
+
+# Read NextMultiOffset from the control file
+#
+# Note: This is used on both the old and the new installation, so the
+# command arguments and the output parsing used here must work with
+# all PostgreSQL versions supported by the test.
+sub read_next_mxoff
+{
+	my $node = shift;
+
+	my $pg_controldata_path = $node->installed_command('pg_controldata');
+	my ($stdout, $stderr) =
+	  run_command([ $pg_controldata_path, $node->data_dir ]);
+	$stdout =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/m
+	  or die "could not read NextMultiOffset from pg_controldata";
+	return $1;
+}
+
+# Reset a cluster's oldest multixact-offset to given offset.
+#
+# Note: This is used on both the old and the new installation, so the
+# command arguments and the output parsing used here must work with
+# all PostgreSQL versions supported by the test.
+sub reset_mxoff_pre_v19
+{
+	my $node = shift;
+	my $offset = shift;
+
+	my $pg_resetwal_path = $node->installed_command('pg_resetwal');
+	# Get block size
+	my ($out, $err) =
+	  run_command([ $pg_resetwal_path, '--dry-run', $node->data_dir ]);
+	$out =~ /^Database block size: *(\d+)$/m or die;
+	my $blcksz = $1;
+	# SLRU_PAGES_PER_SEGMENT is always 32 on pre-19 version
+	my $slru_pages_per_segment = 32;
+
+	# Verify that no multixids are currently in use. Resetting would
+	# destroy them. (A freshly initialized cluster has no multixids.)
+	$out =~ /^Latest checkpoint's NextMultiXactId: *(\d+)$/m or die;
+	my $next_mxid = $1;
+	$out =~ /^Latest checkpoint's oldestMultiXid: *(\d+)$/m or die;
+	my $oldest_mxid = $1;
+	die "cluster has some multixids in use" unless $next_mxid == $oldest_mxid;
+
+	# Reset to new offset using pg_resetwal
+	my @cmd = (
+		$pg_resetwal_path,
+		'--pgdata' => $node->data_dir,
+		'--multixact-offset' => $offset);
+	command_ok(\@cmd, 'set oldest multixact-offset');
+
+	# pg_resetwal just updates the control file. The cluster will
+	# refuse to start up, if the SLRU segment corresponding to the
+	# offset does not exist. Create a dummy segment that covers the
+	# given offset, filled with zeros. But first remove any old
+	# segments.
+	unlink glob $node->data_dir . "/pg_multixact/members/*";
+
+	my $mult = 32 * int($blcksz / 20) * 4;
+	my $segname = sprintf "%04X", $offset / $mult;
+
+	my $path = $node->data_dir . "/pg_multixact/members/" . $segname;
+
+	my $null_block = "\x00" x $blcksz;
+	open(my $dh, '>', $path)
+	  || die "could not open $path for writing $!";
+	for (0 .. $slru_pages_per_segment)
+	{
+		print $dh $null_block;
+	}
+	close($dh);
+}
+
+# Dump contents of the 'mxofftest' table, created by mxact_workload
+sub get_dump_for_comparison
+{
+	my ($node, $file_prefix) = @_;
+
+	my $contents = $node->safe_psql('postgres',
+		"SELECT ctid, xmin, xmax, * FROM mxofftest");
+
+	my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
+	open(my $dh, '>', $dumpfile)
+	  || die "could not open $dumpfile for writing $!";
+	print $dh $contents;
+	close($dh);
+
+	return $dumpfile;
+}
+
+# Main test workhorse routine.
+# Dump data on old version, run pg_upgrade, compare data after upgrade.
+sub upgrade_and_compare
+{
+	my $tag = shift;
+	my $oldnode = shift;
+	my $newnode = shift;
+
+	command_ok(
+		[
+			'pg_upgrade', '--no-sync',
+			'--old-datadir' => $oldnode->data_dir,
+			'--new-datadir' => $newnode->data_dir,
+			'--old-bindir' => $oldnode->config_data('--bindir'),
+			'--new-bindir' => $newnode->config_data('--bindir'),
+			'--socketdir' => $newnode->host,
+			'--old-port' => $oldnode->port,
+			'--new-port' => $newnode->port,
+		],
+		'run of pg_upgrade for new instance');
+
+	# Note: we do this *after* running pg_upgrade, to ensure that we
+	# don't set all the hint bits before upgrade by doing the SELECT
+	# on the table.
+	$oldnode->start;
+	my $old_dump = get_dump_for_comparison($oldnode, "oldnode_${tag}_dump");
+	$oldnode->stop;
+
+	$newnode->start;
+	my $new_dump = get_dump_for_comparison($newnode, "newnode_${tag}_dump");
+	$newnode->stop;
+
+	compare_files($old_dump, $new_dump,
+		'dump outputs from original and restored regression databases match');
+}
+
+my $old_version;
+
+# Basic scenario: Create a cluster using old installation, run
+# multixid-creating workload on it, then upgrade.
+#
+# This works even even if the old and new version is the same,
+# although it's not very interesting as the conversion routines only
+# run when upgrading from a pre-v19 cluster.
+{
+	my $tag = 'basic';
+	my $old =
+	  PostgreSQL::Test::Cluster->new("${tag}_oldnode",
+		install_path => $ENV{oldinstall});
+	my $new = PostgreSQL::Test::Cluster->new("${tag}_newnode");
+
+	$old->init(extra => ['-k']);
+
+	$old_version = $old->pg_version;
+	note "old installation is version $old_version\n";
+
+	# Run the workload
+	my $start_mxoff = read_next_mxoff($old);
+	mxact_workload($old, $new);
+	my $finish_mxoff = read_next_mxoff($old);
+
+	$new->init;
+	upgrade_and_compare($tag, $old, $new);
+
+	my $new_next_mxoff = read_next_mxoff($new);
+
+	note ">>> case #${tag}\n"
+	  . " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n"
+	  . " newnode mxoff ${new_next_mxoff}\n";
+}
+
+# Wraparound scenario: This is the same as the basic scenario, but the
+# old cluster goes through mxoffset wraparound.
+#
+# This requires the old installation to be version 19 of older,
+# because the hacks we use to reset the old cluster to a state just
+# before the wraparound rely on the pre-v19 file format. In version
+# 19, offsets no longer wrap around anyway.
+SKIP:
+{
+	skip
+	  "skipping mxoffset conversion tests because upgrading from the old version does not require conversion"
+	  if ($old_version >= '19devel');
+
+	my $tag = 'wraparound';
+	my $old =
+	  PostgreSQL::Test::Cluster->new("${tag}_oldnode",
+		install_path => $ENV{oldinstall});
+	my $new = PostgreSQL::Test::Cluster->new("${tag}_newnode");
+
+	$old->init(extra => ['-k']);
+
+	# Reset the NextMultiOffset value in the  old cluster to just before 32-bit wraparound.
+	reset_mxoff_pre_v19($old, 0xFFFFEC77);
+
+	# Run the workload. This crosses the wraparound.
+	my $start_mxoff = read_next_mxoff($old);
+	mxact_workload($old, $new);
+	my $finish_mxoff = read_next_mxoff($old);
+
+	# Verify that wraparound happened.
+	cmp_ok($finish_mxoff, '<', $start_mxoff,
+		"mxoff wrapped around in old cluster");
+
+	$new->init;
+	upgrade_and_compare($tag, $old, $new);
+
+	my $new_next_mxoff = read_next_mxoff($new);
+
+	note ">>> case #${tag}\n"
+	  . " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n"
+	  . " newnode mxoff ${new_next_mxoff}\n";
+}
+
+done_testing();
diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm
index 35413f140198..34f07d52cd82 100644
--- a/src/test/perl/PostgreSQL/Test/Cluster.pm
+++ b/src/test/perl/PostgreSQL/Test/Cluster.pm
@@ -1793,13 +1793,20 @@ sub _get_env
 	return (%inst_env);
 }
 
-# Private routine to get an installation path qualified command.
-#
-# IPC::Run maintains a cache, %cmd_cache, mapping commands to paths.  Tests
-# which use nodes spanning more than one postgres installation path need to
-# avoid confusing which installation's binaries get run.  Setting $ENV{PATH} is
-# insufficient, as IPC::Run does not check to see if the path has changed since
-# caching a command.
+=pod
+
+=item $node->installed_command(cmd)
+
+Get an installation path qualified command.
+
+IPC::Run maintains a cache, %cmd_cache, mapping commands to paths.  Tests
+which use nodes spanning more than one postgres installation path need to
+avoid confusing which installation's binaries get run.  Setting $ENV{PATH} is
+insufficient, as IPC::Run does not check to see if the path has changed since
+caching a command.
+
+=cut
+
 sub installed_command
 {
 	my ($self, $cmd) = @_;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 23bce72ae64b..f9ddd06ec1db 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1725,6 +1725,7 @@ MultiXactMember
 MultiXactOffset
 MultiXactStateData
 MultiXactStatus
+MultiXactWriter
 MultirangeIOData
 MultirangeParseState
 MultirangeType
@@ -1808,6 +1809,7 @@ OffsetVarNodes_context
 Oid
 OidOptions
 OkeysState
+OldMultiXactReader
 OldToNewMapping
 OldToNewMappingData
 OnCommitAction
@@ -2804,6 +2806,7 @@ SlruCtlData
 SlruErrorCause
 SlruPageStatus
 SlruScanCallback
+SlruSegState
 SlruShared
 SlruSharedData
 SlruWriteAll

From fca1e2daeeaf497c7801cf17f61b455f7e162a1f Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Thu, 6 Nov 2025 16:20:18 +0300
Subject: [PATCH 05/10] Remove oldestOffset/oldestOffsetKnown from multixact

Since we rewrite all multitransactions during pg_upgrade, the oldest
offset for a new cluster will no longer be missing on disc.
---
 src/backend/access/transam/multixact.c | 101 ++-----------------------
 src/include/access/multixact.h         |   3 -
 2 files changed, 5 insertions(+), 99 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index e0323ec1014f..78ba6d72a92f 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -140,14 +140,6 @@ typedef struct MultiXactStateData
 	MultiXactId oldestMultiXactId;
 	Oid			oldestMultiXactDB;
 
-	/*
-	 * Oldest multixact offset that is potentially referenced by a multixact
-	 * referenced by a relation.  We don't always know this value, so there's
-	 * a flag here to indicate whether or not we currently do.
-	 */
-	MultiXactOffset oldestOffset;
-	bool		oldestOffsetKnown;
-
 	/* support for anti-wraparound measures */
 	MultiXactId multiVacLimit;
 	MultiXactId multiWarnLimit;
@@ -2371,10 +2363,7 @@ SetOffsetVacuumLimit(bool is_startup)
 	MultiXactId oldestMultiXactId;
 	MultiXactId nextMXact;
 	MultiXactOffset oldestOffset = 0;	/* placate compiler */
-	MultiXactOffset prevOldestOffset;
 	MultiXactOffset nextOffset;
-	bool		oldestOffsetKnown = false;
-	bool		prevOldestOffsetKnown;
 
 	/*
 	 * NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2387,8 +2376,6 @@ SetOffsetVacuumLimit(bool is_startup)
 	oldestMultiXactId = MultiXactState->oldestMultiXactId;
 	nextMXact = MultiXactState->nextMXact;
 	nextOffset = MultiXactState->nextOffset;
-	prevOldestOffsetKnown = MultiXactState->oldestOffsetKnown;
-	prevOldestOffset = MultiXactState->oldestOffset;
 	Assert(MultiXactState->finishedStartup);
 	LWLockRelease(MultiXactGenLock);
 
@@ -2406,57 +2393,20 @@ SetOffsetVacuumLimit(bool is_startup)
 		 * offset.
 		 */
 		oldestOffset = nextOffset;
-		oldestOffsetKnown = true;
 	}
-	else
+	else if (!find_multixact_start(oldestMultiXactId, &oldestOffset))
 	{
-		/*
-		 * Figure out where the oldest existing multixact's offsets are
-		 * stored. Due to bugs in early release of PostgreSQL 9.3.X and 9.4.X,
-		 * the supposedly-earliest multixact might not really exist.  We are
-		 * careful not to fail in that case.
-		 */
-		oldestOffsetKnown =
-			find_multixact_start(oldestMultiXactId, &oldestOffset);
-
-		if (!oldestOffsetKnown)
-			ereport(LOG,
-					(errmsg("oldest checkpointed MultiXact %u does not exist on disk",
-							oldestMultiXactId)));
+		ereport(LOG,
+				(errmsg("oldest checkpointed MultiXact %u does not exist on disk",
+						oldestMultiXactId)));
 	}
 
 	LWLockRelease(MultiXactTruncationLock);
 
-	/*
-	 * If we can, compute limits (and install them MultiXactState) to prevent
-	 * overrun of old data in the members SLRU area. We can only do so if the
-	 * oldest offset is known though.
-	 *
-	 * FIXME: Is !oldestOffsetKnown possible anymore? At least update the comment:
-	 * we won't overrun members anymore.
-	 */
-	if (prevOldestOffsetKnown)
-	{
-		/*
-		 * If we failed to get the oldest offset this time, but we have a
-		 * value from a previous pass through this function, use the old
-		 * values rather than automatically forcing an autovacuum cycle again.
-		 */
-		oldestOffset = prevOldestOffset;
-		oldestOffsetKnown = true;
-	}
-
-	/* Install the computed values */
-	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
-	MultiXactState->oldestOffset = oldestOffset;
-	MultiXactState->oldestOffsetKnown = oldestOffsetKnown;
-	LWLockRelease(MultiXactGenLock);
-
 	/*
 	 * Do we need autovacuum?	If we're not sure, assume yes.
 	 */
-	return !oldestOffsetKnown ||
-		(nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD);
+	return nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD;
 }
 
 /*
@@ -2503,47 +2453,6 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	return true;
 }
 
-/*
- * GetMultiXactInfo
- *
- * Returns information about the current MultiXact state, as of:
- * multixacts: Number of MultiXacts (nextMultiXactId - oldestMultiXactId)
- * members: Number of member entries (nextOffset - oldestOffset)
- * oldestMultiXactId: Oldest MultiXact ID still in use
- * oldestOffset: Oldest offset still in use
- *
- * Returns false if unable to determine, the oldest offset being unknown.
- */
-bool
-GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
-				 MultiXactId *oldestMultiXactId, MultiXactOffset *oldestOffset)
-{
-	MultiXactOffset nextOffset;
-	MultiXactId nextMultiXactId;
-	bool		oldestOffsetKnown;
-
-	LWLockAcquire(MultiXactGenLock, LW_SHARED);
-	nextOffset = MultiXactState->nextOffset;
-	*oldestMultiXactId = MultiXactState->oldestMultiXactId;
-	nextMultiXactId = MultiXactState->nextMXact;
-	*oldestOffset = MultiXactState->oldestOffset;
-	oldestOffsetKnown = MultiXactState->oldestOffsetKnown;
-	LWLockRelease(MultiXactGenLock);
-
-	if (!oldestOffsetKnown)
-	{
-		*members = 0;
-		*multixacts = 0;
-		*oldestMultiXactId = InvalidMultiXactId;
-		*oldestOffset = 0;
-		return false;
-	}
-
-	*members = nextOffset - *oldestOffset;
-	*multixacts = nextMultiXactId - *oldestMultiXactId;
-	return true;
-}
-
 typedef struct mxtruncinfo
 {
 	int64		earliestExistingPage;
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index 7d98fe0fe32f..d688b547c547 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -109,9 +109,6 @@ extern bool MultiXactIdIsRunning(MultiXactId multi, bool isLockOnly);
 extern void MultiXactIdSetOldestMember(void);
 extern int	GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
 								  bool from_pgupgrade, bool isLockOnly);
-extern bool GetMultiXactInfo(uint32 *multixacts, MultiXactOffset *members,
-							 MultiXactId *oldestMultiXactId,
-							 MultiXactOffset *oldestOffset);
 extern bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
 extern bool MultiXactIdPrecedesOrEquals(MultiXactId multi1,
 										MultiXactId multi2);

From cb0a6a21d3aa32711136b82c2e584da6a5d95b11 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 13 Nov 2025 12:38:41 +0200
Subject: [PATCH 06/10] Reintroduce MultiXactMemberFreezeThreshold

---
 src/backend/access/transam/multixact.c | 202 ++++++++++++++++++++-----
 src/backend/access/transam/xlog.c      |   4 +-
 src/backend/commands/vacuum.c          |   6 +-
 src/backend/postmaster/autovacuum.c    |   4 +-
 src/include/access/multixact.h         |   4 +-
 5 files changed, 170 insertions(+), 50 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 78ba6d72a92f..c72b2cd70903 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -91,13 +91,13 @@
 
 
 /*
- * Multixact members warning threshold.
- *
- * If the difference between nextOffset and oldestOffset exceeds this value,
- * we trigger autovacuum in order to release disk space consumed by the
- * members SLRU.
+ * Thresholds used to keep members disk usage in check when multixids have a
+ * lot of members.  When MULTIXACT_MEMBER_LOW_THRESHOLD is reached, vacuum
+ * starts freezing multixids more aggressively, even if the normal multixid
+ * age limits haven't been reached yet.
  */
-#define MULTIXACT_MEMBER_AUTOVAC_THRESHOLD		UINT64CONST(4000000000)
+#define MULTIXACT_MEMBER_LOW_THRESHOLD		UINT64CONST(2000000000)
+#define MULTIXACT_MEMBER_HIGH_THRESHOLD		UINT64CONST(4000000000)
 
 static inline MultiXactId
 PreviousMultiXactId(MultiXactId multi)
@@ -140,6 +140,12 @@ typedef struct MultiXactStateData
 	MultiXactId oldestMultiXactId;
 	Oid			oldestMultiXactDB;
 
+	/*
+	 * Oldest multixact offset that is potentially referenced by a multixact
+	 * referenced by a relation.
+	 */
+	MultiXactOffset oldestOffset;
+
 	/* support for anti-wraparound measures */
 	MultiXactId multiVacLimit;
 	MultiXactId multiWarnLimit;
@@ -276,7 +282,7 @@ static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
 									MultiXactOffset offset2);
 static void ExtendMultiXactOffset(MultiXactId multi);
 static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static bool SetOffsetVacuumLimit(bool is_startup);
+static void SetOffsetVacuumLimit(void);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
 static void WriteMTruncateXlogRec(Oid oldestMultiDB,
 								  MultiXactId startTruncOff,
@@ -1945,8 +1951,8 @@ TrimMultiXact(void)
 	MultiXactState->finishedStartup = true;
 	LWLockRelease(MultiXactGenLock);
 
-	/* Now compute how far away the next members wraparound is. */
-	SetMultiXactIdLimit(oldestMXact, oldestMXactDB, true);
+	/* Now compute how far away the next multixid wraparound is. */
+	SetMultiXactIdLimit(oldestMXact, oldestMXactDB);
 }
 
 /*
@@ -2015,28 +2021,24 @@ MultiXactSetNextMXact(MultiXactId nextMulti,
  * datminmxid (ie, the oldest MultiXactId that might exist in any database
  * of our cluster), and the OID of the (or a) database with that value.
  *
- * is_startup is true when we are just starting the cluster, false when we
- * are updating state in a running cluster.  This only affects log messages.
+ * This also updates MultiXactState->oldestOffset, by looking up the offset of
+ * MultiXactState->oldestMultiXactId.
  */
 void
-SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
-					bool is_startup)
+SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
 {
 	MultiXactId multiVacLimit;
 	MultiXactId multiWarnLimit;
 	MultiXactId multiStopLimit;
 	MultiXactId multiWrapLimit;
 	MultiXactId curMulti;
-	bool		needs_offset_vacuum;
 
 	Assert(MultiXactIdIsValid(oldest_datminmxid));
 
 	/*
 	 * We pretend that a wrap will happen halfway through the multixact ID
 	 * space, but that's not really true, because multixacts wrap differently
-	 * from transaction IDs.  Note that, separately from any concern about
-	 * multixact IDs wrapping, we must ensure that multixact members do not
-	 * wrap.  Limits for that are set in SetOffsetVacuumLimit, not here.
+	 * from transaction IDs.
 	 */
 	multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
 	if (multiWrapLimit < FirstMultiXactId)
@@ -2104,8 +2106,13 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
 
 	Assert(!InRecovery);
 
-	/* Set limits for offset vacuum. */
-	needs_offset_vacuum = SetOffsetVacuumLimit(is_startup);
+	/*
+	 * Offsets are 64-bits wide and never wrap around, so we don't need to
+	 * consider them for emergency autovacuum purposes.  But now that we're in
+	 * a consistent state, determine MultiXactState->oldestOffset, to be used
+	 * to calculate freezing cutoff to keep the offsets disk usage in check.
+	 */
+	SetOffsetVacuumLimit();
 
 	/*
 	 * If past the autovacuum force point, immediately signal an autovac
@@ -2114,8 +2121,7 @@ SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid,
 	 * database, it'll call here, and we'll signal the postmaster to start
 	 * another iteration immediately if there are still any old databases.
 	 */
-	if ((MultiXactIdPrecedes(multiVacLimit, curMulti) ||
-		 needs_offset_vacuum) && IsUnderPostmaster)
+	if (MultiXactIdPrecedes(multiVacLimit, curMulti) && IsUnderPostmaster)
 		SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
 
 	/* Give an immediate warning if past the wrap warn point */
@@ -2198,7 +2204,7 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
 	Assert(InRecovery);
 
 	if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
-		SetMultiXactIdLimit(oldestMulti, oldestMultiDB, false);
+		SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
 }
 
 /*
@@ -2348,22 +2354,17 @@ GetOldestMultiXactId(void)
 }
 
 /*
- * Determine if we need to vacuum to keep the size of the members SLRU in
- * check.
- *
- * To do so determine what's the oldest member offset and install the limit
- * info in MultiXactState, where it can be used to prevent overrun of old data
- * in the members SLRU area.
- *
- * The return value is true if autovacuum is required and false otherwise.
+ * Determine what's the oldest member offset and install it in MultiXactState,
+ * where it can be used to adjust multixid freezing cutoffs.
  */
-static bool
-SetOffsetVacuumLimit(bool is_startup)
+static void
+SetOffsetVacuumLimit(void)
 {
 	MultiXactId oldestMultiXactId;
 	MultiXactId nextMXact;
 	MultiXactOffset oldestOffset = 0;	/* placate compiler */
 	MultiXactOffset nextOffset;
+	bool		oldestOffsetKnown = false;
 
 	/*
 	 * NB: Have to prevent concurrent truncation, we might otherwise try to
@@ -2393,20 +2394,37 @@ SetOffsetVacuumLimit(bool is_startup)
 		 * offset.
 		 */
 		oldestOffset = nextOffset;
+		oldestOffsetKnown = true;
 	}
-	else if (!find_multixact_start(oldestMultiXactId, &oldestOffset))
+	else
 	{
-		ereport(LOG,
-				(errmsg("oldest checkpointed MultiXact %u does not exist on disk",
-						oldestMultiXactId)));
+		/*
+		 * Figure out the offset at which oldest existing multixact's members
+		 * are stored.  If we cannot find it, be careful not to fail.  (We had
+		 * bugs in early releases of PostgreSQL 9.3.X and 9.4.X, the
+		 * supposedly-earliest multixact might not really exist.  Those should
+		 * be long gone by now, but let's nevertheless be careful not to fail
+		 * in that case.)
+		 */
+		oldestOffsetKnown =
+			find_multixact_start(oldestMultiXactId, &oldestOffset);
+
+		if (!oldestOffsetKnown)
+			ereport(LOG,
+					(errmsg("oldest checkpointed MultiXact %u does not exist on disk",
+							oldestMultiXactId)));
+		return;
 	}
 
 	LWLockRelease(MultiXactTruncationLock);
 
-	/*
-	 * Do we need autovacuum?	If we're not sure, assume yes.
-	 */
-	return nextOffset - oldestOffset > MULTIXACT_MEMBER_AUTOVAC_THRESHOLD;
+	/* Install the computed value */
+	if (oldestOffsetKnown)
+	{
+		LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+		MultiXactState->oldestOffset = oldestOffset;
+		LWLockRelease(MultiXactGenLock);
+	}
 }
 
 /*
@@ -2453,6 +2471,107 @@ find_multixact_start(MultiXactId multi, MultiXactOffset *result)
 	return true;
 }
 
+/*
+ * Determine how many multixacts, and how many multixact members, currently
+ * exist.
+ */
+static void
+ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
+{
+	MultiXactOffset nextOffset;
+	MultiXactOffset oldestOffset;
+	MultiXactId oldestMultiXactId;
+	MultiXactId nextMultiXactId;
+
+	LWLockAcquire(MultiXactGenLock, LW_SHARED);
+	nextOffset = MultiXactState->nextOffset;
+	oldestMultiXactId = MultiXactState->oldestMultiXactId;
+	nextMultiXactId = MultiXactState->nextMXact;
+	oldestOffset = MultiXactState->oldestOffset;
+	LWLockRelease(MultiXactGenLock);
+
+	*members = nextOffset - oldestOffset;
+	*multixacts = nextMultiXactId - oldestMultiXactId;
+}
+
+/*
+ * Multixact members can be removed once the multixacts that refer to them are
+ * older than every datminmxid.  autovacuum_multixact_freeze_max_age and
+ * vacuum_multixact_freeze_table_age work together to make sure we never have
+ * too many multixacts; we hope that, at least under normal circumstances,
+ * this will also be sufficient to keep us from using too many offsets.
+ * However, if the average multixact has many members, we might accumulate a
+ * huge amount of members, consuming disk space, while still using few enough
+ * multixids that the multixid limits fail to trigger relminmxid advancement
+ * by VACUUM.
+ *
+ * To prevent that, if more than a certain amount of members space is used
+ * (MULTIXACT_MEMBER_LOW_THRESHOLD), we effectively reduce
+ * autovacuum_multixact_freeze_max_age to a value just less than the number of
+ * multixacts in use.  We hope that this will quickly trigger autovacuuming on
+ * the table or tables with the oldest relminmxid, thus allowing datminmxid
+ * values to advance and removing some members.
+ *
+ * As the amount of the member space in use grows, we become more aggressive
+ * in clamping this value.  That not only causes autovacuum to ramp up, but
+ * also makes any manual vacuums the user issues more aggressive.  This
+ * happens because vacuum_get_cutoffs() will clamp the freeze table and the
+ * minimum freeze age cutoffs based on the effective
+ * autovacuum_multixact_freeze_max_age this function returns.  At the extreme,
+ * when the members usage reaches MULTIXACT_MEMBER_HIGH_THRESHOLD, we'll clamp
+ * freeze_max_age to zero, and every vacuum of any table will freeze every
+ * multixact.
+ */
+int
+MultiXactMemberFreezeThreshold(void)
+{
+	MultiXactOffset members;
+	uint32		multixacts;
+	uint32		victim_multixacts;
+	double		fraction;
+	int			result;
+
+	/*
+	 * Read the current offsets and members usage.
+	 *
+	 * Note: In the case that we have been unable to calculate oldestOffset,
+	 * because we failed to find the offset of the oldest multixid, we assume
+	 * the worst because oldestOffset will be left to zero in that case.
+	 */
+	ReadMultiXactCounts(&multixacts, &members);
+
+	/* If member space utilization is low, no special action is required. */
+	if (members <= MULTIXACT_MEMBER_LOW_THRESHOLD)
+		return autovacuum_multixact_freeze_max_age;
+
+	/*
+	 * Compute a target for relminmxid advancement.  The number of multixacts
+	 * we try to eliminate from the system is based on how far we are past
+	 * MULTIXACT_MEMBER_LOW_THRESHOLD.
+	 *
+	 * The way this formula works is that when members is exactly at the low
+	 * threshold, fraction == 0.0, and we set freeze_max_age equal to
+	 * mxid_age(oldestMultiXactId).  As members grows further, towards the
+	 * high threshold, fraction grows linearly from 0.0 to 1.0, and the result
+	 * shrinks from mxid_age(oldestMultiXactId) to 0.  Beyond the high
+	 * threshold, fraction > 1.0 and the result is clamped to 0.
+	 */
+	fraction = (double) (members - MULTIXACT_MEMBER_LOW_THRESHOLD) /
+		(MULTIXACT_MEMBER_HIGH_THRESHOLD - MULTIXACT_MEMBER_LOW_THRESHOLD);
+	victim_multixacts = multixacts * fraction;
+
+	/* fraction could be > 1.0, but lowest possible freeze age is zero */
+	if (victim_multixacts > multixacts)
+		return 0;
+	result = multixacts - victim_multixacts;
+
+	/*
+	 * Clamp to autovacuum_multixact_freeze_max_age, so that we never make
+	 * autovacuum less aggressive than it would otherwise be.
+	 */
+	return Min(result, autovacuum_multixact_freeze_max_age);
+}
+
 typedef struct mxtruncinfo
 {
 	int64		earliestExistingPage;
@@ -2669,6 +2788,7 @@ TruncateMultiXact(MultiXactId newOldestMulti, Oid newOldestMultiDB)
 	LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
 	MultiXactState->oldestMultiXactId = newOldestMulti;
 	MultiXactState->oldestMultiXactDB = newOldestMultiDB;
+	MultiXactState->oldestOffset = newOldestOffset;
 	LWLockRelease(MultiXactGenLock);
 
 	/* First truncate members */
@@ -2864,7 +2984,7 @@ multixact_redo(XLogReaderState *record)
 		 * Advance the horizon values, so they're current at the end of
 		 * recovery.
 		 */
-		SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB, false);
+		SetMultiXactIdLimit(xlrec.endTruncOff, xlrec.oldestMultiDB);
 
 		PerformMembersTruncation(xlrec.startTruncMemb, xlrec.endTruncMemb);
 
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ef405d66b3bd..a000b8bd509a 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -5155,7 +5155,7 @@ BootStrapXLOG(uint32 data_checksum_version)
 	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 	AdvanceOldestClogXid(checkPoint.oldestXid);
 	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
-	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
+	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
 	SetCommitTsLimit(InvalidTransactionId, InvalidTransactionId);
 
 	/* Set up the XLOG page header */
@@ -5636,7 +5636,7 @@ StartupXLOG(void)
 	MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
 	AdvanceOldestClogXid(checkPoint.oldestXid);
 	SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
-	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
+	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
 	SetCommitTsLimit(checkPoint.oldestCommitTsXid,
 					 checkPoint.newestCommitTsXid);
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 100e1a72c221..bd4278cd250d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1146,9 +1146,9 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams params,
 	/*
 	 * Also compute the multixact age for which freezing is urgent.  This is
 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
-	 * short of multixact member space.
+	 * short of multixact member space. XXX update comment
 	 */
-	effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
+	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 
 	/*
 	 * Almost ready to set freeze output parameters; check if OldestXmin or
@@ -1971,7 +1971,7 @@ vac_truncate_clog(TransactionId frozenXID,
 	 * signaling twice?
 	 */
 	SetTransactionIdLimit(frozenXID, oldestxid_datoid);
-	SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+	SetMultiXactIdLimit(minMulti, minmulti_datoid);
 
 	LWLockRelease(WrapLimitsVacuumLock);
 }
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index bf66f494e3ae..1c38488f2cbb 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -1151,7 +1151,7 @@ do_start_worker(void)
 
 	/* Also determine the oldest datminmxid we will consider. */
 	recentMulti = ReadNextMultiXactId();
-	multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age;
+	multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
 	if (multiForceLimit < FirstMultiXactId)
 		multiForceLimit -= FirstMultiXactId;
 
@@ -1939,7 +1939,7 @@ do_autovacuum(void)
 	 * normally autovacuum_multixact_freeze_max_age, but may be less if we are
 	 * short of multixact member space.
 	 */
-	effective_multixact_freeze_max_age = autovacuum_multixact_freeze_max_age;
+	effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
 
 	/*
 	 * Find the pg_database entry and select the default freeze ages. We use
diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h
index d688b547c547..cfff86f655f3 100644
--- a/src/include/access/multixact.h
+++ b/src/include/access/multixact.h
@@ -126,8 +126,7 @@ extern void BootStrapMultiXact(void);
 extern void StartupMultiXact(void);
 extern void TrimMultiXact(void);
 extern void SetMultiXactIdLimit(MultiXactId oldest_datminmxid,
-								Oid oldest_datoid,
-								bool is_startup);
+								Oid oldest_datoid);
 extern void MultiXactGetCheckptMulti(bool is_shutdown,
 									 MultiXactId *nextMulti,
 									 MultiXactOffset *nextMultiOffset,
@@ -142,6 +141,7 @@ extern void MultiXactSetNextMXact(MultiXactId nextMulti,
 extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
 									  MultiXactOffset minMultiOffset);
 extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
+extern int	MultiXactMemberFreezeThreshold(void);
 
 extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info,
 									   void *recdata, uint32 len);

From fcae346df34a23d212f27320d78d8d0c97a2a8ab Mon Sep 17 00:00:00 2001
From: Maxim Orlov <orlovmg@gmail.com>
Date: Tue, 28 Oct 2025 19:08:26 +0300
Subject: [PATCH 07/10] TEST: Add test for 64-bit mxoff in pg_resetwal

---
 src/bin/pg_resetwal/meson.build    |   1 +
 src/bin/pg_resetwal/t/003_mxoff.pl | 170 +++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 src/bin/pg_resetwal/t/003_mxoff.pl

diff --git a/src/bin/pg_resetwal/meson.build b/src/bin/pg_resetwal/meson.build
index 290832b22996..1e2dfb38a5b7 100644
--- a/src/bin/pg_resetwal/meson.build
+++ b/src/bin/pg_resetwal/meson.build
@@ -25,6 +25,7 @@ tests += {
     'tests': [
       't/001_basic.pl',
       't/002_corrupted.pl',
+      't/003_mxoff.pl',
     ],
   },
 }
diff --git a/src/bin/pg_resetwal/t/003_mxoff.pl b/src/bin/pg_resetwal/t/003_mxoff.pl
new file mode 100644
index 000000000000..3c1b7fa1d335
--- /dev/null
+++ b/src/bin/pg_resetwal/t/003_mxoff.pl
@@ -0,0 +1,170 @@
+
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use Math::BigInt;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+sub mxact_eater
+{
+	my $node = shift;
+	my $tbl = shift;
+
+	$node->start;
+	$node->safe_psql('postgres',
+		"CREATE TABLE ${tbl} (I INT PRIMARY KEY, N_UPDATED INT) " .
+		"       WITH (AUTOVACUUM_ENABLED=FALSE);" .
+		"INSERT INTO ${tbl} SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;");
+
+	# consume around 10k multixact-offsetfs
+	my $nclients = 10;
+	my $update_every = 75;
+	my @connections = ();
+
+	for (0..$nclients)
+	{
+		my $conn = $node->background_psql('postgres');
+		$conn->query_safe("BEGIN");
+
+		push(@connections, $conn);
+	}
+
+	for (my $i = 0; $i < 1000; $i++)
+	{
+		my $conn = $connections[$i % $nclients];
+
+		$conn->query_safe("COMMIT;");
+		$conn->query_safe("BEGIN");
+
+		if ($i % $update_every == 0)
+		{
+			$conn->query_safe(
+				"UPDATE ${tbl} SET " .
+				"N_UPDATED = N_UPDATED + 1 " .
+				"WHERE I = ${i} % 50");
+		}
+		else
+		{
+			$conn->query_safe(
+				"SELECT * FROM ${tbl} FOR KEY SHARE");
+		}
+	}
+
+	for my $conn (@connections)
+	{
+		$conn->quit();
+	}
+
+	$node->stop;
+}
+
+sub next_mxoff
+{
+	my $node = shift;
+	my ($stdout, $stderr) =
+	  run_command([ 'pg_controldata', $node->data_dir ]);
+	my @control_data = split("\n", $stdout);
+	my $next_mxoff = undef;
+
+	foreach (@control_data)
+	{
+		if ($_ =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/mg)
+		{
+			$next_mxoff = $1;
+			last;
+		}
+	}
+	die "NextMultiOffset not found in control file\n"
+		unless defined($next_mxoff);
+
+	return $next_mxoff;
+}
+
+sub reset_mxoff
+{
+	my $node = shift;
+	my $offset = shift;
+		$offset = Math::BigInt->new($offset);
+
+	# Get block size
+	my $out = (run_command([ 'pg_resetwal', '--dry-run', $node->data_dir ]))[0];
+		$out =~ /^Database block size: *(\d+)$/m or die;
+	my $blcksz = $1;
+
+	# Reset to new offset
+	my @cmd = ('pg_resetwal', '--pgdata' => $node->data_dir);
+	push @cmd, '--multixact-offset' => $offset->as_hex();
+	command_ok(\@cmd, 'set oldest multixact-offset');
+
+	# Fill empty pg_multixact/members segment
+	my $mult = 32 * int($blcksz / 20) * 4;
+	my $segname = sprintf "%015X", $offset / $mult;
+
+	my @dd = ('dd');
+	push @dd, "if=/dev/zero";
+	push @dd, "of=" . $node->data_dir . "/pg_multixact/members/" . $segname;
+	push @dd, "bs=$blcksz";
+	push @dd, "count=32";
+	command_ok(\@dd, 'fill empty multixact-members');
+}
+
+my ($off1, $off2);
+
+# start from defaults
+my $node1 = PostgreSQL::Test::Cluster->new('node1');
+$node1->init;
+$off1 = next_mxoff($node1);
+mxact_eater($node1, "FOO");
+$off2 = next_mxoff($node1);
+note "> start from $off1, finished at $off2\n";
+
+# start from before 32-bit wraparound
+my $node2 = PostgreSQL::Test::Cluster->new('node2');
+$node2->init;
+reset_mxoff($node2, 0xFFFF0000);
+$off1 = next_mxoff($node2);
+mxact_eater($node2, "FOO");
+$off2 = next_mxoff($node2);
+note "> start from $off1, finished at $off2\n";
+
+# start near 32-bit wraparound
+my $node3 = PostgreSQL::Test::Cluster->new('node3');
+$node3->init;
+reset_mxoff($node3, 0xFFFFEC77);
+$off1 = next_mxoff($node3);
+mxact_eater($node3, "FOO");
+$off2 = next_mxoff($node3);
+note "> start from $off1, finished at $off2\n";
+
+# start over 32-bit wraparound
+my $node4 = PostgreSQL::Test::Cluster->new('node4');
+$node4->init;
+reset_mxoff($node4, '0xFFFFFFFF0000');
+$off1 = next_mxoff($node4);
+mxact_eater($node4, "FOO");
+$off2 = next_mxoff($node3);
+note "> start from $off1, finished at $off2\n";
+
+# check invariant
+$node1->start;
+$node2->start;
+$node3->start;
+$node4->start;
+
+my $var1 = $node1->safe_psql('postgres', 'TABLE FOO');
+my $var2 = $node2->safe_psql('postgres', 'TABLE FOO');
+my $var3 = $node3->safe_psql('postgres', 'TABLE FOO');
+my $var4 = $node4->safe_psql('postgres', 'TABLE FOO');
+ok($var1 eq $var2 eq $var3 eq $var4,
+	'check table invariant in all nodes');
+
+$node4->stop;
+$node3->stop;
+$node2->stop;
+$node1->stop;
+
+done_testing();

From d77208a12efd964f53da59511290ab52b9f530d3 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Wed, 12 Nov 2025 13:36:09 +0200
Subject: [PATCH 08/10] TEST: Add test for wraparound of next new multi in
 pg_upgrade

Related to BUG #18863 and BUG #18865
---
 src/bin/pg_upgrade/meson.build         |   1 +
 src/bin/pg_upgrade/t/008_multi_wrap.pl | 176 +++++++++++++++++++++++++
 2 files changed, 177 insertions(+)
 create mode 100644 src/bin/pg_upgrade/t/008_multi_wrap.pl

diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index fff0db3b560d..28cd29d666e8 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -51,6 +51,7 @@ tests += {
       't/005_char_signedness.pl',
       't/006_transfer_modes.pl',
       't/007_multixact_conversion.pl',
+      't/008_multi_wrap.pl',
     ],
     'test_kwargs': {'priority': 40}, # pg_upgrade tests are slow
   },
diff --git a/src/bin/pg_upgrade/t/008_multi_wrap.pl b/src/bin/pg_upgrade/t/008_multi_wrap.pl
new file mode 100644
index 000000000000..0ad8fd599068
--- /dev/null
+++ b/src/bin/pg_upgrade/t/008_multi_wrap.pl
@@ -0,0 +1,176 @@
+
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use Math::BigInt;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::AdjustDump;
+use PostgreSQL::Test::AdjustUpgrade;
+use Test::More;
+
+# Temp dir for a dumps.
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+# Can be changed to test the other modes.
+my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';
+
+# Handy pg_resetwal wrapper
+sub reset_mxoff
+{
+	my %args = @_;
+
+	my $node = $args{node};
+	my $offset = $args{offset};
+	my $multi = $args{multi};
+	my $blcksz = sub # Get block size
+	{
+		my $out = (run_command([ 'pg_resetwal', '--dry-run',
+								 $node->data_dir ]))[0];
+		$out =~ /^Database block size: *(\d+)$/m or die;
+		return $1;
+	}->();
+
+	my @cmd;
+
+	# Reset cluster
+	@cmd = ('pg_resetwal', '--pgdata' => $node->data_dir);
+	if (defined($offset))
+	{
+		push @cmd, '--multixact-offset' => $offset;
+	}
+	if (defined($multi))
+	{
+		push @cmd, "--multixact-ids=$multi,$multi";
+	}
+	command_ok(\@cmd, 'reset multi/offset');
+
+	my $n_items;
+	my $segname;
+
+	# Fill empty pg_multixact segments
+	if (defined($offset))
+	{
+		$n_items = 32 * int($blcksz / 20) * 4;
+		$segname = sprintf "%015X", ($offset / $n_items);
+		$segname = $node->data_dir . "/pg_multixact/members/" . $segname;
+
+		@cmd = ('dd');
+		push @cmd, "if=/dev/zero";
+		push @cmd, "of=" . $segname;
+		push @cmd, "bs=$blcksz";
+		push @cmd, "count=32";
+		command_ok(\@cmd, 'fill empty multixact-members');
+	}
+
+	if (defined($multi))
+	{
+		$n_items = 32 * int($blcksz / 8);
+		$segname = sprintf "%04X", $multi / $n_items;
+		$segname = $node->data_dir . "/pg_multixact/offsets/" . $segname;
+
+		@cmd = ('dd');
+		push @cmd, "if=/dev/zero";
+		push @cmd, "of=" . $segname;
+		push @cmd, "bs=$blcksz";
+		push @cmd, "count=32";
+		command_ok(\@cmd, 'fill empty multixact-offsets');
+	}
+}
+
+sub get_dump_for_comparison
+{
+	my ($node, $db, $file_prefix, $adjust_child_columns) = @_;
+
+	my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
+	my $dump_adjusted = "${dumpfile}_adjusted";
+
+	open(my $dh, '>', $dump_adjusted)
+	  || die "could not open $dump_adjusted for writing $!";
+
+	$node->run_log(
+		[
+			'pg_dump', '--no-sync',
+			'--restrict-key' => 'test',
+			'-d' => $node->connstr($db),
+			'-f' => $dumpfile
+		]);
+
+	print $dh adjust_regress_dumpfile(slurp_file($dumpfile),
+		$adjust_child_columns);
+	close($dh);
+
+	return $dump_adjusted;
+}
+
+# Create old node
+my $old = PostgreSQL::Test::Cluster->new("old");
+$old->init;
+reset_mxoff(node => $old, multi => 4294967295, offset => 429496729);
+
+$old->start;
+$old->safe_psql('postgres',
+qq(
+	CREATE TABLE test_table (id integer NOT NULL PRIMARY KEY, val text);
+	INSERT INTO test_table VALUES (1, 'a');
+));
+
+my $conn1 = $old->background_psql('postgres');
+my $conn2 = $old->background_psql('postgres');
+
+$conn1->query_safe(qq(
+	BEGIN;
+	SELECT * FROM test_table WHERE id = 1 FOR SHARE;
+));
+$conn2->query_safe(qq(
+	BEGIN;
+	SELECT * FROM test_table WHERE id = 1 FOR SHARE;
+));
+
+$conn1->query_safe(qq(COMMIT;));
+$conn2->query_safe(qq(COMMIT;));
+
+$conn1->quit;
+$conn2->quit;
+
+$old->stop;
+
+# Create new node
+my $new = PostgreSQL::Test::Cluster->new("new");
+$new->init;
+
+# Run pg_upgrade
+command_ok(
+	[
+		'pg_upgrade', '--no-sync',
+		'--old-datadir' => $old->data_dir,
+		'--new-datadir' => $new->data_dir,
+		'--old-bindir' => $old->config_data('--bindir'),
+		'--new-bindir' => $new->config_data('--bindir'),
+		'--socketdir' => $new->host,
+		'--old-port' => $old->port,
+		'--new-port' => $new->port,
+		$mode,
+	],
+	'run of pg_upgrade for new instance');
+ok( !-d $new->data_dir . "/pg_upgrade_output.d",
+	"pg_upgrade_output.d/ removed after pg_upgrade success");
+
+$old->start;
+my $src_dump =
+	get_dump_for_comparison($old, 'postgres',
+							"oldnode_1_dump", 0);
+$old->stop;
+
+$new->start;
+my $dst_dump =
+	get_dump_for_comparison($new, 'postgres',
+							"newnode_1_dump", 0);
+$new->stop;
+
+compare_files($src_dump, $dst_dump,
+	'dump outputs from original and restored regression databases match');
+
+done_testing();

From 459a5af3ea89a4d17a4f6685a1add518a708ee6a Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Tue, 1 Apr 2025 21:01:07 +0300
Subject: [PATCH 09/10] TEST: add consume_multixids function

---
 src/test/modules/xid_wraparound/Makefile      |  1 +
 src/test/modules/xid_wraparound/meson.build   |  1 +
 .../xid_wraparound/multixid_wraparound.c      | 96 +++++++++++++++++++
 .../xid_wraparound/xid_wraparound--1.0.sql    |  4 +
 4 files changed, 102 insertions(+)
 create mode 100644 src/test/modules/xid_wraparound/multixid_wraparound.c

diff --git a/src/test/modules/xid_wraparound/Makefile b/src/test/modules/xid_wraparound/Makefile
index 7a6e0f667629..ebb3d8fcb3ec 100644
--- a/src/test/modules/xid_wraparound/Makefile
+++ b/src/test/modules/xid_wraparound/Makefile
@@ -3,6 +3,7 @@
 MODULE_big = xid_wraparound
 OBJS = \
 	$(WIN32RES) \
+	multixid_wraparound.o \
 	xid_wraparound.o
 PGFILEDESC = "xid_wraparound - tests for XID wraparound"
 
diff --git a/src/test/modules/xid_wraparound/meson.build b/src/test/modules/xid_wraparound/meson.build
index 3aec430df8c7..ce4ac4688301 100644
--- a/src/test/modules/xid_wraparound/meson.build
+++ b/src/test/modules/xid_wraparound/meson.build
@@ -1,6 +1,7 @@
 # Copyright (c) 2023-2025, PostgreSQL Global Development Group
 
 xid_wraparound_sources = files(
+  'multixid_wraparound.c',
   'xid_wraparound.c',
 )
 
diff --git a/src/test/modules/xid_wraparound/multixid_wraparound.c b/src/test/modules/xid_wraparound/multixid_wraparound.c
new file mode 100644
index 000000000000..af567c6e541a
--- /dev/null
+++ b/src/test/modules/xid_wraparound/multixid_wraparound.c
@@ -0,0 +1,96 @@
+/*--------------------------------------------------------------------------
+ *
+ * multixid_wraparound.c
+ *		Utilities for testing multixids
+ *
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/test/modules/xid_wraparound/multixid_wraparound.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/multixact.h"
+#include "access/xact.h"
+#include "miscadmin.h"
+#include "storage/proc.h"
+#include "utils/xid8.h"
+
+static int mxactMemberComparator(const void *arg1, const void *arg2);
+
+/*
+ * Consume the specified number of multi-XIDs, with specified number of
+ * members each.
+ */
+PG_FUNCTION_INFO_V1(consume_multixids);
+Datum
+consume_multixids(PG_FUNCTION_ARGS)
+{
+	int64		nmultis = PG_GETARG_INT64(0);
+	int32		nmembers = PG_GETARG_INT32(1);
+	MultiXactMember *members;
+	MultiXactId	lastmxid = InvalidMultiXactId;
+
+	if (nmultis < 0)
+		elog(ERROR, "invalid nxids argument: %" PRId64, nmultis);
+	if (nmembers < 1)
+		elog(ERROR, "invalid nmembers argument: %d", nmembers);
+
+	/*
+	 * We consume XIDs by calling GetNewTransactionId(true), which marks the
+	 * consumed XIDs as subtransactions of the current top-level transaction.
+	 * For that to work, this transaction must have a top-level XID.
+	 *
+	 * GetNewTransactionId registers them in the subxid cache in PGPROC, until
+	 * the cache overflows, but beyond that, we don't keep track of the
+	 * consumed XIDs.
+	 */
+	(void) GetTopTransactionId();
+
+	members = palloc((nmultis + nmembers) * sizeof(MultiXactMember));
+	for (int32 i = 0; i < nmultis + nmembers; i++)
+	{
+		FullTransactionId xid;
+
+		xid = GetNewTransactionId(true);
+		members[i].xid = XidFromFullTransactionId(xid);
+		members[i].status = MultiXactStatusForKeyShare;
+	}
+	/*
+	 * pre-sort the array like mXactCacheGetBySet does, so that the qsort call
+	 * in mXactCacheGetBySet() is cheaper.
+	 */
+	qsort(members, nmultis + nmembers, sizeof(MultiXactMember), mxactMemberComparator);
+
+	for (int64 i = 0; i < nmultis; i++)
+	{
+		lastmxid = MultiXactIdCreateFromMembers(nmembers, &members[i]);
+		CHECK_FOR_INTERRUPTS();
+	}
+
+	pfree(members);
+
+	PG_RETURN_TRANSACTIONID(lastmxid);
+}
+
+/* copied from multixact.c */
+static int
+mxactMemberComparator(const void *arg1, const void *arg2)
+{
+	MultiXactMember member1 = *(const MultiXactMember *) arg1;
+	MultiXactMember member2 = *(const MultiXactMember *) arg2;
+
+	if (member1.xid > member2.xid)
+		return 1;
+	if (member1.xid < member2.xid)
+		return -1;
+	if (member1.status > member2.status)
+		return 1;
+	if (member1.status < member2.status)
+		return -1;
+	return 0;
+}
diff --git a/src/test/modules/xid_wraparound/xid_wraparound--1.0.sql b/src/test/modules/xid_wraparound/xid_wraparound--1.0.sql
index 96356b4b9745..ed7520c3d86c 100644
--- a/src/test/modules/xid_wraparound/xid_wraparound--1.0.sql
+++ b/src/test/modules/xid_wraparound/xid_wraparound--1.0.sql
@@ -10,3 +10,7 @@ AS 'MODULE_PATHNAME' LANGUAGE C;
 CREATE FUNCTION consume_xids_until(targetxid xid8)
 RETURNS xid8 VOLATILE PARALLEL UNSAFE STRICT
 AS 'MODULE_PATHNAME' LANGUAGE C;
+
+CREATE FUNCTION consume_multixids(nmultis bigint, nmembers int4)
+RETURNS bigint VOLATILE PARALLEL UNSAFE STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;

From f56c43b781167fce7f57e21645a265a099b92b09 Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Fri, 14 Nov 2025 00:31:37 +0200
Subject: [PATCH 10/10] TEST: Original pg_upgrade test case

---
 src/bin/pg_upgrade/meson.build         |   1 +
 src/bin/pg_upgrade/t/009_mxoff_orig.pl | 463 +++++++++++++++++++++++++
 2 files changed, 464 insertions(+)
 create mode 100644 src/bin/pg_upgrade/t/009_mxoff_orig.pl

diff --git a/src/bin/pg_upgrade/meson.build b/src/bin/pg_upgrade/meson.build
index 28cd29d666e8..dbe2ce9de9e1 100644
--- a/src/bin/pg_upgrade/meson.build
+++ b/src/bin/pg_upgrade/meson.build
@@ -52,6 +52,7 @@ tests += {
       't/006_transfer_modes.pl',
       't/007_multixact_conversion.pl',
       't/008_multi_wrap.pl',
+      't/009_mxoff_orig.pl',
     ],
     'test_kwargs': {'priority': 40}, # pg_upgrade tests are slow
   },
diff --git a/src/bin/pg_upgrade/t/009_mxoff_orig.pl b/src/bin/pg_upgrade/t/009_mxoff_orig.pl
new file mode 100644
index 000000000000..7204325f873d
--- /dev/null
+++ b/src/bin/pg_upgrade/t/009_mxoff_orig.pl
@@ -0,0 +1,463 @@
+
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+use strict;
+use warnings FATAL => 'all';
+
+use Math::BigInt;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::AdjustDump;
+use PostgreSQL::Test::AdjustUpgrade;
+use Test::More;
+
+# This test involves different multitransaction states, similarly to that of
+# 002_pg_upgrade.pl.
+
+unless (defined($ENV{oldinstall}))
+{
+	plan skip_all =>
+		'to run test set oldinstall environment variable to the pre 64-bit mxoff cluster';
+}
+
+# Temp dir for a dumps.
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+# Can be changed to test the other modes.
+my $mode = $ENV{PG_TEST_PG_UPGRADE_MODE} || '--copy';
+
+sub utility_path
+{
+	my $node = shift;
+	my $name = shift;
+
+	my $bin_path = defined($node->install_path) ?
+		$node->install_path . "/bin/$name" : $name;
+
+	return $bin_path;
+}
+
+# Get NextMultiOffset.
+sub next_mxoff
+{
+	my $node = shift;
+
+	my $pg_controldata_path = utility_path($node, 'pg_controldata');
+	my ($stdout, $stderr) = run_command([ $pg_controldata_path,
+											$node->data_dir ]);
+	my @control_data = split("\n", $stdout);
+	my $next_mxoff = undef;
+
+	foreach (@control_data)
+	{
+		if ($_ =~ /^Latest checkpoint's NextMultiOffset:\s*(.*)$/mg)
+		{
+			$next_mxoff = $1;
+			last;
+		}
+	}
+	die "NextMultiOffset not found in control file\n"
+		unless defined($next_mxoff);
+
+	return $next_mxoff;
+}
+
+# Consume around 10k of mxoffsets.
+sub mxact_eater
+{
+	my $node = shift;
+	my $tbl = 'FOO';
+
+	my ($mxoff1, $mxoff2);
+
+	$mxoff1 = next_mxoff($node);
+	$node->start;
+	$node->safe_psql('postgres',
+		"CREATE TABLE ${tbl} (I INT PRIMARY KEY, N_UPDATED INT) " .
+		"       WITH (AUTOVACUUM_ENABLED=FALSE);" .
+		"INSERT INTO ${tbl} SELECT G, 0 FROM GENERATE_SERIES(1, 50) G;");
+
+	# consume around 10k mxoff
+	my $nclients = 10;
+	my $update_every = 75;
+	my @connections = ();
+
+	for (0..$nclients)
+	{
+		my $conn = $node->background_psql('postgres');
+		$conn->query_safe("BEGIN");
+
+		push(@connections, $conn);
+	}
+
+	for (my $i = 0; $i < 1000; $i++)
+	{
+		my $conn = $connections[$i % $nclients];
+
+		$conn->query_safe("COMMIT;");
+		$conn->query_safe("BEGIN");
+
+		if ($i % $update_every == 0)
+		{
+			$conn->query_safe(
+				"UPDATE ${tbl} SET " .
+				"N_UPDATED = N_UPDATED + 1 " .
+				"WHERE I = ${i} % 50");
+		}
+		else
+		{
+			$conn->query_safe(
+				"SELECT * FROM ${tbl} FOR KEY SHARE");
+		}
+	}
+
+	for my $conn (@connections)
+	{
+		$conn->quit();
+	}
+
+	$node->stop;
+	$mxoff2 = next_mxoff($node);
+
+	return $mxoff1, $mxoff2;
+}
+
+# Consume around 1M of mxoffsets.
+sub mxact_huge_eater
+{
+	my $node = shift;
+	my $tbl = 'FOO';
+
+	my ($mxoff1, $mxoff2);
+
+	$mxoff1 = next_mxoff($node);
+	$node->start;
+	$node->safe_psql('postgres',
+		"CREATE TABLE ${tbl} (I INT PRIMARY KEY, N_UPDATED INT) " .
+		"       WITH (AUTOVACUUM_ENABLED=FALSE);" .
+		"INSERT INTO ${tbl} SELECT G, 0 FROM GENERATE_SERIES(1, 4) G;");
+
+	my $nclients = 100;
+	my @connections = ();
+	my $timeout = 10 * $PostgreSQL::Test::Utils::timeout_default;
+
+	for (0..$nclients)
+	{
+		my $conn = $node->background_psql('postgres',
+										  timeout => $timeout);
+		$conn->query_safe("BEGIN");
+
+		push(@connections, $conn);
+	}
+
+	# It's a long process, better to tell about progress.
+	my $n_steps = 100_000;
+	my $step = int($n_steps / 10);
+
+	diag "\nstart to consume mxoffsets ...\n";
+	for (my $i = 0; $i < $n_steps; $i++)
+	{
+		my $conn = $connections[$i % $nclients];
+
+		$conn->query_safe("COMMIT;");
+		$conn->query_safe("BEGIN");
+
+		{
+			$conn->query_safe(
+				"SELECT * FROM ${tbl} " .
+				"FOR KEY SHARE");
+		}
+
+		if ($i % $step == 0)
+		{
+			my $done = int(($i / $n_steps) * 100);
+			diag "$done% done...";
+		}
+	}
+
+	for my $conn (@connections)
+	{
+		$conn->quit();
+	}
+
+	$node->stop;
+	$mxoff2 = next_mxoff($node);
+
+	return $mxoff1, $mxoff2;
+}
+
+# Set oldest multixact-offset
+sub reset_mxoff
+{
+	my $node = shift;
+	my $offset = shift;
+
+	my $pg_resetwal_path = utility_path($node, 'pg_resetwal');
+	# Get block size
+	my $out = (run_command([ $pg_resetwal_path, '--dry-run',
+							 $node->data_dir ]))[0];
+		$out =~ /^Database block size: *(\d+)$/m or die;
+	my $blcksz = $1;
+
+	# Reset to new offset
+	my @cmd = ($pg_resetwal_path, '--pgdata' => $node->data_dir);
+	push @cmd, '--multixact-offset' => $offset;
+	command_ok(\@cmd, 'set oldest multixact-offset');
+
+	# Fill empty pg_multixact/members segment
+	my $mult = 32 * int($blcksz / 20) * 4;
+	my $segname = sprintf "%04X", $offset / $mult;
+
+	my @dd = ('dd');
+	push @dd, "if=/dev/zero";
+	push @dd, "of=" . $node->data_dir . "/pg_multixact/members/" . $segname;
+	push @dd, "bs=$blcksz";
+	push @dd, "count=32";
+	command_ok(\@dd, 'fill empty multixact-members');
+}
+
+sub get_dump_for_comparison
+{
+	my ($node, $db, $file_prefix, $adjust_child_columns) = @_;
+
+	my $dumpfile = $tempdir . '/' . $file_prefix . '.sql';
+	my $dump_adjusted = "${dumpfile}_adjusted";
+
+	open(my $dh, '>', $dump_adjusted)
+	  || die "could not open $dump_adjusted for writing $!";
+
+	my $pg_dump_path = utility_path($node, 'pg_dump');
+
+	$node->run_log(
+		[
+			$pg_dump_path, '--no-sync',
+			'--restrict-key' => 'test',
+			'-d' => $node->connstr($db),
+			'-f' => $dumpfile
+		]);
+
+	print $dh adjust_regress_dumpfile(slurp_file($dumpfile),
+		$adjust_child_columns);
+	close($dh);
+
+	return $dump_adjusted;
+}
+
+# Main test workhorse routine.
+# Make pg_upgrade, dump data and compare it.
+sub run_test
+{
+	my $tag = shift;
+	my $oldnode = shift;
+	my $newnode = shift;
+
+	my $pg_upgrade_path = utility_path($newnode, 'pg_upgrade');
+
+	command_ok(
+		[
+			$pg_upgrade_path, '--no-sync',
+			'--old-datadir' => $oldnode->data_dir,
+			'--new-datadir' => $newnode->data_dir,
+			'--old-bindir' => $oldnode->config_data('--bindir'),
+			'--new-bindir' => $newnode->config_data('--bindir'),
+			'--socketdir' => $newnode->host,
+			'--old-port' => $oldnode->port,
+			'--new-port' => $newnode->port,
+			$mode,
+		],
+		'run of pg_upgrade for new instance');
+	ok( !-d $newnode->data_dir . "/pg_upgrade_output.d",
+		"pg_upgrade_output.d/ removed after pg_upgrade success");
+
+	$oldnode->start;
+	my $src_dump =
+		get_dump_for_comparison($oldnode, 'postgres',
+								"oldnode_${tag}_dump", 0);
+	$oldnode->stop;
+
+	$newnode->start;
+	my $dst_dump =
+		get_dump_for_comparison($newnode, 'postgres',
+								"newnode_${tag}_dump", 0);
+	$newnode->stop;
+
+	compare_files($src_dump, $dst_dump,
+		'dump outputs from original and restored regression databases match');
+}
+
+sub to_hex
+{
+	my $arg = shift;
+
+	$arg = Math::BigInt->new($arg);
+	$arg = $arg->as_hex();
+
+	return $arg;
+}
+
+# case #1: start old node from defaults
+{
+	my $tag = 1;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+	$old->init(extra => ['-k']);
+
+	my ($start_mxoff, $finish_mxoff) = mxact_eater($old);
+
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+# case #2: start old node from before 32-bit wraparound
+{
+	my $tag = 2;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+
+	$old->init(extra => ['-k']);
+	reset_mxoff($old, 0xFFFF0000);
+
+	my ($start_mxoff, $finish_mxoff) = mxact_eater($old);
+
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+# case #3: start old node near 32-bit wraparound and reach wraparound state.
+{
+	my $tag = 3;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+
+	$old->init(extra => ['-k']);
+
+	reset_mxoff($old, 0xFFFFEC77);
+	my ($start_mxoff, $finish_mxoff) = mxact_eater($old);
+
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+# case #4: start old node from defaults
+{
+	my $tag = 4;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+
+	$old->init(extra => ['-k']);
+	$old->append_conf("postgresql.conf", "max_connections = 128");
+
+	diag "test #${tag} for multiple mxoff segments";
+	my ($start_mxoff, $finish_mxoff) = mxact_huge_eater($old);
+
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+# case #5: start old node from before 32-bit wraparound
+{
+	my $tag = 5;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+
+	$old->init(extra => ['-k']);
+	$old->append_conf("postgresql.conf", "max_connections = 128");
+	reset_mxoff($old, 0xFF000000);
+
+	diag "test #${tag} for multiple mxoff segments";
+	my ($start_mxoff, $finish_mxoff) = mxact_huge_eater($old);
+
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+# case #6: start old node near 32-bit wraparound and reach wraparound state.
+{
+	my $tag = 6;
+	my $old =
+		PostgreSQL::Test::Cluster->new("oldnode${tag}",
+									   install_path => $ENV{oldinstall});
+
+	$old->init(extra => ['-k']);
+
+	reset_mxoff($old, 0xFFFFFFFF - 500_000);
+	$old->append_conf("postgresql.conf", "max_connections = 128");
+	my ($start_mxoff, $finish_mxoff) = mxact_huge_eater($old);
+
+	diag "test #${tag} for multiple mxoff segments";
+	my $new = PostgreSQL::Test::Cluster->new("newnode${tag}");
+	$new->init;
+
+	run_test($tag, $old, $new);
+
+	$start_mxoff = to_hex($start_mxoff);
+	$finish_mxoff = to_hex($finish_mxoff);
+
+	my $next_mxoff = to_hex(next_mxoff($new));
+
+	note ">>> case #${tag}\n" .
+		 " oldnode mxoff from ${start_mxoff} to ${finish_mxoff}\n" .
+		 " newnode mxoff ${next_mxoff}\n";
+}
+
+done_testing();