From f4240a7477bb748d4ff1095f262952a15e1de63b Mon Sep 17 00:00:00 2001 From: Zhijie Hou Date: Tue, 16 Sep 2025 11:24:20 +0800 Subject: [PATCH] Stablize the tests in 035_conflicts The test used VACUUM to remove delete column, while that might not be stable due to concurrent bgwriter or checkpoint that would lock the page where the deleted tuple exists. Since the test has already confirmed that the replication slot.xmin has advanced, which should be sufficient to prove that the feature works correctly. This commit removes these unstable VACUUM tests. Additionally, the test has a check to verify the resumption of retention for conflict-relevant information after setting max_retention_duration to 0. However, in some cases, the apply worker resumes retention immediately after the inactive slot is removed from the synchronized_standby_slots configuration, prior to setting max_retention_duration to 0. This can occur if the system applies remote changes in less than 1ms, leading to test timeouts while waiting for the resumption log at a later position. To ensure stability, this commit delays the removal of synchronized_standby_slots until after max_retention_duration has been set to 0. --- src/test/subscription/t/035_conflicts.pl | 31 +++++++----------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index f2aee0f70df2..21e992cea50c 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -342,15 +342,6 @@ ), "the xmin value of slot 'pg_conflict_detection' is updated on Node A"); -# Confirm that the dead tuple can be removed now -($cmdret, $stdout, $stderr) = $node_A->psql( - 'postgres', qq(VACUUM (verbose) public.tab;) -); - -ok( $stderr =~ - qr/1 removed, 1 remain, 0 are dead but not yet removable/, - 'the deleted column is removed'); - ############################################################################### # Ensure that the deleted tuple needed to detect an update_deleted conflict is # accessible via a sequential table scan. @@ -555,13 +546,6 @@ "the xmin value of slot 'pg_conflict_detection' is updated on subscriber" ); - # Confirm that the dead tuple can be removed now - ($cmdret, $stdout, $stderr) = - $node_A->psql('postgres', qq(VACUUM (verbose) public.tab;)); - - ok($stderr =~ qr/1 removed, 0 remain, 0 are dead but not yet removable/, - 'the deleted column is removed'); - # Get the commit timestamp for the publisher's update my $pub_ts = $node_B->safe_psql('postgres', "SELECT pg_xact_commit_timestamp(xmin) from tab where a=1;"); @@ -625,12 +609,6 @@ "SELECT subretentionactive FROM pg_subscription WHERE subname='$subname_AB';"); is($result, qq(f), 'retention is inactive'); -# Drop the physical slot and reset the synchronized_standby_slots setting -$node_B->safe_psql('postgres', - "SELECT * FROM pg_drop_replication_slot('blocker');"); -$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''"); -$node_B->reload; - ############################################################################### # Check that dead tuple retention resumes when the max_retention_duration is set # 0. @@ -642,6 +620,15 @@ $node_A->safe_psql('postgres', "ALTER SUBSCRIPTION $subname_AB SET (max_retention_duration = 0);"); +# Drop the physical slot and reset the synchronized_standby_slots setting. We +# change this after setting max_retention_duration to 0, ensuring the apply +# worker does not resume prematurely without noticing the updated +# max_retention_duration value. +$node_B->safe_psql('postgres', + "SELECT * FROM pg_drop_replication_slot('blocker');"); +$node_B->adjust_conf('postgresql.conf', 'synchronized_standby_slots', "''"); +$node_B->reload; + # Confirm that the retention resumes $node_A->wait_for_log( qr/logical replication worker for subscription "tap_sub_a_b" will resume retaining the information for detecting conflicts