|
65 | 65 | #include "utils/pg_lsn.h" |
66 | 66 | #include "utils/ps_status.h" |
67 | 67 | #include "utils/pg_rusage.h" |
| 68 | +#include "utils/timeout.h" |
68 | 69 |
|
69 | 70 | /* Unsupported old recovery command file names (relative to $PGDATA) */ |
70 | 71 | #define RECOVERY_COMMAND_FILE "recovery.conf" |
@@ -148,6 +149,13 @@ bool InArchiveRecovery = false; |
148 | 149 | static bool StandbyModeRequested = false; |
149 | 150 | bool StandbyMode = false; |
150 | 151 |
|
| 152 | +/* |
| 153 | + * Whether we are currently in process of processing recovery records while |
| 154 | + * allowing downstream replication instances |
| 155 | + */ |
| 156 | +#define StandbyWithCascadeReplication() \ |
| 157 | + (AmStartupProcess() && StandbyMode && AllowCascadeReplication()) |
| 158 | + |
151 | 159 | /* was a signal file present at startup? */ |
152 | 160 | static bool standby_signal_file_found = false; |
153 | 161 | static bool recovery_signal_file_found = false; |
@@ -304,6 +312,28 @@ bool reachedConsistency = false; |
304 | 312 | static char *replay_image_masked = NULL; |
305 | 313 | static char *primary_image_masked = NULL; |
306 | 314 |
|
| 315 | +/* |
| 316 | + * Maximum number of applied records in batch before notifying walsender during |
| 317 | + * cascade replication |
| 318 | + */ |
| 319 | +int cascadeReplicationMaxBatchSize; |
| 320 | + |
| 321 | +/* |
| 322 | + * Maximum batching delay before notifying walsender during cascade replication |
| 323 | + */ |
| 324 | +int cascadeReplicationMaxBatchDelay; |
| 325 | + |
| 326 | +/* Current cascade replication batching delay used while enabling timer */ |
| 327 | +static int cascadeDelayCurrent = 0; |
| 328 | + |
| 329 | +/* Counter for applied records which are not yet signaled to walsenders */ |
| 330 | +static int appliedRecords = 0; |
| 331 | + |
| 332 | +/* |
| 333 | + * True if downstream walsenders need to be notified about pending WAL records, |
| 334 | + * set by timeout handler. |
| 335 | + */ |
| 336 | +volatile sig_atomic_t replicationNotificationPending = false; |
307 | 337 |
|
308 | 338 | /* |
309 | 339 | * Shared-memory state for WAL recovery. |
@@ -1846,6 +1876,15 @@ PerformWalRecovery(void) |
1846 | 1876 | * end of main redo apply loop |
1847 | 1877 | */ |
1848 | 1878 |
|
| 1879 | + /* Send notification for batched messages once loop is ended */ |
| 1880 | + if (StandbyWithCascadeReplication() && appliedRecords > 0) |
| 1881 | + { |
| 1882 | + if (cascadeDelayCurrent > 0) |
| 1883 | + disable_timeout(STANDBY_CASCADE_WAL_SEND_TIMEOUT, false); |
| 1884 | + appliedRecords = 0; |
| 1885 | + WalSndWakeup(false, true); |
| 1886 | + } |
| 1887 | + |
1849 | 1888 | if (reachedRecoveryTarget) |
1850 | 1889 | { |
1851 | 1890 | if (!reachedConsistency) |
@@ -2044,8 +2083,45 @@ ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *repl |
2044 | 2083 | * be created otherwise) |
2045 | 2084 | * ------ |
2046 | 2085 | */ |
2047 | | - if (AllowCascadeReplication()) |
2048 | | - WalSndWakeup(switchedTLI, true); |
| 2086 | + |
| 2087 | + if (StandbyWithCascadeReplication()) |
| 2088 | + { |
| 2089 | + if (cascadeReplicationMaxBatchSize <= 1 && appliedRecords == 0) |
| 2090 | + WalSndWakeup(switchedTLI, true); |
| 2091 | + else |
| 2092 | + { |
| 2093 | + /* |
| 2094 | + * If time line has switched, then we will imediately notify both |
| 2095 | + * physical and logical downstream walsenders here, as we do not |
| 2096 | + * want to introduce additional delay in such case. Otherwise we |
| 2097 | + * will wait until we apply specified number of records before |
| 2098 | + * notifying downstream logical walsenders. |
| 2099 | + */ |
| 2100 | + bool batchFlushRequired = |
| 2101 | + ++appliedRecords >= cascadeReplicationMaxBatchSize || |
| 2102 | + replicationNotificationPending || |
| 2103 | + switchedTLI; |
| 2104 | + |
| 2105 | + if (batchFlushRequired) |
| 2106 | + { |
| 2107 | + if (cascadeDelayCurrent > 0) |
| 2108 | + disable_timeout(STANDBY_CASCADE_WAL_SEND_TIMEOUT, false); |
| 2109 | + appliedRecords = 0; |
| 2110 | + replicationNotificationPending = false; |
| 2111 | + } |
| 2112 | + |
| 2113 | + WalSndWakeup(switchedTLI, batchFlushRequired); |
| 2114 | + |
| 2115 | + /* Setup timeout to limit maximum delay for notifications */ |
| 2116 | + if (appliedRecords == 1) |
| 2117 | + { |
| 2118 | + cascadeDelayCurrent = cascadeReplicationMaxBatchDelay; |
| 2119 | + if (cascadeDelayCurrent > 0) |
| 2120 | + enable_timeout_after(STANDBY_CASCADE_WAL_SEND_TIMEOUT, |
| 2121 | + cascadeDelayCurrent); |
| 2122 | + } |
| 2123 | + } |
| 2124 | + } |
2049 | 2125 |
|
2050 | 2126 | /* |
2051 | 2127 | * If rm_redo called XLogRequestWalReceiverReply, then we wake up the |
@@ -5095,3 +5171,50 @@ assign_recovery_target_xid(const char *newval, void *extra) |
5095 | 5171 | else |
5096 | 5172 | recoveryTarget = RECOVERY_TARGET_UNSET; |
5097 | 5173 | } |
| 5174 | + |
| 5175 | +/* |
| 5176 | + * GUC assign_hook for cascade_replication_batch_size and |
| 5177 | + * cascade_replication_batch_delay |
| 5178 | + */ |
| 5179 | +void |
| 5180 | +assign_cascade_replication_batch_values(int new_value, void *extra) |
| 5181 | +{ |
| 5182 | + /* |
| 5183 | + * If either cascade_replication_batch_size or |
| 5184 | + * cascade_replication_batch_delay is changed, then we want to disable |
| 5185 | + * current timer (if any) and immediately flush current batch. New values |
| 5186 | + * will be picked once next WAL record is applied. |
| 5187 | + */ |
| 5188 | + if (cascadeDelayCurrent > 0) |
| 5189 | + { |
| 5190 | + cascadeDelayCurrent = 0; |
| 5191 | + disable_timeout(STANDBY_CASCADE_WAL_SEND_TIMEOUT, false); |
| 5192 | + } |
| 5193 | + /* Will be processed by ProcessStartupProcInterrupts */ |
| 5194 | + replicationNotificationPending = true; |
| 5195 | +} |
| 5196 | + |
| 5197 | +/* |
| 5198 | + * Send notifications to downstream walsenders if there are batched records |
| 5199 | + */ |
| 5200 | +void |
| 5201 | +StandbyWalCheckSendNotify(void) |
| 5202 | +{ |
| 5203 | + if (appliedRecords > 0) |
| 5204 | + { |
| 5205 | + WalSndWakeup(false, true); |
| 5206 | + appliedRecords = 0; |
| 5207 | + } |
| 5208 | + replicationNotificationPending = false; |
| 5209 | +} |
| 5210 | + |
| 5211 | +/* |
| 5212 | + * Timer handler for batch notifications in cascade replication |
| 5213 | + */ |
| 5214 | +void |
| 5215 | +StandbyWalSendTimeoutHandler(void) |
| 5216 | +{ |
| 5217 | + replicationNotificationPending = true; |
| 5218 | + /* Most likely process is waiting for arrival of WAL records */ |
| 5219 | + WakeupRecovery(); |
| 5220 | +} |
0 commit comments