Skip to content

Commit 4d703a6

Browse files
committed
don't block node master node startup if recover_after_time is set and nodes settings are not met (we already handle it with the not recovered state)
1 parent 424941b commit 4d703a6

File tree

1 file changed

+19
-37
lines changed

1 file changed

+19
-37
lines changed

modules/elasticsearch/src/main/java/org/elasticsearch/gateway/GatewayService.java

Lines changed: 19 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
package org.elasticsearch.gateway;
2121

2222
import org.elasticsearch.ElasticSearchException;
23-
import org.elasticsearch.ElasticSearchInterruptedException;
2423
import org.elasticsearch.cluster.*;
2524
import org.elasticsearch.cluster.block.ClusterBlock;
2625
import org.elasticsearch.cluster.block.ClusterBlockLevel;
@@ -30,7 +29,6 @@
3029
import org.elasticsearch.cluster.metadata.MetaData;
3130
import org.elasticsearch.cluster.metadata.MetaDataCreateIndexService;
3231
import org.elasticsearch.cluster.node.DiscoveryNodes;
33-
import org.elasticsearch.common.Nullable;
3432
import org.elasticsearch.common.component.AbstractLifecycleComponent;
3533
import org.elasticsearch.common.inject.Inject;
3634
import org.elasticsearch.common.settings.Settings;
@@ -41,7 +39,6 @@
4139
import java.io.IOException;
4240
import java.util.Map;
4341
import java.util.concurrent.CountDownLatch;
44-
import java.util.concurrent.TimeUnit;
4542
import java.util.concurrent.atomic.AtomicBoolean;
4643
import java.util.concurrent.atomic.AtomicInteger;
4744

@@ -66,7 +63,6 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
6663

6764
private final MetaDataCreateIndexService createIndexService;
6865

69-
private final TimeValue initialStateTimeout;
7066
private final TimeValue recoverAfterTime;
7167
private final int recoverAfterNodes;
7268
private final int expectedNodes;
@@ -86,7 +82,6 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
8682
this.discoveryService = discoveryService;
8783
this.createIndexService = createIndexService;
8884
this.threadPool = threadPool;
89-
this.initialStateTimeout = componentSettings.getAsTime("initial_state_timeout", TimeValue.timeValueSeconds(30));
9085
// allow to control a delay of when indices will get created
9186
this.recoverAfterTime = componentSettings.getAsTime("recover_after_time", null);
9287
this.recoverAfterNodes = componentSettings.getAsInt("recover_after_nodes", -1);
@@ -115,24 +110,24 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
115110
} else if (recoverAfterMasterNodes != -1 && nodes.masterNodes().size() < recoverAfterMasterNodes) {
116111
logger.debug("not recovering from gateway, nodes_size (master) [" + nodes.masterNodes().size() + "] < recover_after_master_nodes [" + recoverAfterMasterNodes + "]");
117112
} else {
118-
boolean ignoreTimeout;
113+
boolean ignoreRecoverAfterTime;
119114
if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
120115
// no expected is set, don't ignore the timeout
121-
ignoreTimeout = false;
116+
ignoreRecoverAfterTime = false;
122117
} else {
123118
// one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
124-
ignoreTimeout = true;
119+
ignoreRecoverAfterTime = true;
125120
if (expectedNodes != -1 && (nodes.masterAndDataNodes().size() < expectedNodes)) { // does not meet the expected...
126-
ignoreTimeout = false;
121+
ignoreRecoverAfterTime = false;
127122
}
128123
if (expectedMasterNodes != -1 && (nodes.masterNodes().size() < expectedMasterNodes)) { // does not meet the expected...
129-
ignoreTimeout = false;
124+
ignoreRecoverAfterTime = false;
130125
}
131126
if (expectedDataNodes != -1 && (nodes.dataNodes().size() < expectedDataNodes)) { // does not meet the expected...
132-
ignoreTimeout = false;
127+
ignoreRecoverAfterTime = false;
133128
}
134129
}
135-
performStateRecovery(initialStateTimeout, ignoreTimeout);
130+
performStateRecovery(ignoreRecoverAfterTime);
136131
}
137132
}
138133
} else {
@@ -151,7 +146,7 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
151146
}
152147

153148
@Override public void clusterChanged(final ClusterChangedEvent event) {
154-
if (!lifecycle.started()) {
149+
if (lifecycle.stoppedOrClosed()) {
155150
return;
156151
}
157152
if (event.localNodeMaster() && event.state().blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) {
@@ -164,42 +159,37 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
164159
} else if (recoverAfterMasterNodes != -1 && nodes.masterNodes().size() < recoverAfterMasterNodes) {
165160
logger.debug("not recovering from gateway, nodes_size (master) [" + nodes.masterNodes().size() + "] < recover_after_master_nodes [" + recoverAfterMasterNodes + "]");
166161
} else {
167-
boolean ignoreTimeout;
162+
boolean ignoreRecoverAfterTime;
168163
if (expectedNodes == -1 && expectedMasterNodes == -1 && expectedDataNodes == -1) {
169164
// no expected is set, don't ignore the timeout
170-
ignoreTimeout = false;
165+
ignoreRecoverAfterTime = false;
171166
} else {
172167
// one of the expected is set, see if all of them meet the need, and ignore the timeout in this case
173-
ignoreTimeout = true;
168+
ignoreRecoverAfterTime = true;
174169
if (expectedNodes != -1 && (nodes.masterAndDataNodes().size() < expectedNodes)) { // does not meet the expected...
175-
ignoreTimeout = false;
170+
ignoreRecoverAfterTime = false;
176171
}
177172
if (expectedMasterNodes != -1 && (nodes.masterNodes().size() < expectedMasterNodes)) { // does not meet the expected...
178-
ignoreTimeout = false;
173+
ignoreRecoverAfterTime = false;
179174
}
180175
if (expectedDataNodes != -1 && (nodes.dataNodes().size() < expectedDataNodes)) { // does not meet the expected...
181-
ignoreTimeout = false;
176+
ignoreRecoverAfterTime = false;
182177
}
183178
}
184-
final boolean fIgnoreTimeout = ignoreTimeout;
179+
final boolean fIgnoreRecoverAfterTime = ignoreRecoverAfterTime;
185180
threadPool.cached().execute(new Runnable() {
186181
@Override public void run() {
187-
performStateRecovery(null, fIgnoreTimeout);
182+
performStateRecovery(fIgnoreRecoverAfterTime);
188183
}
189184
});
190185
}
191186
}
192187
}
193188

194-
private void performStateRecovery(@Nullable TimeValue timeout) {
195-
performStateRecovery(null, false);
196-
}
197-
198-
private void performStateRecovery(@Nullable TimeValue timeout, boolean ignoreTimeout) {
199-
final CountDownLatch latch = new CountDownLatch(1);
200-
final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener(latch);
189+
private void performStateRecovery(boolean ignoreRecoverAfterTime) {
190+
final Gateway.GatewayStateRecoveredListener recoveryListener = new GatewayRecoveryListener(new CountDownLatch(1));
201191

202-
if (!ignoreTimeout && recoverAfterTime != null) {
192+
if (!ignoreRecoverAfterTime && recoverAfterTime != null) {
203193
if (scheduledRecovery.compareAndSet(false, true)) {
204194
logger.debug("delaying initial state recovery for [{}]", recoverAfterTime);
205195
threadPool.schedule(recoverAfterTime, ThreadPool.Names.CACHED, new Runnable() {
@@ -215,14 +205,6 @@ private void performStateRecovery(@Nullable TimeValue timeout, boolean ignoreTim
215205
gateway.performStateRecovery(recoveryListener);
216206
}
217207
}
218-
219-
if (timeout != null) {
220-
try {
221-
latch.await(timeout.millis(), TimeUnit.MILLISECONDS);
222-
} catch (InterruptedException e) {
223-
throw new ElasticSearchInterruptedException(e.getMessage(), e);
224-
}
225-
}
226208
}
227209

228210
class GatewayRecoveryListener implements Gateway.GatewayStateRecoveredListener {

0 commit comments

Comments
 (0)