Skip to content

Commit 2b42a0f

Browse files
committed
Override DefaultExceptionHandler to filter out certain exceptions
We have the situation that some tests fail since they don't handle EsRejectedExecutionException which gets thrown when a node shuts down. That is ok to ignore this exception and not fail. We also suffer from OOMs that can't create native threads but don't get threaddumps for those failures. This patch prints the thread stacks once we catch a OOM which can' create native threads.
1 parent 74bfa27 commit 2b42a0f

File tree

4 files changed

+128
-25
lines changed

4 files changed

+128
-25
lines changed

pom.xml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@
5353
<version>1.3</version>
5454
<scope>test</scope>
5555
</dependency>
56+
<dependency>
57+
<groupId>com.carrotsearch.randomizedtesting</groupId>
58+
<artifactId>randomizedtesting-runner</artifactId>
59+
<version>2.0.15</version>
60+
<scope>test</scope>
61+
</dependency>
5662
<dependency>
5763
<groupId>org.apache.lucene</groupId>
5864
<artifactId>lucene-test-framework</artifactId>
@@ -323,7 +329,7 @@
323329
<plugin>
324330
<groupId>com.carrotsearch.randomizedtesting</groupId>
325331
<artifactId>junit4-maven-plugin</artifactId>
326-
<version>2.0.14</version>
332+
<version>2.0.15</version>
327333
<executions>
328334
<execution>
329335
<id>tests</id>

src/main/java/org/elasticsearch/common/util/concurrent/EsAbortPolicy.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
public class EsAbortPolicy implements XRejectedExecutionHandler {
3232

3333
private final CounterMetric rejected = new CounterMetric();
34+
public static final String SHUTTING_DOWN_KEY = "(shutting down)";
3435

3536
@Override
3637
public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
@@ -51,7 +52,7 @@ public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
5152
rejected.inc();
5253
StringBuilder sb = new StringBuilder("rejected execution ");
5354
if (executor.isShutdown()) {
54-
sb.append("(shutting down) ");
55+
sb.append(SHUTTING_DOWN_KEY + " ");
5556
} else {
5657
if (executor.getQueue() instanceof SizeBlockingQueue) {
5758
sb.append("(queue capacity ").append(((SizeBlockingQueue) executor.getQueue()).capacity()).append(") ");

src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
import com.carrotsearch.randomizedtesting.SeedUtils;
2222
import com.google.common.base.Joiner;
23-
import org.apache.lucene.util.AbstractRandomizedTest.IntegrationTests;
23+
import org.apache.lucene.util.AbstractRandomizedTest;
2424
import org.elasticsearch.ExceptionsHelper;
2525
import org.elasticsearch.action.ActionListener;
2626
import org.elasticsearch.action.ShardOperationFailedException;
@@ -129,7 +129,7 @@
129129
* </p>
130130
*/
131131
@Ignore
132-
@IntegrationTests
132+
@AbstractRandomizedTest.IntegrationTests
133133
public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase {
134134

135135

@@ -170,26 +170,34 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
170170

171171
@Before
172172
public final void before() throws IOException {
173-
final Scope currentClusterScope = getCurrentClusterScope();
174-
switch (currentClusterScope) {
175-
case GLOBAL:
176-
clearClusters();
177-
currentCluster = GLOBAL_CLUSTER;
178-
break;
179-
case SUITE:
180-
currentCluster = buildAndPutCluster(currentClusterScope, false);
181-
break;
182-
case TEST:
183-
currentCluster = buildAndPutCluster(currentClusterScope, true);
184-
break;
185-
default:
186-
assert false : "Unknown Scope: [" + currentClusterScope + "]";
173+
assert Thread.getDefaultUncaughtExceptionHandler() instanceof ElasticsearchUncaughtExceptionHandler;
174+
try {
175+
final Scope currentClusterScope = getCurrentClusterScope();
176+
switch (currentClusterScope) {
177+
case GLOBAL:
178+
clearClusters();
179+
currentCluster = GLOBAL_CLUSTER;
180+
break;
181+
case SUITE:
182+
currentCluster = buildAndPutCluster(currentClusterScope, false);
183+
break;
184+
case TEST:
185+
currentCluster = buildAndPutCluster(currentClusterScope, true);
186+
break;
187+
default:
188+
assert false : "Unknown Scope: [" + currentClusterScope + "]";
189+
}
190+
currentCluster.beforeTest(getRandom(), getPerTestTransportClientRatio());
191+
wipeIndices();
192+
wipeTemplates();
193+
randomIndexTemplate();
194+
logger.info("[{}#{}]: before test", getTestClass().getSimpleName(), getTestName());
195+
} catch (OutOfMemoryError e) {
196+
if (e.getMessage().contains("unable to create new native thread")) {
197+
ElasticsearchTestCase.printStackDump(logger);
198+
}
199+
throw e;
187200
}
188-
currentCluster.beforeTest(getRandom(), getPerTestTransportClientRatio());
189-
wipeIndices();
190-
wipeTemplates();
191-
randomIndexTemplate();
192-
logger.info("[{}#{}]: before test", getTestClass().getSimpleName(), getTestName());
193201
}
194202

195203
public TestCluster buildAndPutCluster(Scope currentClusterScope, boolean createIfExists) throws IOException {
@@ -234,6 +242,11 @@ public final void after() throws IOException {
234242
ensureAllSearchersClosed();
235243
ensureAllFilesClosed();
236244
logger.info("[{}#{}]: cleaned up after test", getTestClass().getSimpleName(), getTestName());
245+
} catch (OutOfMemoryError e) {
246+
if (e.getMessage().contains("unable to create new native thread")) {
247+
ElasticsearchTestCase.printStackDump(logger);
248+
}
249+
throw e;
237250
} finally {
238251
currentCluster.afterTest();
239252
currentCluster = null;

src/test/java/org/elasticsearch/test/ElasticsearchTestCase.java

Lines changed: 85 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,12 @@
2929
import org.elasticsearch.Version;
3030
import org.elasticsearch.common.logging.ESLogger;
3131
import org.elasticsearch.common.logging.Loggers;
32+
import org.elasticsearch.common.util.concurrent.EsAbortPolicy;
33+
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
3234
import org.elasticsearch.test.junit.listeners.LoggingListener;
3335
import org.elasticsearch.test.engine.MockRobinEngine;
3436
import org.elasticsearch.test.store.MockDirectoryHelper;
37+
import org.junit.AfterClass;
3538
import org.junit.BeforeClass;
3639

3740
import java.io.Closeable;
@@ -52,6 +55,8 @@
5255
@TimeoutSuite(millis = TimeUnits.HOUR) // timeout the suite after 1h and fail the test.
5356
@Listeners(LoggingListener.class)
5457
public abstract class ElasticsearchTestCase extends AbstractRandomizedTest {
58+
59+
private static Thread.UncaughtExceptionHandler defaultHandler;
5560

5661
protected final ESLogger logger = Loggers.getLogger(getClass());
5762

@@ -168,6 +173,13 @@ public void close() throws IOException {
168173
ensureAllSearchersClosed();
169174
}
170175
});
176+
defaultHandler = Thread.getDefaultUncaughtExceptionHandler();
177+
Thread.setDefaultUncaughtExceptionHandler(new ElasticsearchUncaughtExceptionHandler(defaultHandler));
178+
}
179+
180+
@AfterClass
181+
public static void resetUncaughtExceptionHandler() {
182+
Thread.setDefaultUncaughtExceptionHandler(defaultHandler);
171183
}
172184

173185
public static boolean maybeDocValues() {
@@ -215,5 +227,76 @@ public static Version randomVersion() {
215227
public static Version randomVersion(Random random) {
216228
return SORTED_VERSIONS.get(random.nextInt(SORTED_VERSIONS.size()));
217229
}
218-
219-
}
230+
231+
static final class ElasticsearchUncaughtExceptionHandler implements Thread.UncaughtExceptionHandler {
232+
233+
private final Thread.UncaughtExceptionHandler parent;
234+
private final ESLogger logger = Loggers.getLogger(getClass());
235+
236+
private ElasticsearchUncaughtExceptionHandler(Thread.UncaughtExceptionHandler parent) {
237+
this.parent = parent;
238+
}
239+
240+
241+
@Override
242+
public void uncaughtException(Thread t, Throwable e) {
243+
if (e instanceof EsRejectedExecutionException) {
244+
if (e.getMessage().contains(EsAbortPolicy.SHUTTING_DOWN_KEY)) {
245+
return; // ignore the EsRejectedExecutionException when a node shuts down
246+
}
247+
} else if (e instanceof OutOfMemoryError) {
248+
if (e.getMessage().contains("unable to create new native thread")) {
249+
printStackDump(logger);
250+
}
251+
}
252+
parent.uncaughtException(t, e);
253+
}
254+
255+
}
256+
257+
protected static final void printStackDump(ESLogger logger) {
258+
// print stack traces if we can't create any native thread anymore
259+
Map<Thread, StackTraceElement[]> allStackTraces = Thread.getAllStackTraces();
260+
logger.error(formatThreadStacks(allStackTraces));
261+
}
262+
263+
/**
264+
* Dump threads and their current stack trace.
265+
*/
266+
private static String formatThreadStacks(Map<Thread,StackTraceElement[]> threads) {
267+
StringBuilder message = new StringBuilder();
268+
int cnt = 1;
269+
final Formatter f = new Formatter(message, Locale.ENGLISH);
270+
for (Map.Entry<Thread,StackTraceElement[]> e : threads.entrySet()) {
271+
if (e.getKey().isAlive())
272+
f.format(Locale.ENGLISH, "\n %2d) %s", cnt++, threadName(e.getKey())).flush();
273+
if (e.getValue().length == 0) {
274+
message.append("\n at (empty stack)");
275+
} else {
276+
for (StackTraceElement ste : e.getValue()) {
277+
message.append("\n at ").append(ste);
278+
}
279+
}
280+
}
281+
return message.toString();
282+
}
283+
284+
private static String threadName(Thread t) {
285+
return "Thread[" +
286+
"id=" + t.getId() +
287+
", name=" + t.getName() +
288+
", state=" + t.getState() +
289+
", group=" + groupName(t.getThreadGroup()) +
290+
"]";
291+
}
292+
293+
private static String groupName(ThreadGroup threadGroup) {
294+
if (threadGroup == null) {
295+
return "{null group}";
296+
} else {
297+
return threadGroup.getName();
298+
}
299+
}
300+
301+
302+
}

0 commit comments

Comments
 (0)