Skip to content

Commit 2f119bc

Browse files
committedOct 6, 2020
Retry CCR shard follow task when no seed node left (#63225)
If the connection between clusters is disconnected or the leader cluster is offline, then CCR shard-follow tasks can stop with "no seed node left". CCR should retry on this error.
1 parent cd38a51 commit 2f119bc

File tree

2 files changed

+13
-8
lines changed

2 files changed

+13
-8
lines changed
 

‎x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTask.java

+1
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,7 @@ static boolean shouldRetry(final Exception e) {
525525
actual instanceof NodeClosedException ||
526526
actual instanceof NoSuchRemoteClusterException ||
527527
(actual.getMessage() != null && actual.getMessage().contains("TransportService is closed")) ||
528+
(actual instanceof IllegalStateException && "no seed node left".equals(actual.getMessage())) ||
528529
actual instanceof EsRejectedExecutionException ||
529530
actual instanceof CircuitBreakingException;
530531
}

‎x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/ShardFollowNodeTaskTests.java

+12-8
Original file line numberDiff line numberDiff line change
@@ -263,12 +263,11 @@ public void testReceiveRetryableError() {
263263
int max = randomIntBetween(1, 30);
264264
final Exception[] exceptions = new Exception[max];
265265
for (int i = 0; i < max; i++) {
266-
final Exception exception;
267-
if (randomBoolean()) {
268-
exception = new ShardNotFoundException(new ShardId("leader_index", "", 0));
269-
} else {
270-
exception = new EsRejectedExecutionException("leader_index rejected");
271-
}
266+
final Exception exception = randomFrom(
267+
new ShardNotFoundException(new ShardId("leader_index", "", 0)),
268+
new EsRejectedExecutionException("leader_index rejected"),
269+
new IllegalStateException("no seed node left")
270+
);
272271
exceptions[i] = exception;
273272
readFailures.add(exception);
274273
}
@@ -286,16 +285,21 @@ public void testReceiveRetryableError() {
286285
final Map.Entry<Long, Tuple<Integer, ElasticsearchException>> entry = status.readExceptions().entrySet().iterator().next();
287286
assertThat(entry.getValue().v1(), equalTo(Math.toIntExact(retryCounter.get())));
288287
assertThat(entry.getKey(), equalTo(0L));
289-
if (exceptions[Math.toIntExact(retryCounter.get()) - 1] instanceof ShardNotFoundException) {
288+
final Exception error = exceptions[Math.toIntExact(retryCounter.get()) - 1];
289+
if (error instanceof ShardNotFoundException) {
290290
assertThat(entry.getValue().v2(), instanceOf(ShardNotFoundException.class));
291291
final ShardNotFoundException shardNotFoundException = (ShardNotFoundException) entry.getValue().v2();
292292
assertThat(shardNotFoundException.getShardId().getIndexName(), equalTo("leader_index"));
293293
assertThat(shardNotFoundException.getShardId().getId(), equalTo(0));
294-
} else {
294+
} else if (error instanceof EsRejectedExecutionException) {
295295
assertThat(entry.getValue().v2().getCause(), instanceOf(EsRejectedExecutionException.class));
296296
final EsRejectedExecutionException rejectedExecutionException =
297297
(EsRejectedExecutionException) entry.getValue().v2().getCause();
298298
assertThat(rejectedExecutionException.getMessage(), equalTo("leader_index rejected"));
299+
} else {
300+
assertThat(entry.getValue().v2().getCause(), instanceOf(IllegalStateException.class));
301+
final IllegalStateException noSeedError = (IllegalStateException) entry.getValue().v2().getCause();
302+
assertThat(noSeedError.getMessage(), equalTo("no seed node left"));
299303
}
300304
}
301305
retryCounter.incrementAndGet();

0 commit comments

Comments
 (0)
Please sign in to comment.