Skip to content

Commit 2f490f2

Browse files
committed
Fix recovery path for searchable snapshots (opensearch-project#4813)
Replicas are bootstrapped using the recovery path, as opposed to the restore path used for creating the primary shard. This has been broken in the initial implementation of searchable snapshots. The fix here is to put in the appropriate checks to avoid failing during recovery. I've also updated the integration test to ensure the recovery path is always exercised during testing. Signed-off-by: Andrew Ross <andrross@amazon.com> (cherry picked from commit 6ac0c7c)
1 parent 52ff342 commit 2f490f2

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

CHANGELOG.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
6666
- [Bug]: Alias filter lost after rollover ([#4499](https://github.com/opensearch-project/OpenSearch/pull/4499))
6767
- Fixing Gradle warnings associated with publishPluginZipPublicationToXxx tasks ([#4696](https://github.com/opensearch-project/OpenSearch/pull/4696))
6868
- Fixed randomly failing test ([4774](https://github.com/opensearch-project/OpenSearch/pull/4774))
69-
- Skip uppercase regex tests before 2.4.0 ([4869](https://github.com/opensearch-project/OpenSearch/pull/4869))
70-
- Compatibility issue with /_mget: RHLC 2.x connected to OpenSearch Cluster 1.x ([#4812](https://github.com/opensearch-project/OpenSearch/pull/4812))
71-
69+
- Fix recovery path for searchable snapshots ([4813](https://github.com/opensearch-project/OpenSearch/pull/4813))
7270
### Security
7371
- CVE-2022-25857 org.yaml:snakeyaml DOS vulnerability ([#4341](https://github.com/opensearch-project/OpenSearch/pull/4341))
7472

server/src/internalClusterTest/java/org/opensearch/snapshots/SearchableSnapshotIT.java

+11-2
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,24 @@ protected boolean addMockInternalEngine() {
4949
}
5050

5151
public void testCreateSearchableSnapshot() throws Exception {
52+
final int numReplicasIndex1 = randomIntBetween(1, 4);
53+
final int numReplicasIndex2 = randomIntBetween(0, 2);
54+
internalCluster().ensureAtLeastNumDataNodes(Math.max(numReplicasIndex1, numReplicasIndex2) + 1);
5255
final Client client = client();
5356
createRepository("test-repo", "fs");
5457
createIndex(
5558
"test-idx-1",
56-
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1").build()
59+
Settings.builder()
60+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, Integer.toString(numReplicasIndex1))
61+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1")
62+
.build()
5763
);
5864
createIndex(
5965
"test-idx-2",
60-
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, "0").put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1").build()
66+
Settings.builder()
67+
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, Integer.toString(numReplicasIndex2))
68+
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, "1")
69+
.build()
6170
);
6271
ensureGreen();
6372
indexRandomDocs("test-idx-1", 100);

server/src/main/java/org/opensearch/indices/recovery/PeerRecoveryTargetService.java

+6-3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
import org.opensearch.common.unit.TimeValue;
5555
import org.opensearch.common.util.CancellableThreads;
5656
import org.opensearch.common.util.concurrent.AbstractRunnable;
57+
import org.opensearch.index.IndexModule;
5758
import org.opensearch.index.IndexNotFoundException;
5859
import org.opensearch.index.engine.RecoveryEngineException;
5960
import org.opensearch.index.mapper.MapperException;
@@ -244,16 +245,18 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi
244245
assert recoveryTarget.sourceNode() != null : "can not do a recovery without a source node";
245246
logger.trace("{} preparing shard for peer recovery", recoveryTarget.shardId());
246247
indexShard.prepareForIndexRecovery();
247-
boolean remoteTranslogEnabled = recoveryTarget.state().getPrimary() == false && indexShard.isRemoteTranslogEnabled();
248-
final long startingSeqNo = indexShard.recoverLocallyAndFetchStartSeqNo(!remoteTranslogEnabled);
248+
final boolean hasRemoteTranslog = recoveryTarget.state().getPrimary() == false && indexShard.isRemoteTranslogEnabled();
249+
final boolean hasNoTranslog = IndexModule.Type.REMOTE_SNAPSHOT.match(indexShard.indexSettings());
250+
final boolean verifyTranslog = (hasRemoteTranslog || hasNoTranslog) == false;
251+
final long startingSeqNo = indexShard.recoverLocallyAndFetchStartSeqNo(!hasRemoteTranslog);
249252
assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget.state().getStage() == RecoveryState.Stage.TRANSLOG
250253
: "unexpected recovery stage [" + recoveryTarget.state().getStage() + "] starting seqno [ " + startingSeqNo + "]";
251254
startRequest = getStartRecoveryRequest(
252255
logger,
253256
clusterService.localNode(),
254257
recoveryTarget,
255258
startingSeqNo,
256-
!remoteTranslogEnabled
259+
verifyTranslog
257260
);
258261
requestToSend = startRequest;
259262
actionName = PeerRecoverySourceService.Actions.START_RECOVERY;

server/src/main/java/org/opensearch/indices/recovery/RecoveryTarget.java

+7-4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import org.opensearch.common.bytes.BytesReference;
4646
import org.opensearch.common.lucene.Lucene;
4747
import org.opensearch.common.util.CancellableThreads;
48+
import org.opensearch.index.IndexModule;
4849
import org.opensearch.index.engine.Engine;
4950
import org.opensearch.index.mapper.MapperException;
5051
import org.opensearch.index.seqno.ReplicationTracker;
@@ -402,10 +403,12 @@ public void cleanFiles(
402403
try {
403404
store.cleanupAndVerify("recovery CleanFilesRequestHandler", sourceMetadata);
404405

405-
// If Segment Replication is enabled, we need to reuse the primary's translog UUID already stored in the index.
406-
// With Segrep, replicas should never create their own commit points. This ensures the index and xlog share the same
407-
// UUID without the extra step to associate the index with a new xlog.
408-
if (indexShard.indexSettings().isSegRepEnabled()) {
406+
// Replicas for segment replication or remote snapshot indices do not create
407+
// their own commit points and therefore do not modify the commit user data
408+
// in their store. In these cases, reuse the primary's translog UUID.
409+
final boolean reuseTranslogUUID = indexShard.indexSettings().isSegRepEnabled()
410+
|| IndexModule.Type.REMOTE_SNAPSHOT.match(indexShard.indexSettings());
411+
if (reuseTranslogUUID) {
409412
final String translogUUID = store.getMetadata().getCommitUserData().get(TRANSLOG_UUID_KEY);
410413
Translog.createEmptyTranslog(
411414
indexShard.shardPath().resolveTranslog(),

0 commit comments

Comments
 (0)