36
36
import org .apache .logging .log4j .Logger ;
37
37
import org .apache .logging .log4j .message .ParameterizedMessage ;
38
38
import org .apache .lucene .store .AlreadyClosedException ;
39
+ import org .opensearch .ExceptionsHelper ;
39
40
import org .opensearch .LegacyESVersion ;
40
41
import org .opensearch .OpenSearchException ;
41
42
import org .opensearch .OpenSearchTimeoutException ;
42
- import org .opensearch .ExceptionsHelper ;
43
43
import org .opensearch .action .ActionListener ;
44
44
import org .opensearch .action .ActionRunnable ;
45
45
import org .opensearch .cluster .ClusterState ;
@@ -219,6 +219,12 @@ protected void reestablishRecovery(final StartRecoveryRequest request, final Str
219
219
threadPool .scheduleUnlessShuttingDown (retryAfter , ThreadPool .Names .GENERIC , new RecoveryRunner (recoveryId , request ));
220
220
}
221
221
222
+ /**
223
+ * Initiates recovery of the replica. TODO - Need to revisit it with PRRL and later. @see
224
+ * <a href="https://github.com/opensearch-project/OpenSearch/issues/4502">github issue</a> on it.
225
+ * @param recoveryId recovery id
226
+ * @param preExistingRequest start recovery request
227
+ */
222
228
private void doRecovery (final long recoveryId , final StartRecoveryRequest preExistingRequest ) {
223
229
final String actionName ;
224
230
final TransportRequest requestToSend ;
@@ -238,10 +244,17 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi
238
244
assert recoveryTarget .sourceNode () != null : "can not do a recovery without a source node" ;
239
245
logger .trace ("{} preparing shard for peer recovery" , recoveryTarget .shardId ());
240
246
indexShard .prepareForIndexRecovery ();
241
- final long startingSeqNo = indexShard .recoverLocallyUpToGlobalCheckpoint ();
247
+ boolean remoteTranslogEnabled = recoveryTarget .state ().getPrimary () == false && indexShard .isRemoteTranslogEnabled ();
248
+ final long startingSeqNo = indexShard .recoverLocallyAndFetchStartSeqNo (!remoteTranslogEnabled );
242
249
assert startingSeqNo == UNASSIGNED_SEQ_NO || recoveryTarget .state ().getStage () == RecoveryState .Stage .TRANSLOG
243
250
: "unexpected recovery stage [" + recoveryTarget .state ().getStage () + "] starting seqno [ " + startingSeqNo + "]" ;
244
- startRequest = getStartRecoveryRequest (logger , clusterService .localNode (), recoveryTarget , startingSeqNo );
251
+ startRequest = getStartRecoveryRequest (
252
+ logger ,
253
+ clusterService .localNode (),
254
+ recoveryTarget ,
255
+ startingSeqNo ,
256
+ !remoteTranslogEnabled
257
+ );
245
258
requestToSend = startRequest ;
246
259
actionName = PeerRecoverySourceService .Actions .START_RECOVERY ;
247
260
} catch (final Exception e ) {
@@ -270,44 +283,58 @@ private void doRecovery(final long recoveryId, final StartRecoveryRequest preExi
270
283
);
271
284
}
272
285
286
+ public static StartRecoveryRequest getStartRecoveryRequest (
287
+ Logger logger ,
288
+ DiscoveryNode localNode ,
289
+ RecoveryTarget recoveryTarget ,
290
+ long startingSeqNo
291
+ ) {
292
+ return getStartRecoveryRequest (logger , localNode , recoveryTarget , startingSeqNo , true );
293
+ }
294
+
273
295
/**
274
296
* Prepare the start recovery request.
275
297
*
276
- * @param logger the logger
277
- * @param localNode the local node of the recovery target
278
- * @param recoveryTarget the target of the recovery
279
- * @param startingSeqNo a sequence number that an operation-based peer recovery can start with.
280
- * This is the first operation after the local checkpoint of the safe commit if exists.
298
+ * @param logger the logger
299
+ * @param localNode the local node of the recovery target
300
+ * @param recoveryTarget the target of the recovery
301
+ * @param startingSeqNo a sequence number that an operation-based peer recovery can start with.
302
+ * This is the first operation after the local checkpoint of the safe commit if exists.
303
+ * @param verifyTranslog should the recovery request validate translog consistency with snapshot store metadata.
281
304
* @return a start recovery request
282
305
*/
283
306
public static StartRecoveryRequest getStartRecoveryRequest (
284
307
Logger logger ,
285
308
DiscoveryNode localNode ,
286
309
RecoveryTarget recoveryTarget ,
287
- long startingSeqNo
310
+ long startingSeqNo ,
311
+ boolean verifyTranslog
288
312
) {
289
313
final StartRecoveryRequest request ;
290
314
logger .trace ("{} collecting local files for [{}]" , recoveryTarget .shardId (), recoveryTarget .sourceNode ());
291
315
292
316
Store .MetadataSnapshot metadataSnapshot ;
293
317
try {
294
318
metadataSnapshot = recoveryTarget .indexShard ().snapshotStoreMetadata ();
295
- // Make sure that the current translog is consistent with the Lucene index; otherwise, we have to throw away the Lucene index.
296
- try {
297
- final String expectedTranslogUUID = metadataSnapshot .getCommitUserData ().get (Translog .TRANSLOG_UUID_KEY );
298
- final long globalCheckpoint = Translog .readGlobalCheckpoint (recoveryTarget .translogLocation (), expectedTranslogUUID );
299
- assert globalCheckpoint + 1 >= startingSeqNo : "invalid startingSeqNo " + startingSeqNo + " >= " + globalCheckpoint ;
300
- } catch (IOException | TranslogCorruptedException e ) {
301
- logger .warn (
302
- new ParameterizedMessage (
303
- "error while reading global checkpoint from translog, "
304
- + "resetting the starting sequence number from {} to unassigned and recovering as if there are none" ,
305
- startingSeqNo
306
- ),
307
- e
308
- );
309
- metadataSnapshot = Store .MetadataSnapshot .EMPTY ;
310
- startingSeqNo = UNASSIGNED_SEQ_NO ;
319
+ if (verifyTranslog ) {
320
+ // Make sure that the current translog is consistent with the Lucene index; otherwise, we have to throw away the Lucene
321
+ // index.
322
+ try {
323
+ final String expectedTranslogUUID = metadataSnapshot .getCommitUserData ().get (Translog .TRANSLOG_UUID_KEY );
324
+ final long globalCheckpoint = Translog .readGlobalCheckpoint (recoveryTarget .translogLocation (), expectedTranslogUUID );
325
+ assert globalCheckpoint + 1 >= startingSeqNo : "invalid startingSeqNo " + startingSeqNo + " >= " + globalCheckpoint ;
326
+ } catch (IOException | TranslogCorruptedException e ) {
327
+ logger .warn (
328
+ new ParameterizedMessage (
329
+ "error while reading global checkpoint from translog, "
330
+ + "resetting the starting sequence number from {} to unassigned and recovering as if there are none" ,
331
+ startingSeqNo
332
+ ),
333
+ e
334
+ );
335
+ metadataSnapshot = Store .MetadataSnapshot .EMPTY ;
336
+ startingSeqNo = UNASSIGNED_SEQ_NO ;
337
+ }
311
338
}
312
339
} catch (final org .apache .lucene .index .IndexNotFoundException e ) {
313
340
// happens on an empty folder. no need to log
0 commit comments