@@ -470,23 +470,27 @@ protected void onStart() throws JobMasterException {
470
470
/** Suspend the job and shutdown all other services including rpc. */
471
471
@ Override
472
472
public CompletableFuture <Void > onStop () {
473
- log .info (
474
- "Stopping the JobMaster for job '{}' ({})." ,
475
- executionPlan .getName (),
476
- executionPlan .getJobID ());
477
-
478
- // make sure there is a graceful exit
479
- return stopJobExecution (
480
- new FlinkException (
481
- String .format (
482
- "Stopping JobMaster for job '%s' (%s)." ,
483
- executionPlan .getName (), executionPlan .getJobID ())))
484
- .exceptionally (
485
- exception -> {
486
- throw new CompletionException (
487
- new JobMasterException (
488
- "Could not properly stop the JobMaster." , exception ));
489
- });
473
+ try (MdcUtils .MdcCloseable ignored =
474
+ MdcUtils .withContext (MdcUtils .asContextData (executionPlan .getJobID ()))) {
475
+ log .info (
476
+ "Stopping the JobMaster for job '{}' ({})." ,
477
+ executionPlan .getName (),
478
+ executionPlan .getJobID ());
479
+
480
+ // make sure there is a graceful exit
481
+ return stopJobExecution (
482
+ new FlinkException (
483
+ String .format (
484
+ "Stopping JobMaster for job '%s' (%s)." ,
485
+ executionPlan .getName (), executionPlan .getJobID ())))
486
+ .exceptionally (
487
+ exception -> {
488
+ throw new CompletionException (
489
+ new JobMasterException (
490
+ "Could not properly stop the JobMaster." ,
491
+ exception ));
492
+ });
493
+ }
490
494
}
491
495
492
496
// ----------------------------------------------------------------------------------------------
@@ -597,12 +601,14 @@ public void acknowledgeCheckpoint(
597
601
final long checkpointId ,
598
602
final CheckpointMetrics checkpointMetrics ,
599
603
@ Nullable final SerializedValue <TaskStateSnapshot > checkpointState ) {
600
- schedulerNG .acknowledgeCheckpoint (
601
- jobID ,
602
- executionAttemptID ,
603
- checkpointId ,
604
- checkpointMetrics ,
605
- deserializeTaskStateSnapshot (checkpointState , getClass ().getClassLoader ()));
604
+ try (MdcUtils .MdcCloseable ignored = MdcUtils .withContext (MdcUtils .asContextData (jobID ))) {
605
+ schedulerNG .acknowledgeCheckpoint (
606
+ jobID ,
607
+ executionAttemptID ,
608
+ checkpointId ,
609
+ checkpointMetrics ,
610
+ deserializeTaskStateSnapshot (checkpointState , getClass ().getClassLoader ()));
611
+ }
606
612
}
607
613
608
614
@ Override
@@ -1212,9 +1218,13 @@ private CompletableFuture<Void> stopJobExecution(final Exception cause) {
1212
1218
return FutureUtils .runAfterwardsAsync (
1213
1219
terminationFuture ,
1214
1220
() -> {
1215
- shuffleMaster .unregisterJob (executionPlan .getJobID ());
1216
- disconnectTaskManagerResourceManagerConnections (cause );
1217
- stopJobMasterServices ();
1221
+ try (MdcUtils .MdcCloseable ignored =
1222
+ MdcUtils .withContext (
1223
+ MdcUtils .asContextData (executionPlan .getJobID ()))) {
1224
+ shuffleMaster .unregisterJob (executionPlan .getJobID ());
1225
+ disconnectTaskManagerResourceManagerConnections (cause );
1226
+ stopJobMasterServices ();
1227
+ }
1218
1228
},
1219
1229
getMainThreadExecutor ());
1220
1230
}
@@ -1486,10 +1496,12 @@ private class ResourceManagerLeaderListener implements LeaderRetrievalListener {
1486
1496
@ Override
1487
1497
public void notifyLeaderAddress (final String leaderAddress , final UUID leaderSessionID ) {
1488
1498
runAsync (
1489
- () ->
1490
- notifyOfNewResourceManagerLeader (
1491
- leaderAddress ,
1492
- ResourceManagerId .fromUuidOrNull (leaderSessionID )));
1499
+ MdcUtils .wrapRunnable (
1500
+ MdcUtils .asContextData (executionPlan .getJobID ()),
1501
+ () ->
1502
+ notifyOfNewResourceManagerLeader (
1503
+ leaderAddress ,
1504
+ ResourceManagerId .fromUuidOrNull (leaderSessionID ))));
1493
1505
}
1494
1506
1495
1507
@ Override
@@ -1670,13 +1682,16 @@ private class ResourceManagerHeartbeatListener implements HeartbeatListener<Void
1670
1682
1671
1683
@ Override
1672
1684
public void notifyHeartbeatTimeout (final ResourceID resourceId ) {
1673
- final String message =
1674
- String .format (
1675
- "The heartbeat of ResourceManager with id %s timed out." ,
1676
- resourceId .getStringWithMetadata ());
1677
- log .info (message );
1685
+ try (MdcUtils .MdcCloseable ignored =
1686
+ MdcUtils .withContext (MdcUtils .asContextData (executionPlan .getJobID ()))) {
1687
+ final String message =
1688
+ String .format (
1689
+ "The heartbeat of ResourceManager with id %s timed out." ,
1690
+ resourceId .getStringWithMetadata ());
1691
+ log .info (message );
1678
1692
1679
- handleResourceManagerConnectionLoss (resourceId , new TimeoutException (message ));
1693
+ handleResourceManagerConnectionLoss (resourceId , new TimeoutException (message ));
1694
+ }
1680
1695
}
1681
1696
1682
1697
private void handleResourceManagerConnectionLoss (ResourceID resourceId , Exception cause ) {
@@ -1691,13 +1706,16 @@ private void handleResourceManagerConnectionLoss(ResourceID resourceId, Exceptio
1691
1706
1692
1707
@ Override
1693
1708
public void notifyTargetUnreachable (ResourceID resourceID ) {
1694
- final String message =
1695
- String .format (
1696
- "ResourceManager with id %s is no longer reachable." ,
1697
- resourceID .getStringWithMetadata ());
1698
- log .info (message );
1709
+ try (MdcUtils .MdcCloseable ignored =
1710
+ MdcUtils .withContext (MdcUtils .asContextData (executionPlan .getJobID ()))) {
1711
+ final String message =
1712
+ String .format (
1713
+ "ResourceManager with id %s is no longer reachable." ,
1714
+ resourceID .getStringWithMetadata ());
1715
+ log .info (message );
1699
1716
1700
- handleResourceManagerConnectionLoss (resourceID , new JobMasterException (message ));
1717
+ handleResourceManagerConnectionLoss (resourceID , new JobMasterException (message ));
1718
+ }
1701
1719
}
1702
1720
1703
1721
@ Override
0 commit comments