@@ -121,18 +121,17 @@ class TaskCounter {
121
121
private:
122
122
mutable absl::Mutex mu_;
123
123
// Tracks all tasks submitted to this worker by state, is_retry.
124
- CounterMap<std::tuple<std::string, TaskStatusType, bool >> counter_
125
- ABSL_GUARDED_BY (&mu_);
124
+ CounterMap<std::tuple<std::string, TaskStatusType, bool >> counter_ ABSL_GUARDED_BY (mu_);
126
125
127
126
// Additionally tracks the sub-states of RUNNING_IN_RAY_GET/WAIT. The counters here
128
127
// overlap with those of counter_.
129
- CounterMap<std::pair<std::string, bool >> running_in_get_counter_ ABSL_GUARDED_BY (& mu_);
130
- CounterMap<std::pair<std::string, bool >> running_in_wait_counter_ ABSL_GUARDED_BY (& mu_);
128
+ CounterMap<std::pair<std::string, bool >> running_in_get_counter_ ABSL_GUARDED_BY (mu_);
129
+ CounterMap<std::pair<std::string, bool >> running_in_wait_counter_ ABSL_GUARDED_BY (mu_);
131
130
132
- std::string job_id_ ABSL_GUARDED_BY (& mu_);
131
+ std::string job_id_ ABSL_GUARDED_BY (mu_);
133
132
// Used for actor state tracking.
134
- std::string actor_name_ ABSL_GUARDED_BY (& mu_);
135
- int64_t num_tasks_running_ ABSL_GUARDED_BY (& mu_) = 0;
133
+ std::string actor_name_ ABSL_GUARDED_BY (mu_);
134
+ int64_t num_tasks_running_ ABSL_GUARDED_BY (mu_) = 0;
136
135
};
137
136
138
137
struct TaskToRetry {
@@ -294,7 +293,10 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
294
293
// / generator task.
295
294
void AsyncDelObjectRefStream (const ObjectID &generator_id);
296
295
297
- void TryDeleteObjectRefStreams ();
296
+ // Attempt to delete ObjectRefStreams that were unable to be deleted when
297
+ // AsyncDelObjectRefStream was called (stored in generator_ids_pending_deletion_).
298
+ // This function is called periodically on the io_service_.
299
+ void TryDelPendingObjectRefStreams ();
298
300
299
301
const PlacementGroupID &GetCurrentPlacementGroupId () const {
300
302
return worker_context_.GetCurrentPlacementGroupId ();
@@ -1903,7 +1905,16 @@ class CoreWorker : public rpc::CoreWorkerServiceHandler {
1903
1905
// / Worker's PID
1904
1906
uint32_t pid_;
1905
1907
1906
- absl::flat_hash_set<ObjectID> deleted_generator_ids_;
1908
+ // Guards generator_ids_pending_deletion_.
1909
+ absl::Mutex generator_ids_pending_deletion_mutex_;
1910
+
1911
+ // A set of generator IDs that have gone out of scope but couldn't be deleted from
1912
+ // the task manager yet (e.g., due to lineage references). We will periodically
1913
+ // attempt to delete them in the background until it succeeds.
1914
+ // This field is accessed on the destruction path of an ObjectRefGenerator as well as
1915
+ // by a background thread attempting later deletion, so it must be guarded by a lock.
1916
+ absl::flat_hash_set<ObjectID> generator_ids_pending_deletion_
1917
+ ABSL_GUARDED_BY (generator_ids_pending_deletion_mutex_);
1907
1918
1908
1919
// / TODO(hjiang):
1909
1920
// / 1. Cached job runtime env info, it's not implemented at first place since
0 commit comments