Skip to content

Commit ecd0709

Browse files
authored
[llm.serving] Fix using uni executor when world size == 1 (#50849) (#50863)
Cherry-pick: #50849 Signed-off-by: Gene Su <e870252314@gmail.com>
1 parent cd9e467 commit ecd0709

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

python/ray/llm/_internal/serve/deployments/llm/vllm/vllm_engine.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -191,15 +191,18 @@ def __init__(self, ipc_path, engine_args, engine_config):
191191
# Adapted from vllm.engine.multiprocessing.engine.MQLLMEngine.from_engine_args
192192
vllm.plugins.load_general_plugins()
193193

194-
executor_class = vllm.engine.llm_engine.LLMEngine._get_executor_cls(
195-
engine_config
196-
)
194+
# Note (genesu): There is a bug in vllm 0.7.2 forced the use of uni processing
195+
# executor when world_size is 1. This is a bug in vllm 0.7.2 and
196+
# is fixed by https://github.com/vllm-project/vllm/pull/12934 which is shipped
197+
# with vllm 0.7.3. However, in Ray's llm package, we will enforce the use of
198+
# ray distributed executor for all cases so it's always compatible with Ray.
199+
from vllm.executor.ray_distributed_executor import RayDistributedExecutor
197200

198201
self.engine = MQLLMEngine(
199202
ipc_path=ipc_path,
200203
use_async_sockets=engine_config.model_config.use_async_output_proc,
201204
vllm_config=engine_config,
202-
executor_class=executor_class,
205+
executor_class=RayDistributedExecutor,
203206
log_requests=not engine_args.disable_log_requests,
204207
log_stats=not engine_args.disable_log_stats,
205208
usage_context=vllm.usage.usage_lib.UsageContext.API_SERVER,

0 commit comments

Comments
 (0)