From 67e30b0ccee54eb1be39217267895422bda77783 Mon Sep 17 00:00:00 2001 From: Matthias Reso <13337103+mreso@users.noreply.github.com> Date: Wed, 11 Sep 2024 22:59:00 +0000 Subject: [PATCH] Use spawn instead of fork method for vllm --- ts/torch_handler/vllm_handler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ts/torch_handler/vllm_handler.py b/ts/torch_handler/vllm_handler.py index 927efe93e2b..02f7ee77b0a 100644 --- a/ts/torch_handler/vllm_handler.py +++ b/ts/torch_handler/vllm_handler.py @@ -1,5 +1,6 @@ import asyncio import logging +import os import pathlib import time from unittest.mock import MagicMock @@ -40,6 +41,8 @@ def initialize(self, ctx): vllm_engine_config = self._get_vllm_engine_config( ctx.model_yaml_config.get("handler", {}) ) + + os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn" self.vllm_engine = AsyncLLMEngine.from_engine_args(vllm_engine_config)