From 3be27f81f78065a993fe17007a1d3d4d898a9b99 Mon Sep 17 00:00:00 2001 From: irexyc Date: Fri, 11 Oct 2024 02:38:10 +0000 Subject: [PATCH] release vision model --- lmdeploy/pytorch/engine/engine.py | 3 --- lmdeploy/serve/async_engine.py | 5 +++++ lmdeploy/serve/vl_async_engine.py | 4 ++++ lmdeploy/turbomind/turbomind.py | 4 +++- lmdeploy/vl/engine.py | 13 +++++++++++++ 5 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lmdeploy/pytorch/engine/engine.py b/lmdeploy/pytorch/engine/engine.py index 694d56aa92..d4ad707c45 100644 --- a/lmdeploy/pytorch/engine/engine.py +++ b/lmdeploy/pytorch/engine/engine.py @@ -250,9 +250,6 @@ def close(self): self._seq_length_buf = None self._inputs = None torch._C._cuda_clearCublasWorkspaces() - torch.cuda.empty_cache() - import gc - gc.collect() def _start_loop(self): """start loop.""" diff --git a/lmdeploy/serve/async_engine.py b/lmdeploy/serve/async_engine.py index 5645cbee22..3c0d8dfecd 100644 --- a/lmdeploy/serve/async_engine.py +++ b/lmdeploy/serve/async_engine.py @@ -187,10 +187,15 @@ def __init__(self, self.request_logger = RequestLogger(max_log_len) def close(self): + self.gens_set.clear() if hasattr(self, 'engine'): if isinstance(self.backend_config, PytorchEngineConfig): self.engine.close() del self.engine + import torch + torch.cuda.empty_cache() + import gc + gc.collect() def _build_turbomind( self, diff --git a/lmdeploy/serve/vl_async_engine.py b/lmdeploy/serve/vl_async_engine.py index 3971e068c0..aaefbe953c 100644 --- a/lmdeploy/serve/vl_async_engine.py +++ b/lmdeploy/serve/vl_async_engine.py @@ -32,6 +32,10 @@ def __init__(self, model_path: str, **kwargs) -> None: self.vl_prompt_template = get_vl_prompt_template( model_path, self.chat_template, self.model_name) + def close(self): + self.vl_encoder.close() + super().close() + def _convert_prompts(self, prompts: Union[VLPromptType, List[Dict], List[VLPromptType], List[List[Dict]]]): diff --git a/lmdeploy/turbomind/turbomind.py b/lmdeploy/turbomind/turbomind.py index 0d1596fefa..f7b2c0b2b2 100644 --- a/lmdeploy/turbomind/turbomind.py +++ b/lmdeploy/turbomind/turbomind.py @@ -4,6 +4,7 @@ import json import os.path as osp import sys +import weakref from concurrent.futures import ThreadPoolExecutor from dataclasses import asdict from itertools import repeat @@ -318,7 +319,8 @@ def create_instance(self, cuda_stream_id=0): Returns: TurboMindInstance: an instance of turbomind """ - return TurboMindInstance(self, self.config, cuda_stream_id) + return TurboMindInstance(weakref.proxy(self), self.config, + cuda_stream_id) class TurboMindInstance: diff --git a/lmdeploy/vl/engine.py b/lmdeploy/vl/engine.py index 124fd537c6..9d5235817c 100644 --- a/lmdeploy/vl/engine.py +++ b/lmdeploy/vl/engine.py @@ -95,9 +95,20 @@ def __init__(self, torch.cuda.empty_cache() self._que: asyncio.Queue = None self._loop_task: asyncio.Task = None + self._stop = False if vision_config.thread_safe: self._create_thread_safe_task() + def close(self): + if self.model is not None: + self._stop = True + if self.vision_config.thread_safe: + self._loop_thread.join() + else: + if hasattr(self, '_loop'): + self._loop.run_until_complete(self._loop_task) + self.model = None + def _create_thread_safe_task(self): """thread safe loop task.""" self._loop = asyncio.new_event_loop() @@ -138,6 +149,8 @@ async def _forward_loop(self): while record.total == 0 or (self._que.qsize() and record.total < self.max_batch_size): while self._que.qsize() == 0: + if self._stop and record.total == 0: + return await asyncio.sleep(0.01) item = await self._que.get() record.enqueue(item[0], item[1], item[2])