Skip to content

Commit

Permalink
release vision model
Browse files Browse the repository at this point in the history
  • Loading branch information
irexyc committed Oct 11, 2024
1 parent 4d35004 commit 3be27f8
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 4 deletions.
3 changes: 0 additions & 3 deletions lmdeploy/pytorch/engine/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,6 @@ def close(self):
self._seq_length_buf = None
self._inputs = None
torch._C._cuda_clearCublasWorkspaces()
torch.cuda.empty_cache()
import gc
gc.collect()

def _start_loop(self):
"""start loop."""
Expand Down
5 changes: 5 additions & 0 deletions lmdeploy/serve/async_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,15 @@ def __init__(self,
self.request_logger = RequestLogger(max_log_len)

def close(self):
self.gens_set.clear()
if hasattr(self, 'engine'):
if isinstance(self.backend_config, PytorchEngineConfig):
self.engine.close()
del self.engine
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

def _build_turbomind(
self,
Expand Down
4 changes: 4 additions & 0 deletions lmdeploy/serve/vl_async_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ def __init__(self, model_path: str, **kwargs) -> None:
self.vl_prompt_template = get_vl_prompt_template(
model_path, self.chat_template, self.model_name)

def close(self):
self.vl_encoder.close()
super().close()

def _convert_prompts(self,
prompts: Union[VLPromptType, List[Dict],
List[VLPromptType], List[List[Dict]]]):
Expand Down
4 changes: 3 additions & 1 deletion lmdeploy/turbomind/turbomind.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import os.path as osp
import sys
import weakref
from concurrent.futures import ThreadPoolExecutor
from dataclasses import asdict
from itertools import repeat
Expand Down Expand Up @@ -318,7 +319,8 @@ def create_instance(self, cuda_stream_id=0):
Returns:
TurboMindInstance: an instance of turbomind
"""
return TurboMindInstance(self, self.config, cuda_stream_id)
return TurboMindInstance(weakref.proxy(self), self.config,
cuda_stream_id)


class TurboMindInstance:
Expand Down
13 changes: 13 additions & 0 deletions lmdeploy/vl/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,20 @@ def __init__(self,
torch.cuda.empty_cache()
self._que: asyncio.Queue = None
self._loop_task: asyncio.Task = None
self._stop = False
if vision_config.thread_safe:
self._create_thread_safe_task()

def close(self):
if self.model is not None:
self._stop = True
if self.vision_config.thread_safe:
self._loop_thread.join()
else:
if hasattr(self, '_loop'):
self._loop.run_until_complete(self._loop_task)
self.model = None

def _create_thread_safe_task(self):
"""thread safe loop task."""
self._loop = asyncio.new_event_loop()
Expand Down Expand Up @@ -138,6 +149,8 @@ async def _forward_loop(self):
while record.total == 0 or (self._que.qsize() and
record.total < self.max_batch_size):
while self._que.qsize() == 0:
if self._stop and record.total == 0:
return
await asyncio.sleep(0.01)
item = await self._que.get()
record.enqueue(item[0], item[1], item[2])
Expand Down

0 comments on commit 3be27f8

Please sign in to comment.