Skip to content

Commit

Permalink
Merge pull request #889 from roboflow/serialized-owlv2-model
Browse files Browse the repository at this point in the history
Serialized owlv2 model
  • Loading branch information
grzegorz-roboflow authored Dec 20, 2024
2 parents 2a4e5d3 + 4d79682 commit f6473e4
Show file tree
Hide file tree
Showing 4 changed files with 298 additions and 27 deletions.
1 change: 1 addition & 0 deletions inference/core/roboflow_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def get_roboflow_model_type(
class ModelEndpointType(Enum):
ORT = "ort"
CORE_MODEL = "core_model"
OWLV2 = "owlv2"


@wrap_roboflow_api_errors()
Expand Down
239 changes: 216 additions & 23 deletions inference/models/owlv2/owlv2.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import hashlib
import os
import pickle
import weakref
from collections import defaultdict
from typing import Any, Dict, List, Literal, NewType, Tuple, Union
from typing import Any, Dict, List, Literal, NewType, Optional, Tuple, Union

import numpy as np
import torch
Expand All @@ -11,6 +12,8 @@
from transformers import Owlv2ForObjectDetection, Owlv2Processor
from transformers.models.owlv2.modeling_owlv2 import box_iou

from inference.core.cache.model_artifacts import save_bytes_in_cache
from inference.core.entities.requests.inference import ObjectDetectionInferenceRequest
from inference.core.entities.responses.inference import (
InferenceResponseImage,
ObjectDetectionInferenceResponse,
Expand All @@ -19,15 +22,23 @@
from inference.core.env import (
DEVICE,
MAX_DETECTIONS,
MODEL_CACHE_DIR,
OWLV2_IMAGE_CACHE_SIZE,
OWLV2_MODEL_CACHE_SIZE,
OWLV2_VERSION_ID,
)
from inference.core.exceptions import ModelArtefactError
from inference.core.models.roboflow import (
DEFAULT_COLOR_PALETTE,
RoboflowCoreModel,
RoboflowInferenceModel,
draw_detection_predictions,
)
from inference.core.roboflow_api import (
ModelEndpointType,
get_from_url,
get_roboflow_model_data,
)
from inference.core.utils.image_utils import (
ImageType,
extract_image_payload_and_type,
Expand Down Expand Up @@ -71,6 +82,26 @@ def _check_size_limit(self):
self.popitem(last=False)


class Owlv2Singleton:
_instances = weakref.WeakValueDictionary()

def __new__(cls, huggingface_id: str):
if huggingface_id not in cls._instances:
instance = super().__new__(cls)
instance.huggingface_id = huggingface_id
# Load model directly in the instance
model = (
Owlv2ForObjectDetection.from_pretrained(huggingface_id)
.eval()
.to(DEVICE)
)
torch._dynamo.config.suppress_errors = True
model.owlv2.vision_model = torch.compile(model.owlv2.vision_model)
instance.model = model
cls._instances[huggingface_id] = instance
return cls._instances[huggingface_id]


def preprocess_image(
np_image: np.ndarray,
image_size: Tuple[int, int],
Expand Down Expand Up @@ -258,7 +289,7 @@ def hash_wrapped_training_data(wrapped_training_data: List[Dict[str, Any]]) -> H
return hash_function(pickle.dumps(just_hash_relevant_data))


class OwlV2(RoboflowCoreModel):
class OwlV2(RoboflowInferenceModel):
task_type = "object-detection"
box_format = "xywh"

Expand All @@ -273,21 +304,14 @@ def __init__(self, *args, model_id=f"owlv2/{OWLV2_VERSION_ID}", **kwargs):
self.image_std = torch.tensor(
processor.image_processor.image_std, device=DEVICE
).view(1, 3, 1, 1)
self.model = Owlv2ForObjectDetection.from_pretrained(hf_id).eval().to(DEVICE)
self.model = Owlv2Singleton(hf_id).model
self.reset_cache()

# compile forward pass of the visual backbone of the model
# NOTE that this is able to fix the manual attention implementation used in OWLv2
# so we don't have to force in flash attention by ourselves
# however that is only true if torch version 2.4 or later is used
# for torch < 2.4, this is a LOT slower and using flash attention by ourselves is faster
# this also breaks in torch < 2.1 so we supress torch._dynamo errors
torch._dynamo.config.suppress_errors = True
self.model.owlv2.vision_model = torch.compile(self.model.owlv2.vision_model)

def reset_cache(self):
# each entry should be on the order of 300*4KB, so 1000 is 400MB of CUDA memory
self.image_embed_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
# no need for limit here, as we're only storing on CPU
self.cpu_image_embed_cache = dict()
# each entry should be on the order of 10 bytes, so 1000 is 10KB
self.image_size_cache = LimitedSizeDict(size_limit=OWLV2_IMAGE_CACHE_SIZE)
# entry size will vary depending on the number of samples, but 10 should be safe
Expand Down Expand Up @@ -323,6 +347,16 @@ def download_weights(self) -> None:
# Download from huggingface
pass

def get_image_embeds(self, image_hash: Hash) -> Optional[torch.Tensor]:
if image_hash in self.image_embed_cache:
return self.image_embed_cache[image_hash]
elif image_hash in self.cpu_image_embed_cache:
tensors = self.cpu_image_embed_cache[image_hash]
tensors = tuple(t.to(DEVICE) for t in tensors)
return tensors
else:
return None

def compute_image_size(
self, image: Union[np.ndarray, LazyImageRetrievalWrapper]
) -> Tuple[int, int]:
Expand All @@ -342,7 +376,7 @@ def embed_image(self, image: Union[np.ndarray, LazyImageRetrievalWrapper]) -> Ha
else:
image_hash = hash_function(image.tobytes())

if image_hash in self.image_embed_cache:
if (image_embeds := self.get_image_embeds(image_hash)) is not None:
return image_hash

np_image = (
Expand Down Expand Up @@ -402,12 +436,10 @@ def get_query_embedding(
# NOTE: for now we're handling each image seperately
query_embeds = []
for image_hash, query_boxes in query_spec.items():
try:
_objectness, image_boxes, image_class_embeds, _, _ = (
self.image_embed_cache[image_hash]
)
except KeyError as error:
raise KeyError("We didn't embed the image first!") from error
image_embeds = self.get_image_embeds(image_hash)
if image_embeds is None:
raise KeyError("We didn't embed the image first!")
_objectness, image_boxes, image_class_embeds, _, _ = image_embeds

query_boxes_tensor = torch.tensor(
query_boxes, dtype=image_boxes.dtype, device=image_boxes.device
Expand Down Expand Up @@ -438,7 +470,10 @@ def infer_from_embed(
confidence: float,
iou_threshold: float,
) -> List[Dict]:
_, image_boxes, image_class_embeds, _, _ = self.image_embed_cache[image_hash]
image_embeds = self.get_image_embeds(image_hash)
if image_embeds is None:
raise KeyError("We didn't embed the image first!")
_, image_boxes, image_class_embeds, _, _ = image_embeds
class_map, class_names = make_class_map(query_embeddings)
all_predicted_boxes, all_predicted_classes, all_predicted_scores = [], [], []
for class_name, pos_neg_embedding_dict in query_embeddings.items():
Expand Down Expand Up @@ -494,14 +529,25 @@ def infer(
self,
image: Any,
training_data: Dict,
confidence=0.99,
iou_threshold=0.3,
confidence: float = 0.99,
iou_threshold: float = 0.3,
**kwargs,
):
class_embeddings_dict = self.make_class_embeddings_dict(
training_data, iou_threshold
)
return self.infer_from_embedding_dict(
image, class_embeddings_dict, confidence, iou_threshold
)

def infer_from_embedding_dict(
self,
image: Any,
class_embeddings_dict: Dict[str, PosNegDictType],
confidence: float,
iou_threshold: float,
**kwargs,
):
if not isinstance(image, list):
images = [image]
else:
Expand All @@ -526,7 +572,10 @@ def infer(
)

def make_class_embeddings_dict(
self, training_data: List[Any], iou_threshold: float
self,
training_data: List[Any],
iou_threshold: float,
return_image_embeds: bool = False,
) -> Dict[str, PosNegDictType]:
wrapped_training_data = [
{
Expand All @@ -547,9 +596,16 @@ def make_class_embeddings_dict(
class_embeddings_dict = defaultdict(lambda: {"positive": [], "negative": []})

bool_to_literal = {True: "positive", False: "negative"}
return_image_embeds_dict = dict()
for train_image in wrapped_training_data:
# grab and embed image
image_hash = self.embed_image(train_image["image"])
if return_image_embeds:
if (image_embeds := self.get_image_embeds(image_hash)) is None:
raise KeyError("We didn't embed the image first!")
return_image_embeds_dict[image_hash] = tuple(
t.to("cpu") for t in image_embeds
)

# grab and normalize box prompts for this image
image_size = self.compute_image_size(train_image["image"])
Expand Down Expand Up @@ -586,6 +642,8 @@ def make_class_embeddings_dict(
}

self.class_embeddings_cache[wrapped_training_data_hash] = class_embeddings_dict
if return_image_embeds:
return class_embeddings_dict, return_image_embeds_dict

return class_embeddings_dict

Expand Down Expand Up @@ -614,3 +672,138 @@ def make_response(self, predictions, image_sizes, class_names):
for ind, batch_predictions in enumerate(predictions)
]
return responses


class SerializedOwlV2(RoboflowInferenceModel):
task_type = "object-detection"
box_format = "xywh"

@classmethod
def serialize_training_data(
cls,
training_data: List[Any],
hf_id: str = f"google/{OWLV2_VERSION_ID}",
iou_threshold: float = 0.3,
save_dir: str = os.path.join(MODEL_CACHE_DIR, "owl-v2-serialized-data"),
):
roboflow_id = hf_id.replace("google/", "owlv2/")
owlv2 = OwlV2(model_id=roboflow_id)
train_data_dict, image_embeds = owlv2.make_class_embeddings_dict(
training_data, iou_threshold, return_image_embeds=True
)

return cls.save_model(
hf_id, roboflow_id, train_data_dict, image_embeds, save_dir
)

@classmethod
def save_model(
cls,
hf_id: str,
roboflow_id: str,
train_data_dict: Dict,
image_embeds: Dict,
save_dir: str,
):
train_data_dict = {
"huggingface_id": hf_id,
"train_data_dict": train_data_dict,
"class_names": list(train_data_dict.keys()),
"roboflow_id": roboflow_id,
"image_embeds": image_embeds,
}
train_data_path = os.path.join(save_dir, cls.weights_file_path)
os.makedirs(save_dir, exist_ok=True)
torch.save(train_data_dict, train_data_path)
return train_data_path

def infer_from_request(
self,
request: ObjectDetectionInferenceRequest,
) -> Union[
List[ObjectDetectionInferenceResponse], ObjectDetectionInferenceResponse
]:
return super().infer_from_request(request)

def __init__(self, model_id, *args, **kwargs):
super().__init__(model_id, *args, **kwargs)
self.get_model_artifacts()

def get_infer_bucket_file_list(self):
return []

def download_model_artefacts_from_s3(self):
raise NotImplementedError("Owlv2 not currently supported on hosted inference")

def download_model_artifacts_from_roboflow_api(self):
api_data = get_roboflow_model_data(
api_key=self.api_key,
model_id=self.endpoint,
endpoint_type=ModelEndpointType.OWLV2,
device_id=self.device_id,
)
api_data = api_data["owlv2"]
if "model" not in api_data:
raise ModelArtefactError(
"Could not find `model` key in roboflow API model description response."
)
model_weights_response = get_from_url(api_data["model"], json_response=False)
save_bytes_in_cache(
content=model_weights_response.content,
file=self.weights_file,
model_id=self.endpoint,
)

def load_model_artifacts_from_cache(self):
if DEVICE == "cpu":
self.model_data = torch.load(
self.cache_file(self.weights_file), map_location="cpu"
)
else:
self.model_data = torch.load(self.cache_file(self.weights_file))
self.class_names = self.model_data["class_names"]
self.train_data_dict = self.model_data["train_data_dict"]
self.huggingface_id = self.model_data["huggingface_id"]
self.roboflow_id = self.model_data["roboflow_id"]
# each model can have its own OwlV2 instance because we use a singleton
self.owlv2 = OwlV2(model_id=self.roboflow_id)
self.owlv2.cpu_image_embed_cache = self.model_data["image_embeds"]

weights_file_path = "weights.pt"

@property
def weights_file(self):
return self.weights_file_path

def infer(
self, image, confidence: float = 0.99, iou_threshold: float = 0.3, **kwargs
):
return self.owlv2.infer_from_embedding_dict(
image,
self.train_data_dict,
confidence=confidence,
iou_threshold=iou_threshold,
**kwargs,
)

def draw_predictions(
self,
inference_request: ObjectDetectionInferenceRequest,
inference_response: ObjectDetectionInferenceResponse,
):
return self.owlv2.draw_predictions(
inference_request,
inference_response,
)

def save_small_model_without_image_embeds(
self, save_dir: str = os.path.join(MODEL_CACHE_DIR, "owl-v2-serialized-data")
):
self.owlv2.cpu_image_embed_cache = dict()
return self.save_model(
self.huggingface_id,
self.roboflow_id,
self.train_data_dict,
self.owlv2.cpu_image_embed_cache,
save_dir,
)
3 changes: 2 additions & 1 deletion inference/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,9 +314,10 @@
pass

try:
from inference.models.owlv2.owlv2 import OwlV2
from inference.models.owlv2.owlv2 import OwlV2, SerializedOwlV2

ROBOFLOW_MODEL_TYPES[("object-detection", "owlv2")] = OwlV2
ROBOFLOW_MODEL_TYPES[("object-detection", "owlv2-finetuned")] = SerializedOwlV2
except:
pass

Expand Down
Loading

0 comments on commit f6473e4

Please sign in to comment.