Skip to content

Commit

Permalink
[HWORKS-1885] Add vllm-openai deployment and support for config files
Browse files Browse the repository at this point in the history
  • Loading branch information
javierdlrm committed Jan 9, 2025
1 parent 84abef2 commit 16145a7
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 20 deletions.
4 changes: 0 additions & 4 deletions python/hsml/core/serving_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
InferOutput,
InferRequest,
)
from hsml.constants import ARTIFACT_VERSION
from hsml.constants import INFERENCE_ENDPOINTS as IE


Expand Down Expand Up @@ -419,7 +418,4 @@ def _get_hopsworks_inference_path(self, project_id: int, deployment_instance):
]

def _get_istio_inference_path(self, deployment_instance):
if deployment_instance.model_server == "VLLM":
return ["openai", "v1", "completions"]

return ["v1", "models", deployment_instance.name + ":predict"]
9 changes: 9 additions & 0 deletions python/hsml/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,15 @@ def script_file(self):
def script_file(self, script_file: str):
self._predictor.script_file = script_file

@property
def config_file(self):
"""Config file passed to the predictor."""
return self._predictor.config_file

@config_file.setter
def config_file(self, config_file: str):
self._predictor.config_file = config_file

@property
def resources(self):
"""Resource configuration for the predictor."""
Expand Down
8 changes: 5 additions & 3 deletions python/hsml/engine/serving_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,11 +563,13 @@ def predict(
inputs: Union[Dict, List[Dict]],
):
# validate user-provided payload
if deployment_instance.model_server != "VLLM":
self._validate_inference_payload(
deployment_instance.api_protocol, data, inputs
if deployment_instance.model_server == PREDICTOR.MODEL_SERVER_VLLM:
raise ModelServingException(
"Inference requests to LLM deployments are not supported by the `predict` method. Please, use any OpenAI API-compatible client instead."
)

self._validate_inference_payload(deployment_instance.api_protocol, data, inputs)

# build inference payload based on API protocol
payload = self._build_inference_payload(
deployment_instance.api_protocol, data, inputs
Expand Down
3 changes: 3 additions & 0 deletions python/hsml/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def deploy(
artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE,
serving_tool: Optional[str] = None,
script_file: Optional[str] = None,
config_file: Optional[str] = None,
resources: Optional[Union[PredictorResources, dict]] = None,
inference_logger: Optional[Union[InferenceLogger, dict]] = None,
inference_batcher: Optional[Union[InferenceBatcher, dict]] = None,
Expand Down Expand Up @@ -202,6 +203,7 @@ def deploy(
or `MODEL-ONLY` to reuse the shared artifact containing only the model files.
serving_tool: Serving tool used to deploy the model server.
script_file: Path to a custom predictor script implementing the Predict class.
config_file: Server configuration file to be passed to the model deployment.
resources: Resources to be allocated for the predictor.
inference_logger: Inference logger configuration.
inference_batcher: Inference batcher configuration.
Expand All @@ -223,6 +225,7 @@ def deploy(
artifact_version=artifact_version,
serving_tool=serving_tool,
script_file=script_file,
config_file=config_file,
resources=resources,
inference_logger=inference_logger,
inference_batcher=inference_batcher,
Expand Down
3 changes: 3 additions & 0 deletions python/hsml/model_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ def create_predictor(
artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE,
serving_tool: Optional[str] = None,
script_file: Optional[str] = None,
config_file: Optional[str] = None,
resources: Optional[Union[PredictorResources, dict]] = None,
inference_logger: Optional[Union[InferenceLogger, dict, str]] = None,
inference_batcher: Optional[Union[InferenceBatcher, dict]] = None,
Expand Down Expand Up @@ -197,6 +198,7 @@ def create_predictor(
or `MODEL-ONLY` to reuse the shared artifact containing only the model files.
serving_tool: Serving tool used to deploy the model server.
script_file: Path to a custom predictor script implementing the Predict class.
config_file: Server configuration file to be passed to the model deployment.
resources: Resources to be allocated for the predictor.
inference_logger: Inference logger configuration.
inference_batcher: Inference batcher configuration.
Expand All @@ -216,6 +218,7 @@ def create_predictor(
artifact_version=artifact_version,
serving_tool=serving_tool,
script_file=script_file,
config_file=config_file,
resources=resources,
inference_logger=inference_logger,
inference_batcher=inference_batcher,
Expand Down
23 changes: 18 additions & 5 deletions python/hsml/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def __init__(
model_server: str,
serving_tool: Optional[str] = None,
script_file: Optional[str] = None,
config_file: Optional[str] = None,
resources: Optional[Union[PredictorResources, dict, Default]] = None, # base
inference_logger: Optional[
Union[InferenceLogger, dict, Default]
Expand Down Expand Up @@ -87,6 +88,7 @@ def __init__(
self._artifact_version = artifact_version
self._serving_tool = serving_tool
self._model_server = model_server
self._config_file = config_file
self._id = id
self._description = description
self._created_at = created_at
Expand Down Expand Up @@ -167,12 +169,9 @@ def _validate_serving_tool(cls, serving_tool):

@classmethod
def _validate_script_file(cls, model_framework, script_file):
if script_file is None and (
model_framework == MODEL.FRAMEWORK_PYTHON
or model_framework == MODEL.FRAMEWORK_LLM
):
if script_file is None and (model_framework == MODEL.FRAMEWORK_PYTHON):
raise ValueError(
"Predictor scripts are required in deployments for custom Python models and LLMs."
"Predictor scripts are required in deployments for custom Python models."
)

@classmethod
Expand Down Expand Up @@ -273,6 +272,9 @@ def extract_fields_from_json(cls, json_decamelized):
kwargs["script_file"] = util.extract_field_from_json(
json_decamelized, "predictor"
)
kwargs["config_file"] = util.extract_field_from_json(
json_decamelized, "config_file"
)
kwargs["resources"] = PredictorResources.from_json(json_decamelized)
kwargs["inference_logger"] = InferenceLogger.from_json(json_decamelized)
kwargs["inference_batcher"] = InferenceBatcher.from_json(json_decamelized)
Expand Down Expand Up @@ -311,6 +313,7 @@ def to_dict(self):
"modelServer": self._model_server,
"servingTool": self._serving_tool,
"predictor": self._script_file,
"configFile": self._config_file,
"apiProtocol": self._api_protocol,
"projectNamespace": self._project_namespace,
}
Expand Down Expand Up @@ -442,6 +445,16 @@ def script_file(self, script_file: str):
self._script_file = script_file
self._artifact_version = ARTIFACT_VERSION.CREATE

@property
def config_file(self):
"""Server config file to be passed to the model deployment."""
return self._config_file

@config_file.setter
def config_file(self, config_file: str):
self._config_file = config_file

Check failure on line 455 in python/hsml/predictor.py

View workflow job for this annotation

GitHub Actions / Lint and Stylecheck

Ruff (W291)

python/hsml/predictor.py:455:40: W291 Trailing whitespace
self._artifact_version = ARTIFACT_VERSION.CREATE

@property
def inference_logger(self):
"""Configuration of the inference logger attached to this predictor."""
Expand Down
3 changes: 3 additions & 0 deletions python/tests/fixtures/predictor_fixtures.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"artifact_version": 2,
"predictor": "predictor_file",
"transformer": "transformer_file",
"config_file": "config_file",
"requested_instances": 1,
"requested_transformer_instances": 1,
"predictor_resources": {
Expand Down Expand Up @@ -74,6 +75,7 @@
"api_protocol": "REST",
"artifact_version": 2,
"predictor": "predictor_file",
"config_file": "config_file",
"transformer": "transformer_file",
"requested_instances": 1,
"requested_transformer_instances": 1,
Expand Down Expand Up @@ -117,6 +119,7 @@
"api_protocol": "REST",
"artifact_version": 3,
"predictor": "predictor_file",
"config_file": "config_file",
"transformer": "transformer_file",
"requested_instances": 1,
"requested_transformer_instances": 1,
Expand Down
2 changes: 2 additions & 0 deletions python/tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def test_deploy(self, mocker, backend_fixtures):
artifact_version=p_json["artifact_version"],
serving_tool=p_json["serving_tool"],
script_file=p_json["predictor"],
config_file=p_json["config_file"],
resources=resources,
inference_logger=inference_logger,
inference_batcher=inference_batcher,
Expand All @@ -227,6 +228,7 @@ def test_deploy(self, mocker, backend_fixtures):
artifact_version=p_json["artifact_version"],
serving_tool=p_json["serving_tool"],
script_file=p_json["predictor"],
config_file=p_json["config_file"],
resources=resources,
inference_logger=inference_logger,
inference_batcher=inference_batcher,
Expand Down
14 changes: 6 additions & 8 deletions python/tests/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def test_from_response_json_singleton(self, mocker, backend_fixtures):
assert p.artifact_version == p_json["artifact_version"]
assert p.environment == p_json["environment_dto"]["name"]
assert p.script_file == p_json["predictor"]
assert p.config_file == p_json["config_file"]
assert isinstance(p.resources, resources.PredictorResources)
assert isinstance(p.transformer, transformer.Transformer)
assert p.transformer.script_file == p_json["transformer"]
Expand Down Expand Up @@ -123,6 +124,7 @@ def test_from_response_json_list(self, mocker, backend_fixtures):
assert p.environment == p_json["environment_dto"]["name"]
assert p.artifact_version == p_json["artifact_version"]
assert p.script_file == p_json["predictor"]
assert p.config_file == p_json["config_file"]
assert isinstance(p.resources, resources.PredictorResources)
assert isinstance(p.transformer, transformer.Transformer)
assert p.transformer.script_file == p_json["transformer"]
Expand Down Expand Up @@ -161,6 +163,7 @@ def test_from_response_json_single(self, mocker, backend_fixtures):
assert p.environment == p_json["environment_dto"]["name"]
assert p.artifact_version == p_json["artifact_version"]
assert p.script_file == p_json["predictor"]
assert p.config_file == p_json["config_file"]
assert isinstance(p.resources, resources.PredictorResources)
assert isinstance(p.transformer, transformer.Transformer)
assert p.transformer.script_file == p_json["transformer"]
Expand Down Expand Up @@ -213,6 +216,7 @@ def test_constructor(self, mocker, backend_fixtures):
environment=p_json["environment_dto"]["name"],
artifact_version=p_json["artifact_version"],
script_file=p_json["predictor"],
config_file=p_json["config_file"],
resources=p_json["predictor_resources"],
transformer={
"script_file": p_json["transformer"],
Expand Down Expand Up @@ -241,6 +245,7 @@ def test_constructor(self, mocker, backend_fixtures):
assert p.environment == p_json["environment_dto"]["name"]
assert p.artifact_version == p_json["artifact_version"]
assert p.script_file == p_json["predictor"]
assert p.config_file == p_json["config_file"]
assert isinstance(p.resources, resources.PredictorResources)
assert isinstance(p.transformer, transformer.Transformer)
assert p.transformer.script_file == p_json["transformer"]
Expand Down Expand Up @@ -340,14 +345,6 @@ def test_validate_script_file_py_none(self):
# Assert
assert "Predictor scripts are required" in str(e_info.value)

def test_validate_script_file_llm_none(self):
# Act
with pytest.raises(ValueError) as e_info:
_ = predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_LLM, None)

# Assert
assert "Predictor scripts are required" in str(e_info.value)

def test_validate_script_file_tf_script_file(self):
# Act
predictor.Predictor._validate_script_file(
Expand Down Expand Up @@ -659,6 +656,7 @@ def extract_fields_from_json(self, mocker, backend_fixtures):
assert kwargs["model_server"] == p_json["model_server"]
assert kwargs["serving_tool"] == p_json["serving_tool"]
assert kwargs["script_file"] == p_json["predictor"]
assert kwargs["config_file"] == p_json["config_file"]
assert isinstance(kwargs["resources"], resources.PredictorResources)
assert isinstance(kwargs["inference_logger"], inference_logger.InferenceLogger)
assert kwargs["inference_logger"].mode == p_json["inference_logging"]
Expand Down

0 comments on commit 16145a7

Please sign in to comment.