From 16145a786413926f6976f2714c175adbe9d5c1a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20de=20la=20R=C3=BAa=20Mart=C3=ADnez?= Date: Thu, 9 Jan 2025 09:51:21 +0100 Subject: [PATCH] [HWORKS-1885] Add vllm-openai deployment and support for config files --- python/hsml/core/serving_api.py | 4 ---- python/hsml/deployment.py | 9 ++++++++ python/hsml/engine/serving_engine.py | 8 ++++--- python/hsml/model.py | 3 +++ python/hsml/model_serving.py | 3 +++ python/hsml/predictor.py | 23 +++++++++++++++---- python/tests/fixtures/predictor_fixtures.json | 3 +++ python/tests/test_model.py | 2 ++ python/tests/test_predictor.py | 14 +++++------ 9 files changed, 49 insertions(+), 20 deletions(-) diff --git a/python/hsml/core/serving_api.py b/python/hsml/core/serving_api.py index 9a124465d..22d21925f 100644 --- a/python/hsml/core/serving_api.py +++ b/python/hsml/core/serving_api.py @@ -29,7 +29,6 @@ InferOutput, InferRequest, ) -from hsml.constants import ARTIFACT_VERSION from hsml.constants import INFERENCE_ENDPOINTS as IE @@ -419,7 +418,4 @@ def _get_hopsworks_inference_path(self, project_id: int, deployment_instance): ] def _get_istio_inference_path(self, deployment_instance): - if deployment_instance.model_server == "VLLM": - return ["openai", "v1", "completions"] - return ["v1", "models", deployment_instance.name + ":predict"] diff --git a/python/hsml/deployment.py b/python/hsml/deployment.py index f23eec2ed..fea78b359 100644 --- a/python/hsml/deployment.py +++ b/python/hsml/deployment.py @@ -415,6 +415,15 @@ def script_file(self): def script_file(self, script_file: str): self._predictor.script_file = script_file + @property + def config_file(self): + """Config file passed to the predictor.""" + return self._predictor.config_file + + @config_file.setter + def config_file(self, config_file: str): + self._predictor.config_file = config_file + @property def resources(self): """Resource configuration for the predictor.""" diff --git a/python/hsml/engine/serving_engine.py b/python/hsml/engine/serving_engine.py index 164ac7504..6f11616ca 100644 --- a/python/hsml/engine/serving_engine.py +++ b/python/hsml/engine/serving_engine.py @@ -563,11 +563,13 @@ def predict( inputs: Union[Dict, List[Dict]], ): # validate user-provided payload - if deployment_instance.model_server != "VLLM": - self._validate_inference_payload( - deployment_instance.api_protocol, data, inputs + if deployment_instance.model_server == PREDICTOR.MODEL_SERVER_VLLM: + raise ModelServingException( + "Inference requests to LLM deployments are not supported by the `predict` method. Please, use any OpenAI API-compatible client instead." ) + self._validate_inference_payload(deployment_instance.api_protocol, data, inputs) + # build inference payload based on API protocol payload = self._build_inference_payload( deployment_instance.api_protocol, data, inputs diff --git a/python/hsml/model.py b/python/hsml/model.py index a545fa5ea..3978df471 100644 --- a/python/hsml/model.py +++ b/python/hsml/model.py @@ -171,6 +171,7 @@ def deploy( artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE, serving_tool: Optional[str] = None, script_file: Optional[str] = None, + config_file: Optional[str] = None, resources: Optional[Union[PredictorResources, dict]] = None, inference_logger: Optional[Union[InferenceLogger, dict]] = None, inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, @@ -202,6 +203,7 @@ def deploy( or `MODEL-ONLY` to reuse the shared artifact containing only the model files. serving_tool: Serving tool used to deploy the model server. script_file: Path to a custom predictor script implementing the Predict class. + config_file: Server configuration file to be passed to the model deployment. resources: Resources to be allocated for the predictor. inference_logger: Inference logger configuration. inference_batcher: Inference batcher configuration. @@ -223,6 +225,7 @@ def deploy( artifact_version=artifact_version, serving_tool=serving_tool, script_file=script_file, + config_file=config_file, resources=resources, inference_logger=inference_logger, inference_batcher=inference_batcher, diff --git a/python/hsml/model_serving.py b/python/hsml/model_serving.py index d448193aa..71e268ffb 100644 --- a/python/hsml/model_serving.py +++ b/python/hsml/model_serving.py @@ -160,6 +160,7 @@ def create_predictor( artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE, serving_tool: Optional[str] = None, script_file: Optional[str] = None, + config_file: Optional[str] = None, resources: Optional[Union[PredictorResources, dict]] = None, inference_logger: Optional[Union[InferenceLogger, dict, str]] = None, inference_batcher: Optional[Union[InferenceBatcher, dict]] = None, @@ -197,6 +198,7 @@ def create_predictor( or `MODEL-ONLY` to reuse the shared artifact containing only the model files. serving_tool: Serving tool used to deploy the model server. script_file: Path to a custom predictor script implementing the Predict class. + config_file: Server configuration file to be passed to the model deployment. resources: Resources to be allocated for the predictor. inference_logger: Inference logger configuration. inference_batcher: Inference batcher configuration. @@ -216,6 +218,7 @@ def create_predictor( artifact_version=artifact_version, serving_tool=serving_tool, script_file=script_file, + config_file=config_file, resources=resources, inference_logger=inference_logger, inference_batcher=inference_batcher, diff --git a/python/hsml/predictor.py b/python/hsml/predictor.py index 236b7cb20..88cb8fefc 100644 --- a/python/hsml/predictor.py +++ b/python/hsml/predictor.py @@ -48,6 +48,7 @@ def __init__( model_server: str, serving_tool: Optional[str] = None, script_file: Optional[str] = None, + config_file: Optional[str] = None, resources: Optional[Union[PredictorResources, dict, Default]] = None, # base inference_logger: Optional[ Union[InferenceLogger, dict, Default] @@ -87,6 +88,7 @@ def __init__( self._artifact_version = artifact_version self._serving_tool = serving_tool self._model_server = model_server + self._config_file = config_file self._id = id self._description = description self._created_at = created_at @@ -167,12 +169,9 @@ def _validate_serving_tool(cls, serving_tool): @classmethod def _validate_script_file(cls, model_framework, script_file): - if script_file is None and ( - model_framework == MODEL.FRAMEWORK_PYTHON - or model_framework == MODEL.FRAMEWORK_LLM - ): + if script_file is None and (model_framework == MODEL.FRAMEWORK_PYTHON): raise ValueError( - "Predictor scripts are required in deployments for custom Python models and LLMs." + "Predictor scripts are required in deployments for custom Python models." ) @classmethod @@ -273,6 +272,9 @@ def extract_fields_from_json(cls, json_decamelized): kwargs["script_file"] = util.extract_field_from_json( json_decamelized, "predictor" ) + kwargs["config_file"] = util.extract_field_from_json( + json_decamelized, "config_file" + ) kwargs["resources"] = PredictorResources.from_json(json_decamelized) kwargs["inference_logger"] = InferenceLogger.from_json(json_decamelized) kwargs["inference_batcher"] = InferenceBatcher.from_json(json_decamelized) @@ -311,6 +313,7 @@ def to_dict(self): "modelServer": self._model_server, "servingTool": self._serving_tool, "predictor": self._script_file, + "configFile": self._config_file, "apiProtocol": self._api_protocol, "projectNamespace": self._project_namespace, } @@ -442,6 +445,16 @@ def script_file(self, script_file: str): self._script_file = script_file self._artifact_version = ARTIFACT_VERSION.CREATE + @property + def config_file(self): + """Server config file to be passed to the model deployment.""" + return self._config_file + + @config_file.setter + def config_file(self, config_file: str): + self._config_file = config_file + self._artifact_version = ARTIFACT_VERSION.CREATE + @property def inference_logger(self): """Configuration of the inference logger attached to this predictor.""" diff --git a/python/tests/fixtures/predictor_fixtures.json b/python/tests/fixtures/predictor_fixtures.json index b90eaf6de..702787050 100644 --- a/python/tests/fixtures/predictor_fixtures.json +++ b/python/tests/fixtures/predictor_fixtures.json @@ -19,6 +19,7 @@ "artifact_version": 2, "predictor": "predictor_file", "transformer": "transformer_file", + "config_file": "config_file", "requested_instances": 1, "requested_transformer_instances": 1, "predictor_resources": { @@ -74,6 +75,7 @@ "api_protocol": "REST", "artifact_version": 2, "predictor": "predictor_file", + "config_file": "config_file", "transformer": "transformer_file", "requested_instances": 1, "requested_transformer_instances": 1, @@ -117,6 +119,7 @@ "api_protocol": "REST", "artifact_version": 3, "predictor": "predictor_file", + "config_file": "config_file", "transformer": "transformer_file", "requested_instances": 1, "requested_transformer_instances": 1, diff --git a/python/tests/test_model.py b/python/tests/test_model.py index b153b5742..c34e5b6f0 100644 --- a/python/tests/test_model.py +++ b/python/tests/test_model.py @@ -211,6 +211,7 @@ def test_deploy(self, mocker, backend_fixtures): artifact_version=p_json["artifact_version"], serving_tool=p_json["serving_tool"], script_file=p_json["predictor"], + config_file=p_json["config_file"], resources=resources, inference_logger=inference_logger, inference_batcher=inference_batcher, @@ -227,6 +228,7 @@ def test_deploy(self, mocker, backend_fixtures): artifact_version=p_json["artifact_version"], serving_tool=p_json["serving_tool"], script_file=p_json["predictor"], + config_file=p_json["config_file"], resources=resources, inference_logger=inference_logger, inference_batcher=inference_batcher, diff --git a/python/tests/test_predictor.py b/python/tests/test_predictor.py index 166666baf..ed3759c2f 100644 --- a/python/tests/test_predictor.py +++ b/python/tests/test_predictor.py @@ -80,6 +80,7 @@ def test_from_response_json_singleton(self, mocker, backend_fixtures): assert p.artifact_version == p_json["artifact_version"] assert p.environment == p_json["environment_dto"]["name"] assert p.script_file == p_json["predictor"] + assert p.config_file == p_json["config_file"] assert isinstance(p.resources, resources.PredictorResources) assert isinstance(p.transformer, transformer.Transformer) assert p.transformer.script_file == p_json["transformer"] @@ -123,6 +124,7 @@ def test_from_response_json_list(self, mocker, backend_fixtures): assert p.environment == p_json["environment_dto"]["name"] assert p.artifact_version == p_json["artifact_version"] assert p.script_file == p_json["predictor"] + assert p.config_file == p_json["config_file"] assert isinstance(p.resources, resources.PredictorResources) assert isinstance(p.transformer, transformer.Transformer) assert p.transformer.script_file == p_json["transformer"] @@ -161,6 +163,7 @@ def test_from_response_json_single(self, mocker, backend_fixtures): assert p.environment == p_json["environment_dto"]["name"] assert p.artifact_version == p_json["artifact_version"] assert p.script_file == p_json["predictor"] + assert p.config_file == p_json["config_file"] assert isinstance(p.resources, resources.PredictorResources) assert isinstance(p.transformer, transformer.Transformer) assert p.transformer.script_file == p_json["transformer"] @@ -213,6 +216,7 @@ def test_constructor(self, mocker, backend_fixtures): environment=p_json["environment_dto"]["name"], artifact_version=p_json["artifact_version"], script_file=p_json["predictor"], + config_file=p_json["config_file"], resources=p_json["predictor_resources"], transformer={ "script_file": p_json["transformer"], @@ -241,6 +245,7 @@ def test_constructor(self, mocker, backend_fixtures): assert p.environment == p_json["environment_dto"]["name"] assert p.artifact_version == p_json["artifact_version"] assert p.script_file == p_json["predictor"] + assert p.config_file == p_json["config_file"] assert isinstance(p.resources, resources.PredictorResources) assert isinstance(p.transformer, transformer.Transformer) assert p.transformer.script_file == p_json["transformer"] @@ -340,14 +345,6 @@ def test_validate_script_file_py_none(self): # Assert assert "Predictor scripts are required" in str(e_info.value) - def test_validate_script_file_llm_none(self): - # Act - with pytest.raises(ValueError) as e_info: - _ = predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_LLM, None) - - # Assert - assert "Predictor scripts are required" in str(e_info.value) - def test_validate_script_file_tf_script_file(self): # Act predictor.Predictor._validate_script_file( @@ -659,6 +656,7 @@ def extract_fields_from_json(self, mocker, backend_fixtures): assert kwargs["model_server"] == p_json["model_server"] assert kwargs["serving_tool"] == p_json["serving_tool"] assert kwargs["script_file"] == p_json["predictor"] + assert kwargs["config_file"] == p_json["config_file"] assert isinstance(kwargs["resources"], resources.PredictorResources) assert isinstance(kwargs["inference_logger"], inference_logger.InferenceLogger) assert kwargs["inference_logger"].mode == p_json["inference_logging"]