Skip to content

[RAPTOR-14025] Allow override nim server details, release v1.16.10.1 #1539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions custom_model_runner/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


#### [1.16.10.post1] - 2025-06-25
#### Changed
- Allow override NIM server details.

#### [1.16.10] - 2025-03-19
##### Changed
- Ensure only TextGen models have chat capability.
Expand Down
2 changes: 1 addition & 1 deletion custom_model_runner/datarobot_drum/drum/description.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
This is proprietary source code of DataRobot, Inc. and its affiliates.
Released under the terms of DataRobot Tool and Utility Agreement.
"""
version = "1.16.10"
version = "1.16.10.post1"
__version__ = version
project_name = "datarobot-drum"
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class BaseOpenAiGpuPredictor(BaseLanguagePredictor):
NAME = "Generic OpenAI API"
DEFAULT_MODEL_NAME = "datarobot-deployed-llm"
MAX_RESTARTS = 10
HEALTH_ROUTE = "/"
DEFAULT_HEALTH_ROUTE = "/"

def __init__(self):
super().__init__()
Expand All @@ -69,6 +69,7 @@ def __init__(self):
self.deployment_id = os.environ.get("MLOPS_DEPLOYMENT_ID", None)

# server configuration is set in the Drop-in environment
self.health_route = self.DEFAULT_HEALTH_ROUTE
self.openai_port = os.environ.get(EnvVarNames.OPENAI_PORT, "9999")
self.openai_host = os.environ.get(EnvVarNames.OPENAI_HOST, "localhost")
self.openai_process = None
Expand Down Expand Up @@ -369,7 +370,7 @@ def health_check(self) -> typing.Tuple[dict, int]:
return {"message": f"{self.NAME} has crashed."}, HTTP_513_DRUM_PIPELINE_ERROR

try:
health_url = f"http://{self.openai_host}:{self.openai_port}{self.HEALTH_ROUTE}"
health_url = f"http://{self.openai_host}:{self.openai_port}{self.health_route}"
response = requests.get(health_url, timeout=5)
return {"message": response.text}, response.status_code
except Timeout:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class NIMPredictor(BaseOpenAiGpuPredictor):
ENGINE_CONFIG_FILE = "engine_config.json"
LEGACY_START_SERVER_SCRIPT = Path("/opt/nim/start-server.sh")
START_SERVER_SCRIPT = Path("/opt/nim/start_server.sh")
HEALTH_ROUTE = "/v1/health/ready"
DEFAULT_HEALTH_ROUTE = "/v1/health/ready"

def __init__(self):
super().__init__()
Expand All @@ -41,6 +41,17 @@ def __init__(self):
self.max_model_len = self.get_optional_parameter("NIM_MAX_MODEL_LEN")
self.log_level = self.get_optional_parameter("NIM_LOG_LEVEL")

# DR prefix to denote that this is not some standard NIM variable of any sort.
self.health_route = self.get_optional_parameter(
"DR_NIM_HEALTH_ROUTE", default_value=self.DEFAULT_HEALTH_ROUTE
)
self.openai_port = self.get_optional_parameter(
"DR_NIM_SERVER_PORT", default_value=self.openai_port
)
self.openai_host = self.get_optional_parameter(
"DR_NIM_SERVER_HOST", default_value=self.openai_host
)

@property
def num_deployment_stages(self):
return 3 if self.python_model_adapter.has_custom_hook(CustomHooks.LOAD_MODEL) else 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import json
import os
import typing
from unittest.mock import patch

import pytest

from datarobot_drum import RuntimeParameters
from datarobot_drum.drum.enum import TargetType

from datarobot_drum.drum.gpu_predictors.base import BaseOpenAiGpuPredictor
from datarobot_drum.drum.gpu_predictors.nim_predictor import NIMPredictor
from datarobot_drum.drum.root_predictors.drum_server_utils import DrumServerProcess
from datarobot_drum.runtime_parameters.runtime_parameters_schema import RuntimeParameterTypes


class TestGPUPredictor(BaseOpenAiGpuPredictor):
Expand All @@ -28,6 +36,20 @@ def mock_target_name_env_var(monkeypatch):
monkeypatch.delenv("TARGET_NAME")


@pytest.fixture
def mock_openai_host_env_var(monkeypatch):
monkeypatch.setenv("OPENAI_HOST", "mocked.openai.host")
yield
monkeypatch.delenv("OPENAI_HOST")


@pytest.fixture
def mock_openai_port_env_var(monkeypatch):
monkeypatch.setenv("OPENAI_PORT", "45678")
yield
monkeypatch.delenv("OPENAI_PORT")


@pytest.mark.parametrize("target_type", list(TargetType))
def test_supports_chat(mock_target_name_env_var, target_type):
predictor = TestGPUPredictor()
Expand All @@ -40,3 +62,79 @@ def test_supports_chat(mock_target_name_env_var, target_type):
assert predictor.supports_chat()
else:
assert not predictor.supports_chat()

@pytest.mark.parametrize(
"runtime_param_type, payload",
[
(RuntimeParameterTypes.STRING, "Some string value"),
(RuntimeParameterTypes.BOOLEAN, True),
(RuntimeParameterTypes.NUMERIC, 10),
(
RuntimeParameterTypes.CREDENTIAL,
{
"credentialType": "s3",
"region": "us-west",
"awsAccessKeyId": "123aaa",
"awsSecretAccessKey": "3425sdd",
"awsSessionToken": "12345abcde",
},
),
],
)
def test_valid(self, runtime_param_type, payload):
runtime_param_name = "AAA"
namespaced_runtime_param_name = RuntimeParameters.namespaced_param_name(runtime_param_name)
runtime_param_env_value = json.dumps({"type": runtime_param_type.value, "payload": payload})
with patch.dict(os.environ, {namespaced_runtime_param_name: runtime_param_env_value}):
assert RuntimeParameters.has(runtime_param_name)
assert RuntimeParameters.get(runtime_param_name) == payload


def rt_param_name(name):
return RuntimeParameters.namespaced_param_name(name)


def rt_param_value(type, value):
return json.dumps(
{
"type": type,
"payload": value,
}
)


def rt_param_str_value(value):
return rt_param_value(RuntimeParameterTypes.STRING.value, value)


class TestNIMPredictor:
def test_nim_predictor_created_with_default_values(self):
predictor = NIMPredictor()
assert predictor.health_route == "/v1/health/ready"
assert predictor.openai_port == "9999"
assert predictor.openai_host == "localhost"

@pytest.mark.usefixtures("mock_openai_host_env_var", "mock_openai_port_env_var")
def test_nim_predictor_created_with_values_from_env_vars(self):
predictor = NIMPredictor()
assert predictor.health_route == "/v1/health/ready"
assert predictor.openai_port == "45678"
assert predictor.openai_host == "mocked.openai.host"

@pytest.mark.usefixtures("mock_openai_host_env_var", "mock_openai_port_env_var")
def test_nim_predictor_created_with_values_from_runtime_params_and_takes_precedence_over_env_vars(
self,
):
with patch.dict(
os.environ,
{
rt_param_name("DR_NIM_HEALTH_ROUTE"): rt_param_str_value("/overriden/health/route"),
rt_param_name("DR_NIM_SERVER_HOST"): rt_param_str_value("mocked.host"),
rt_param_name("DR_NIM_SERVER_PORT"): rt_param_str_value("mocked.port"),
},
):
predictor = NIMPredictor()

assert predictor.health_route == "/overriden/health/route"
assert predictor.openai_port == "mocked.port"
assert predictor.openai_host == "mocked.host"