Skip to content

[BUZZOK-25116] added new env #1349

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions public_dropin_gpu_environments/python312_genai/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
FROM datarobotdev/platform-base-python-312-devel:latest-ubi9

COPY requirements.txt requirements.txt

COPY dep.constraints dep.constraints

COPY install_dependencies.sh install_dependencies.sh

ENV JARS_PATH=/opt/jars
ENV DATAROBOT_MLOPS_VERSION=10.2.8

RUN bash install_dependencies.sh

ENV DRUM_JAVA_SHARED_JARS=$JARS_PATH/*
ENV MLOPS_MONITORING_AGENT_JAR_PATH=$JARS_PATH/mlops-agent-${DATAROBOT_MLOPS_VERSION}.jar

RUN python3.12 -c "import nltk; nltk.download('wordnet'); nltk.download('omw-1.4'); nltk.download('punkt_tab'); "

# This ensures that the tiktoken vocabulary artifacts are preloaded to the container's filesystem
# from OpenAI's Azure Blob Storage, and tiktoken does not require egress Internet access to work.
ENV TIKTOKEN_CACHE_DIR=/opt/.tiktoken_cache
RUN python3.12 -c "import tiktoken; tiktoken.get_encoding('cl100k_base').encode('Hello world');" && \
# Check that the directory exists and is not empty.
test -n "$( ls -A /opt/.tiktoken_cache/ )"

# Add the user that will run the model
RUN useradd -m -u 1000 envuser && \
chown -R envuser /opt
USER envuser

# Copy the drop-in environment code into the correct directory
# Code from the custom model tarball can overwrite the code here
ENV HOME=/opt CODE_DIR=/opt/code ADDRESS=0.0.0.0:8080

# This makes print statements show up in the logs API
ENV PYTHONUNBUFFERED=1

WORKDIR ${CODE_DIR}
COPY ./*.sh ${CODE_DIR}/

ENV WITH_ERROR_SERVER=1
# Uncomment the following line to switch from Flask to uwsgi server
#ENV PRODUCTION=1 MAX_WORKERS=1 SHOW_STACKTRACE=1

ENTRYPOINT ["/opt/code/start_server.sh"]
Empty file.
214 changes: 214 additions & 0 deletions public_dropin_gpu_environments/python312_genai/dep.constraints
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
absl-py==2.1.0
aioboto3==13.1.1
aiobotocore==2.13.1
aiofiles==23.2.1
aiohappyeyeballs==2.4.6
aiohttp==3.11.12
aioitertools==0.12.0
aiosignal==1.3.2
annotated-types==0.7.0
annoy==1.17.3
anthropic==0.28.1
anyio==4.8.0
argcomplete==3.5.3
asyncio==3.4.3
attrs==25.1.0
awscli==1.37.21
azure-core==1.32.0
azure-identity==1.20.0
azure-storage-blob==12.19.0
backoff==2.2.1
beautifulsoup4==4.13.3
blinker==1.9.0
boto3==1.34.131
botocore==1.34.131
cachetools==5.5.2
certifi==2025.1.31
cffi==1.17.1
charset-normalizer==3.4.1
click==8.1.8
cloudpickle==2.2.1
colorama==0.4.6
coloredlogs==15.0.1
cryptography==44.0.1
dataclasses-json==0.6.7
datarobot==3.6.3
datarobot-drum==1.16.6
datarobot-mlops==10.2.8
datarobot-model-metrics==0.6.13
datarobot-moderations==11.0.2
datarobot-predict==1.11.0
datarobot-storage==2.1.1
Deprecated==1.2.18
dirtyjson==1.0.8
distro==1.9.0
docker==7.1.0
docstring_parser==0.16
docutils==0.16
faiss-cpu==1.8.0
fastapi==0.115.8
fastembed==0.4.0
filechunkio==1.8
filelock==3.17.0
Flask==3.1.0
flatbuffers==25.2.10
frozenlist==1.5.0
fsspec==2025.2.0
fugashi==1.3.2
google-api-core==2.24.1
google-auth==2.38.0
google-cloud-aiplatform==1.67.1
google-cloud-bigquery==3.29.0
google-cloud-core==2.4.2
google-cloud-resource-manager==1.14.1
google-cloud-storage==2.19.0
google-crc32c==1.6.0
google-resumable-media==2.7.2
googleapis-common-protos==1.68.0
greenlet==3.1.1
grpc-google-iam-v1==0.14.0
grpcio==1.70.0
grpcio-status==1.70.0
h11==0.14.0
httpcore==0.17.3
httpx==0.24.1
httpx-sse==0.4.0
huggingface-hub==0.29.1
humanfriendly==10.0
idna==3.10
isodate==0.7.2
itsdangerous==2.2.0
Jinja2==3.1.5
jiter==0.8.2
jmespath==1.0.1
joblib==1.4.2
jsonpatch==1.33
jsonpointer==3.0.0
julia==0.5.7
langchain==0.3.18
langchain-community==0.3.17
langchain-core==0.3.37
langchain-openai==0.3.6
langchain-text-splitters==0.3.6
langsmith==0.3.9
lark==1.1.9
llama-cloud==0.1.13
llama-index==0.10.67.post1
llama-index-agent-openai==0.2.9
llama-index-cli==0.1.13
llama-index-core==0.10.67
llama-index-embeddings-azure-openai==0.1.11
llama-index-embeddings-openai==0.1.11
llama-index-indices-managed-llama-cloud==0.2.7
llama-index-legacy==0.9.48.post4
llama-index-llms-anthropic==0.1.17
llama-index-llms-azure-openai==0.1.10
llama-index-llms-bedrock-converse==0.1.6
llama-index-llms-fireworks==0.1.8
llama-index-llms-langchain==0.3.0
llama-index-llms-openai==0.1.31
llama-index-llms-vertex==0.1.5
llama-index-multi-modal-llms-openai==0.1.9
llama-index-program-openai==0.1.7
llama-index-question-gen-openai==0.1.3
llama-index-readers-file==0.1.33
llama-index-readers-llama-parse==0.1.6
llama-parse==0.4.9
loguru==0.7.2
markdown-it-py==3.0.0
MarkupSafe==3.0.2
marshmallow==3.26.1
mdurl==0.1.2
memory-profiler==0.61.0
mmh3==4.1.0
mpmath==1.3.0
msal==1.31.1
msal-extensions==1.2.0
multidict==6.1.0
mypy-extensions==1.0.0
nest-asyncio==1.6.0
networkx==3.4.2
nltk==3.9.1
numpy==1.26.4
onnx==1.17.0
onnxruntime==1.18.1
openai==1.58.1
optimum==1.24.0
orjson==3.10.15
packaging==24.2
pandas==2.2.3
pillow==10.4.0
portalocker==2.10.1
progress==1.6
prompt_toolkit==3.0.50
propcache==0.3.0
proto-plus==1.26.0
protobuf==5.29.3
psutil==7.0.0
py4j==0.10.9.9
pyarrow==14.0.1
pyasn1==0.6.1
pyasn1_modules==0.4.1
pycparser==2.22
pydantic==2.9.2
pydantic-settings==2.7.1
pydantic_core==2.23.4
Pygments==2.19.1
PyJWT==2.10.1
pypdf==4.3.1
PyStemmer==2.2.0.3
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
pytz==2025.1
PyYAML==6.0.2
regex==2024.11.6
requests==2.32.3
requests-toolbelt==1.0.0
rich==13.9.4
rouge-score==0.1.2
rsa==4.7.2
ruamel.yaml==0.17.4
s3transfer==0.10.4
safetensors==0.5.2
schema==0.7.7
scikit-learn==1.5.0
scipy==1.12.0
sentence-transformers==3.0.0
setuptools==68.2.2
shapely==2.0.7
shellingham==1.5.4
simpleeval==1.0.3
six==1.17.0
sniffio==1.3.1
snowballstemmer==2.2.0
soupsieve==2.6
SQLAlchemy==2.0.35
starlette==0.45.3
StrEnum==0.4.15
strictyaml==1.4.2
striprtf==0.0.26
sympy==1.13.3
tenacity==8.5.0
termcolor==2.5.0
texttable==1.7.0
threadpoolctl==3.5.0
tiktoken==0.7.0
tokenizers==0.21.0
torch @ https://download.pytorch.org/whl/cu118/torch-2.2.2%2Bcu118-cp312-cp312-linux_x86_64.whl#sha256=c0fa31b79d2c06012422e4ed4ed08a86179615463647ac5c44c8f6abef1d4aec
tqdm==4.67.1
trafaret==2.1.1
transformers==4.48.3
typer==0.15.1
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2025.1
unidic-lite==1.0.8
urllib3==1.26.20
uvicorn==0.34.0
watchdog==6.0.0
wcwidth==0.2.13
Werkzeug==3.1.3
wheel==0.45.1
wrapt==1.17.2
yarl==1.18.3
zstandard==0.23.0
8 changes: 8 additions & 0 deletions public_dropin_gpu_environments/python312_genai/env_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"id": "67d83b8379275f752a4de3ba",
"name": "[GenAI][NVIDIA] Python 3.12 with Moderations",
"description": "Python 3.12 environment for GenAI custom models with Moderation and assessment support. This environment is created to run on machines with NVIDIA GPU's.",
"programmingLanguage": "python",
"environmentVersionId": "67d83be502e20f38b4db2a80",
"isPublic": true
}
43 changes: 43 additions & 0 deletions public_dropin_gpu_environments/python312_genai/fit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env sh
# Copyright 2021 DataRobot, Inc. and its affiliates.
#
# All rights reserved.
# This is proprietary source code of DataRobot, Inc. and its affiliates.
#
# Released under the terms of DataRobot Tool and Utility Agreement.
# You probably don't want to modify this file
cd "${CODEPATH}" || exit 1
export PYTHONPATH="${CODEPATH}":"${PYTHONPATH}"

export X="${INPUT_DIRECTORY}/X${TRAINING_DATA_EXTENSION:-.csv}"
export weights="${INPUT_DIRECTORY}/weights.csv"
export sparse_colnames="${INPUT_DIRECTORY}/X.colnames"
export parameters="${INPUT_DIRECTORY}/parameters.json"

CMD="drum fit --target-type ${TARGET_TYPE} --input ${X} --num-rows ALL --output ${ARTIFACT_DIRECTORY} \
--code-dir ${CODEPATH} --verbose --enable-fit-metadata "

if [ "${TARGET_TYPE}" != "anomaly" ]; then
CMD="${CMD} --target-csv ${INPUT_DIRECTORY}/y.csv"
fi

if [ -f "${weights}" ]; then
CMD="${CMD} --row-weights-csv ${weights}"
fi

if [ -f "${sparse_colnames}" ]; then
CMD="${CMD} --sparse-column-file ${sparse_colnames}"
fi

if [ -f "${parameters}" ]; then
CMD="${CMD} --parameter-file ${parameters}"
fi

if [ -n "${USER_SECRETS_MOUNT_PATH}" ]; then
CMD="${CMD} --user-secrets-mount-path ${USER_SECRETS_MOUNT_PATH}"
fi

echo "Environment variables:"
env
echo "${CMD}"
sh -c "${CMD}"
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/bin/bash
set -e

ARTIFACTORY_MAVEN_URL="https://artifactory.devinfra.drdev.io/artifactory/maven-central/com/datarobot"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May we don't have artifactory references in this env?

  1. Environments in this repo are considered to be public and buildable outside of datarobot.
  2. We have branch cut release job that triggers HHI based pipeline to build a release tarball. It doesn't have access to the artifactory and will fail.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I will look into that, thanks


# A number of packages here are based on the following custom models image:
# datarobot/dropin-env-base-jdk:ubi8.8-py3.11-jdk11.0.22-drum1.10.20-mlops9.2.8
# (https://github.com/datarobot/datarobot-user-models/blob/master/docker/dropin_env_base_jdk_ubi)
# Downloading MLOps jars prior to build is done via Maven, see pom.xml in the dropin image
# if you need to reproduce the process

# TODO: review dependencies https://datarobot.atlassian.net/browse/BUZZOK-24542
microdnf update
microdnf install -y gcc gcc-c++ which \
java-11-openjdk-headless-1:11.0.25.0.9 java-11-openjdk-devel-1:11.0.25.0.9 \
nginx \
tar gzip unzip zip wget vim-minimal nano

chmod -R 707 /var/lib/nginx /var/log/nginx

pip3 install -U pip --no-cache-dir
pip3 install --no-cache-dir wheel setuptools

pip3 install -r requirements.txt \
--no-cache-dir \
--upgrade-strategy eager \
--extra-index-url https://download.pytorch.org/whl/cu118 \
--extra-index-url https://artifactory.devinfra.drdev.io/artifactory/api/pypi/datarobot-python-dev/simple

mkdir -p $JARS_PATH
curl -L ${ARTIFACTORY_MAVEN_URL}/datarobot-mlops/${DATAROBOT_MLOPS_VERSION}/datarobot-mlops-${DATAROBOT_MLOPS_VERSION}.jar --output ${JARS_PATH}/datarobot-mlops-${DATAROBOT_MLOPS_VERSION}.jar && \
curl -L ${ARTIFACTORY_MAVEN_URL}mlops-agent/${DATAROBOT_MLOPS_VERSION}/mlops-agent-${DATAROBOT_MLOPS_VERSION}.jar --output ${JARS_PATH}/mlops-agent-${DATAROBOT_MLOPS_VERSION}.jar && \

microdnf upgrade
microdnf clean all

rm -rf dep.constraints
rm -rf requirements.txt
48 changes: 48 additions & 0 deletions public_dropin_gpu_environments/python312_genai/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
-c dep.constraints

pyarrow>=0.14.1,<=14.0.1
datarobot-drum==1.16.6
datarobot-mlops==10.2.8
cloudpickle==2.2.1
# You can see find available wheels of the Meta torch repository here: https://download.pytorch.org/whl/torch/
torch @ https://download.pytorch.org/whl/cu118/torch-2.2.2%2Bcu118-cp312-cp312-linux_x86_64.whl#sha256=c0fa31b79d2c06012422e4ed4ed08a86179615463647ac5c44c8f6abef1d4aec
transformers==4.48.3
openai==1.58.1
langchain==0.3.18
langchain-community==0.3.17
optimum==1.24.0
onnxruntime==1.18.1
onnx==1.17.0
sentence-transformers==3.0.0
faiss-cpu==1.8.0
numpy==1.26.4
pandas<=2.2.3
scikit-learn==1.5.0
scipy>=1.1,<=1.12
tiktoken==0.7.0
google-cloud-aiplatform>=1.34.0,<=1.67.1
pydantic>=2.7.0,<=2.9.2
pydantic-settings==2.7.1
aiofiles==23.2.1
aioboto3==13.1.1
rouge-score==0.1.2
fugashi==1.3.2
unidic-lite==1.0.8
# llama-index-core is pinned, cause 0.10.68.post1 actually breaks the install.
llama-index-core==0.10.67
nltk==3.9.1
SQLAlchemy==2.0.35
# Required for logging done in the model_execution_lib
loguru==0.7.2

# httpx 0.28.0 breaks opanai library
# https://github.com/openai/openai-python/issues/1902
httpx<0.28.0
# Keep this in sync with version in worker/pyproject.toml
datarobot-moderations==11.0.2
# A leaf dependency of moderations. The reason to pin the version is that in version 0.4.1 it starts using rust-stemmers which is licensed under GPLv3
fastembed<0.4.1
# A fork of NeMo which supports Python 3.12. Should stop using fork here when https://github.com/NVIDIA/NeMo-Guardrails/pull/984 is merged
nemoguardrails @ git+https://github.com/datarobot-forks/NeMo-Guardrails@v0.11.1+dr2

backoff>=2.2.1
18 changes: 18 additions & 0 deletions public_dropin_gpu_environments/python312_genai/start_server.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/sh
# Copyright 2021 DataRobot, Inc. and its affiliates.
#
# All rights reserved.
# This is proprietary source code of DataRobot, Inc. and its affiliates.
#
# Released under the terms of DataRobot Tool and Utility Agreement.
echo "Starting Custom Model environment with DRUM prediction server"

if [ "${ENABLE_CUSTOM_MODEL_RUNTIME_ENV_DUMP}" = 1 ]; then
echo "Environment variables:"
env
fi

echo
echo "Executing command: drum server $*"
echo
exec drum server "$@"