Skip to content

Commit 3487bce

Browse files
committed
add sagemaker serve executable
1 parent 8ab9e27 commit 3487bce

File tree

2 files changed

+139
-2
lines changed

2 files changed

+139
-2
lines changed
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
2-
FROM nvcr.io/nvidia/merlin/merlin-tensorflow:23.06
1+
FROM nvcr.io/nvidia/merlin/merlin-tensorflow:23.08
32

43
RUN pip3 install sagemaker-training
4+
5+
COPY --chown=1000:1000 serve /usr/bin/serve
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#!/bin/bash
2+
# Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in the
11+
# documentation and/or other materials provided with the distribution.
12+
# * Neither the name of NVIDIA CORPORATION nor the names of its
13+
# contributors may be used to endorse or promote products derived
14+
# from this software without specific prior written permission.
15+
#
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
28+
SAGEMAKER_SINGLE_MODEL_REPO=/opt/ml/model/
29+
30+
# Use 'ready' for ping check in single-model endpoint mode, and use 'live' for ping check in multi-model endpoint model
31+
# https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/rest_predict_v2.yaml#L10-L26
32+
if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
33+
SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
34+
else
35+
SAGEMAKER_TRITON_PING_MODE="ready"
36+
fi
37+
38+
# Note: in Triton on SageMaker, each model url is registered as a separate repository
39+
# e.g., /opt/ml/models/<hash>/model. Specifying MME model repo path as /opt/ml/models causes Triton
40+
# to treat it as an additional empty repository and changes
41+
# the state of all models to be UNAVAILABLE in the model repository
42+
# https://github.com/triton-inference-server/core/blob/main/src/model_repository_manager.cc#L914,L922
43+
# On Triton, this path will be a dummy path as it's mandatory to specify a model repo when starting triton
44+
SAGEMAKER_MULTI_MODEL_REPO=/tmp/sagemaker
45+
46+
SAGEMAKER_MODEL_REPO=${SAGEMAKER_SINGLE_MODEL_REPO}
47+
is_mme_mode=false
48+
49+
if [ -n "$SAGEMAKER_MULTI_MODEL" ]; then
50+
if [ "$SAGEMAKER_MULTI_MODEL" == "true" ]; then
51+
mkdir -p ${SAGEMAKER_MULTI_MODEL_REPO}
52+
SAGEMAKER_MODEL_REPO=${SAGEMAKER_MULTI_MODEL_REPO}
53+
if [ -n "$SAGEMAKER_TRITON_OVERRIDE_PING_MODE" ]; then
54+
SAGEMAKER_TRITON_PING_MODE=${SAGEMAKER_TRITON_OVERRIDE_PING_MODE}
55+
else
56+
SAGEMAKER_TRITON_PING_MODE="live"
57+
fi
58+
is_mme_mode=true
59+
echo -e "Triton is running in SageMaker MME mode. Using Triton ping mode: \"${SAGEMAKER_TRITON_PING_MODE}\""
60+
fi
61+
fi
62+
63+
SAGEMAKER_ARGS="--model-repository=${SAGEMAKER_MODEL_REPO}"
64+
#Set model namespacing to true, but allow disabling if required
65+
if [ -n "$SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING" ]; then
66+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=${SAGEMAKER_TRITON_DISABLE_MODEL_NAMESPACING}"
67+
else
68+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-namespacing=true"
69+
fi
70+
if [ -n "$SAGEMAKER_BIND_TO_PORT" ]; then
71+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-port=${SAGEMAKER_BIND_TO_PORT}"
72+
fi
73+
if [ -n "$SAGEMAKER_SAFE_PORT_RANGE" ]; then
74+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-safe-port-range=${SAGEMAKER_SAFE_PORT_RANGE}"
75+
fi
76+
if [ -n "$SAGEMAKER_TRITON_ALLOW_GRPC" ]; then
77+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=${SAGEMAKER_TRITON_ALLOW_GRPC}"
78+
else
79+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-grpc=false"
80+
fi
81+
if [ -n "$SAGEMAKER_TRITON_ALLOW_METRICS" ]; then
82+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=${SAGEMAKER_TRITON_ALLOW_METRICS}"
83+
else
84+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --allow-metrics=false"
85+
fi
86+
if [ -n "$SAGEMAKER_TRITON_METRICS_PORT" ]; then
87+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --metrics-port=${SAGEMAKER_TRITON_METRICS_PORT}"
88+
fi
89+
if [ -n "$SAGEMAKER_TRITON_GRPC_PORT" ]; then
90+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --grpc-port=${SAGEMAKER_TRITON_GRPC_PORT}"
91+
fi
92+
if [ -n "$SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT" ]; then
93+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --buffer-manager-thread-count=${SAGEMAKER_TRITON_BUFFER_MANAGER_THREAD_COUNT}"
94+
fi
95+
if [ -n "$SAGEMAKER_TRITON_THREAD_COUNT" ]; then
96+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --sagemaker-thread-count=${SAGEMAKER_TRITON_THREAD_COUNT}"
97+
fi
98+
# Enable verbose logging by default. If env variable is specified, use value from env variable
99+
if [ -n "$SAGEMAKER_TRITON_LOG_VERBOSE" ]; then
100+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=${SAGEMAKER_TRITON_LOG_VERBOSE}"
101+
else
102+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-verbose=true"
103+
fi
104+
if [ -n "$SAGEMAKER_TRITON_LOG_INFO" ]; then
105+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-info=${SAGEMAKER_TRITON_LOG_INFO}"
106+
fi
107+
if [ -n "$SAGEMAKER_TRITON_LOG_WARNING" ]; then
108+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-warning=${SAGEMAKER_TRITON_LOG_WARNING}"
109+
fi
110+
if [ -n "$SAGEMAKER_TRITON_LOG_ERROR" ]; then
111+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --log-error=${SAGEMAKER_TRITON_LOG_ERROR}"
112+
fi
113+
if [ -n "$SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE" ]; then
114+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=${SAGEMAKER_TRITON_SHM_DEFAULT_BYTE_SIZE}"
115+
else
116+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-default-byte-size=16777216" #16MB
117+
fi
118+
if [ -n "$SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE" ]; then
119+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=${SAGEMAKER_TRITON_SHM_GROWTH_BYTE_SIZE}"
120+
else
121+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=python,shm-growth-byte-size=1048576" #1MB
122+
fi
123+
if [ -n "$SAGEMAKER_TRITON_TENSORFLOW_VERSION" ]; then
124+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --backend-config=tensorflow,version=${SAGEMAKER_TRITON_TENSORFLOW_VERSION}"
125+
fi
126+
if [ -n "$SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT" ]; then
127+
num_gpus=$(nvidia-smi -L | wc -l)
128+
for ((i=0; i<${num_gpus}; i++)); do
129+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} --model-load-gpu-limit ${i}:${SAGEMAKER_TRITON_MODEL_LOAD_GPU_LIMIT}"
130+
done
131+
fi
132+
if [ -n "$SAGEMAKER_TRITON_ADDITIONAL_ARGS" ]; then
133+
SAGEMAKER_ARGS="${SAGEMAKER_ARGS} ${SAGEMAKER_TRITON_ADDITIONAL_ARGS}"
134+
fi
135+
136+
tritonserver --allow-sagemaker=true --allow-http=false $SAGEMAKER_ARGS

0 commit comments

Comments
 (0)