Skip to content

Commit dc07b02

Browse files
TAO 5.5 Release - PyTorch
TAO 5.5 Release - PyTorch
2 parents 9c2d94c + 2fdd6b3 commit dc07b02

File tree

666 files changed

+60730
-26767
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

666 files changed

+60730
-26767
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,4 +218,4 @@ In order to build a new docker, please edit the `deploy.sh` file in `$NV_TAO_PYT
218218
TAO Toolkit PyTorch backend is not accepting contributions as part of the TAO 5.0 release, but will be open in the future.
219219

220220
## <a name='License'></a>License
221-
This project is licensed under the [Apache-2.0](./LICENSE) License.
221+
This project is licensed under the [Apache-2.0](./LICENSE) License.

docker/Dockerfile

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
ARG PYTORCH_BASE_IMAGE=nvcr.io/nvidia/pytorch:23.12-py3
1+
ARG PYTORCH_BASE_IMAGE=nvcr.io/nvidia/pytorch:24.04-py3
22
FROM ${PYTORCH_BASE_IMAGE}
33

44
# Ensure apt-get won't prompt for selecting options
@@ -11,7 +11,6 @@ RUN apt-get upgrade && apt-get update && \
1111

1212
# uninstall stuff from base container
1313
RUN pip uninstall -y sacrebleu torchtext
14-
RUN pip install parametrized ninja
1514
# Installing custom packages in /opt.
1615
WORKDIR /opt
1716

@@ -32,25 +31,31 @@ RUN mkdir trt_oss_src && \
3231
-DCMAKE_CUDA_ARCHITECTURES="53;60;61;70;75;80;86;90" \
3332
-DTRT_LIB_DIR=/usr/lib/x86_64-linux-gnu \
3433
-DTRT_BIN_DIR=`pwd`/out \
35-
-DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.3/bin/nvcc \
36-
-DCUDNN_VERSION=8.9 && \
34+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda-12.4/bin/nvcc \
35+
-DCUDNN_VERSION=9.1.0.70 && \
3736
make -j16 nvinfer_plugin nvinfer_plugin_static && \
3837
cp libnvinfer_plugin.so.8.6.1 /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.8.6.1 && \
3938
cp libnvinfer_plugin_static.a /usr/lib/x86_64-linux-gnu/libnvinfer_plugin_static.a && \
4039
cd ../../../ && \
4140
rm -rf trt_oss_src
4241

43-
4442
COPY docker/requirements-pip.txt requirements-pip.txt
45-
# Forcing cython==0.29.36 for pycocotools-fix with python3.10.
46-
RUN pip install Cython==0.29.36
47-
RUN pip uninstall -y jupyterlab
48-
RUN pip install --upgrade pip
49-
RUN pip install -r requirements-pip.txt \
50-
&& rm requirements-pip.txt
51-
RUN pip install -U openmim && mim install mmengine "mmcv>=2.0.0" "mmpretrain>=1.0.0rc8"
52-
RUN pip install "mmsegmentation>=1.0.0" mmdeploy
53-
RUN pip install --upgrade setuptools
43+
RUN pip uninstall -y jupyterlab \
44+
&& pip install --upgrade pip \
45+
&& pip install -r requirements-pip.txt \
46+
&& rm requirements-pip.txt \
47+
&& mim install "mmengine==0.10.4" "mmpretrain>=1.0.0rc8"
48+
49+
ENV FORCE_CUDA="1"
50+
RUN mkdir mmcv_src && \
51+
cd mmcv_src && \
52+
echo "$PWD Building MMCV..." && \
53+
git clone -b v2.1.0 https://github.com/open-mmlab/mmcv.git mmcv && \
54+
cd mmcv && \
55+
MMCV_WITH_OPS=1 pip install -v -e .
56+
57+
RUN pip install "mmsegmentation==1.2.2" "mmdeploy==1.3.1" "mmdet==3.3.0" "mmdet3d==1.4.0" \
58+
&& pip uninstall -y opencv-python && pip install opencv-python==4.8.0.74
5459
COPY docker/requirements-pip-pytorch.txt requirements-pip-pytorch.txt
5560
RUN pip install --ignore-installed --no-deps -r requirements-pip-pytorch.txt \
5661
&& rm requirements-pip-pytorch.txt

docker/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,4 @@ if [ $BUILD_DOCKER = "1" ]; then
6868
# Exit by printing usage.
6969
else
7070
echo "Usage: ./build.sh [--build] [--push] [--force] [--default]"
71-
fi
71+
fi

docker/manifest.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
"registry": "nvcr.io",
33
"repository": "nvidia/tao/tao-toolkit",
4-
"tag": "5.0.0-pyt-base"
4+
"digest": "sha256:ff920080025e2dd12d5740c3d722f0630f1f84630ae6f55e56f872d4914fca03",
5+
"tag": "5.5.0-pyt-base"
56
}

docker/requirements-pip-odise.txt

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
# ODISE
2-
huggingface-hub
3-
fvcore
4-
ftfy
2+
huggingface-hub==0.20.3
3+
fvcore==0.1.5.post20221221
4+
ftfy==6.1.3
55
kornia==0.6
66
diffdist==0.1
77
nltk>=3.6.2
8-
taming-transformers-rom1504
8+
taming-transformers-rom1504==0.0.6
99
importlib-metadata==4.11.3
10-
flake8-comprehensions
10+
flake8-comprehensions==3.14.0
1111
git+https://github.com/facebookresearch/detectron2.git
1212
git+https://github.com/openai/CLIP.git@main#egg=clip
1313
git+https://github.com/cocodataset/panopticapi.git
1414
yacs>=0.1.8
1515
iopath==0.1.9
16-
jmespath
17-
s3transfer
18-
pathspec
19-
black
16+
jmespath==0.10.0
17+
s3transfer==0.10.0
18+
pathspec==0.12.1
19+
black==22.3.0
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
fairscale==0.4.12
22
lpips==0.1.4
3-
lightning-utilities==0.8.0
4-
pytorch-lightning==1.8.5
3+
lightning-utilities==0.10.1
4+
pytorch-lightning==2.2.0
55
pytorch_metric_learning==1.7.1
6-
pytorch-msssim
7-
thop
8-
timm>=0.9.6.dev0
6+
pytorch-msssim==1.0.0
7+
thop==0.1.1.post2209072238
8+
timm==0.9.12
99
torchmetrics==0.10.3
10-
open-clip-torch[training]==2.23.0
10+
open-clip-torch[training]==2.24.0
1111
sentencepiece==0.1.99
12-
ftfy
12+
ftfy==6.1.3
1313
torch-pruning==1.2.2

docker/requirements-pip.txt

Lines changed: 93 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,102 @@
1-
addict
2-
anyconfig
3-
astroid==2.5.2
4-
boto3
5-
botocore
6-
ccimport
7-
click
8-
colored
9-
cumm-cu114
10-
cutex
11-
easydict
12-
einops
13-
faiss-cpu
14-
fire
15-
flake8==6.0.0
16-
gdown
17-
gradio
1+
addict==2.4.0
2+
anyconfig==0.14.0
3+
apispec==6.6.1
4+
apispec_webframeworks==1.1.0
5+
astroid==3.0.3
6+
boto3==1.34.45
7+
botocore==1.34.45
8+
ccimport==0.4.2
9+
click==8.1.7
10+
colored==2.2.4
11+
cumm-cu114==0.4.11
12+
cutex==0.3.8
13+
Cython==0.29.36
14+
easydict==1.12
15+
einops==0.7.0
16+
faiss-cpu==1.7.4
17+
fire==0.5.0
18+
flake8==7.0.0
19+
flask==3.0.3
20+
flask_limiter==3.6.0
21+
gdown==5.1.0
22+
gradio==4.19.2
1823
hydra-core==1.2.0
19-
imgaug
20-
imageio
21-
isort
22-
lark
23-
lazy-import
24-
lazy_object_proxy
25-
lazy_loader
26-
librosa
27-
lmdb
28-
matplotlib
29-
mccabe
30-
mypy
31-
mypy-extensions
32-
natsort
33-
ninja
34-
nltk
35-
nvidia-eff
36-
nvidia-eff-tao-encryption
37-
numpy
38-
omegaconf
24+
imgaug==0.4.0
25+
imageio==2.34.0
26+
isort==4.3.21
27+
jsonlines==4.0.0
28+
lark==1.1.9
29+
lazy-import==0.2.2
30+
lazy-object-proxy==1.10.0
31+
lazy_loader==0.3
32+
librosa==0.10.1
33+
lmdb==1.4.1
34+
marshmallow==3.21.2
35+
marshmallow_enum==1.5.1
36+
matplotlib==3.8.2
37+
mccabe==0.7.0
38+
mypy==1.8.0
39+
mypy-extensions==1.0.0
40+
natsort==8.4.0
41+
ninja==1.11.1.1
42+
nltk==3.8.1
43+
nvidia-eff==0.6.5
44+
nvidia-eff-tao-encryption==0.1.8
45+
numpy==1.24.4
46+
omegaconf==2.3.0
3947
# Install onnx-graphsurgeon with extra-index-url
4048
--extra-index-url https://pypi.ngc.nvidia.com
41-
onnx-graphsurgeon
42-
onnx-simplifier
43-
onnxoptimizer
44-
onnxruntime
45-
onnxsim
49+
# onnx overrides 24.04 stack version (1.16.0)
50+
onnx==1.15.0
51+
onnx-graphsurgeon==0.3.27
52+
onnx-simplifier==0.4.35
53+
onnxoptimizer==0.3.13
54+
onnxruntime==1.17.0
55+
onnxsim==0.4.35
4656
opencv-python==4.8.0.74
47-
pccm
57+
openmim==0.3.9
58+
parameterized==0.9.0
59+
pccm==0.4.11
4860
pillow==9.5.0
49-
Polygon3
50-
protobuf
51-
pyarmor
52-
pyclipper
61+
Polygon3==3.0.9.1
62+
protobuf==3.20.2
63+
pyarmor==7.7.4
64+
pyclipper==1.3.0.post5
5365
pycocotools==2.0.6
54-
pycodestyle==2.10.0
55-
pycuda
56-
pyDeprecate
57-
pydocstyle==3.0.0
58-
pyflakes
59-
pylint==2.2.2
60-
pynini==2.1.5
61-
pyquaternion
62-
pyrr
63-
PyWavelets
64-
PyYAML
65-
rich
66-
scikit-image
67-
shapely
68-
soundfile
69-
spconv-cu114
70-
tabulate
71-
tensorboardX
72-
terminaltables
73-
tifffile
66+
pycodestyle==2.11.1
67+
pycuda==2024.1
68+
pyDeprecate==0.3.2
69+
pydocstyle==6.3.0
70+
pyflakes==3.2.0
71+
pylint==3.0.3
72+
pynini==2.1.6
73+
pyquaternion==0.9.9
74+
pyrr==0.10.3
75+
PyWavelets==1.5.0
76+
PyYAML==6.0.1
77+
rich==13.4.2
78+
scikit-image==0.22.0
79+
setuptools==69.1.0
80+
shapely==2.0.3
81+
soundfile==0.12.1
82+
spconv-cu114==2.3.6
83+
tabulate==0.9.0
84+
tensorboardX==2.6.2.2
85+
terminaltables==3.1.10
86+
tifffile==2024.2.12
7487
# Upgrading transformer due to an error with importlib version checks.
75-
transformers
76-
tokenizers
88+
transformers==4.37.2
89+
tokenizers==0.15.2
7790
# Same issue with tqdm.pip
78-
tqdm
79-
ujson
80-
unidecode
81-
wandb
82-
wget
83-
wrapt
84-
yapf
91+
tqdm==4.65.2
92+
ujson==5.9.0
93+
Unidecode==1.3.8
94+
wandb==0.16.3
95+
wget==3.2
96+
wrapt==1.16.0
97+
yapf==0.40.2
8598
# updating base py packages versions to resolve oss vulnerabilities
86-
starlette
87-
python-multipart
88-
fastapi
89-
pyarrow==14.0.1
99+
starlette==0.36.3
100+
python-multipart==0.0.9
101+
fastapi==0.109.2
102+
pyarrow==14.0.1

nvidia_tao_pytorch/api/__init__.py

Whitespace-only changes.

nvidia_tao_pytorch/api/api_utils/__init__.py

Whitespace-only changes.
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import requests
2+
import json
3+
4+
def invoke_microservices(request_dict):
5+
url = f"http://localhost:8000/api/v1"
6+
api_endpoint = request_dict.get('api_endpoint', None)
7+
neural_network_name = request_dict.get('neural_network_name', None)
8+
ngc_api_key = request_dict.get('ngc_api_key', None)
9+
action_name = request_dict.get('action_name', None)
10+
storage = request_dict.get('storage', None)
11+
specs = request_dict.get('specs', None)
12+
job_id = request_dict.get('job_id', None)
13+
14+
telemetry_opt_out = request_dict.get('telemetry_opt_out', "no")
15+
use_ngc_staging = request_dict.get('use_ngc_staging', "True")
16+
tao_api_ui_cookie = request_dict.get('tao_api_ui_cookie', "")
17+
tao_api_admin_key = request_dict.get('tao_api_admin_key', "")
18+
tao_api_base_url = request_dict.get('tao_api_base_url', "https://nvidia.com")
19+
tao_api_status_callback_url = request_dict.get('tao_api_status_callback_url', "https://nvidia.com")
20+
automl_experiment_number = request_dict.get('automl_experiment_number', "")
21+
hosted_service_interaction = request_dict.get('hosted_service_interaction', "")
22+
23+
24+
if api_endpoint == "get_networks":
25+
response = requests.get(f"{url}/neural_networks")
26+
elif api_endpoint == "get_actions":
27+
response = requests.get(f"{url}/neural_networks/{neural_network_name}/actions")
28+
elif api_endpoint == "list_ptms":
29+
req_obj = {"ngc_api_key": ngc_api_key}
30+
response = requests.post(f"{url}/neural_networks/{neural_network_name}/pretrained_models", req_obj)
31+
elif api_endpoint == "get_schema":
32+
response = requests.get(f"{url}/neural_networks/{neural_network_name}/actions/{action_name}:schema")
33+
elif api_endpoint == "post_action":
34+
req_obj = {"specs": specs,
35+
"cloud_metadata": storage,
36+
"ngc_api_key": ngc_api_key,
37+
"job_id": job_id,
38+
"telemetry_opt_out": telemetry_opt_out,
39+
"use_ngc_staging": use_ngc_staging,
40+
"tao_api_ui_cookie": tao_api_ui_cookie,
41+
"tao_api_admin_key": tao_api_admin_key,
42+
"tao_api_base_url": tao_api_base_url,
43+
"tao_api_status_callback_url": tao_api_status_callback_url,
44+
"automl_experiment_number": automl_experiment_number,
45+
"hosted_service_interaction": hosted_service_interaction,
46+
}
47+
response = requests.post(f"{url}/neural_networks/{neural_network_name}/actions/{action_name}", data=json.dumps(req_obj))
48+
elif api_endpoint == "get_jobs":
49+
response = requests.get(f"{url}/neural_networks/{neural_network_name}/actions/{action_name}:ids")
50+
elif api_endpoint == "get_job_status":
51+
response = requests.get(f"{url}/neural_networks/{neural_network_name}/actions/{action_name}/{job_id}")
52+
53+
if response and response.status_code in (200, 201):
54+
return response.json()
55+
else:
56+
raise ValueError(f"{response.json()['error_desc']}" if response.json().get('error_desc') else f"Failed to get execute (Status Code: {response.status_code} : {response.json()})")

0 commit comments

Comments
 (0)