Skip to content

Commit 3bc5904

Browse files
committed
Merge branch 'main' into main-public
2 parents 12a7365 + 3331716 commit 3bc5904

38 files changed

+1041
-274
lines changed

GetStarted.md

Lines changed: 31 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ approaches.
6161
6262
3. **Install Intel Transfer Learning Tool**
6363
64-
Use the Basic Installation instructions unless you plan on making code changes.
64+
Use the Basic Installation instructions unless you plan on making code changes or installing the latest code from the repository.
65+
Please note that mixing basic and advanced installation options within the same virtual environment is not supported.
6566
6667
a. **Basic Installation**
6768
@@ -107,29 +108,6 @@ approaches.
107108
tlt --help
108109
```
109110
110-
6. **Prepare the Dataset**
111-
112-
The Intel Transfer Learning Tool can use datasets from existing dataset catalogs
113-
or custom datasets that you have on your machine. The following CLI and API
114-
examples use the Intel Transfer Learning Tool's custom dataset option
115-
(`--dataset-dir`) with the TensorFlow flowers dataset.
116-
117-
```
118-
# Create a directory for the dataset to be downloaded
119-
DATASET_DIR=/tmp/dataset
120-
mkdir -p ${DATASET_DIR}
121-
122-
# Download and extract the dataset (be sure https_proxy is set if needed)
123-
wget -P ${DATASET_DIR} https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
124-
tar -xzf ${DATASET_DIR}/flower_photos.tgz -C ${DATASET_DIR}
125-
126-
# Set the DATASET_DIR to the extracted images folder
127-
DATASET_DIR=${DATASET_DIR}/flower_photos
128-
```
129-
130-
At this point, you should have a `flower_photos` folder with
131-
subfolders for `daisy`, `dandelion`, `roses`, `sunflower`, and `tulips`.
132-
133111
## ③ Run the Intel Transfer Learning Tool
134112
135113
With the Intel Transfer Learning Tool, you can train AI models with TensorFlow or
@@ -153,20 +131,23 @@ tlt list models --use-case image_classification
153131
154132
**Train a Model**
155133
156-
In this example, we'll use the ``tlt train`` command to use the TensorFlow
157-
ResNet50v1.5 model using the flowers dataset we already prepared and write the
158-
trained model to a folder specified with `--output-dir`.
159-
134+
In this example, we'll use the `tlt train` command to retrain the TensorFlow
135+
ResNet50v1.5 model using a flowers dataset from the
136+
[TensorFlow Datasets catalog](https://www.tensorflow.org/datasets/catalog/tf_flowers).
137+
The `--dataset-dir` and `--output-dir` paths need to point to writable folders on your system.
160138
```
161-
tlt train -f tensorflow --model-name resnet_v1_50 --dataset-dir ${DATASET_DIR} --output-dir /tmp/output
139+
# Use the follow environment variable setting to reduce the warnings and log output from TensorFlow
140+
export TF_CPP_MIN_LOG_LEVEL="2"
141+
142+
tlt train -f tensorflow --model-name resnet_v1_50 --dataset-name tf_flowers --dataset-dir "/tmp/data-${USER}" --output-dir "/tmp/output-${USER}"
162143
```
163144
```
164145
Model name: resnet_v1_50
165146
Framework: tensorflow
147+
Dataset name: tf_flowers
166148
Training epochs: 1
167-
Dataset dir: /tmp/dataset/flower_photos
168-
Output directory: /tmp/output
169-
Found 3670 files belonging to 5 classes.
149+
Dataset dir: /tmp/data-user
150+
Output directory: /tmp/output-user
170151
...
171152
Model: "sequential"
172153
_________________________________________________________________
@@ -179,9 +160,9 @@ Total params: 23,571,397
179160
Trainable params: 10,245
180161
Non-trainable params: 23,561,152
181162
_________________________________________________________________
182-
Checkpoint directory: /tmp/output/resnet_v1_50_checkpoints
163+
Checkpoint directory: /tmp/output-user/resnet_v1_50_checkpoints
183164
86/86 [==============================] - 24s 248ms/step - loss: 0.4600 - acc: 0.8438
184-
Saved model directory: /tmp/output/resnet_v1_50/1
165+
Saved model directory: /tmp/output-user/resnet_v1_50/1
185166
```
186167
187168
After training completes, the `tlt train` command evaluates the model. The loss and
@@ -217,22 +198,27 @@ from tlt.datasets import dataset_factory
217198
from tlt.models import model_factory
218199
from tlt.utils.types import FrameworkType, UseCaseType
219200
220-
# Specify the directory where the TensorFlow flowers dataset has been downloaded and extracted
221-
# (https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz)
222-
dataset_dir = os.environ["DATASET_DIR"] if "DATASET_DIR" in os.environ else \
223-
os.path.join(os.environ["HOME"], "dataset")
201+
username = os.getenv('USER', 'user')
202+
203+
# Specify a writable directory for the dataset to be downloaded
204+
dataset_dir = '/tmp/data-{}'.format(username)
205+
if not os.path.exists(dataset_dir):
206+
os.makedirs(dataset_dir)
224207
225-
# Specify a directory for output
226-
output_dir = os.environ["OUTPUT_DIR"] if "OUTPUT_DIR" in os.environ else \
227-
os.path.join(os.environ["HOME"], "output")
208+
# Specify a writeable directory for output (such as saved model files)
209+
output_dir = '/tmp/output-{}'.format(username)
210+
if not os.path.exists(output_dir):
211+
os.makedirs(output_dir)
228212
229213
# Get the model
230214
model = model_factory.get_model(model_name="resnet_v1_50", framework=FrameworkType.TENSORFLOW)
231215
232-
# Load and preprocess a dataset
233-
dataset = dataset_factory.load_dataset(dataset_dir = os.path.join(dataset_dir, "flower_photos"),
234-
use_case=UseCaseType.IMAGE_CLASSIFICATION, \
235-
framework=FrameworkType.TENSORFLOW)
216+
# Download and preprocess the flowers dataset from the TensorFlow datasets catalog
217+
dataset = dataset_factory.get_dataset(dataset_dir=dataset_dir,
218+
dataset_name='tf_flowers',
219+
use_case=UseCaseType.IMAGE_CLASSIFICATION,
220+
framework=FrameworkType.TENSORFLOW,
221+
dataset_catalog='tf_datasets')
236222
dataset.preprocess(image_size=model.image_size, batch_size=32)
237223
dataset.shuffle_split(train_pct=.75, val_pct=.25)
238224

README.md

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1+
*Note: You may find it easier to read about Intel Transfer Learning tool, follow the Get
2+
Started guide, and browse the API material from our published documentation site
3+
https://intelai.github.io/transfer-learning.*
4+
15
<!-- SkipBadges -->
6+
27
# Intel® Transfer Learning Tool
38

49
Transfer learning workflows use the knowledge learned by a pre-trained model on
@@ -55,15 +60,15 @@ figure:
5560

5661
## Get Started
5762

58-
The [Get Started](GetStarted.md) guide walks you through the steps to check
59-
system requirements, install, and then run the tool with a couple of examples
60-
showing no-code CLI and low-code API approaches. After that, you can check out
63+
Check out the [Get Started Guide](GetStarted.md) which will walk you through the
64+
steps to check system requirements, install, and then run the tool with a couple of
65+
examples showing no-code CLI and low-code API approaches. After that, you can check out
6166
these additional CLI and API [Examples](examples/README.md).
6267

6368
<!-- ExpandGetStarted-Start -->
64-
As described in the [Get Started](GetStarted.md) guide, once you have a Python
65-
3.9 environment set up, you do a basic install of the Intel Transfer Learning
66-
Tool using:
69+
As described in the [Get Started Guide](GetStarted.md), once you have a Python
70+
environment set up, you do a basic install of the Intel Transfer Learning
71+
Tool. Here are some examples of commands you will find in the [Get Started Guide](GetStarted.md):
6772

6873
```
6974
pip install intel-transfer-learning-tool

docker/README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,6 @@ docker compose build
2424
OR
2525
```bash
2626
docker pull intel/ai-tools:tlt-0.5.0
27-
docker pull intel/ai-tools:tlt-devel-0.5.0
28-
docker pull intel/ai-tools:tlt-dist-0.5.0
29-
docker pull intel/ai-tools:tlt-dist-devel-0.5.0
3027
```
3128

3229
## Use Docker Image
@@ -56,11 +53,19 @@ OR
5653
helm repo add cowboysysop https://cowboysysop.github.io/charts/
5754
helm install <release name> cowboysysop/training-operator
5855
```
56+
57+
### 3. Build Distributed Container
58+
```bash
59+
cd docker
60+
docker compose build
61+
docker push <registry>:tlt-dist-latest
62+
```
63+
5964
### 3. Deploy TLT Distributed Job
6065
For more customization information, see the chart [README](./docker/chart/README.md)
6166
```bash
6267
export NAMESPACE=kubeflow
63-
helm install --namespace ${NAMESPACE} --set ... tlt-distributed ./docker/chart
68+
helm install --namespace ${NAMESPACE} --set imageName=<registry> --set imageTag=tlt-dist-latest --set ... tlt-distributed ./docker/chart
6469
```
6570
### 4. View
6671
To view your workflow progress

examples/cli/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ wget -P ${DATASET_DIR} https://storage.googleapis.com/download.tensorflow.org/ex
6060
tar -xzf ${DATASET_DIR}/flower_photos.tgz -C ${DATASET_DIR}
6161
6262
# Set the DATASET_DIR to the extracted images folder
63-
DATASET_DIR=${DATASET_DIR}/flower_photos
63+
export DATASET_DIR=${DATASET_DIR}/flower_photos
6464
6565
# Supress debug information from TensorFlow 2.12
66-
TF_CPP_MIN_LOG_LEVEL=2
66+
export TF_CPP_MIN_LOG_LEVEL=2
6767
```
6868

6969
After the dataset directory is ready, use the `tlt train` command to train one of the models from

examples/cli/text_classification.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ the label (`ham` or `spam`) and the second column is the text of the SMS message
2020
labels are replaced with numerical values before training.
2121
```bash
2222
# Create dataset and output directories
23-
DATASET_DIR=/tmp/data
24-
OUTPUT_DIR=/tmp/output
23+
export DATASET_DIR=/tmp/data
24+
export OUTPUT_DIR=/tmp/output
2525
mkdir -p ${DATASET_DIR}
2626
mkdir -p ${OUTPUT_DIR}
2727

@@ -71,8 +71,8 @@ and [glue/cola](https://www.tensorflow.org/datasets/catalog/glue#gluecola_defaul
7171

7272
```bash
7373
# Create dataset and output directories
74-
DATASET_DIR=/tmp/data
75-
OUTPUT_DIR=/tmp/output
74+
export DATASET_DIR=/tmp/data
75+
export OUTPUT_DIR=/tmp/output
7676
mkdir -p ${DATASET_DIR}
7777
mkdir -p ${OUTPUT_DIR}
7878

@@ -114,8 +114,8 @@ one epoch using 2 nodes and 2 processes per node.
114114

115115
```bash
116116
# Create dataset and output directories
117-
DATASET_DIR=/tmp/data
118-
OUTPUT_DIR=/tmp/output
117+
export DATASET_DIR=/tmp/data
118+
export OUTPUT_DIR=/tmp/output
119119
mkdir -p ${DATASET_DIR}
120120
mkdir -p ${OUTPUT_DIR}
121121

notebooks/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ gin-config~=0.5.0
66
intel-extension-for-pytorch==1.13.100
77
intel-tensorflow==2.12.0
88
ipython-genutils~=0.2.0
9-
ipython~=8.13.2
9+
ipython~=8.12.2; python_version<'3.9'
10+
ipython~=8.13.2; python_version>='3.9'
1011
ipywidgets~=8.0.6
1112
jmespath~=1.0.1
1213
matplotlib-inline~=0.1.6

notebooks/setup.md

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,44 +2,49 @@
22

33
Use the instructions below to install the dependencies required to run the notebooks.
44

5-
System Requirements:
6-
1. Ubuntu 20.04
5+
Software Requirements:
6+
1. Linux* system (validated on Ubuntu* 20.04/22.04 LTS)
77
2. Python3 (3.8, 3.9, or 3.10), Pip/Conda and Virtualenv
88
3. git
99

1010
## Set Up Notebook Environment
1111

12-
1. Install Intel® Transfer Learning Tool using the Developer Installation option in the [Get Started](/GetStarted.md) Guide.
13-
This is required for the Intel Transfer Learning Tool tutorial notebooks, E2E notebooks, and performance comparison. Follow the
14-
instructions in the [Get Started Guide](/GetStarted.md). You can
15-
skip this step if you are only running the native framework notebooks.
12+
1. Install Intel® Transfer Learning Tool using any of the installation options in the [Get Started Guide](/GetStarted.md).
13+
This is required for the Intel Transfer Learning Tool tutorial notebooks, E2E notebooks, and performance comparison.
14+
You can skip this step if you are only running the native framework notebooks.
1615

17-
2. Activate the virtualenv or conda environment used to install Intel Transfer Learning Tool,
16+
2. Clone the GitHub repo if you haven't done this in step 1
17+
18+
```
19+
git clone https://github.com/IntelAI/transfer-learning.git
20+
cd transfer-learning
21+
```
22+
23+
3. Activate the virtualenv or conda environment used to install Intel Transfer Learning Tool,
1824
then from inside the activated environment, run these steps:
1925
```
2026
pip install --upgrade pip
2127
pip install -r notebooks/requirements.txt
2228
```
2329

24-
3. Set environment variables for the path to the dataset folder and an output directory.
30+
4. Set environment variables for the path to the dataset folder and an output directory.
2531
The dataset and output directories can be empty. The notebook will download the dataset to
2632
the dataset directory, if it is empty. Subsequent runs will reuse the dataset.
2733
If the `DATASET_DIR` and `OUTPUT_DIR` variables are not defined, the notebooks will
2834
default to use `~/dataset` and `~/output`.
2935
```
30-
export DATASET_DIR=<directory to download the dataset>
31-
export OUTPUT_DIR=<output directory for the saved model>
32-
36+
export DATASET_DIR=~/dataset
37+
export OUTPUT_DIR=~/output
3338
mkdir -p $DATASET_DIR
3439
mkdir -p $OUTPUT_DIR
3540
```
36-
4. Navigate to the notebook directory in your clone of the Transfer Learning repo, and then start the
41+
5. Navigate to the notebook directory in your clone of the Transfer Learning repo, and then start the
3742
[notebook server](https://jupyter.readthedocs.io/en/latest/running.html#starting-the-notebook-server):
3843
```
3944
cd notebooks
4045
jupyter notebook --port 8888
4146
```
42-
5. Copy and paste the URL from the terminal to your browser to view and run the notebooks.
47+
6. Copy and paste the URL from the terminal to your browser to view and run the notebooks.
4348

4449
Once you have the environment and dependencies set up, see the list of available
45-
notebook examples.
50+
[notebooks](/notebooks/README.md).

notebooks/text_classification/tlt_api_tf_text_classification/TLT_TF_Text_Classification.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,9 @@
202202
"id": "ccac8980",
203203
"metadata": {},
204204
"source": [
205-
"### Option B: Use the TFDS catalog\n",
205+
"### Option B: Use the TensorFlow datasets catalog\n",
206206
"\n",
207-
"Option B allows for using a dataset from the [TensorFlow datasets catalog](https://www.tensorflow.org/datasets/catalog/overview). The dataset factory currently supports the following TFDS text classification datasets: [imdb_reviews](https://www.tensorflow.org/datasets/catalog/imdb_reviews), [glue/sst2](https://www.tensorflow.org/datasets/catalog/imdb_reviews), [glue/cola](https://www.tensorflow.org/datasets/catalog/glue#gluecola_default_config), and [ag_news_subset](https://www.tensorflow.org/datasets/catalog/ag_news_subset)."
207+
"Option B allows for using a dataset from the [TensorFlow datasets catalog](https://www.tensorflow.org/datasets/catalog/overview). The dataset factory currently supports the following TFDS text classification datasets: [imdb_reviews](https://www.tensorflow.org/datasets/catalog/imdb_reviews), [glue/sst2](https://www.tensorflow.org/datasets/catalog/glue#gluesst2), [glue/cola](https://www.tensorflow.org/datasets/catalog/glue#gluecola_default_config), and [ag_news_subset](https://www.tensorflow.org/datasets/catalog/ag_news_subset)."
208208
]
209209
},
210210
{

tests/tools/cli/test_train_cli.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,8 @@ def test_train_init_checkpoints(mock_load_dataset, mock_get_model, model_name, f
238238
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=2,
239239
initial_checkpoints=init_checkpoints, early_stopping=False,
240240
lr_decay=False, ipex_optimize=False, distributed=False,
241-
hostfile=None, nnodes=1, nproc_per_node=1)
241+
hostfile=None, nnodes=1, nproc_per_node=1, use_horovod=False,
242+
hvd_start_timeout=30)
242243
data_mock.preprocess.assert_called_once_with(batch_size=32)
243244

244245
# Verify that the train command exit code is successful
@@ -314,7 +315,8 @@ def test_train_features(mock_inspect, mock_load_dataset, mock_get_model, model_n
314315
model_mock.train.assert_called_once_with(data_mock, output_dir=output_dir, epochs=15,
315316
initial_checkpoints=None, early_stopping=early_stopping,
316317
lr_decay=lr_decay, ipex_optimize=False, distributed=False,
317-
hostfile=None, nnodes=1, nproc_per_node=1)
318+
hostfile=None, nnodes=1, nproc_per_node=1, use_horovod=False,
319+
hvd_start_timeout=30)
318320

319321
# Verify that the train command exit code is successful
320322
assert result.exit_code == 0

tests/utils/test_file_utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import tempfile
2525
from unittest.mock import MagicMock
2626

27-
from tlt.utils.file_utils import validate_model_name, download_file
27+
from tlt.utils.file_utils import download_file, get_model_name_from_path, validate_model_name
2828

2929

3030
@pytest.mark.common
@@ -70,3 +70,17 @@ def test_download():
7070
# Delete the temp output directory
7171
if os.path.exists(output_dir) and os.path.isdir(output_dir):
7272
shutil.rmtree(output_dir)
73+
74+
75+
@pytest.mark.common
76+
@pytest.mark.parametrize('model_dir,expected_model_name',
77+
[['/tmp/user/resnet_v2_50/12/', 'resnet_v2_50'],
78+
['/tmp/user/resnet_v2_50/12', 'resnet_v2_50'],
79+
['/localdisk/folder/google_bert_uncased_L-2_H-128_A-2/8/',
80+
'google_bert_uncased_L-2_H-128_A-2']])
81+
def test_get_model_name_from_path(model_dir, expected_model_name):
82+
"""
83+
Tests the file utils method that returns the model name from a model directory path. Verifies that the model name
84+
returned matches the expected model name.
85+
"""
86+
assert expected_model_name == get_model_name_from_path(model_dir)

0 commit comments

Comments
 (0)