Skip to content

Commit 4f10bc5

Browse files
committed
Update tiledb-ml package to work with python 3.12
1 parent 8af7c75 commit 4f10bc5

File tree

14 files changed

+337
-205
lines changed

14 files changed

+337
-205
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ jobs:
99
strategy:
1010
fail-fast: false
1111
matrix:
12-
python-version: ["3.9"]
13-
ml-deps:
14-
- "torch==2.1.2+cpu torchvision==0.16.2+cpu torchdata==0.7.1 tensorflow-cpu==2.15.1 "
15-
- "torch==2.3.1+cpu torchvision==0.18.1+cpu torchdata==0.8.0 'tensorflow-cpu<2.16'"
12+
python-version: ["3.12"]
13+
# ml-deps:
14+
# - "torch==2.1.2+cpu torchvision==0.16.2+cpu torchdata==0.7.1 tensorflow-cpu==2.15.1 "
15+
# - "torch==2.3.1+cpu torchvision==0.18.1+cpu torchdata==0.8.0 'tensorflow-cpu<2.16'"
1616
env:
1717
run_coverage: ${{ github.ref == 'refs/heads/master' }}
1818

@@ -28,14 +28,13 @@ jobs:
2828
uses: actions/cache@v4
2929
with:
3030
path: ~/.cache/pip
31-
key: ${{ runner.os }}:ml-deps=[${{ matrix.ml-deps }}]
31+
key: ${{ runner.os }} # :ml-deps=[${{ matrix.ml-deps }}]
3232

3333
- name: Install dependencies
3434
run: |
3535
pip install --upgrade pip
36-
pip install --extra-index-url https://download.pytorch.org/whl/cpu ${{ matrix.ml-deps }}
3736
pip install pytest-mock pytest-cov scikit-learn==1.0.2
38-
pip install -e .[cloud]
37+
pip install -e .[full]
3938
4039
- name: Run pre-commit hooks
4140
run: |

examples/cloud/serverless_training/pytorch/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ Firstly, we ingest all training images and labels in TileDB arrays and register
66
and finally, we serverless-ly get some predictions using the trained model. In case you want to run the example, you will need a TileDB-Cloud account as described
77
[here](https://docs.tiledb.com/cloud/tutorials/start-here). After signing up, you should export your username and password
88
as environmental variables (**TILEDB_USER_NAME**, **TILEDB_PASSWD**), in order to run ingestion, model training and prediction UDFs. Moreover,
9-
please add your TileDB namespace and your **S3** bucket in each script.
9+
please add your TileDB teamspace and your **S3** bucket in each script.
1010

1111
# Steps
1212

examples/cloud/serverless_training/pytorch/data_ingestion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
TILEDB_USER_NAME = os.environ.get("TILEDB_USER_NAME")
99
TILEDB_PASSWD = os.environ.get("TILEDB_PASSWD")
1010

11-
# Your TileDB namespace
11+
# Your TileDB workspace/teamspace
1212
TILEDB_WORKSPACE = "your_tiledb_WORKSPACE"
1313
TILEDB_TEAMSPACE = "your_tiledb_TEAMSPACE"
1414

examples/cloud/serverless_training/pytorch/model_training.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import os
22

33
import tiledb.client
4+
from tiledb.ml.readers.types import ArrayParams
45

56
# Your TileDB username and password, exported as environmental variables
67
TILEDB_USER_NAME = os.environ.get("TILEDB_USER_NAME")
@@ -13,9 +14,9 @@
1314
# Your S3 bucket
1415
S3_BUCKET = "your_s3_bucket"
1516

16-
IMAGES_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_images"
17-
LABELS_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_labels"
18-
MODEL_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/s3://{S3_BUCKET}/mnist_model"
17+
IMAGES_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_images"
18+
LABELS_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_labels"
19+
MODEL_URI = f"tiledb://{TILEDB_WORKSPACE}/{TILEDB_TEAMSPACE}/mnist_model"
1920

2021
# The size of each slice from a image and label TileDB arrays.
2122
IO_BATCH_SIZE = 20000
@@ -50,8 +51,20 @@ def forward(self, x: torch.Tensor) -> Any:
5051
logits = self.linear_relu_stack(x)
5152
return logits
5253

54+
def do_random_noise(img, mag=0.1):
55+
noise = np.random.uniform(-1, 1,img.shape)*mag
56+
img = img + noise
57+
img = np.clip(img,0,1)
58+
return img
59+
5360
with tiledb.open(IMAGES_URI) as x, tiledb.open(LABELS_URI) as y:
54-
train_loader = PyTorchTileDBDataLoader(x, y, batch_size=IO_BATCH_SIZE)
61+
train_loader = PyTorchTileDBDataLoader(
62+
ArrayParams(x, fn=do_random_noise),
63+
ArrayParams(y),
64+
batch_size=IO_BATCH_SIZE,
65+
num_workers=0,
66+
shuffle_buffer_size=256,
67+
)
5568

5669
net = Net(shape=(28, 28))
5770
criterion = nn.CrossEntropyLoss()
@@ -96,7 +109,7 @@ def forward(self, x: torch.Tensor) -> Any:
96109

97110
model = PyTorchTileDBModel(
98111
uri="mnist_model",
99-
namespace=TILEDB_NAMESPACE,
112+
teamspace=TILEDB_TEAMSPACE,
100113
model=net,
101114
optimizer=optimizer,
102115
)

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import setuptools
22

3-
tensorflow = ["tensorflow>=2.6, <2.16.0"]
4-
pytorch = ["torch>=1.11", "torchdata"]
3+
tensorflow = ["tensorflow>=2.6"]
4+
pytorch = ["torch>=1.11", "torchdata>=0.6.0,<0.8.0"]
55
sklearn = ["scikit-learn>=1.0"]
66
cloud = ["tiledb-client"]
77
full = sorted({"torchvision", *tensorflow, *pytorch, *sklearn, *cloud})

tests/models/test_cloud_utils.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,10 @@
11
import pytest
22

33
from tiledb.ml.models._cloud_utils import (
4-
get_cloud_uri,
5-
get_s3_prefix,
64
update_file_properties,
75
)
86

9-
107
class TestCloudUtils:
11-
def test_get_s3_prefix(self, mocker):
12-
13-
assert get_s3_prefix(None) is None
14-
15-
profile = mocker.patch(
16-
"tiledb.client.user_profile",
17-
return_value=mocker.Mock(username="foo", default_s3_path="bar"),
18-
)
19-
profile.default_s3_path = "bar"
20-
assert get_s3_prefix("foo") == "bar/ml_models"
21-
22-
profile = mocker.patch(
23-
"tiledb.client.user_profile",
24-
return_value=mocker.Mock(username="foo", default_s3_path=None),
25-
)
26-
profile.default_s3_path = "bar"
27-
assert get_s3_prefix("foo") is None
28-
29-
org = mocker.patch(
30-
"tiledb.client.organization",
31-
return_value=mocker.Mock(default_s3_path="orgbar"),
32-
)
33-
org.default_s3_path = "orgbar"
34-
assert get_s3_prefix("nofoo") == "orgbar/ml_models"
35-
36-
org = mocker.patch(
37-
"tiledb.client.organization",
38-
return_value=mocker.Mock(
39-
default_s3_path=None,
40-
),
41-
)
42-
org.default_s3_path = "orgbar"
43-
assert get_s3_prefix("nofoo") is None
44-
45-
def test_get_cloud_uri(self, mocker):
46-
mocker.patch(
47-
"tiledb.ml.models._cloud_utils.get_s3_prefix", return_value="s3://"
48-
)
49-
50-
assert "tiledb://test_namespace/s3://tiledb_array" == get_cloud_uri(
51-
uri="tiledb_array", namespace="test_namespace"
52-
)
53-
54-
mocker.patch("tiledb.ml.models._cloud_utils.get_s3_prefix", return_value=None)
55-
with pytest.raises(ValueError) as ex:
56-
get_cloud_uri(uri="tiledb_array", namespace="test_namespace")
57-
58-
assert "You must set the default s3 prefix path for ML models" in str(ex.value)
598

609
def test_update_file_properties(self, mocker):
6110
mock_tiledb_cloud_update_file_properties = mocker.patch(

tests/models/test_pytorch_models.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ class TestPyTorchModel:
103103
],
104104
)
105105
def test_save(self, tmpdir, net, optimizer):
106+
if optimizer.__name__ == "Muon":
107+
pytest.skip("Muon optimizer is 2D only")
106108
model = net()
107109
saved_optimizer = optimizer(model.parameters(), lr=0.001)
108110
tiledb_array = os.path.join(tmpdir, "model_array")
@@ -174,6 +176,8 @@ def test_file_properties(self, tmpdir, net):
174176
],
175177
)
176178
def test_tensorboard_callback(self, tmpdir, net, optimizer):
179+
if optimizer.__name__ == "Muon":
180+
pytest.skip("Muon optimizer is 2D only")
177181
model = net()
178182
saved_optimizer = optimizer(model.parameters(), lr=0.001)
179183
tiledb_array = os.path.join(tmpdir, "model_array")
@@ -219,21 +223,21 @@ def test_get_cloud_uri_call_for_models_on_tiledb_cloud(self, tmpdir, mocker):
219223
"tiledb.ml.models._base.get_cloud_uri", return_value=uri
220224
)
221225

222-
_ = PyTorchTileDBModel(uri=uri, namespace="test_namespace", model=model)
226+
_ = PyTorchTileDBModel(uri=uri, teamspace="test_teamspace", model=model)
223227

224-
mock_get_cloud_uri.assert_called_once_with(uri, "test_namespace")
228+
mock_get_cloud_uri.assert_called_once_with(uri, "test_teamspace")
225229

226-
def test_get_s3_prefix_call_for_models_on_tiledb_cloud(self, tmpdir, mocker):
227-
model = Net()
228-
uri = os.path.join(tmpdir, "model_array")
230+
# def test_get_s3_prefix_call_for_models_on_tiledb_cloud(self, tmpdir, mocker):
231+
# model = Net()
232+
# uri = os.path.join(tmpdir, "model_array")
229233

230-
mock_get_s3_prefix = mocker.patch(
231-
"tiledb.ml.models._cloud_utils.get_s3_prefix", return_value="s3 prefix"
232-
)
234+
# mock_get_s3_prefix = mocker.patch(
235+
# "tiledb.ml.models._cloud_utils.get_s3_prefix", return_value="s3 prefix"
236+
# )
233237

234-
_ = PyTorchTileDBModel(uri=uri, namespace="test_namespace", model=model)
238+
# _ = PyTorchTileDBModel(uri=uri, teamspace="test_teamspace", model=model)
235239

236-
mock_get_s3_prefix.assert_called_once_with("test_namespace")
240+
# mock_get_s3_prefix.assert_called_once_with("test_teamspace")
237241

238242
def test_update_file_properties_call(self, tmpdir, mocker):
239243
model = Net()
@@ -242,7 +246,7 @@ def test_update_file_properties_call(self, tmpdir, mocker):
242246
mocker.patch("tiledb.ml.models._base.get_cloud_uri", return_value=uri)
243247

244248
tiledb_obj = PyTorchTileDBModel(
245-
uri=uri, namespace="test_namespace", model=model
249+
uri=uri, teamspace="test_teamspace", model=model
246250
)
247251

248252
mock_update_file_properties = mocker.patch(

tests/models/test_sklearn_models.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -82,21 +82,9 @@ def test_get_cloud_uri_call_for_models_on_tiledb_cloud(self, tmpdir, mocker):
8282
"tiledb.ml.models._base.get_cloud_uri", return_value=uri
8383
)
8484

85-
_ = SklearnTileDBModel(uri=uri, namespace="test_namespace", model=model)
85+
_ = SklearnTileDBModel(uri=uri, teamspace="test_teamspace", model=model)
8686

87-
mock_get_cloud_uri.assert_called_once_with(uri, "test_namespace")
88-
89-
def test_get_s3_prefix_call_for_models_on_tiledb_cloud(self, tmpdir, mocker):
90-
model = sklearn.linear_model.LinearRegression()
91-
uri = os.path.join(tmpdir, "model_array")
92-
93-
mock_get_s3_prefix = mocker.patch(
94-
"tiledb.ml.models._cloud_utils.get_s3_prefix", return_value="s3 prefix"
95-
)
96-
97-
_ = SklearnTileDBModel(uri=uri, namespace="test_namespace", model=model)
98-
99-
mock_get_s3_prefix.assert_called_once_with("test_namespace")
87+
mock_get_cloud_uri.assert_called_once_with(uri, "test_teamspace")
10088

10189
def test_update_file_properties_call(self, tmpdir, mocker):
10290
model = sklearn.linear_model.LinearRegression()
@@ -105,7 +93,7 @@ def test_update_file_properties_call(self, tmpdir, mocker):
10593
mocker.patch("tiledb.ml.models._base.get_cloud_uri", return_value=uri)
10694

10795
tiledb_obj = SklearnTileDBModel(
108-
uri=uri, namespace="test_namespace", model=model
96+
uri=uri, teamspace="test_teamspace", model=model
10997
)
11098

11199
mock_update_file_properties = mocker.patch(

0 commit comments

Comments
 (0)