Skip to content

Commit 6f23e59

Browse files
Project import generated by Copybara. (#42)
1 parent f0326eb commit 6f23e59

File tree

16 files changed

+109
-97
lines changed

16 files changed

+109
-97
lines changed

CHANGELOG.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
# Release History
22

3-
## 1.0.6
3+
## 1.0.7
4+
5+
### Behavior Changes
6+
7+
8+
### New Features
9+
10+
11+
### Bug Fixes
12+
13+
- Model Development & Model Registry: Fix an error related to `pandas.io.json.json_normalize`.
14+
15+
## 1.0.6 (2023-09-01)
416

517
### New Features
618
- Model Registry: add `create_if_not_exists` parameter in constructor.

bazel/environments/conda-env-snowflake.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ dependencies:
1616
- cryptography==39.0.1
1717
- flask-cors==3.0.10
1818
- flask==2.1.3
19-
- fsspec==2022.11.0
19+
- fsspec==2023.3.0
2020
- httpx==0.23.0
2121
- inflection==0.5.1
2222
- joblib==1.1.1
@@ -37,7 +37,7 @@ dependencies:
3737
- pyyaml==6.0
3838
- requests==2.29.0
3939
- ruamel.yaml==0.17.21
40-
- s3fs==2022.11.0
40+
- s3fs==2023.3.0
4141
- scikit-learn==1.3.0
4242
- scipy==1.9.3
4343
- snowflake-connector-python==3.0.3

bazel/environments/conda-env.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies:
1919
- cryptography==39.0.1
2020
- flask-cors==3.0.10
2121
- flask==2.1.3
22-
- fsspec==2022.11.0
22+
- fsspec==2023.3.0
2323
- httpx==0.23.0
2424
- inflection==0.5.1
2525
- joblib==1.1.1
@@ -41,7 +41,7 @@ dependencies:
4141
- pyyaml==6.0
4242
- requests==2.29.0
4343
- ruamel.yaml==0.17.21
44-
- s3fs==2022.11.0
44+
- s3fs==2023.3.0
4545
- scikit-learn==1.3.0
4646
- scipy==1.9.3
4747
- snowflake-connector-python==3.0.3

ci/conda_recipe/meta.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ build:
1717
noarch: python
1818
package:
1919
name: snowflake-ml-python
20-
version: 1.0.6
20+
version: 1.0.7
2121
requirements:
2222
build:
2323
- python
@@ -27,13 +27,14 @@ requirements:
2727
- aiohttp!=4.0.0a0, !=4.0.0a1
2828
- anyio>=3.5.0,<4
2929
- cloudpickle
30-
- fsspec>=2022.11,<=2023.1
30+
- fsspec>=2022.11,<2024
3131
- numpy>=1.23,<2
3232
- packaging>=20.9,<24
3333
- pandas>=1.0.0,<2
34-
- python
34+
- python>=3.8.13, <3.11
3535
- pyyaml>=6.0,<7
3636
- requests
37+
- s3fs>=2022.11,<2024
3738
- scikit-learn>=1.2.1,<1.4
3839
- scipy>=1.9,<2
3940
- snowflake-connector-python>=3.0.3,<4

codegen/sklearn_wrapper_template.py_template

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -476,9 +476,9 @@ class {transform.original_class_name}(BaseTransformer):
476476
import pandas as pd
477477
import numpy as np
478478

479-
input_df = pd.io.json.json_normalize(ds)
479+
input_df = pd.json_normalize(ds)
480480

481-
# pd.io.json.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
481+
# pd.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
482482
# But trained models have unquoted input column names saved in internal state if trained using snowpark_df
483483
# or quoted input column names saved in internal state if trained using pandas_df.
484484
# Model expects exact same columns names in the input df for predict call.

requirements.yml

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@
101101
dev_version: "2.1.3"
102102
- name_pypi: fsspec[http]
103103
name_conda: fsspec
104-
dev_version: "2022.11.0"
105-
version_requirements: ">=2022.11,<=2023.1"
104+
dev_version: "2023.3.0"
105+
version_requirements: ">=2022.11,<2024"
106106
- name: httpx
107107
dev_version: "0.23.0"
108108
- name: inflection
@@ -158,7 +158,7 @@
158158
dev_version: "7.1.2"
159159
- name_conda: python
160160
dev_version_conda: "3.8.13"
161-
version_requirements_conda: ""
161+
version_requirements_conda: ">=3.8.13, <3.11"
162162
- name_pypi: torch
163163
name_conda: pytorch
164164
dev_version: "2.0.1"
@@ -175,7 +175,8 @@
175175
- name: ruamel.yaml
176176
dev_version: "0.17.21"
177177
- name: s3fs
178-
dev_version: "2022.11.0"
178+
dev_version: "2023.3.0"
179+
version_requirements: ">=2022.11,<2024"
179180
- name: scikit-learn
180181
dev_version: "1.3.0"
181182
version_requirements: ">=1.2.1,<1.4"

snowflake/ml/fileset/stage_fs_test.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,7 @@
22
from typing import Dict, List
33

44
import boto3
5-
6-
# library `requests` has known stubs but is not installed.
7-
# TODO(zpeng): we may need to install as many mypy stubs as possible. However that
8-
# would require installing mypy when initializing the bazel conda environment.
9-
import requests # type: ignore
5+
import requests
106
import stage_fs
117
from absl.testing import absltest
128
from moto import server

snowflake/ml/model/_deploy_client/warehouse/infer_template.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def __exit__(self, type, value, traceback):
5252
# TODO(halu): Avoid per batch async detection branching.
5353
@vectorized(input=pd.DataFrame, max_batch_size=10)
5454
def infer(df):
55-
input_df = pd.io.json.json_normalize(df[0]).astype(dtype=dtype_map)
55+
input_df = pd.json_normalize(df[0]).astype(dtype=dtype_map)
5656
if inspect.iscoroutinefunction(model.{target_method}):
5757
predictions_df = anyio.run(model.{target_method}, input_df[input_cols])
5858
else:

snowflake/ml/modeling/model_selection/_internal/_grid_search_cv.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -493,7 +493,6 @@ def _fit_snowpark(self, dataset: DataFrame) -> None:
493493
]
494494
target_locations = []
495495
for param_chunk in param_chunks:
496-
497496
param_chunk_dist: Any = defaultdict(set)
498497
for d in param_chunk:
499498
for k, v in d.items():
@@ -675,9 +674,9 @@ def vec_batch_infer(ds: PandasSeries[dict]) -> PandasSeries[dict]: # type: igno
675674
import numpy as np
676675
import pandas as pd
677676

678-
input_df = pd.io.json.json_normalize(ds)
677+
input_df = pd.json_normalize(ds)
679678

680-
# pd.io.json.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
679+
# pd.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
681680
# But trained models have unquoted input column names saved in internal state if trained using snowpark_df
682681
# or quoted input column names saved in internal state if trained using pandas_df.
683682
# Model expects exact same columns names in the input df for predict call.

snowflake/ml/modeling/model_selection/_internal/_randomized_search_cv.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,6 @@ def _fit_snowpark(self, dataset: DataFrame) -> None:
503503
]
504504
target_locations = []
505505
for param_chunk in param_chunks:
506-
507506
param_chunk_dist: Any = defaultdict(set)
508507
for d in param_chunk:
509508
for k, v in d.items():
@@ -684,9 +683,9 @@ def vec_batch_infer(ds: PandasSeries[dict]) -> PandasSeries[dict]: # type: igno
684683
import numpy as np
685684
import pandas as pd
686685

687-
input_df = pd.io.json.json_normalize(ds)
686+
input_df = pd.json_normalize(ds)
688687

689-
# pd.io.json.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
688+
# pd.json_normalize() doesn't remove quotes around quoted identifiers like snowpakr_df.to_pandas().
690689
# But trained models have unquoted input column names saved in internal state if trained using snowpark_df
691690
# or quoted input column names saved in internal state if trained using pandas_df.
692691
# Model expects exact same columns names in the input df for predict call.

snowflake/ml/requirements.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
EXTRA_REQUIREMENTS={'lightgbm': ['lightgbm==3.3.5'], 'mlflow': ['mlflow>=2.1.0,<2.4'], 'tensorflow': ['tensorflow>=2.9,<3'], 'torch': ['torchdata>=0.4,<1'], 'transformers': ['transformers>=4.29.2,<5'], 'all': ['lightgbm==3.3.5', 'mlflow>=2.1.0,<2.4', 'tensorflow>=2.9,<3', 'torchdata>=0.4,<1', 'transformers>=4.29.2,<5']}
55

6-
REQUIREMENTS=['absl-py>=0.15,<2', 'anyio>=3.5.0,<4', 'cloudpickle', 'fsspec[http]>=2022.11,<=2023.1', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 'scikit-learn>=1.2.1,<1.4', 'scipy>=1.9,<2', 'snowflake-connector-python[pandas]>=3.0.3,<4', 'snowflake-snowpark-python>=1.5.1,<2', 'sqlparse>=0.4,<1', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<2']
6+
REQUIREMENTS=['absl-py>=0.15,<2', 'anyio>=3.5.0,<4', 'cloudpickle', 'fsspec[http]>=2022.11,<2024', 'numpy>=1.23,<2', 'packaging>=20.9,<24', 'pandas>=1.0.0,<2', 'pyyaml>=6.0,<7', 's3fs>=2022.11,<2024', 'scikit-learn>=1.2.1,<1.4', 'scipy>=1.9,<2', 'snowflake-connector-python[pandas]>=3.0.3,<4', 'snowflake-snowpark-python>=1.5.1,<2', 'sqlparse>=0.4,<1', 'typing-extensions>=4.1.0,<5', 'xgboost>=1.7.3,<2']

snowflake/ml/version.bzl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# This is parsed by regex in conda reciper meta file. Make sure not to break it.
2-
VERSION = "1.0.6"
2+
VERSION = "1.0.7"

tests/integ/snowflake/ml/model/warehouse_snowml_model_integ_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def base_test_case(
7777
test_released_version=test_released_version,
7878
)
7979

80-
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.5"]) # type: ignore[misc]
80+
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.7"]) # type: ignore[misc]
8181
def test_snowml_model_deploy_snowml_sklearn(
8282
self,
8383
permanent_deploy: Optional[bool] = False,
@@ -110,7 +110,7 @@ def test_snowml_model_deploy_snowml_sklearn(
110110
test_released_version=test_released_version,
111111
)
112112

113-
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.5"]) # type: ignore[misc]
113+
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.7"]) # type: ignore[misc]
114114
def test_snowml_model_deploy_xgboost(
115115
self,
116116
permanent_deploy: Optional[bool] = False,
@@ -143,7 +143,7 @@ def test_snowml_model_deploy_xgboost(
143143
test_released_version=test_released_version,
144144
)
145145

146-
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.5"]) # type: ignore[misc]
146+
@parameterized.product(permanent_deploy=[True, False], test_released_version=[None, "1.0.7"]) # type: ignore[misc]
147147
def test_snowml_model_deploy_lightgbm(
148148
self,
149149
permanent_deploy: Optional[bool] = False,

tests/integ/snowflake/ml/modeling/pipeline/test_pipeline.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -124,45 +124,47 @@ def test_serde(self) -> None:
124124
mms = MinMaxScaler(input_cols=output_cols, output_cols=pipeline_output_cols)
125125
pipeline = snowml_pipeline.Pipeline([("ss", ss), ("mms", mms)])
126126
pipeline.fit(df1)
127-
filepath = os.path.join(tempfile.gettempdir(), "test_pipeline.pkl")
128-
self._to_be_deleted_files.append(filepath)
129-
pipeline_dump_cloudpickle = cloudpickle.dumps(pipeline)
130-
pipeline_dump_pickle = pickle.dumps(pipeline)
131-
joblib.dump(pipeline, filepath)
127+
with tempfile.NamedTemporaryFile(suffix=".pkl", delete=False) as file:
128+
self._to_be_deleted_files.append(file.name)
129+
pipeline_dump_cloudpickle = cloudpickle.dumps(pipeline)
130+
pipeline_dump_pickle = pickle.dumps(pipeline)
131+
joblib.dump(pipeline, file.name)
132+
133+
self._session.close()
134+
135+
# transform in session 2
136+
self._session = Session.builder.configs(SnowflakeLoginOptions()).create()
137+
_, df2 = framework_utils.get_df(self._session, data, schema, np.nan)
138+
input_cols_extended = input_cols.copy()
139+
input_cols_extended.append(id_col)
140+
141+
importlib.reload(sys.modules["snowflake.ml.modeling.pipeline"])
142+
143+
# cloudpickle
144+
pipeline_load_cloudpickle = cloudpickle.loads(pipeline_dump_cloudpickle)
145+
transformed_df_cloudpickle = pipeline_load_cloudpickle.transform(df2[input_cols_extended])
146+
actual_arr_cloudpickle = (
147+
transformed_df_cloudpickle.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
148+
)
132149

133-
self._session.close()
150+
# pickle
151+
pipeline_load_pickle = pickle.loads(pipeline_dump_pickle)
152+
transformed_df_pickle = pipeline_load_pickle.transform(df2[input_cols_extended])
153+
actual_arr_pickle = transformed_df_pickle.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
134154

135-
# transform in session 2
136-
self._session = Session.builder.configs(SnowflakeLoginOptions()).create()
137-
_, df2 = framework_utils.get_df(self._session, data, schema, np.nan)
138-
input_cols_extended = input_cols.copy()
139-
input_cols_extended.append(id_col)
140-
141-
importlib.reload(sys.modules["snowflake.ml.modeling.pipeline"])
142-
143-
# cloudpickle
144-
pipeline_load_cloudpickle = cloudpickle.loads(pipeline_dump_cloudpickle)
145-
transformed_df_cloudpickle = pipeline_load_cloudpickle.transform(df2[input_cols_extended])
146-
actual_arr_cloudpickle = transformed_df_cloudpickle.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
147-
148-
# pickle
149-
pipeline_load_pickle = pickle.loads(pipeline_dump_pickle)
150-
transformed_df_pickle = pipeline_load_pickle.transform(df2[input_cols_extended])
151-
actual_arr_pickle = transformed_df_pickle.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
152-
153-
# joblib
154-
pipeline_load_joblib = joblib.load(filepath)
155-
transformed_df_joblib = pipeline_load_joblib.transform(df2[input_cols_extended])
156-
actual_arr_joblib = transformed_df_joblib.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
157-
158-
# sklearn
159-
skpipeline = SkPipeline([("ss", SklearnStandardScaler()), ("mms", SklearnMinMaxScaler())])
160-
skpipeline.fit(df_pandas[input_cols])
161-
sklearn_arr = skpipeline.transform(df_pandas[input_cols])
162-
163-
assert np.allclose(actual_arr_cloudpickle, sklearn_arr)
164-
assert np.allclose(actual_arr_pickle, sklearn_arr)
165-
assert np.allclose(actual_arr_joblib, sklearn_arr)
155+
# joblib
156+
pipeline_load_joblib = joblib.load(file.name)
157+
transformed_df_joblib = pipeline_load_joblib.transform(df2[input_cols_extended])
158+
actual_arr_joblib = transformed_df_joblib.sort(id_col)[pipeline_output_cols].to_pandas().to_numpy()
159+
160+
# sklearn
161+
skpipeline = SkPipeline([("ss", SklearnStandardScaler()), ("mms", SklearnMinMaxScaler())])
162+
skpipeline.fit(df_pandas[input_cols])
163+
sklearn_arr = skpipeline.transform(df_pandas[input_cols])
164+
165+
np.testing.assert_allclose(actual_arr_cloudpickle, sklearn_arr)
166+
np.testing.assert_allclose(actual_arr_pickle, sklearn_arr)
167+
np.testing.assert_allclose(actual_arr_joblib, sklearn_arr)
166168

167169
def test_pipeline_with_regression_estimators(self) -> None:
168170
input_df_pandas = load_diabetes(as_frame=True).frame

tests/integ/snowflake/ml/modeling/preprocessing/BUILD_NATIVE.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,8 @@ def get_build_rules_for_native_impl():
122122
py_test(
123123
name = "test_drop_input_cols",
124124
srcs = ["test_drop_input_cols.py"],
125+
shard_count = SHARD_COUNT,
126+
timeout = TIMEOUT,
125127
deps = [
126128
"//snowflake/ml/modeling/impute:simple_imputer",
127129
"//snowflake/ml/modeling/pipeline:pipeline",

tests/integ/snowflake/ml/registry/model_registry_integ_test_snowservice_merge_gate.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,40 @@
22
# Copyright (c) 2012-2022 Snowflake Computing Inc. All rights reserved.
33
#
44

5-
import uuid
5+
# import uuid
66

7-
import pandas as pd
8-
import pytest
7+
# import pandas as pd
8+
# import pytest
99
from absl.testing import absltest
1010

11-
from snowflake.ml.model import deploy_platforms
12-
from tests.integ.snowflake.ml.registry.model_registry_integ_test_snowservice_base import (
13-
TestModelRegistryIntegSnowServiceBase,
14-
)
15-
from tests.integ.snowflake.ml.test_utils import model_factory
11+
# from snowflake.ml.model import deploy_platforms
12+
# from tests.integ.snowflake.ml.registry.model_registry_integ_test_snowservice_base import (
13+
# TestModelRegistryIntegSnowServiceBase,
14+
# )
15+
# from tests.integ.snowflake.ml.test_utils import model_factory
1616

1717

18-
class TestModelRegistryIntegWithSnowServiceDeployment(TestModelRegistryIntegSnowServiceBase):
19-
@pytest.mark.pip_incompatible
20-
def test_snowml_model_deployment_xgboost(self) -> None:
21-
self._test_snowservice_deployment(
22-
model_name="xgboost_model",
23-
model_version=uuid.uuid4().hex,
24-
prepare_model_and_feature_fn=model_factory.ModelFactory.prepare_snowml_model_xgb,
25-
prediction_assert_fn=lambda local_prediction, remote_prediction: pd.testing.assert_frame_equal(
26-
remote_prediction, local_prediction, check_dtype=False
27-
),
28-
deployment_options={
29-
"platform": deploy_platforms.TargetPlatform.SNOWPARK_CONTAINER_SERVICES,
30-
"target_method": "predict",
31-
"options": {
32-
"compute_pool": self._TEST_CPU_COMPUTE_POOL,
33-
"image_repo": self._db_manager.get_snowservice_image_repo(repo=self._TEST_IMAGE_REPO),
34-
"enable_remote_image_build": True,
35-
},
36-
},
37-
omit_target_method_when_deploy=True,
38-
)
18+
# class TestModelRegistryIntegWithSnowServiceDeployment(TestModelRegistryIntegSnowServiceBase):
19+
# @pytest.mark.pip_incompatible
20+
# def test_snowml_model_deployment_xgboost(self) -> None:
21+
# self._test_snowservice_deployment(
22+
# model_name="xgboost_model",
23+
# model_version=uuid.uuid4().hex,
24+
# prepare_model_and_feature_fn=model_factory.ModelFactory.prepare_snowml_model_xgb,
25+
# prediction_assert_fn=lambda local_prediction, remote_prediction: pd.testing.assert_frame_equal(
26+
# remote_prediction, local_prediction, check_dtype=False
27+
# ),
28+
# deployment_options={
29+
# "platform": deploy_platforms.TargetPlatform.SNOWPARK_CONTAINER_SERVICES,
30+
# "target_method": "predict",
31+
# "options": {
32+
# "compute_pool": self._TEST_CPU_COMPUTE_POOL,
33+
# "image_repo": self._db_manager.get_snowservice_image_repo(repo=self._TEST_IMAGE_REPO),
34+
# "enable_remote_image_build": True,
35+
# },
36+
# },
37+
# omit_target_method_when_deploy=True,
38+
# )
3939

4040

4141
if __name__ == "__main__":

0 commit comments

Comments
 (0)