diff --git a/.github/workflows/testpythonpackage.yml b/.github/workflows/testpythonpackage.yml index 8cfd1d40..06031f9b 100644 --- a/.github/workflows/testpythonpackage.yml +++ b/.github/workflows/testpythonpackage.yml @@ -15,10 +15,10 @@ jobs: runs-on: ubuntu-latest strategy: - max-parallel: 4 + max-parallel: 7 fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.7, 3.8, 3.9, 3.10, 3.11, 3.12, 3.13] steps: - uses: actions/checkout@v1 diff --git a/.vscode/launch.json b/.vscode/launch.json index 91b002e4..0f71c42c 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -2,17 +2,33 @@ "version": "0.2.0", "configurations": [ { - "name": "Python: Current File with /Neuraxle workdir", + "name": "Python: Current File", "type": "python", - "justMyCode": false, "request": "launch", "program": "${file}", "console": "integratedTerminal", - "cwd": "${workspaceFolder}", - "env": { - "PYTHONPATH": "${cwd}" - }, - "redirectOutput": true, + "justMyCode": false, + "cwd": "${workspaceFolder}" + }, + { + "name": "Debug Unit Test", + "type": "python", + "request": "test", + "justMyCode": false, + "console": "integratedTerminal", + "cwd": "${workspaceFolder}" + }, + { + "name": "pytest", + "purpose": [ + "debug-test" + ], + "type": "python", + "request": "launch", + "program": "${file}", + "console": "internalConsole", + "justMyCode": false, + "cwd": "${workspaceFolder}" } ] -} \ No newline at end of file +} diff --git a/coverage.sh b/coverage.sh index 43549c3f..be8dfb9d 100755 --- a/coverage.sh +++ b/coverage.sh @@ -1,5 +1,6 @@ #!/usr/bin/env bash ./flake8.sh pytest -n 7 --cov-report html --cov-report xml:cov.xml --cov-config=.coveragerc --cov=neuraxle testing_neuraxle +# pytest --cov-report html --cov-report xml:cov.xml --cov=neuraxle testing # pytest --cov-report html --cov=neuraxle testing_neuraxle; open htmlcov/index.html diff --git a/examples/Introduction to Automatic Hyperparameter Tuning.ipynb b/examples/Introduction to Automatic Hyperparameter Tuning.ipynb index 2b091e12..dd5ea9ad 100644 --- a/examples/Introduction to Automatic Hyperparameter Tuning.ipynb +++ b/examples/Introduction to Automatic Hyperparameter Tuning.ipynb @@ -16365,7 +16365,6 @@ " SKLearnWrapper(RidgeClassifier(), HyperparameterSpace({\n", " 'alpha': Choice([(0.0, 1.0, 10.0), (0.0, 10.0, 100.0)]),\n", " 'fit_intercept': Boolean(),\n", - " 'normalize': Boolean()\n", " }))\n", " ]).set_name('RidgeClassifier'),\n", " Pipeline([\n", diff --git a/examples/Rest API Serving.ipynb b/examples/Rest API Serving.ipynb index a73c005e..fb51c1e3 100644 --- a/examples/Rest API Serving.ipynb +++ b/examples/Rest API Serving.ipynb @@ -27,7 +27,7 @@ "source": [ "import numpy as np\n", "from sklearn.cluster import KMeans\n", - "from sklearn.datasets import load_boston\n", + "from sklearn.datasets import fetch_california_housing\n", "from sklearn.decomposition import PCA, FastICA\n", "from sklearn.ensemble import GradientBoostingRegressor\n", "from sklearn.metrics import r2_score\n", @@ -46,7 +46,7 @@ "source": [ "## Load your Dataset\n", "\n", - "Here, we'll simply use the Boston Housing Dataset, and do a train test split." + "Here, we'll simply use the Housing Dataset, and do a train test split." ] }, { @@ -55,8 +55,8 @@ "metadata": {}, "outputs": [], "source": [ - "boston = load_boston()\n", - "X, y = shuffle(boston.data, boston.target, random_state=13)\n", + "cali = fetch_california_housing()\n", + "X, y = shuffle(cali.data, cali.target, random_state=13)\n", "X = X.astype(np.float32)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)" ] diff --git a/examples/auto_ml/plot_automl_loop_clean_kata.py b/examples/auto_ml/plot_automl_loop_clean_kata.py index 04456b0f..5832cabb 100644 --- a/examples/auto_ml/plot_automl_loop_clean_kata.py +++ b/examples/auto_ml/plot_automl_loop_clean_kata.py @@ -72,7 +72,6 @@ def main(tmpdir: str): HyperparameterSpace({ 'alpha': Choice([0.0, 1.0, 10.0, 100.0]), 'fit_intercept': Boolean(), - 'normalize': Boolean() })) ]).set_name('RidgeClassifier') diff --git a/examples/deployment/plot_easy_rest_api_serving.py b/examples/deployment/plot_easy_rest_api_serving.py index 37153a20..e9d2f998 100644 --- a/examples/deployment/plot_easy_rest_api_serving.py +++ b/examples/deployment/plot_easy_rest_api_serving.py @@ -28,7 +28,7 @@ import numpy as np from flask import Flask from sklearn.cluster import KMeans -from sklearn.datasets import load_boston +from sklearn.datasets import fetch_california_housing from sklearn.decomposition import PCA, FastICA from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score @@ -42,8 +42,8 @@ def main(): - boston = load_boston() - X, y = shuffle(boston.data, boston.target, random_state=13) + cali = fetch_california_housing() + X, y = shuffle(cali.data, cali.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) diff --git a/examples/sklearn/plot_boston_housing_meta_optimization.py b/examples/sklearn/plot_boston_housing_meta_optimization.py index 6291e738..e0cec5b2 100644 --- a/examples/sklearn/plot_boston_housing_meta_optimization.py +++ b/examples/sklearn/plot_boston_housing_meta_optimization.py @@ -1,10 +1,12 @@ """ -Boston Housing Regression with Meta Optimization +California Housing Regression with Meta Optimization ================================================ This is an automatic machine learning example. It is more sophisticated than the other simple regression example. Not only a pipeline is defined, but also an hyperparameter space is defined for the pipeline. Then, a random search is performed to find the best possible combination of hyperparameters by sampling randomly in the hyperparameter space. +Note the URL may point to Boston housing, although the dataset isn't available anymore, so the California housing +dataset is used instead for the time being without breaking URLs. .. Copyright 2022, Neuraxio Inc. @@ -29,7 +31,7 @@ import numpy as np from sklearn.cluster import KMeans -from sklearn.datasets import load_boston +from sklearn.datasets import fetch_california_housing from sklearn.decomposition import PCA, FastICA from sklearn.ensemble import GradientBoostingRegressor from sklearn.linear_model import Ridge @@ -48,8 +50,8 @@ def main(tmpdir): - boston = load_boston() - X, y = shuffle(boston.data, boston.target, random_state=13) + cali = fetch_california_housing() # The boston dataset isn't available anymore, let's take the california one. + X, y = shuffle(cali.data, cali.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) diff --git a/examples/sklearn/plot_boston_housing_regression_with_model_stacking.py b/examples/sklearn/plot_boston_housing_regression_with_model_stacking.py index a4e40962..eb9bdea4 100644 --- a/examples/sklearn/plot_boston_housing_regression_with_model_stacking.py +++ b/examples/sklearn/plot_boston_housing_regression_with_model_stacking.py @@ -33,7 +33,7 @@ import numpy as np from sklearn.cluster import KMeans -from sklearn.datasets import load_boston +from sklearn.datasets import fetch_california_housing from sklearn.decomposition import PCA, FastICA from sklearn.ensemble import GradientBoostingRegressor from sklearn.metrics import r2_score @@ -47,8 +47,8 @@ def main(): - boston = load_boston() - X, y = shuffle(boston.data, boston.target, random_state=13) + cali = fetch_california_housing() + X, y = shuffle(cali.data, cali.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) @@ -83,8 +83,8 @@ def main(): score_test = r2_score(y_test_predicted, y_test) print('R2 regression score:', score_test) - assert y_train_predicted.shape == (379,) - assert y_test_predicted.shape == (127,) + assert y_train_predicted.shape == (15480,) + assert y_test_predicted.shape == (5160,) assert isinstance(score_train, float) assert isinstance(score_test, float) diff --git a/examples/sklearn/plot_cyclical_feature_engineering.py b/examples/sklearn/plot_cyclical_feature_engineering.py index 42a23d3c..a39f8f36 100644 --- a/examples/sklearn/plot_cyclical_feature_engineering.py +++ b/examples/sklearn/plot_cyclical_feature_engineering.py @@ -68,7 +68,7 @@ fig, ax = plt.subplots(figsize=(12, 4)) -average_week_demand = df.groupby(["weekday", "hour"]).mean()["count"] +average_week_demand = df.groupby(["weekday", "hour"])["count"].mean() average_week_demand.plot(ax=ax) _ = ax.set( title="Average hourly bike demand during the week", @@ -299,7 +299,7 @@ def set_params(self, deep=False) -> dict: categorical_one_hot_encoders = [ - (col_name, OneHotEncoder(handle_unknown="ignore", sparse=False)) + (col_name, OneHotEncoder(handle_unknown="ignore", sparse_output=False)) for col_name in categorical_columns ] alphas = np.logspace(-6, 6, 25) @@ -347,7 +347,7 @@ def set_params(self, deep=False) -> dict: non_time_non_categorical_columns = [i for i in non_categorical_columns if i not in hour_weekday_month] time_one_hot_encoders = [ - (col_name, OneHotEncoder(handle_unknown="ignore", sparse=False)) + (col_name, OneHotEncoder(handle_unknown="ignore", sparse_output=False)) for col_name in hour_weekday_month ] diff --git a/neuraxle/__init__.py b/neuraxle/__init__.py index 8088f751..deded324 100644 --- a/neuraxle/__init__.py +++ b/neuraxle/__init__.py @@ -1 +1 @@ -__version__ = "0.8.1" +__version__ = "0.8.2" diff --git a/neuraxle/steps/sklearn.py b/neuraxle/steps/sklearn.py index f60176b4..4644b66c 100644 --- a/neuraxle/steps/sklearn.py +++ b/neuraxle/steps/sklearn.py @@ -24,7 +24,6 @@ """ import functools -import inspect from typing import Any, Tuple from neuraxle.base import BaseStep @@ -83,7 +82,11 @@ def _delete_base_estimator_from_dict(self, params): def fit_transform(self, data_inputs, expected_outputs=None) -> Tuple['BaseStep', Any]: if hasattr(self.wrapped_sklearn_predictor, 'fit_transform'): - if expected_outputs is None or len(inspect.getfullargspec(self.wrapped_sklearn_predictor.fit).args) < 3: + if expected_outputs is None: # or len(inspect.getfullargspec(self.wrapped_sklearn_predictor.fit).args) < 3: + # A change from Neuraxle 0.8.1 to 0.8.2 is to do less checks here, + # since scikit-learn started putting "y=None" in most (?) of their objects. + # If you're using an old version of sklearn (likely somewhere under 1.4.2), + # consider using an old version of Neuraxle, such as 0.8.1 out = self._sklearn_fit_transform_without_expected_outputs(data_inputs) else: out = self._sklearn_fit_transform_with_expected_outputs(data_inputs, expected_outputs) @@ -106,7 +109,7 @@ def _sklearn_fit_transform_without_expected_outputs(self, data_inputs): return out def fit(self, data_inputs, expected_outputs=None) -> 'SKLearnWrapper': - if expected_outputs is None or len(inspect.getfullargspec(self.wrapped_sklearn_predictor.fit).args) < 3: + if expected_outputs is None: # or len(inspect.getfullargspec(self.wrapped_sklearn_predictor.fit).args) < 3: self._sklearn_fit_without_expected_outputs(data_inputs) else: self._sklearn_fit_with_expected_outputs(data_inputs, expected_outputs) diff --git a/requirements.txt b/requirements.txt index df86a9fc..a49f63f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ pytest>=6.0.0 pytest-cov>=2.6.1 numpy>=1.16.2 matplotlib==3.3.4 -scikit-learn>=0.24.1 +scikit-learn>=1.6.1 scipy>=1.4.1 pandas>=1.3.5 joblib>=0.13.2 @@ -11,4 +11,3 @@ flask==1.1.4 flask-restful>=0.3.9 SQLAlchemy==1.4.26 markupsafe==2.0.1 -pytest-timeout>=2.1.0 diff --git a/setup.py b/setup.py index a861146e..ab57e825 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ 'abstractions to both ease research, development, and deployment of your ML applications.', long_description=_README, classifiers=[ - "Development Status :: 4 - Beta", + "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Education", "Intended Audience :: Financial and Insurance Industry", @@ -64,18 +64,14 @@ "Topic :: Scientific/Engineering :: Physics", "Topic :: Software Development", "Topic :: Software Development :: Assemblers", - "Topic :: Software Development :: Build Tools", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Software Development :: Object Brokering", "Topic :: Software Development :: Pre-processors", - "Topic :: Software Development :: Quality Assurance", - "Topic :: Software Development :: Testing", "Topic :: System", "Topic :: System :: Clustering", "Topic :: System :: Distributed Computing", "Topic :: System :: Networking", - # Topic :: System :: Systems Administration, "Topic :: Text Processing", "Topic :: Text Processing :: Filters", "Topic :: Text Processing :: Linguistic", @@ -93,7 +89,7 @@ install_requires=[ 'numpy>=1.16.2', 'scipy>=1.4.1', - 'scikit-learn>=0.24.1', + 'scikit-learn>=1.6.1', 'matplotlib==3.3.4', 'joblib>=0.13.2', 'Flask>=1.1.4', @@ -105,7 +101,7 @@ "pytest", "pytest-cov", "pytest-timeout>=2.1.0", - "scikit-learn>=0.24.1" + "scikit-learn>=1.6.1" ], include_package_data=True, license='Apache 2.0', @@ -127,9 +123,6 @@ Learn more: - https://www.neuraxle.org/stable/index.html - Contribute: - - https://gitter.im/Neuraxle/community - Open issue: - https://github.com/Neuraxio/Neuraxle diff --git a/testing_neuraxle/examples/test_examples.py b/testing_neuraxle/examples/test_examples.py index e6afadb1..86dd6db9 100644 --- a/testing_neuraxle/examples/test_examples.py +++ b/testing_neuraxle/examples/test_examples.py @@ -1,8 +1,7 @@ import pytest -from py._path.local import LocalPath -def test_auto_ml_loop_clean_kata(tmpdir: LocalPath): +def test_auto_ml_loop_clean_kata(tmpdir): from examples.auto_ml.plot_automl_loop_clean_kata import main main(tmpdir) @@ -52,12 +51,12 @@ def test_parallel_streaming(): main() -def test_boston_housing_meta_optimization(tmpdir: LocalPath): +def test_housing_meta_optimization(tmpdir): from examples.sklearn.plot_boston_housing_meta_optimization import main main(tmpdir) -def test_boston_housing_regression_with_model_stacking(): +def test_housing_regression_with_model_stacking(): from examples.sklearn.plot_boston_housing_regression_with_model_stacking import main main() diff --git a/testing_neuraxle/metaopt/test_trial.py b/testing_neuraxle/metaopt/test_trial.py index 0a17ec73..bbba6404 100644 --- a/testing_neuraxle/metaopt/test_trial.py +++ b/testing_neuraxle/metaopt/test_trial.py @@ -31,7 +31,7 @@ class TestTrials: - def setup(self): + def setup_method(self): self.hp: HyperparameterSamples = HyperparameterSamples({'a': 2}) round_loc = SOME_FULL_SCOPED_LOCATION[:RoundDataclass] self.cx: AutoMLContext = AutoMLContext.from_context().with_loc(round_loc) diff --git a/testing_neuraxle/test_data_container.py b/testing_neuraxle/test_data_container.py index a7563afc..a608452b 100644 --- a/testing_neuraxle/test_data_container.py +++ b/testing_neuraxle/test_data_container.py @@ -68,11 +68,11 @@ def test_list_data_container_concat(): )) # Then - assert np.array_equal(np.array(data_container.ids), np.array(list(range(0, 200))).astype(np.str)) + assert np.array_equal(np.array(data_container.ids), list(str(i) for i in range(200))) - expected_data_inputs = np.array(list(range(0, 200))).astype(np.int) - actual_data_inputs = np.array(data_container.data_inputs).astype(np.int) + expected_data_inputs = np.array(list(range(0, 200))) + actual_data_inputs = np.array(data_container.data_inputs) assert np.array_equal(actual_data_inputs, expected_data_inputs) - expected_expected_outputs = np.array(list(range(100, 300))).astype(np.int) - assert np.array_equal(np.array(data_container.expected_outputs).astype(np.int), expected_expected_outputs) + expected_expected_outputs = np.array(list(range(100, 300))) + assert np.array_equal(np.array(data_container.expected_outputs), expected_expected_outputs) diff --git a/testing_neuraxle/test_step_saving.py b/testing_neuraxle/test_step_saving.py index 3c3d1455..bef624bc 100644 --- a/testing_neuraxle/test_step_saving.py +++ b/testing_neuraxle/test_step_saving.py @@ -2,7 +2,6 @@ import numpy as np from joblib import dump -from py._path.local import LocalPath from pprint import pprint from neuraxle.hyperparams.space import RecursiveDict @@ -45,7 +44,7 @@ def create_root_path(tmpdir, create_dir=False): return path2 -def test_nested_pipeline_fit_transform_should_save_some_fitted_pipeline_steps(tmpdir: LocalPath): +def test_nested_pipeline_fit_transform_should_save_some_fitted_pipeline_steps(tmpdir): p: StepWithContext = create_pipeline(tmpdir) p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) @@ -61,7 +60,7 @@ def test_nested_pipeline_fit_transform_should_save_some_fitted_pipeline_steps(tm assert os.path.exists(path), path -def test_pipeline_transform_should_not_save_steps(tmpdir: LocalPath): +def test_pipeline_transform_should_not_save_steps(tmpdir): p: StepWithContext = create_pipeline(tmpdir) outputs = p.transform(np.array(range(10))) @@ -75,7 +74,7 @@ def test_pipeline_transform_should_not_save_steps(tmpdir: LocalPath): assert not os.path.exists(path), path -def test_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath): +def test_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir): p: StepWithContext = create_pipeline(tmpdir) p = p.fit(np.array(range(10)), np.array(range(10))) @@ -90,7 +89,7 @@ def test_pipeline_fit_should_save_all_fitted_pipeline_steps(tmpdir: LocalPath): assert os.path.exists(path), path -def test_pipeline_fit_transform_should_load_all_pipeline_steps(tmpdir: LocalPath): +def test_pipeline_fit_transform_should_load_all_pipeline_steps(tmpdir): p = given_saved_pipeline(tmpdir) p, outputs = p.fit_transform(np.array(range(10)), np.array(range(10))) @@ -98,7 +97,7 @@ def test_pipeline_fit_transform_should_load_all_pipeline_steps(tmpdir: LocalPath assert np.array_equal(outputs, EXPECTED_OUTPUTS) -def test_pipeline_transform_should_load_all_pipeline_steps(tmpdir: LocalPath): +def test_pipeline_transform_should_load_all_pipeline_steps(tmpdir): p = given_saved_pipeline(tmpdir) outputs = p.transform(np.array(range(10))) @@ -106,7 +105,7 @@ def test_pipeline_transform_should_load_all_pipeline_steps(tmpdir: LocalPath): assert np.array_equal(outputs, EXPECTED_OUTPUTS) -def test_pipeline_fit_should_load_all_pipeline_steps(tmpdir: LocalPath): +def test_pipeline_fit_should_load_all_pipeline_steps(tmpdir): p = given_saved_pipeline(tmpdir) p = p.fit(np.array(range(10)), np.array(range(10))) @@ -116,7 +115,7 @@ def test_pipeline_fit_should_load_all_pipeline_steps(tmpdir: LocalPath): assert p.wrapped[PIPELINE_2][SOME_STEPS[2]].hyperparams['multiply_by'] == 6 -def given_saved_pipeline(tmpdir: LocalPath) -> Pipeline: +def given_saved_pipeline(tmpdir) -> Pipeline: path = create_root_path(tmpdir, True) p = Pipeline([]).set_name(ROOT).with_context(CX(tmpdir)).with_context(CX(tmpdir)) dump(p, path) diff --git a/testing_neuraxle/test_union.py b/testing_neuraxle/test_union.py index 8d303a0d..ecae2845 100644 --- a/testing_neuraxle/test_union.py +++ b/testing_neuraxle/test_union.py @@ -142,7 +142,7 @@ def test_model_stacking_fit_transform(): ), ) ]) - expected_outputs_shape = (379, 1) + expected_outputs_shape = (379, ) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape) @@ -179,7 +179,7 @@ def test_model_stacking_transform(): ), ) ]) - expected_outputs_shape = (379, 1) + expected_outputs_shape = (379, ) data_inputs_shape = (379, 13) data_inputs = _create_data(data_inputs_shape) expected_outputs = _create_data(expected_outputs_shape)