From 4d4dff1025c38aa95d31bdc025b96f364520a1a1 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Mon, 26 Mar 2018 10:11:39 -0500 Subject: [PATCH 1/5] Adds fit_params to ExhaustiveFeatureSelector fit method --- .../exhaustive_feature_selector.py | 38 +++++++++++-------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/mlxtend/feature_selection/exhaustive_feature_selector.py b/mlxtend/feature_selection/exhaustive_feature_selector.py index e71fe8e02..d7d4123b0 100644 --- a/mlxtend/feature_selection/exhaustive_feature_selector.py +++ b/mlxtend/feature_selection/exhaustive_feature_selector.py @@ -25,16 +25,17 @@ from sklearn.externals.joblib import Parallel, delayed -def _calc_score(selector, X, y, indices): +def _calc_score(selector, X, y, indices, **fit_params): if selector.cv: scores = cross_val_score(selector.est_, X[:, indices], y, cv=selector.cv, scoring=selector.scorer, n_jobs=1, - pre_dispatch=selector.pre_dispatch) + pre_dispatch=selector.pre_dispatch, + fit_params=fit_params) else: - selector.est_.fit(X[:, indices], y) + selector.est_.fit(X[:, indices], y, **fit_params) scores = np.array([selector.scorer(selector.est_, X[:, indices], y)]) return indices, scores @@ -127,7 +128,7 @@ def __init__(self, estimator, min_features=1, max_features=1, self.est_ = self.estimator self.fitted = False - def fit(self, X, y): + def fit(self, X, y, **fit_params): """Perform feature selection and learn model from training data. Parameters @@ -137,6 +138,8 @@ def fit(self, X, y): n_features is the number of features. y : array-like, shape = [n_samples] Target values. + fit_params : dict of string -> object, optional + Parameters to pass to to the fit method of classifier. Returns ------- @@ -160,41 +163,42 @@ def fit(self, X, y): raise AttributeError('min_features must be <= max_features') candidates = chain(*((combinations(range(X.shape[1]), r=i)) - for i in range(self.min_features, - self.max_features + 1))) + for i in range(self.min_features, + self.max_features + 1))) self.subsets_ = {} - + def ncr(n, r): """Return the number of combinations of length r from n items. - + Parameters ---------- n : {integer} Total number of items r : {integer} Number of items to select from n - + Returns ------- Number of combinations, integer - + """ - + r = min(r, n-r) if r == 0: return 1 numer = reduce(op.mul, range(n, n-r, -1)) denom = reduce(op.mul, range(1, r+1)) return numer//denom - + all_comb = np.sum([ncr(n=X.shape[1], r=i) for i in range(self.min_features, self.max_features + 1)]) - + n_jobs = min(self.n_jobs, all_comb) parallel = Parallel(n_jobs=n_jobs, pre_dispatch=self.pre_dispatch) - work = enumerate(parallel(delayed(_calc_score)(self, X, y, c) + work = enumerate(parallel(delayed(_calc_score) + (self, X, y, c, **fit_params) for c in candidates)) for iteration, (c, cv_scores) in work: @@ -239,7 +243,7 @@ def transform(self, X): self._check_fitted() return X[:, self.best_idx_] - def fit_transform(self, X, y): + def fit_transform(self, X, y, **fit_params): """Fit to training data and return the best selected features from X. Parameters @@ -247,13 +251,15 @@ def fit_transform(self, X, y): X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. + fit_params : dict of string -> object, optional + Parameters to pass to to the fit method of classifier. Returns ------- Feature subset of X, shape={n_samples, k_features} """ - self.fit(X, y) + self.fit(X, y, **fit_params) return self.transform(X) def get_metric_dict(self, confidence_interval=0.95): From 2f46ec1898080fde822da17527acf08dad8e8c74 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Mon, 26 Mar 2018 10:12:23 -0500 Subject: [PATCH 2/5] Adds fit_params test for ExhaustiveFeatureSelector --- .../tests/test_exhaustive_feature_selector.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py b/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py index 82f796d14..d9dc6eecd 100644 --- a/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py +++ b/mlxtend/feature_selection/tests/test_exhaustive_feature_selector.py @@ -8,6 +8,7 @@ import numpy as np from numpy.testing import assert_almost_equal from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS +from sklearn.ensemble import RandomForestClassifier from sklearn.neighbors import KNeighborsClassifier from mlxtend.classifier import SoftmaxRegression from sklearn.datasets import load_iris @@ -164,6 +165,40 @@ def test_knn_cv3(): assert round(efs1.best_score_, 4) == 0.9728 +def test_fit_params(): + iris = load_iris() + X = iris.data + y = iris.target + sample_weight = np.ones(X.shape[0]) + forest = RandomForestClassifier(n_estimators=100, random_state=123) + efs1 = EFS(forest, + min_features=3, + max_features=3, + scoring='accuracy', + cv=4, + print_progress=False) + efs1 = efs1.fit(X, y, sample_weight=sample_weight) + expect = {0: {'feature_idx': (0, 1, 2), + 'cv_scores': np.array([0.94871795, 0.92307692, + 0.91666667, 0.97222222]), + 'avg_score': 0.9401709401709402}, + 1: {'feature_idx': (0, 1, 3), + 'cv_scores': np.array([0.92307692, 0.92307692, + 0.88888889, 1.]), + 'avg_score': 0.9337606837606838}, + 2: {'feature_idx': (0, 2, 3), + 'cv_scores': np.array([0.97435897, 0.94871795, + 0.94444444, 0.97222222]), + 'avg_score': 0.9599358974358974}, + 3: {'feature_idx': (1, 2, 3), + 'cv_scores': np.array([0.97435897, 0.94871795, + 0.91666667, 1.]), + 'avg_score': 0.9599358974358974}} + dict_compare_utility(d1=expect, d2=efs1.subsets_) + assert efs1.best_idx_ == (0, 2, 3) + assert round(efs1.best_score_, 4) == 0.9599 + + def test_regression(): boston = load_boston() X, y = boston.data[:, [1, 2, 6, 8, 12]], boston.target From 137769de83bb10b1062c36f7f4ce7aaad91b739c Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Mon, 26 Mar 2018 10:13:53 -0500 Subject: [PATCH 3/5] Adds missing parameter definition in fit_transform --- mlxtend/feature_selection/exhaustive_feature_selector.py | 2 ++ mlxtend/feature_selection/sequential_feature_selector.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mlxtend/feature_selection/exhaustive_feature_selector.py b/mlxtend/feature_selection/exhaustive_feature_selector.py index d7d4123b0..41a2f3192 100644 --- a/mlxtend/feature_selection/exhaustive_feature_selector.py +++ b/mlxtend/feature_selection/exhaustive_feature_selector.py @@ -251,6 +251,8 @@ def fit_transform(self, X, y, **fit_params): X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. + y : array-like, shape = [n_samples] + Target values. fit_params : dict of string -> object, optional Parameters to pass to to the fit method of classifier. diff --git a/mlxtend/feature_selection/sequential_feature_selector.py b/mlxtend/feature_selection/sequential_feature_selector.py index 2f8aed793..3f7fa8903 100644 --- a/mlxtend/feature_selection/sequential_feature_selector.py +++ b/mlxtend/feature_selection/sequential_feature_selector.py @@ -481,6 +481,8 @@ def fit_transform(self, X, y, **fit_params): X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. + y : array-like, shape = [n_samples] + Target values. fit_params : dict of string -> object, optional Parameters to pass to to the fit method of classifier. From 60da5321e629c4b365c5604ddd62e3c1c3b92457 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Mon, 26 Mar 2018 11:19:38 -0500 Subject: [PATCH 4/5] adding an entry to the Changelog and updating the API docs --- docs/sources/CHANGELOG.md | 5 ++++- .../feature_selection/SequentialFeatureSelector.ipynb | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md index b9689e306..8c97065a9 100755 --- a/docs/sources/CHANGELOG.md +++ b/docs/sources/CHANGELOG.md @@ -18,7 +18,10 @@ The CHANGELOG for the current development version is available at ##### New Features -The fit method of the SequentialFeatureSelector now optionally accepts **fit_params for the estimator that is used for the feature selection. ([#350](https://github.com/rasbt/mlxtend/pull/350) by Zach Griffith) +- The fit method of the ExhaustiveFeatureSelector now optionally accepts + **fit_params for the estimator that is used for the feature selection. ([#354](https://github.com/rasbt/mlxtend/pull/354) by Zach Griffith) +- The fit method of the SequentialFeatureSelector now optionally accepts + **fit_params for the estimator that is used for the feature selection. ([#350](https://github.com/rasbt/mlxtend/pull/350) by Zach Griffith) - - diff --git a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb index 107f4fc04..61528a92d 100644 --- a/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb +++ b/docs/sources/user_guide/feature_selection/SequentialFeatureSelector.ipynb @@ -1592,6 +1592,10 @@ " Training vectors, where n_samples is the number of samples and\n", " n_features is the number of features.\n", "\n", + "- `y` : array-like, shape = [n_samples]\n", + "\n", + " Target values.\n", + "\n", "- `fit_params` : dict of string -> object, optional\n", "\n", " Parameters to pass to to the fit method of classifier.\n", From d1ef89f32f9156c07e60b9591cf3aa28034d6458 Mon Sep 17 00:00:00 2001 From: Zach Griffith Date: Tue, 27 Mar 2018 16:59:29 -0500 Subject: [PATCH 5/5] actually save api changes --- .../ExhaustiveFeatureSelector.ipynb | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb b/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb index 4648936c7..51368635e 100644 --- a/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb +++ b/docs/sources/user_guide/feature_selection/ExhaustiveFeatureSelector.ipynb @@ -1666,7 +1666,7 @@ "\n", "
\n", "\n", - "*fit(X, y)*\n", + "*fit(X, y, **fit_params)*\n", "\n", "Perform feature selection and learn model from training data.\n", "\n", @@ -1681,6 +1681,10 @@ "\n", " Target values.\n", "\n", + "- `fit_params` : dict of string -> object, optional\n", + "\n", + " Parameters to pass to to the fit method of classifier.\n", + "\n", "**Returns**\n", "\n", "- `self` : object\n", @@ -1688,7 +1692,7 @@ "\n", "
\n", "\n", - "*fit_transform(X, y)*\n", + "*fit_transform(X, y, **fit_params)*\n", "\n", "Fit to training data and return the best selected features from X.\n", "\n", @@ -1699,6 +1703,14 @@ " Training vectors, where n_samples is the number of samples and\n", " n_features is the number of features.\n", "\n", + "- `y` : array-like, shape = [n_samples]\n", + "\n", + " Target values.\n", + "\n", + "- `fit_params` : dict of string -> object, optional\n", + "\n", + " Parameters to pass to to the fit method of classifier.\n", + "\n", "**Returns**\n", "\n", "Feature subset of X, shape={n_samples, k_features}\n", @@ -1815,7 +1827,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.1" + "version": "3.6.3" } }, "nbformat": 4,