From c6867de31259d687bafb3cf74f46d73e3a07ac20 Mon Sep 17 00:00:00 2001 From: Kota Mori Date: Mon, 24 Sep 2018 11:42:24 -0500 Subject: [PATCH 1/3] refactor stacking classifier, regressor --- mlxtend/classifier/stacking_classification.py | 4 + mlxtend/regressor/stacking_regression.py | 4 + mlxtend/stacking/__init__.py | 11 + mlxtend/stacking/stacking.py | 408 +++++++++++ sandbox-dont-push.ipynb | 633 ++++++++++++++++++ 5 files changed, 1060 insertions(+) create mode 100644 mlxtend/stacking/__init__.py create mode 100644 mlxtend/stacking/stacking.py create mode 100644 sandbox-dont-push.ipynb diff --git a/mlxtend/classifier/stacking_classification.py b/mlxtend/classifier/stacking_classification.py index ad84a1dee..fe8c51c29 100644 --- a/mlxtend/classifier/stacking_classification.py +++ b/mlxtend/classifier/stacking_classification.py @@ -8,6 +8,9 @@ # # License: BSD 3 clause +from ..stacking import StackingClassifier + +''' from ..externals.estimator_checks import check_is_fitted from ..externals.name_estimators import _name_estimators from ..externals import six @@ -275,3 +278,4 @@ def predict_proba(self, X): ) else: return self.meta_clf_.predict_proba(np.hstack((X, meta_features))) +''' \ No newline at end of file diff --git a/mlxtend/regressor/stacking_regression.py b/mlxtend/regressor/stacking_regression.py index 8ef75727d..bf43804e4 100644 --- a/mlxtend/regressor/stacking_regression.py +++ b/mlxtend/regressor/stacking_regression.py @@ -8,6 +8,9 @@ # # License: BSD 3 clause +from ..stacking import StackingRegressor + +''' from ..externals.estimator_checks import check_is_fitted from ..externals.name_estimators import _name_estimators from ..externals import six @@ -243,3 +246,4 @@ def predict(self, X): return self.meta_regr_.predict(sparse.hstack((X, meta_features))) else: return self.meta_regr_.predict(np.hstack((X, meta_features))) +''' diff --git a/mlxtend/stacking/__init__.py b/mlxtend/stacking/__init__.py new file mode 100644 index 000000000..01fac781f --- /dev/null +++ b/mlxtend/stacking/__init__.py @@ -0,0 +1,11 @@ +# Sebastian Raschka 2014-2018 +# mlxtend Machine Learning Library Extensions +# Author: Sebastian Raschka +# +# License: BSD 3 clause + +from .stacking import StackingRegressor +from .stacking import StackingClassifier +#from .stacking_cv_regression import StackingCVRegressor + +__all__ = ["StackingRegressor", "StackingClassifier"] diff --git a/mlxtend/stacking/stacking.py b/mlxtend/stacking/stacking.py new file mode 100644 index 000000000..70acaab7b --- /dev/null +++ b/mlxtend/stacking/stacking.py @@ -0,0 +1,408 @@ +# Stacking estimators + +# Sebastian Raschka 2014-2018 +# mlxtend Machine Learning Library Extensions +# +# An ensemble-learning meta-regressor for stacking regression +# Author: Sebastian Raschka +# +# License: BSD 3 clause + +from ..externals.estimator_checks import check_is_fitted +from ..externals.name_estimators import _name_estimators +from ..externals import six +import numpy as np +import scipy.sparse as sparse +from sklearn.base import BaseEstimator +from sklearn.base import RegressorMixin +from sklearn.base import ClassifierMixin +from sklearn.base import TransformerMixin +from sklearn.base import clone + + +class StackingEstimator(BaseEstimator, TransformerMixin): + def __init__(self, estimators, meta_estimator, verbose=0, + use_features_in_secondary=False, + store_train_meta_features=True, use_clones=True): + self.estimators = estimators + self.meta_estimator = meta_estimator + self.named_estimators = {key: value for + key, value in + _name_estimators(estimators)} + self.named_meta_estimator = {'meta-%s' % key: value for + key, value in + _name_estimators([meta_estimator])} + self.verbose = verbose + self.use_features_in_secondary = use_features_in_secondary + self.store_train_meta_features = store_train_meta_features + self.use_clones = use_clones + + + def fit(self, X, y, sample_weight=None): + """Learn weight coefficients from training data for each regressor. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + y : array-like, shape = [n_samples] or [n_samples, n_targets] + Target values. + sample_weight : array-like, shape = [n_samples], optional + Sample weights passed as sample_weights to each regressor + in the regressors list as well as the meta_regressor. + Raises error if some regressor does not support + sample_weight in the fit() method. + + Returns + ------- + self : object + + """ + # initialize estimators + # make copy or assign references + self._initialize_estimators() + + # fit base estimators + self._fit_base_estimators(X, y) + + + meta_features = self.predict_meta_features(X) + # save meta-features for training data + if self.store_train_meta_features: + self.train_meta_features_ = meta_features + + # add variables for meta regression, if needed + meta_features = self._augment_meta_features(X, meta_features) + + self._fit_one(self.meta_estimator_, meta_features, y, + sample_weight=sample_weight) + + return self + + def predict(self, X): + """ Predict target values for X. + + Parameters + ---------- + X : {array-like, sparse matrix}, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ---------- + y_target : array-like, shape = [n_samples] or [n_samples, n_targets] + Predicted target values. + """ + check_is_fitted(self, 'estimators_') + + meta_features = self.predict_meta_features(X) + meta_features = self._augment_meta_features(X, meta_features) + return self.meta_estimator_.predict(meta_features) + + def predict_meta_features(self, X): + """ Get meta-features of test-data. + + Parameters + ---------- + X : numpy array, shape = [n_samples, n_features] + Test vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + meta-features : numpy array, shape = [n_samples, len(self.regressors)] + meta-features for test data, where n_samples is the number of + samples in test data and len(self.regressors) is the number + of regressors. + + """ + check_is_fitted(self, 'estimators_') + return np.column_stack([r.predict(X) for r in self.estimators_]) + + def get_params(self, deep=True): + """Return estimator parameter names for GridSearch support.""" + if not deep: + return super(StackingEstimator, self).get_params(deep=False) + else: + out = self.named_estimators.copy() + for name, step in six.iteritems(self.named_estimators): + for key, value in six.iteritems(step.get_params(deep=True)): + out['%s__%s' % (name, key)] = value + + out.update(self.named_meta_estimator.copy()) + for name, step in six.iteritems(self.named_meta_estimator): + for key, value in six.iteritems(step.get_params(deep=True)): + out['%s__%s' % (name, key)] = value + + for key, value in six.iteritems(super(StackingEstimator, + self).get_params(deep=False)): + out['%s' % key] = value + + return out + + def _initialize_estimators(self): + # if use_clones, create copies of base estimators + # otherwise we assign the references + if self.use_clones: + self.estimators_ = [clone(e) for e in self.estimators] + self.meta_estimator_ = clone(self.meta_estimator) + else: + self.estimator_ = self.estimator_ + self.meta_estimator_ = self.meta_estimator_ + + def _fit_one(self, estimator, X, y, sample_weight=None): + if sample_weight is None: + estimator.fit(X, y) + else: + estimator.fit(X, y, sample_weight=sample_weight) + + def _fit_base_estimators(self, X, y, sample_weight=None): + if self.verbose > 0: + print("Fitting %d regressors..." % (len(self.estimators))) + + for estimator in self.estimators_: + + if self.verbose > 0: + i = self.estimators_.index(estimator) + 1 + print("Fitting regressor%d: %s (%d/%d)" % + (i, _name_estimators((estimator,))[0][0], i, len(self.estimators_))) + + if self.verbose > 2: + if hasattr(estimator, 'verbose'): + estimator.set_params(verbose=self.verbose - 2) + + if self.verbose > 1: + print(_name_estimators((estimator,))[0][1]) + + self._fit_one(estimator, X, y, sample_weight=sample_weight) + + def _augment_meta_features(self, X, prediction_features): + if not self.use_features_in_secondary: + # meta model uses the prediction outcomes only + return prediction_features + elif sparse.issparse(X): + return sparse.hstack((X, prediction_features)) + else: + return np.hstack((X, prediction_features)) + + +class StackingRegressor(StackingEstimator, RegressorMixin): + + """A Stacking regressor for scikit-learn estimators for regression. + + Parameters + ---------- + regressors : array-like, shape = [n_regressors] + A list of regressors. + Invoking the `fit` method on the `StackingRegressor` will fit clones + of those original regressors that will + be stored in the class attribute + `self.regr_`. + meta_regressor : object + The meta-regressor to be fitted on the ensemble of + regressors + verbose : int, optional (default=0) + Controls the verbosity of the building process. + - `verbose=0` (default): Prints nothing + - `verbose=1`: Prints the number & name of the regressor being fitted + - `verbose=2`: Prints info about the parameters of the + regressor being fitted + - `verbose>2`: Changes `verbose` param of the underlying regressor to + self.verbose - 2 + use_features_in_secondary : bool (default: False) + If True, the meta-regressor will be trained both on + the predictions of the original regressors and the + original dataset. + If False, the meta-regressor will be trained only on + the predictions of the original regressors. + store_train_meta_features : bool (default: False) + If True, the meta-features computed from the training data + used for fitting the + meta-regressor stored in the `self.train_meta_features_` array, + which can be + accessed after calling `fit`. + + + Attributes + ---------- + regr_ : list, shape=[n_regressors] + Fitted regressors (clones of the original regressors) + meta_regr_ : estimator + Fitted meta-regressor (clone of the original meta-estimator) + coef_ : array-like, shape = [n_features] + Model coefficients of the fitted meta-estimator + intercept_ : float + Intercept of the fitted meta-estimator + train_meta_features : numpy array, shape = [n_samples, len(self.regressors)] + meta-features for training data, where n_samples is the + number of samples + in training data and len(self.regressors) is the number of regressors. + use_clones : bool (default: True) + Clones the regressors for stacking regression if True (default) + or else uses the original ones, which will be refitted on the dataset + upon calling the `fit` method. Setting refit=False is + recommended if you are working with estimators that are supporting + the scikit-learn fit/predict API interface but are not compatible + to scikit-learn's `clone` function. + + Examples + ----------- + For usage examples, please see + http://rasbt.github.io/mlxtend/user_guide/regressor/StackingRegressor/ + + """ + def __init__(self, regressors, meta_regressor, verbose=0, + use_features_in_secondary=False, + store_train_meta_features=False, use_clones=True): + super(StackingRegressor, self).__init__( + regressors, meta_regressor, verbose=verbose, + use_features_in_secondary=use_features_in_secondary, + store_train_meta_features=store_train_meta_features, + use_clones=use_clones) + self.regressors = regressors + self.meta_regressor = meta_regressor + + @property + def regr_(self): + return self.estimators_ + + @property + def meta_regr_(self): + return self.meta_estimator_ + + @property + def intercept_(self): + return self.meta_estimator_.intercept_ + + @property + def coef_(self): + return self.meta_estimator_.coef_ + + +class StackingClassifier(StackingEstimator, ClassifierMixin): + + """A Stacking classifier for scikit-learn estimators for classification. + Parameters + ---------- + classifiers : array-like, shape = [n_classifiers] + A list of classifiers. + Invoking the `fit` method on the `StackingClassifer` will fit clones + of these original classifiers that will + be stored in the class attribute + `self.clfs_`. + meta_classifier : object + The meta-classifier to be fitted on the ensemble of + classifiers + use_probas : bool (default: False) + If True, trains meta-classifier based on predicted probabilities + instead of class labels. + average_probas : bool (default: False) + Averages the probabilities as meta features if True. + verbose : int, optional (default=0) + Controls the verbosity of the building process. + - `verbose=0` (default): Prints nothing + - `verbose=1`: Prints the number & name of the regressor being fitted + - `verbose=2`: Prints info about the parameters of the + regressor being fitted + - `verbose>2`: Changes `verbose` param of the underlying regressor to + self.verbose - 2 + use_features_in_secondary : bool (default: False) + If True, the meta-classifier will be trained both on the predictions + of the original classifiers and the original dataset. + If False, the meta-classifier will be trained only on the predictions + of the original classifiers. + store_train_meta_features : bool (default: False) + If True, the meta-features computed from the training data used + for fitting the meta-classifier stored in the + `self.train_meta_features_` array, which can be + accessed after calling `fit`. + use_clones : bool (default: True) + Clones the classifiers for stacking classification if True (default) + or else uses the original ones, which will be refitted on the dataset + upon calling the `fit` method. Hence, if use_clones=True, the original + input classifiers will remain unmodified upon using the + StackingClassifier's `fit` method. + Setting `use_clones=False` is + recommended if you are working with estimators that are supporting + the scikit-learn fit/predict API interface but are not compatible + to scikit-learn's `clone` function. + Attributes + ---------- + clfs_ : list, shape=[n_classifiers] + Fitted classifiers (clones of the original classifiers) + meta_clf_ : estimator + Fitted meta-classifier (clone of the original meta-estimator) + train_meta_features : numpy array, shape = [n_samples, n_classifiers] + meta-features for training data, where n_samples is the + number of samples + in training data and n_classifiers is the number of classfiers. + Examples + ----------- + For usage examples, please see + http://rasbt.github.io/mlxtend/user_guide/classifier/StackingClassifier/ + """ + + def __init__(self, classifiers, meta_classifier, + use_probas=False, average_probas=False, verbose=0, + use_features_in_secondary=False, + store_train_meta_features=False, + use_clones=True): + super(StackingClassifier, self).__init__( + classifiers, meta_classifier, verbose=verbose, + use_features_in_secondary=use_features_in_secondary, + store_train_meta_features=store_train_meta_features, + use_clones=use_clones) + self.use_probas = use_probas + self.average_probas = average_probas + self.classifiers = classifiers + self.meta_classifier = meta_classifier + + def predict_meta_features(self, X): + """ Get meta-features of test-data. + Parameters + ---------- + X : numpy array, shape = [n_samples, n_features] + Test vectors, where n_samples is the number of samples and + n_features is the number of features. + Returns + ------- + meta-features : numpy array, shape = [n_samples, n_classifiers] + Returns the meta-features for test data. + """ + check_is_fitted(self, 'clfs_') + if self.use_probas: + probas = np.asarray([clf.predict_proba(X) + for clf in self.clfs_]) + if self.average_probas: + vals = np.average(probas, axis=0) + else: + vals = np.concatenate(probas, axis=1) + else: + vals = np.column_stack([clf.predict(X) for clf in self.clfs_]) + return vals + + def predict_proba(self, X): + """ Predict class probabilities for X. + Parameters + ---------- + X : {array-like, sparse matrix}, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + Returns + ---------- + proba : array-like, shape = [n_samples, n_classes] or a list of \ + n_outputs of such arrays if n_outputs > 1. + Probability for each class per sample. + """ + check_is_fitted(self, 'clfs_') + meta_features = self.predict_meta_features(X) + meta_features = self._augment_meta_features(X, meta_features) + + @property + def clfs_(self): + return self.estimators_ + + @property + def meta_clf_(self): + return self.meta_estimator_ \ No newline at end of file diff --git a/sandbox-dont-push.ipynb b/sandbox-dont-push.ipynb new file mode 100644 index 000000000..193e2b36a --- /dev/null +++ b/sandbox-dont-push.ipynb @@ -0,0 +1,633 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kota/miniconda3/envs/mlxtend/lib/python3.7/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n", + " from numpy.core.umath_tests import inner1d\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from mlxtend.externals.estimator_checks import NotFittedError\n", + "from mlxtend.utils import assert_raises\n", + "from mlxtend.regressor import StackingRegressor, StackingCVRegressor\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.linear_model import Lasso\n", + "from sklearn.neural_network import MLPRegressor\n", + "from sklearn.svm import SVR\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import train_test_split, KFold\n", + "from scipy import sparse\n", + "from numpy.testing import assert_almost_equal\n", + "from nose.tools import raises\n", + "from sklearn.base import clone" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from scipy import sparse\n", + "from mlxtend.externals.estimator_checks import NotFittedError\n", + "from mlxtend.regressor import StackingCVRegressor\n", + "from mlxtend.utils import assert_raises\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.linear_model import Ridge\n", + "from sklearn.svm import SVR\n", + "from sklearn.model_selection import GridSearchCV, train_test_split, KFold\n", + "from sklearn.base import clone\n", + "\n", + "\n", + "# Some test data\n", + "np.random.seed(1)\n", + "X1 = np.sort(5 * np.random.rand(40, 1), axis=0)\n", + "X2 = np.sort(5 * np.random.rand(40, 2), axis=0)\n", + "X3 = np.zeros((40, 3))\n", + "y = np.sin(X1).ravel()\n", + "y[::5] += 3 * (0.5 - np.random.rand(8))\n", + "y2 = np.zeros((40,))" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.20166969294437087\n" + ] + } + ], + "source": [ + "import random\n", + "\n", + "random.seed(8)\n", + "w = np.array([random.random() for _ in range(40)])\n", + "\n", + "\n", + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, \n", + " cv=KFold(4, shuffle=True, random_state=7))\n", + "stack.fit(X1, y, sample_weight=np.ones(40)).predict(X1)\n", + "mse = 0.21\n", + "got = np.mean((stack.predict(X1) - y) ** 2)\n", + "print(got)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 0.36775818 0.41422082 0.43800099 0.46889041 0.59272485 0.61231241\n", + " 0.62836283 0.7367122 0.7522366 0.80566971 0.84050448 0.86365339\n", + " 0.87532447 0.9710891 0.97014473 0.96966604 0.952006 0.87445258\n", + " 0.82740436 0.82668374 0.82182401 0.81682575 0.40477061 0.37905676\n", + " 0.28624875 -0.25799577 -0.32626711 -0.33209999 -0.35638635 -0.35838507\n", + " -0.47926837 -0.59599037 -0.75945827 -0.84141441 -0.91029104 -0.91237472\n", + " -0.9124045 -0.92923798 -0.94573335 -0.94170385]\n", + "[ 0.36775818 0.41422082 0.43800099 0.46889041 0.59272485 0.61231241\n", + " 0.62836283 0.7367122 0.7522366 0.80566971 0.84050448 0.86365339\n", + " 0.87532447 0.9710891 0.97014473 0.96966604 0.952006 0.87445258\n", + " 0.82740436 0.82668374 0.82182401 0.81682575 0.40477061 0.37905676\n", + " 0.28624875 -0.25799577 -0.32626711 -0.33209999 -0.35638635 -0.35838507\n", + " -0.47926837 -0.59599037 -0.75945827 -0.84141441 -0.91029104 -0.91237472\n", + " -0.9124045 -0.92923798 -0.94573335 -0.94170385]\n" + ] + } + ], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, \n", + " cv=KFold(5, shuffle=True, random_state=5))\n", + "pred1 = stack.fit(X1, y, sample_weight=np.ones(40)).predict(X1)\n", + "\n", + "# lr = LinearRegression()\n", + "# svr_lin = SVR(kernel='linear')\n", + "# ridge = Ridge(random_state=1)\n", + "# svr_rbf = SVR(kernel='rbf')\n", + "# stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + "# meta_regressor=svr_rbf, \n", + "# cv=KFold(5, shuffle=True, random_state=5))\n", + "pred2 = stack.fit(X1, y).predict(X1)\n", + "\n", + "print(pred1)\n", + "print(pred2)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "fit() missing 2 required positional arguments: 'X' and 'y'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensemble\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mRandomForestClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: fit() missing 2 required positional arguments: 'X' and 'y'" + ] + } + ], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "r = RandomForestClassifier()\n", + "r.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "fit() missing 2 required positional arguments: 'X' and 'y'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnaive_bayes\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mGaussianNB\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mGaussianNB\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: fit() missing 2 required positional arguments: 'X' and 'y'" + ] + } + ], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "g = GaussianNB()\n", + "g.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "fit() missing 2 required positional arguments: 'X' and 'y'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinear_model\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mLogisticRegression\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0ml\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLogisticRegression\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0ml\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m: fit() missing 2 required positional arguments: 'X' and 'y'" + ] + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "l = LogisticRegression()\n", + "l.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.7\n" + ] + }, + { + "ename": "AssertionError", + "evalue": "0.700000 is wrong", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mgot\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0.1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgot\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%f is wrong'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mgot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 106\u001b[0;31m \u001b[0mtest_gridsearch_numerate_regr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 107\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mtest_get_params\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m\u001b[0m in \u001b[0;36mtest_gridsearch_numerate_regr\u001b[0;34m()\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[0mgot\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mround\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgrid\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbest_score_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 104\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgot\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 105\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mgot\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0;36m0.1\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgot\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'%f is wrong'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mgot\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 106\u001b[0m \u001b[0mtest_gridsearch_numerate_regr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAssertionError\u001b[0m: 0.700000 is wrong" + ] + } + ], + "source": [ + "def test_multivariate():\n", + " lr = LinearRegression()\n", + " svr_lin = SVR(kernel='linear')\n", + " ridge = Ridge(random_state=1)\n", + " svr_rbf = SVR(kernel='rbf')\n", + " stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, \n", + " cv=KFold(5, shuffle=True, random_state=3))\n", + " stack.fit(X2, y).predict(X2)\n", + " mse = 0.20\n", + " got = np.mean((stack.predict(X2) - y) ** 2)\n", + " assert round(got, 2) == mse, '%f != %f' % (round(got, 2), mse)\n", + "\n", + "\n", + "def test_internals():\n", + " lr = LinearRegression()\n", + " regressors = [lr, lr, lr, lr, lr]\n", + " #cv = 10\n", + " stack = StackingCVRegressor(regressors=[lr, lr, lr, lr, lr],\n", + " meta_regressor=lr,\n", + " cv=KFold(10, shuffle=True, random_state=4))\n", + " stack.fit(X3, y2)\n", + " assert stack.predict(X3).mean() == y2.mean()\n", + " assert stack.meta_regr_.intercept_ == 0.0\n", + " assert stack.meta_regr_.coef_[0] == 0.0\n", + " assert stack.meta_regr_.coef_[1] == 0.0\n", + " assert stack.meta_regr_.coef_[2] == 0.0\n", + " assert len(stack.regr_) == len(regressors)\n", + "\n", + "\n", + "def test_gridsearch_numerate_regr():\n", + " svr_lin = SVR(kernel='linear')\n", + " ridge = Ridge(random_state=1)\n", + " svr_rbf = SVR(kernel='rbf')\n", + " stack = StackingCVRegressor(regressors=[svr_lin, ridge, ridge],\n", + " meta_regressor=svr_rbf, \n", + " cv=KFold(3, shuffle=True, random_state=4))\n", + "\n", + " params = {'ridge-1__alpha': [0.01, 1.0],\n", + " 'ridge-2__alpha': [0.01, 1.0],\n", + " 'svr__C': [0.01, 1.0],\n", + " 'meta-svr__C': [0.01, 1.0],\n", + " 'use_features_in_secondary': [True, False]}\n", + "\n", + " grid = GridSearchCV(estimator=stack,\n", + " param_grid=params,\n", + " cv=KFold(5, shuffle=True, random_state=5),\n", + " refit=True,\n", + " verbose=0)\n", + " grid = grid.fit(X1, y)\n", + " got = round(grid.best_score_, 1)\n", + " print(got)\n", + " assert got >= 0.1 and got <= 0.2, '%f is wrong' % got\n", + "test_gridsearch_numerate_regr()\n", + "\n", + "def test_get_params():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[ridge, lr],\n", + " meta_regressor=svr_rbf, cv=cv3)\n", + "\n", + " got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()}))\n", + " expect = ['cv',\n", + " 'linearregression',\n", + " 'meta-svr',\n", + " 'meta_regressor',\n", + " 'refit',\n", + " 'regressors',\n", + " 'ridge',\n", + " 'shuffle',\n", + " 'store_train_meta_features',\n", + " 'use_features_in_secondary']\n", + " assert got == expect, got\n", + "\n", + "\n", + "def test_regressor_gridsearch():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[lr],\n", + " meta_regressor=svr_rbf, cv=cv3)\n", + "\n", + " params = {'regressors': [[ridge, lr], [lr, ridge, lr]]}\n", + "\n", + " grid = GridSearchCV(estimator=stregr,\n", + " param_grid=params,\n", + " cv=cv5,\n", + " refit=True)\n", + " grid.fit(X1, y)\n", + "\n", + " assert len(grid.best_params_['regressors']) == 3\n", + "\n", + "\n", + "def test_predict_meta_features():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3)\n", + " X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)\n", + " stregr.fit(X_train, y_train)\n", + " test_meta_features = stregr.predict(X_test)\n", + " assert test_meta_features.shape[0] == X_test.shape[0]\n", + "\n", + "\n", + "def test_train_meta_features_():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3,\n", + " store_train_meta_features=True)\n", + " X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)\n", + " stregr.fit(X_train, y_train)\n", + " train_meta_features = stregr.train_meta_features_\n", + " assert train_meta_features.shape[0] == X_train.shape[0]\n", + "\n", + "\n", + "def test_not_fitted_predict():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3,\n", + " store_train_meta_features=True)\n", + " X_train, X_test, y_train, y_test = train_test_split(X2, y, test_size=0.3)\n", + "\n", + " expect = (\"This StackingCVRegressor instance is not fitted yet. Call \"\n", + " \"'fit' with appropriate arguments before using this method.\")\n", + "\n", + " assert_raises(NotFittedError,\n", + " expect,\n", + " stregr.predict,\n", + " X_train)\n", + "\n", + " assert_raises(NotFittedError,\n", + " expect,\n", + " stregr.predict_meta_features,\n", + " X_train)\n", + "\n", + "\n", + "def test_clone():\n", + " lr = LinearRegression()\n", + " svr_rbf = SVR(kernel='rbf')\n", + " ridge = Ridge(random_state=1)\n", + " stregr = StackingCVRegressor(regressors=[lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3,\n", + " store_train_meta_features=True)\n", + " clone(stregr)\n", + "\n", + "\n", + "def test_sparse_matrix_inputs():\n", + " lr = LinearRegression()\n", + " svr_lin = SVR(kernel='linear')\n", + " ridge = Ridge(random_state=1)\n", + " svr_rbf = SVR(kernel='rbf')\n", + " stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3)\n", + "\n", + " # dense\n", + " stack.fit(X1, y).predict(X1)\n", + " mse = 0.20\n", + " got = np.mean((stack.predict(X1) - y) ** 2)\n", + " assert round(got, 2) == mse\n", + "\n", + " # sparse\n", + " stack.fit(sparse.csr_matrix(X1), y)\n", + " mse = 0.20\n", + " got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)\n", + " assert round(got, 2) == mse\n", + "\n", + "\n", + "def test_sparse_matrix_inputs_with_features_in_secondary():\n", + " lr = LinearRegression()\n", + " svr_lin = SVR(kernel='linear')\n", + " ridge = Ridge(random_state=1)\n", + " svr_rbf = SVR(kernel='rbf')\n", + " stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv3,\n", + " use_features_in_secondary=True)\n", + "\n", + " # dense\n", + " stack.fit(X1, y).predict(X1)\n", + " mse = 0.20\n", + " got = np.mean((stack.predict(X1) - y) ** 2)\n", + " assert round(got, 2) == mse\n", + "\n", + " # sparse\n", + " stack.fit(sparse.csr_matrix(X1), y)\n", + " mse = 0.20\n", + " got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)\n", + " assert round(got, 2) == mse" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(1)\n", + "X1 = np.sort(5 * np.random.rand(40, 1), axis=0)\n", + "X2 = np.sort(5 * np.random.rand(40, 2), axis=0)\n", + "y = np.sin(X1).ravel()\n", + "y[::5] += 3 * (0.5 - np.random.rand(8))\n", + "y2 = np.sin(X2)\n", + "\n", + "#w = np.random.random(40)\n", + "import random\n", + "random.seed(1)\n", + "w = np.array([random.random() for _ in range(40)])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stregr = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv)\n", + "stregr.fit(X1, y, sample_weight=w).predict(X1)\n", + "mse = 0.21\n", + "got = np.mean((stregr.predict(X1) - y) ** 2)\n", + "print(got)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stregr = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv)\n", + "stregr.fit(X1, y, sample_weight=np.ones(40)).predict(X1)\n", + "mse = 0.21\n", + "got = np.mean((stregr.predict(X1) - y) ** 2)\n", + "print(got)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf)\n", + "pred1 = stregr.fit(X1, y).predict(X1)\n", + "\n", + "\n", + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf)\n", + "pred2 = stregr.fit(X1, y, np.ones(40)).predict(X1)\n", + "print(pred1)\n", + "print(pred2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(ridge.fit(X1, y).predict(X1))\n", + "print(ridge.fit(X1, y, 2*np.ones(40)).predict(X1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(ridge.fit(X1, y, ).predict(X1))\n", + "print(ridge.fit(X1, y, np.ones(40)).predict(X1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "\n", + "stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=MLPRegressor())\n", + "stregr.fit(X1, y, w).predict(X1)\n", + "mse = 0.21\n", + "got = np.mean((stregr.predict(X1) - y) ** 2)\n", + "got" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.model_selection import KFold\n", + "import random\n", + "\n", + "np.random.seed(1)\n", + "cv = KFold(2, shuffle=True)\n", + "print(list(cv.split([1,2,3,4,5,6])))\n", + "\n", + "np.random.seed(1)\n", + "random.random()\n", + "cv = KFold(2, shuffle=True)\n", + "print(list(cv.split([1,2,3,4,5,6])))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lr = LinearRegression()\n", + "svr_lin = SVR(kernel='linear')\n", + "ridge = Ridge(random_state=1)\n", + "svr_rbf = SVR(kernel='rbf')\n", + "stack = StackingCVRegressor(regressors=[svr_lin, lr, ridge],\n", + " meta_regressor=svr_rbf, cv=cv)\n", + "stack.fit(X1, y).predict(X1)\n", + "mse = 0.21\n", + "got = np.mean((stack.predict(X1) - y) ** 2)\n", + "got" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From aae40e9d965792f8f53ce8802b21c4ce8f9b0964 Mon Sep 17 00:00:00 2001 From: Kota Mori Date: Mon, 24 Sep 2018 15:06:38 -0500 Subject: [PATCH 2/3] bug fix in inheritance structure --- .../tests/test_stacking_regression.py | 4 +- mlxtend/stacking/stacking.py | 95 +++++++++++++------ 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/mlxtend/regressor/tests/test_stacking_regression.py b/mlxtend/regressor/tests/test_stacking_regression.py index 8407f45f1..d0c7d1891 100644 --- a/mlxtend/regressor/tests/test_stacking_regression.py +++ b/mlxtend/regressor/tests/test_stacking_regression.py @@ -240,13 +240,13 @@ def test_get_params(): expect = ['linearregression', 'meta-svr', 'meta_regressor', - 'refit', + 'use_clones', 'regressors', 'ridge', 'store_train_meta_features', 'use_features_in_secondary', 'verbose'] - assert got == expect, got + assert set(got) == set(expect), got def test_regressor_gridsearch(): diff --git a/mlxtend/stacking/stacking.py b/mlxtend/stacking/stacking.py index 70acaab7b..99b8bf969 100644 --- a/mlxtend/stacking/stacking.py +++ b/mlxtend/stacking/stacking.py @@ -26,18 +26,14 @@ def __init__(self, estimators, meta_estimator, verbose=0, store_train_meta_features=True, use_clones=True): self.estimators = estimators self.meta_estimator = meta_estimator - self.named_estimators = {key: value for - key, value in - _name_estimators(estimators)} - self.named_meta_estimator = {'meta-%s' % key: value for - key, value in - _name_estimators([meta_estimator])} self.verbose = verbose self.use_features_in_secondary = use_features_in_secondary self.store_train_meta_features = store_train_meta_features self.use_clones = use_clones - - + + # Placeholders for ests_, meta_est_ + #self.ests_, self.meta_est_ = estimators, meta_estimator + def fit(self, X, y, sample_weight=None): """Learn weight coefficients from training data for each regressor. @@ -64,7 +60,7 @@ def fit(self, X, y, sample_weight=None): self._initialize_estimators() # fit base estimators - self._fit_base_estimators(X, y) + self._fit_base_estimators(X, y, sample_weight=sample_weight) meta_features = self.predict_meta_features(X) @@ -75,7 +71,7 @@ def fit(self, X, y, sample_weight=None): # add variables for meta regression, if needed meta_features = self._augment_meta_features(X, meta_features) - self._fit_one(self.meta_estimator_, meta_features, y, + self._fit_one(self.meta_est_, meta_features, y, sample_weight=sample_weight) return self @@ -94,11 +90,12 @@ def predict(self, X): y_target : array-like, shape = [n_samples] or [n_samples, n_targets] Predicted target values. """ - check_is_fitted(self, 'estimators_') - + check_is_fitted(self, 'ests_') + check_is_fitted(self, 'meta_est_') + meta_features = self.predict_meta_features(X) meta_features = self._augment_meta_features(X, meta_features) - return self.meta_estimator_.predict(meta_features) + return self.meta_est_.predict(meta_features) def predict_meta_features(self, X): """ Get meta-features of test-data. @@ -117,8 +114,8 @@ def predict_meta_features(self, X): of regressors. """ - check_is_fitted(self, 'estimators_') - return np.column_stack([r.predict(X) for r in self.estimators_]) + check_is_fitted(self, 'ests_') + return np.column_stack([r.predict(X) for r in self.ests_]) def get_params(self, deep=True): """Return estimator parameter names for GridSearch support.""" @@ -145,11 +142,11 @@ def _initialize_estimators(self): # if use_clones, create copies of base estimators # otherwise we assign the references if self.use_clones: - self.estimators_ = [clone(e) for e in self.estimators] - self.meta_estimator_ = clone(self.meta_estimator) + self.ests_ = [clone(e) for e in self.estimators_] + self.meta_est_ = clone(self.meta_estimator_) else: - self.estimator_ = self.estimator_ - self.meta_estimator_ = self.meta_estimator_ + self.ests_ = self.estimators_ + self.meta_est_ = self.meta_estimator_ def _fit_one(self, estimator, X, y, sample_weight=None): if sample_weight is None: @@ -159,14 +156,15 @@ def _fit_one(self, estimator, X, y, sample_weight=None): def _fit_base_estimators(self, X, y, sample_weight=None): if self.verbose > 0: - print("Fitting %d regressors..." % (len(self.estimators))) + print("Fitting %d regressors..." % len(self.ests_)) - for estimator in self.estimators_: + for estimator in self.ests_: if self.verbose > 0: - i = self.estimators_.index(estimator) + 1 + i = self.ests_.index(estimator) + 1 print("Fitting regressor%d: %s (%d/%d)" % - (i, _name_estimators((estimator,))[0][0], i, len(self.estimators_))) + (i, _name_estimators((estimator,))[0][0], + i, len(self.ests_))) if self.verbose > 2: if hasattr(estimator, 'verbose'): @@ -186,7 +184,24 @@ def _augment_meta_features(self, X, prediction_features): else: return np.hstack((X, prediction_features)) + @property + def estimators_(self): + return self.estimators + @property + def meta_estimator_(self): + return self.meta_estimator + + @property + def named_estimators(self): + return {key: value \ + for key, value in _name_estimators(self.estimators_)} + + @property + def named_meta_estimator(self): + return {'meta-%s' % key: value \ + for key, value in _name_estimators([self.meta_estimator_])} + class StackingRegressor(StackingEstimator, RegressorMixin): """A Stacking regressor for scikit-learn estimators for regression. @@ -262,22 +277,32 @@ def __init__(self, regressors, meta_regressor, verbose=0, use_clones=use_clones) self.regressors = regressors self.meta_regressor = meta_regressor + del self.estimators + del self.meta_estimator + @property + def estimators_(self): + return self.regressors + + @property + def meta_estimator_(self): + return self.meta_regressor + @property def regr_(self): - return self.estimators_ + return self.ests_ @property def meta_regr_(self): - return self.meta_estimator_ + return self.meta_ests_ @property def intercept_(self): - return self.meta_estimator_.intercept_ + return self.meta_est_.intercept_ @property def coef_(self): - return self.meta_estimator_.coef_ + return self.meta_est_.coef_ class StackingClassifier(StackingEstimator, ClassifierMixin): @@ -357,6 +382,8 @@ def __init__(self, classifiers, meta_classifier, self.average_probas = average_probas self.classifiers = classifiers self.meta_classifier = meta_classifier + del self.estimators + del self.meta_estimator def predict_meta_features(self, X): """ Get meta-features of test-data. @@ -396,13 +423,23 @@ def predict_proba(self, X): Probability for each class per sample. """ check_is_fitted(self, 'clfs_') + check_is_fitted(self, 'meta_clf_') meta_features = self.predict_meta_features(X) meta_features = self._augment_meta_features(X, meta_features) + return self.meta_clf_.predict_proba(meta_features) + + @property + def estimators_(self): + return self.classifiers + + @property + def meta_estimator_(self): + return self.meta_classifier @property def clfs_(self): - return self.estimators_ + return self.ests_ @property def meta_clf_(self): - return self.meta_estimator_ \ No newline at end of file + return self.meta_est_ \ No newline at end of file From 3c9151e78c61e4ea442f667ea1fa9242f6fbaa27 Mon Sep 17 00:00:00 2001 From: Kota Mori Date: Mon, 24 Sep 2018 19:19:36 -0500 Subject: [PATCH 3/3] wip. stacking cv regressor --- .../classifier/stacking_cv_classification.py | 416 +----------------- mlxtend/regressor/stacking_cv_regression.py | 273 +----------- .../tests/test_stacking_cv_regression.py | 7 +- .../tests/test_stacking_regression.py | 2 +- mlxtend/stacking/__init__.py | 8 +- mlxtend/stacking/stacking.py | 3 +- mlxtend/stacking/stacking_cv.py | 391 ++++++++++++++++ 7 files changed, 404 insertions(+), 696 deletions(-) create mode 100644 mlxtend/stacking/stacking_cv.py diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py index b8830213e..5a0ca2df6 100644 --- a/mlxtend/classifier/stacking_cv_classification.py +++ b/mlxtend/classifier/stacking_cv_classification.py @@ -9,418 +9,4 @@ # # License: BSD 3 clause -from ..externals.name_estimators import _name_estimators -from ..externals.estimator_checks import check_is_fitted -import numpy as np -from scipy import sparse -from sklearn.base import BaseEstimator -from sklearn.base import ClassifierMixin -from sklearn.base import TransformerMixin -from sklearn.base import clone -from sklearn.externals import six -from sklearn.model_selection._split import check_cv - - -class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin): - - """A 'Stacking Cross-Validation' classifier for scikit-learn estimators. - - New in mlxtend v0.4.3 - - Notes - ------- - The StackingCVClassifier uses scikit-learn's check_cv - internally, which doesn't support a random seed. Thus - NumPy's random seed need to be specified explicitely for - deterministic behavior, for instance, by setting - np.random.seed(RANDOM_SEED) - prior to fitting the StackingCVClassifier - - Parameters - ---------- - classifiers : array-like, shape = [n_classifiers] - A list of classifiers. - Invoking the `fit` method on the `StackingCVClassifer` will fit clones - of these original classifiers that will - be stored in the class attribute `self.clfs_`. - meta_classifier : object - The meta-classifier to be fitted on the ensemble of - classifiers - use_probas : bool (default: False) - If True, trains meta-classifier based on predicted probabilities - instead of class labels. - cv : int, cross-validation generator or an iterable, optional (default: 2) - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - None, to use the default 2-fold cross validation, - - integer, to specify the number of folds in a `(Stratified)KFold`, - - An object to be used as a cross-validation generator. - - An iterable yielding train, test splits. - For integer/None inputs, it will use either a `KFold` or - `StratifiedKFold` cross validation depending the value of `stratify` - argument. - use_features_in_secondary : bool (default: False) - If True, the meta-classifier will be trained both on the predictions - of the original classifiers and the original dataset. - If False, the meta-classifier will be trained only on the predictions - of the original classifiers. - stratify : bool (default: True) - If True, and the `cv` argument is integer it will follow a stratified - K-Fold cross validation technique. If the `cv` argument is a specific - cross validation technique, this argument is omitted. - shuffle : bool (default: True) - If True, and the `cv` argument is integer, the training data will be - shuffled at fitting stage prior to cross-validation. If the `cv` - argument is a specific cross validation technique, this argument is - omitted. - verbose : int, optional (default=0) - Controls the verbosity of the building process. - - `verbose=0` (default): Prints nothing - - `verbose=1`: Prints the number & name of the regressor being fitted - and which fold is currently being used for fitting - - `verbose=2`: Prints info about the parameters of the - regressor being fitted - - `verbose>2`: Changes `verbose` param of the underlying regressor to - self.verbose - 2 - store_train_meta_features : bool (default: False) - If True, the meta-features computed from the training data used - for fitting the meta-classifier stored in the - `self.train_meta_features_` array, which can be - accessed after calling `fit`. - use_clones : bool (default: True) - Clones the classifiers for stacking classification if True (default) - or else uses the original ones, which will be refitted on the dataset - upon calling the `fit` method. Hence, if use_clones=True, the original - input classifiers will remain unmodified upon using the - StackingCVClassifier's `fit` method. - Setting `use_clones=False` is - recommended if you are working with estimators that are supporting - the scikit-learn fit/predict API interface but are not compatible - to scikit-learn's `clone` function. - - - Attributes - ---------- - clfs_ : list, shape=[n_classifiers] - Fitted classifiers (clones of the original classifiers) - meta_clf_ : estimator - Fitted meta-classifier (clone of the original meta-estimator) - train_meta_features : numpy array, shape = [n_samples, n_classifiers] - meta-features for training data, where n_samples is the - number of samples - in training data and n_classifiers is the number of classfiers. - - Examples - ----------- - For usage examples, please see - http://rasbt.github.io/mlxtend/user_guide/classifier/StackingCVClassifier/ - - """ - def __init__(self, classifiers, meta_classifier, - use_probas=False, cv=2, - use_features_in_secondary=False, - stratify=True, - shuffle=True, verbose=0, - store_train_meta_features=False, - use_clones=True): - - self.classifiers = classifiers - self.meta_classifier = meta_classifier - self.named_classifiers = {key: value for - key, value in - _name_estimators(classifiers)} - self.named_meta_classifier = {'meta-%s' % key: value for - key, value in - _name_estimators([meta_classifier])} - self.use_probas = use_probas - self.verbose = verbose - self.cv = cv - self.use_features_in_secondary = use_features_in_secondary - self.stratify = stratify - self.shuffle = shuffle - self.store_train_meta_features = store_train_meta_features - self.use_clones = use_clones - - def fit(self, X, y, groups=None, sample_weight=None): - """ Fit ensemble classifers and the meta-classifier. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Training vectors, where n_samples is the number of samples and - n_features is the number of features. - - y : numpy array, shape = [n_samples] - Target values. - - groups : numpy array/None, shape = [n_samples] - The group that each sample belongs to. This is used by specific - folding strategies such as GroupKFold() - - sample_weight : array-like, shape = [n_samples], optional - Sample weights passed as sample_weights to each regressor - in the regressors list as well as the meta_regressor. - Raises error if some regressor does not support - sample_weight in the fit() method. - - Returns - ------- - self : object - - """ - if self.use_clones: - self.clfs_ = [clone(clf) for clf in self.classifiers] - self.meta_clf_ = clone(self.meta_classifier) - else: - self.clfs_ = self.classifiers - self.meta_clf_ = self.meta_classifier - if self.verbose > 0: - print("Fitting %d classifiers..." % (len(self.classifiers))) - - final_cv = check_cv(self.cv, y, classifier=self.stratify) - if isinstance(self.cv, int): - # Override shuffle parameter in case of self generated - # cross-validation strategy - final_cv.shuffle = self.shuffle - skf = list(final_cv.split(X, y, groups)) - - all_model_predictions = np.array([]).reshape(len(y), 0) - for model in self.clfs_: - - if self.verbose > 0: - i = self.clfs_.index(model) + 1 - print("Fitting classifier%d: %s (%d/%d)" % - (i, _name_estimators((model,))[0][0], - i, len(self.clfs_))) - - if self.verbose > 2: - if hasattr(model, 'verbose'): - model.set_params(verbose=self.verbose - 2) - - if self.verbose > 1: - print(_name_estimators((model,))[0][1]) - - if not self.use_probas: - single_model_prediction = np.array([]).reshape(0, 1) - else: - single_model_prediction = np.array([]).reshape(0, len(set(y))) - - for num, (train_index, test_index) in enumerate(skf): - - if self.verbose > 0: - print("Training and fitting fold %d of %d..." % - ((num + 1), final_cv.get_n_splits())) - - try: - if sample_weight is None: - model.fit(X[train_index], y[train_index]) - else: - model.fit(X[train_index], y[train_index], - sample_weight=sample_weight[train_index]) - except TypeError as e: - - if str(e).startswith('A sparse matrix was passed,' - ' but dense' - ' data is required'): - sparse_estimator_message = ( - "\nYou are likely getting this error" - " because one of the" - " estimators" - " does not support sparse matrix input.") - else: - sparse_estimator_message = '' - - raise TypeError(str(e) + sparse_estimator_message + - '\nPlease check that X and y' - 'are NumPy arrays. If X and y are lists' - ' of lists,\ntry passing them as' - ' numpy.array(X)' - ' and numpy.array(y).') - except KeyError as e: - - raise KeyError(str(e) + '\nPlease check that X and y' - ' are NumPy arrays. If X and y are pandas' - ' DataFrames,\ntry passing them as' - ' X.values' - ' and y.values.') - - if not self.use_probas: - prediction = model.predict(X[test_index]) - prediction = prediction.reshape(prediction.shape[0], 1) - else: - prediction = model.predict_proba(X[test_index]) - single_model_prediction = np.vstack([single_model_prediction. - astype(prediction.dtype), - prediction]) - - all_model_predictions = np.hstack([all_model_predictions. - astype(single_model_prediction. - dtype), - single_model_prediction]) - - if self.store_train_meta_features: - # Store the meta features in the order of the - # original X,y arrays - reodered_indices = np.array([]).astype(y.dtype) - for train_index, test_index in skf: - reodered_indices = np.concatenate((reodered_indices, - test_index)) - self.train_meta_features_ = all_model_predictions[np.argsort( - reodered_indices)] - - # We have to shuffle the labels in the same order as we generated - # predictions during CV (we kinda shuffled them when we did - # Stratified CV). - # We also do the same with the features (we will need this only IF - # use_features_in_secondary is True) - reordered_labels = np.array([]).astype(y.dtype) - reordered_features = np.array([]).reshape((0, X.shape[1]))\ - .astype(X.dtype) - for train_index, test_index in skf: - reordered_labels = np.concatenate((reordered_labels, - y[test_index])) - - if sparse.issparse(X): - reordered_features = sparse.vstack((reordered_features, - X[test_index])) - else: - reordered_features = np.concatenate((reordered_features, - X[test_index])) - - # Fit the base models correctly this time using ALL the training set - for model in self.clfs_: - if sample_weight is None: - model.fit(X, y) - else: - model.fit(X, y, sample_weight=sample_weight) - - # Fit the secondary model - if not self.use_features_in_secondary: - meta_features = all_model_predictions - elif sparse.issparse(X): - meta_features = sparse.hstack((reordered_features, - all_model_predictions)) - else: - meta_features = np.hstack((reordered_features, - all_model_predictions)) - if sample_weight is None: - self.meta_clf_.fit(meta_features, reordered_labels) - else: - self.meta_clf_.fit(meta_features, reordered_labels, - sample_weight=sample_weight) - - return self - - def get_params(self, deep=True): - """Return estimator parameter names for GridSearch support.""" - if not deep: - return super(StackingCVClassifier, self).get_params(deep=False) - else: - out = self.named_classifiers.copy() - for name, step in six.iteritems(self.named_classifiers): - for key, value in six.iteritems(step.get_params(deep=True)): - out['%s__%s' % (name, key)] = value - - out.update(self.named_meta_classifier.copy()) - for name, step in six.iteritems(self.named_meta_classifier): - for key, value in six.iteritems(step.get_params(deep=True)): - out['%s__%s' % (name, key)] = value - - for key, value in six.iteritems(super(StackingCVClassifier, - self).get_params(deep=False)): - out['%s' % key] = value - - return out - - def predict_meta_features(self, X): - """ Get meta-features of test-data. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Test vectors, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ------- - meta-features : numpy array, shape = [n_samples, n_classifiers] - Returns the meta-features for test data. - - """ - check_is_fitted(self, 'clfs_') - all_model_predictions = np.array([]).reshape(len(X), 0) - for model in self.clfs_: - if not self.use_probas: - single_model_prediction = model.predict(X) - single_model_prediction = single_model_prediction\ - .reshape(single_model_prediction.shape[0], 1) - else: - single_model_prediction = model.predict_proba(X) - all_model_predictions = np.hstack((all_model_predictions. - astype(single_model_prediction - .dtype), - single_model_prediction)) - return all_model_predictions - - def predict(self, X): - """ Predict target values for X. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Training vectors, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ---------- - labels : array-like, shape = [n_samples] - Predicted class labels. - - """ - check_is_fitted(self, 'clfs_') - all_model_predictions = self.predict_meta_features(X) - if not self.use_features_in_secondary: - return self.meta_clf_.predict(all_model_predictions) - elif sparse.issparse(X): - return self.meta_clf_.predict( - sparse.hstack((X, all_model_predictions))) - else: - return self.meta_clf_.predict( - np.hstack((X, all_model_predictions))) - - def predict_proba(self, X): - """ Predict class probabilities for X. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Training vectors, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ---------- - proba : array-like, shape = [n_samples, n_classes] - Probability for each class per sample. - - """ - check_is_fitted(self, 'clfs_') - all_model_predictions = np.array([]).reshape(len(X), 0) - for model in self.clfs_: - if not self.use_probas: - single_model_prediction = model.predict(X) - single_model_prediction = single_model_prediction\ - .reshape(single_model_prediction.shape[0], 1) - else: - single_model_prediction = model.predict_proba(X) - all_model_predictions = np.hstack((all_model_predictions. - astype(single_model_prediction. - dtype), - single_model_prediction)) - if not self.use_features_in_secondary: - return self.meta_clf_.predict_proba(all_model_predictions) - elif sparse.issparse(X): - self.meta_clf_\ - .predict_proba(sparse.hstack((X, all_model_predictions))) - else: - return self.meta_clf_\ - .predict_proba(np.hstack((X, all_model_predictions))) +from ..stacking import StackingCVClassifier \ No newline at end of file diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py index 2a5e23ff5..cb1ff844a 100644 --- a/mlxtend/regressor/stacking_cv_regression.py +++ b/mlxtend/regressor/stacking_cv_regression.py @@ -13,275 +13,4 @@ # # License: BSD 3 clause -from ..externals.estimator_checks import check_is_fitted -from ..externals import six -from ..externals.name_estimators import _name_estimators -from scipy import sparse -from sklearn.base import BaseEstimator -from sklearn.base import RegressorMixin -from sklearn.base import TransformerMixin -from sklearn.base import clone -from sklearn.model_selection._split import check_cv - -import numpy as np - - -class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin): - """A 'Stacking Cross-Validation' regressor for scikit-learn estimators. - - New in mlxtend v0.7.0 - - Notes - ------- - The StackingCVRegressor uses scikit-learn's check_cv - internally, which doesn't support a random seed. Thus - NumPy's random seed need to be specified explicitely for - deterministic behavior, for instance, by setting - np.random.seed(RANDOM_SEED) - prior to fitting the StackingCVRegressor - - Parameters - ---------- - regressors : array-like, shape = [n_regressors] - A list of regressors. - Invoking the `fit` method on the `StackingCVRegressor` will fit clones - of these original regressors that will - be stored in the class attribute `self.regr_`. - meta_regressor : object - The meta-regressor to be fitted on the ensemble of - regressor - cv : int, cross-validation generator or iterable, optional (default: 5) - Determines the cross-validation splitting strategy. - Possible inputs for cv are: - - None, to use the default 5-fold cross validation, - - integer, to specify the number of folds in a `KFold`, - - An object to be used as a cross-validation generator. - - An iterable yielding train, test splits. - For integer/None inputs, it will use `KFold` cross-validation - use_features_in_secondary : bool (default: False) - If True, the meta-regressor will be trained both on - the predictions of the original regressors and the - original dataset. - If False, the meta-regressor will be trained only on - the predictions of the original regressors. - shuffle : bool (default: True) - If True, and the `cv` argument is integer, the training data will - be shuffled at fitting stage prior to cross-validation. If the `cv` - argument is a specific cross validation technique, this argument is - omitted. - store_train_meta_features : bool (default: False) - If True, the meta-features computed from the training data - used for fitting the - meta-regressor stored in the `self.train_meta_features_` array, - which can be - accessed after calling `fit`. - refit : bool (default: True) - Clones the regressors for stacking regression if True (default) - or else uses the original ones, which will be refitted on the dataset - upon calling the `fit` method. Setting refit=False is - recommended if you are working with estimators that are supporting - the scikit-learn fit/predict API interface but are not compatible - to scikit-learn's `clone` function. - - Attributes - ---------- - train_meta_features : numpy array, shape = [n_samples, n_regressors] - meta-features for training data, where n_samples is the - number of samples - in training data and len(self.regressors) is the number of regressors. - - Examples - ----------- - For usage examples, please see - http://rasbt.github.io/mlxtend/user_guide/regressor/StackingCVRegressor/ - - """ - def __init__(self, regressors, meta_regressor, cv=5, - shuffle=True, - use_features_in_secondary=False, - store_train_meta_features=False, - refit=True): - - self.regressors = regressors - self.meta_regressor = meta_regressor - self.named_regressors = {key: value for - key, value in - _name_estimators(regressors)} - self.named_meta_regressor = {'meta-%s' % key: value for - key, value in - _name_estimators([meta_regressor])} - self.cv = cv - self.shuffle = shuffle - self.use_features_in_secondary = use_features_in_secondary - self.store_train_meta_features = store_train_meta_features - self.refit = refit - - def fit(self, X, y, groups=None, sample_weight=None): - """ Fit ensemble regressors and the meta-regressor. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Training vectors, where n_samples is the number of samples and - n_features is the number of features. - - y : numpy array, shape = [n_samples] - Target values. - - groups : numpy array/None, shape = [n_samples] - The group that each sample belongs to. This is used by specific - folding strategies such as GroupKFold() - - sample_weight : array-like, shape = [n_samples], optional - Sample weights passed as sample_weights to each regressor - in the regressors list as well as the meta_regressor. - Raises error if some regressor does not support - sample_weight in the fit() method. - - Returns - ------- - self : object - - """ - if self.refit: - self.regr_ = [clone(clf) for clf in self.regressors] - self.meta_regr_ = clone(self.meta_regressor) - else: - self.regr_ = self.regressors - self.meta_regr_ = self.meta_regressor - - kfold = check_cv(self.cv, y) - if isinstance(self.cv, int): - # Override shuffle parameter in case of self generated - # cross-validation strategy - kfold.shuffle = self.shuffle - - meta_features = np.zeros((X.shape[0], len(self.regressors))) - - # - # The outer loop iterates over the base-regressors. Each regressor - # is trained cv times and makes predictions, after which we train - # the meta-regressor on their combined results. - # - for i, regr in enumerate(self.regressors): - # - # In the inner loop, each model is trained cv times on the - # training-part of this fold of data; and the holdout-part of data - # is used for predictions. This is repeated cv times, so in - # the end we have predictions for each data point. - # - # Advantage of this complex approach is that data points we're - # predicting have not been trained on by the algorithm, so it's - # less susceptible to overfitting. - # - for train_idx, holdout_idx in kfold.split(X, y, groups): - instance = clone(regr) - if sample_weight is None: - instance.fit(X[train_idx], y[train_idx]) - else: - instance.fit(X[train_idx], y[train_idx], - sample_weight=sample_weight[train_idx]) - y_pred = instance.predict(X[holdout_idx]) - meta_features[holdout_idx, i] = y_pred - - # save meta-features for training data - if self.store_train_meta_features: - self.train_meta_features_ = meta_features - - # Train meta-model on the out-of-fold predictions - if not self.use_features_in_secondary: - pass - elif sparse.issparse(X): - meta_features = sparse.hstack((X, meta_features)) - else: - meta_features = np.hstack((X, meta_features)) - - if sample_weight is None: - self.meta_regr_.fit(meta_features, y) - else: - self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight) - - # Retrain base models on all data - for regr in self.regr_: - if sample_weight is None: - regr.fit(X, y) - else: - regr.fit(X, y, sample_weight=sample_weight) - - return self - - def predict(self, X): - """ Predict target values for X. - - Parameters - ---------- - X : {array-like, sparse matrix}, shape = [n_samples, n_features] - Training vectors, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ---------- - y_target : array-like, shape = [n_samples] or [n_samples, n_targets] - Predicted target values. - """ - - # - # First we make predictions with the base-models then we predict with - # the meta-model from that info. - # - - check_is_fitted(self, 'regr_') - - meta_features = np.column_stack([ - regr.predict(X) for regr in self.regr_ - ]) - - if not self.use_features_in_secondary: - return self.meta_regr_.predict(meta_features) - elif sparse.issparse(X): - return self.meta_regr_.predict(sparse.hstack((X, meta_features))) - else: - return self.meta_regr_.predict(np.hstack((X, meta_features))) - - def predict_meta_features(self, X): - """ Get meta-features of test-data. - - Parameters - ---------- - X : numpy array, shape = [n_samples, n_features] - Test vectors, where n_samples is the number of samples and - n_features is the number of features. - - Returns - ------- - meta-features : numpy array, shape = [n_samples, len(self.regressors)] - meta-features for test data, where n_samples is the number of - samples in test data and len(self.regressors) is the number - of regressors. - - """ - check_is_fitted(self, 'regr_') - return np.column_stack([regr.predict(X) for regr in self.regr_]) - - def get_params(self, deep=True): - # - # Return estimator parameter names for GridSearch support. - # - if not deep: - return super(StackingCVRegressor, self).get_params(deep=False) - else: - out = self.named_regressors.copy() - for name, step in six.iteritems(self.named_regressors): - for key, value in six.iteritems(step.get_params(deep=True)): - out['%s__%s' % (name, key)] = value - - out.update(self.named_meta_regressor.copy()) - for name, step in six.iteritems(self.named_meta_regressor): - for key, value in six.iteritems(step.get_params(deep=True)): - out['%s__%s' % (name, key)] = value - - for key, value in six.iteritems(super(StackingCVRegressor, - self).get_params(deep=False)): - out['%s' % key] = value - - return out +from ..stacking import StackingCVRegressor \ No newline at end of file diff --git a/mlxtend/regressor/tests/test_stacking_cv_regression.py b/mlxtend/regressor/tests/test_stacking_cv_regression.py index 9a011fa5d..17b79e07e 100644 --- a/mlxtend/regressor/tests/test_stacking_cv_regression.py +++ b/mlxtend/regressor/tests/test_stacking_cv_regression.py @@ -120,17 +120,18 @@ def test_get_params(): got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()})) expect = ['cv', + 'verbose', 'linearregression', 'meta-svr', 'meta_regressor', - 'refit', + 'use_clones', 'regressors', 'ridge', 'shuffle', 'store_train_meta_features', 'use_features_in_secondary'] - assert got == expect, got - + assert set(got) == set(expect), "Missing: %s, Redundant %s" % ( + set(expect) - set(got), set(got) - set(expect)) def test_regressor_gridsearch(): lr = LinearRegression() diff --git a/mlxtend/regressor/tests/test_stacking_regression.py b/mlxtend/regressor/tests/test_stacking_regression.py index d0c7d1891..b4235b675 100644 --- a/mlxtend/regressor/tests/test_stacking_regression.py +++ b/mlxtend/regressor/tests/test_stacking_regression.py @@ -391,4 +391,4 @@ def test_sparse_matrix_inputs_and_features_in_secondary(): stack.fit(sparse.csr_matrix(X1), y) mse = 0.14 got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2) - assert round(got, 2) == mse + assert round(got, 2) == mse \ No newline at end of file diff --git a/mlxtend/stacking/__init__.py b/mlxtend/stacking/__init__.py index 01fac781f..2ee96dc92 100644 --- a/mlxtend/stacking/__init__.py +++ b/mlxtend/stacking/__init__.py @@ -4,8 +4,8 @@ # # License: BSD 3 clause -from .stacking import StackingRegressor -from .stacking import StackingClassifier -#from .stacking_cv_regression import StackingCVRegressor +from .stacking import StackingRegressor, StackingClassifier +from .stacking_cv import StackingCVRegressor, StackingCVClassifier -__all__ = ["StackingRegressor", "StackingClassifier"] +__all__ = ["StackingRegressor", "StackingClassifier", + "StackingCVRegressor", "StackingCVClassifier"] \ No newline at end of file diff --git a/mlxtend/stacking/stacking.py b/mlxtend/stacking/stacking.py index 99b8bf969..e125da67d 100644 --- a/mlxtend/stacking/stacking.py +++ b/mlxtend/stacking/stacking.py @@ -18,6 +18,7 @@ from sklearn.base import ClassifierMixin from sklearn.base import TransformerMixin from sklearn.base import clone +from sklearn.model_selection import check_cv class StackingEstimator(BaseEstimator, TransformerMixin): @@ -294,7 +295,7 @@ def regr_(self): @property def meta_regr_(self): - return self.meta_ests_ + return self.meta_est_ @property def intercept_(self): diff --git a/mlxtend/stacking/stacking_cv.py b/mlxtend/stacking/stacking_cv.py new file mode 100644 index 000000000..98de5c622 --- /dev/null +++ b/mlxtend/stacking/stacking_cv.py @@ -0,0 +1,391 @@ +# Stacking CV estimators + +# Sebastian Raschka 2014-2018 +# mlxtend Machine Learning Library Extensions +# +# An ensemble-learning meta-regressor for stacking regression +# Author: Sebastian Raschka +# +# License: BSD 3 clause + +import numpy as np +from sklearn.base import RegressorMixin +from sklearn.base import ClassifierMixin +from sklearn.base import clone +from sklearn.model_selection import check_cv + +from ..externals.estimator_checks import check_is_fitted +from .stacking import StackingEstimator +from .stacking import StackingRegressor +from .stacking import StackingClassifier + + +class StackingCVEstimator(StackingEstimator): + def __init__(self, estimators, meta_estimator, + cv=5, shuffle=True, stratify=False, verbose=0, + use_features_in_secondary=False, + store_train_meta_features=False, + use_clones=True): + super(StackingCVEstimator, self).__init__( + estimators, meta_estimator, verbose=verbose, + use_features_in_secondary=use_features_in_secondary, + store_train_meta_features=store_train_meta_features, + use_clones=use_clones) + self.cv = cv + self.shuffle = shuffle + self.stratify = stratify + + def fit(self, X, y, groups=None, sample_weight=None): + """ Fit ensemble estimator and the meta-estimator. + + Parameters + ---------- + X : numpy array, shape = [n_samples, n_features] + Training vectors, where n_samples is the number of samples and + n_features is the number of features. + + y : numpy array, shape = [n_samples] + Target values. + + groups : numpy array/None, shape = [n_samples] + The group that each sample belongs to. This is used by specific + folding strategies such as GroupKFold() + + sample_weight : array-like, shape = [n_samples], optional + Sample weights passed as sample_weights to each regressor + in the regressors list as well as the meta_regressor. + Raises error if some regressor does not support + sample_weight in the fit() method. + + Returns + ------- + self : object + """ + # initialize estimators + # make copy or assign references + self._initialize_estimators() + + meta_features = self._meta_features_cv( + X, y, groups=groups, sample_weight=sample_weight) + # save meta-features for training data + if self.store_train_meta_features: + self.train_meta_features_ = meta_features + + # fit base estimators with full training samples, + # which used at prediction phase + self._fit_base_estimators(X, y, sample_weight=sample_weight) + + # add variables for meta regression, if needed + meta_features = self._augment_meta_features(X, meta_features) + + self._fit_one(self.meta_est_, meta_features, y, + sample_weight=sample_weight) + + return self + + def _meta_features_cv(self, X, y, + groups=None, sample_weight=None): + # In stacking CV models, meta features are predicted by + # temporarily fitting to partial traing samples and + # applying to the holdout samples. + # Hence requires arguments for fitting. + + kfold = check_cv(self.cv, y, classifier=self.stratify) + if isinstance(self.cv, int): + # Override shuffle parameter in case of self generated + # cross-validation strategy + kfold.shuffle = self.shuffle + + meta_features = None + # + # The outer loop iterates over the base-regressors. Each regressor + # is trained cv times and makes predictions, after which we train + # the meta-regressor on their combined results. + # + for i, estimator in enumerate(self.estimators_): + # + # In the inner loop, each model is trained cv times on the + # training-part of this fold of data; and the holdout-part of data + # is used for predictions. This is repeated cv times, so in + # the end we have predictions for each data point. + # + # Advantage of this complex approach is that data points we're + # predicting have not been trained on by the algorithm, so it's + # less susceptible to overfitting. + # + this_meta_features = None + for train_idx, holdout_idx in kfold.split(X, y, groups): + instance = clone(estimator) + self._fit_one( + instance, X[train_idx], y[train_idx], + sample_weight=None if sample_weight is None \ + else sample_weight[train_idx]) + + y_pred = self._predict_meta_feature_one( + instance, X[holdout_idx]) + + # make sure prediction is two dimensional for + # accumulating consistency + assert len(y_pred.shape) < 3, "y must be 2d or smaller" + if len(y_pred.shape) == 1: + y_pred = np.expand_dims(y_pred, axis=1) + # initialize output here by looking at the shape of + # prediction outcome, since guessing is not so easy :p + if this_meta_features is None: + this_meta_features = np.zeros((len(y), y_pred.shape[1])) + this_meta_features[holdout_idx] = y_pred + if meta_features is None: + meta_features = this_meta_features + else: + meta_features = np.concatenate( + [meta_features, this_meta_features], axis=1) + return meta_features + + def _predict_meta_feature_one(self, model, X): + return model.predict(X) + + +class StackingCVRegressor(StackingCVEstimator, StackingRegressor): + """A 'Stacking Cross-Validation' regressor for scikit-learn estimators. + + New in mlxtend v0.7.0 + + Notes + ------- + The StackingCVRegressor uses scikit-learn's check_cv + internally, which doesn't support a random seed. Thus + NumPy's random seed need to be specified explicitely for + deterministic behavior, for instance, by setting + np.random.seed(RANDOM_SEED) + prior to fitting the StackingCVRegressor + + Parameters + ---------- + regressors : array-like, shape = [n_regressors] + A list of regressors. + Invoking the `fit` method on the `StackingCVRegressor` will fit clones + of these original regressors that will + be stored in the class attribute `self.regr_`. + meta_regressor : object + The meta-regressor to be fitted on the ensemble of + regressor + cv : int, cross-validation generator or iterable, optional (default: 5) + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + - None, to use the default 5-fold cross validation, + - integer, to specify the number of folds in a `KFold`, + - An object to be used as a cross-validation generator. + - An iterable yielding train, test splits. + For integer/None inputs, it will use `KFold` cross-validation + shuffle : bool (default: True) + If True, and the `cv` argument is integer, the training data will + be shuffled at fitting stage prior to cross-validation. If the `cv` + argument is a specific cross validation technique, this argument is + omitted. + verbose : int, optional (default=0) + Controls the verbosity of the building process. + - `verbose=0` (default): Prints nothing + - `verbose=1`: Prints the number & name of the regressor being fitted + - `verbose=2`: Prints info about the parameters of the + regressor being fitted + - `verbose>2`: Changes `verbose` param of the underlying regressor to + self.verbose - 2 + use_features_in_secondary : bool (default: False) + If True, the meta-regressor will be trained both on + the predictions of the original regressors and the + original dataset. + If False, the meta-regressor will be trained only on + the predictions of the original regressors. + store_train_meta_features : bool (default: False) + If True, the meta-features computed from the training data + used for fitting the + meta-regressor stored in the `self.train_meta_features_` array, + which can be + accessed after calling `fit`. + use_clones : bool (default: True) + Clones the regressors for stacking regression if True (default) + or else uses the original ones, which will be refitted on the dataset + upon calling the `fit` method. Setting refit=False is + recommended if you are working with estimators that are supporting + the scikit-learn fit/predict API interface but are not compatible + to scikit-learn's `clone` function. + + Attributes + ---------- + train_meta_features : numpy array, shape = [n_samples, n_regressors] + meta-features for training data, where n_samples is the + number of samples + in training data and len(self.regressors) is the number of regressors. + + Examples + ----------- + For usage examples, please see + http://rasbt.github.io/mlxtend/user_guide/regressor/StackingCVRegressor/ + + """ + def __init__(self, regressors, meta_regressor, + cv=5, shuffle=True, verbose=0, + use_features_in_secondary=False, + store_train_meta_features=False, + use_clones=True): + super(StackingCVRegressor, self).__init__( + regressors, meta_regressor, + cv=cv, shuffle=shuffle, stratify=False, verbose=verbose, + use_features_in_secondary=use_features_in_secondary, + store_train_meta_features=store_train_meta_features, + use_clones=use_clones) + self.regressors = regressors + self.meta_regressor = meta_regressor + + +class StackingCVClassifier(StackingCVEstimator, StackingClassifier): + + """A 'Stacking Cross-Validation' classifier for scikit-learn estimators. + + New in mlxtend v0.4.3 + + Notes + ------- + The StackingCVClassifier uses scikit-learn's check_cv + internally, which doesn't support a random seed. Thus + NumPy's random seed need to be specified explicitely for + deterministic behavior, for instance, by setting + np.random.seed(RANDOM_SEED) + prior to fitting the StackingCVClassifier + + Parameters + ---------- + classifiers : array-like, shape = [n_classifiers] + A list of classifiers. + Invoking the `fit` method on the `StackingCVClassifer` will fit clones + of these original classifiers that will + be stored in the class attribute `self.clfs_`. + meta_classifier : object + The meta-classifier to be fitted on the ensemble of + classifiers + use_probas : bool (default: False) + If True, trains meta-classifier based on predicted probabilities + instead of class labels. + cv : int, cross-validation generator or an iterable, optional (default: 2) + Determines the cross-validation splitting strategy. + Possible inputs for cv are: + - None, to use the default 2-fold cross validation, + - integer, to specify the number of folds in a `(Stratified)KFold`, + - An object to be used as a cross-validation generator. + - An iterable yielding train, test splits. + For integer/None inputs, it will use either a `KFold` or + `StratifiedKFold` cross validation depending the value of `stratify` + argument. + use_features_in_secondary : bool (default: False) + If True, the meta-classifier will be trained both on the predictions + of the original classifiers and the original dataset. + If False, the meta-classifier will be trained only on the predictions + of the original classifiers. + stratify : bool (default: True) + If True, and the `cv` argument is integer it will follow a stratified + K-Fold cross validation technique. If the `cv` argument is a specific + cross validation technique, this argument is omitted. + shuffle : bool (default: True) + If True, and the `cv` argument is integer, the training data will be + shuffled at fitting stage prior to cross-validation. If the `cv` + argument is a specific cross validation technique, this argument is + omitted. + verbose : int, optional (default=0) + Controls the verbosity of the building process. + - `verbose=0` (default): Prints nothing + - `verbose=1`: Prints the number & name of the regressor being fitted + and which fold is currently being used for fitting + - `verbose=2`: Prints info about the parameters of the + regressor being fitted + - `verbose>2`: Changes `verbose` param of the underlying regressor to + self.verbose - 2 + store_train_meta_features : bool (default: False) + If True, the meta-features computed from the training data used + for fitting the meta-classifier stored in the + `self.train_meta_features_` array, which can be + accessed after calling `fit`. + use_clones : bool (default: True) + Clones the classifiers for stacking classification if True (default) + or else uses the original ones, which will be refitted on the dataset + upon calling the `fit` method. Hence, if use_clones=True, the original + input classifiers will remain unmodified upon using the + StackingCVClassifier's `fit` method. + Setting `use_clones=False` is + recommended if you are working with estimators that are supporting + the scikit-learn fit/predict API interface but are not compatible + to scikit-learn's `clone` function. + + + Attributes + ---------- + clfs_ : list, shape=[n_classifiers] + Fitted classifiers (clones of the original classifiers) + meta_clf_ : estimator + Fitted meta-classifier (clone of the original meta-estimator) + train_meta_features : numpy array, shape = [n_samples, n_classifiers] + meta-features for training data, where n_samples is the + number of samples + in training data and n_classifiers is the number of classfiers. + + Examples + ----------- + For usage examples, please see + http://rasbt.github.io/mlxtend/user_guide/classifier/StackingCVClassifier/ + + """ + def __init__(self, classifiers, meta_classifier, + use_probas=False, cv=2, + use_features_in_secondary=False, + stratify=True, + shuffle=True, verbose=0, + store_train_meta_features=False, + use_clones=True): + super(StackingCVClassifier, self).__init__( + classifiers, meta_classifier, + cv=cv, shuffle=shuffle, stratify=stratify, verbose=verbose, + use_features_in_secondary=use_features_in_secondary, + store_train_meta_features=store_train_meta_features, + use_clones=use_clones) + self.classifiers = classifiers + self.meta_classifier = meta_classifier + self.use_probas = use_probas + + def predict_meta_features(self, X): + """ Get meta-features of test-data. + + Parameters + ---------- + X : numpy array, shape = [n_samples, n_features] + Test vectors, where n_samples is the number of samples and + n_features is the number of features. + + Returns + ------- + meta-features : numpy array, shape = [n_samples, n_classifiers] + Returns the meta-features for test data. + + """ + check_is_fitted(self, 'clfs_') + all_model_predictions = np.array([]).reshape(len(X), 0) + for model in self.clfs_: + if not self.use_probas: + single_model_prediction = model.predict(X) + single_model_prediction = single_model_prediction\ + .reshape(single_model_prediction.shape[0], 1) + else: + single_model_prediction = model.predict_proba(X) + all_model_predictions = np.hstack((all_model_predictions. + astype(single_model_prediction + .dtype), + single_model_prediction)) + return all_model_predictions + + def _predict_meta_feature_one(self, model, X): + if not self.use_probas: + prediction = model.predict(X) + prediction = prediction.reshape(prediction.shape[0], 1) + else: + prediction = model.predict_proba(X) + if len(prediction.shape) > 0: + prediction = np.squeeze(prediction) + return prediction