rasbt · kota7 · Sep 24, 2018 · Sep 24, 2018 · Sep 24, 2018 · Sep 25, 2018
diff --git a/mlxtend/classifier/stacking_classification.py b/mlxtend/classifier/stacking_classification.py
@@ -8,6 +8,9 @@
 #
 # License: BSD 3 clause
 
+from ..stacking import StackingClassifier
+
+'''
 from ..externals.estimator_checks import check_is_fitted
 from ..externals.name_estimators import _name_estimators
 from ..externals import six
@@ -275,3 +278,4 @@ def predict_proba(self, X):
             )
         else:
             return self.meta_clf_.predict_proba(np.hstack((X, meta_features)))
+'''
diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py
diff --git a/mlxtend/regressor/stacking_cv_regression.py b/mlxtend/regressor/stacking_cv_regression.py
@@ -13,275 +13,4 @@
 #
 # License: BSD 3 clause
 
-from ..externals.estimator_checks import check_is_fitted
-from ..externals import six
-from ..externals.name_estimators import _name_estimators
-from scipy import sparse
-from sklearn.base import BaseEstimator
-from sklearn.base import RegressorMixin
-from sklearn.base import TransformerMixin
-from sklearn.base import clone
-from sklearn.model_selection._split import check_cv
-
-import numpy as np
-
-
-class StackingCVRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
-    """A 'Stacking Cross-Validation' regressor for scikit-learn estimators.
-
-    New in mlxtend v0.7.0
-
-    Notes
-    -------
-    The StackingCVRegressor uses scikit-learn's check_cv
-    internally, which doesn't support a random seed. Thus
-    NumPy's random seed need to be specified explicitely for
-    deterministic behavior, for instance, by setting
-    np.random.seed(RANDOM_SEED)
-    prior to fitting the StackingCVRegressor
-
-    Parameters
-    ----------
-    regressors : array-like, shape = [n_regressors]
-        A list of regressors.
-        Invoking the `fit` method on the `StackingCVRegressor` will fit clones
-        of these original regressors that will
-        be stored in the class attribute `self.regr_`.
-    meta_regressor : object
-        The meta-regressor to be fitted on the ensemble of
-        regressor
-    cv : int, cross-validation generator or iterable, optional (default: 5)
-        Determines the cross-validation splitting strategy.
-        Possible inputs for cv are:
-        - None, to use the default 5-fold cross validation,
-        - integer, to specify the number of folds in a `KFold`,
-        - An object to be used as a cross-validation generator.
-        - An iterable yielding train, test splits.
-        For integer/None inputs, it will use `KFold` cross-validation
-    use_features_in_secondary : bool (default: False)
-        If True, the meta-regressor will be trained both on
-        the predictions of the original regressors and the
-        original dataset.
-        If False, the meta-regressor will be trained only on
-        the predictions of the original regressors.
-    shuffle : bool (default: True)
-        If True,  and the `cv` argument is integer, the training data will
-        be shuffled at fitting stage prior to cross-validation. If the `cv`
-        argument is a specific cross validation technique, this argument is
-        omitted.
-    store_train_meta_features : bool (default: False)
-        If True, the meta-features computed from the training data
-        used for fitting the
-        meta-regressor stored in the `self.train_meta_features_` array,
-        which can be
-        accessed after calling `fit`.
-    refit : bool (default: True)
-        Clones the regressors for stacking regression if True (default)
-        or else uses the original ones, which will be refitted on the dataset
-        upon calling the `fit` method. Setting refit=False is
-        recommended if you are working with estimators that are supporting
-        the scikit-learn fit/predict API interface but are not compatible
-        to scikit-learn's `clone` function.
-
-    Attributes
-    ----------
-    train_meta_features : numpy array, shape = [n_samples, n_regressors]
-        meta-features for training data, where n_samples is the
-        number of samples
-        in training data and len(self.regressors) is the number of regressors.
-
-    Examples
-    -----------
-    For usage examples, please see
-    http://rasbt.github.io/mlxtend/user_guide/regressor/StackingCVRegressor/
-
-    """
-    def __init__(self, regressors, meta_regressor, cv=5,
-                 shuffle=True,
-                 use_features_in_secondary=False,
-                 store_train_meta_features=False,
-                 refit=True):
-
-        self.regressors = regressors
-        self.meta_regressor = meta_regressor
-        self.named_regressors = {key: value for
-                                 key, value in
-                                 _name_estimators(regressors)}
-        self.named_meta_regressor = {'meta-%s' % key: value for
-                                     key, value in
-                                     _name_estimators([meta_regressor])}
-        self.cv = cv
-        self.shuffle = shuffle
-        self.use_features_in_secondary = use_features_in_secondary
-        self.store_train_meta_features = store_train_meta_features
-        self.refit = refit
-
-    def fit(self, X, y, groups=None, sample_weight=None):
-        """ Fit ensemble regressors and the meta-regressor.
-
-        Parameters
-        ----------
-        X : numpy array, shape = [n_samples, n_features]
-            Training vectors, where n_samples is the number of samples and
-            n_features is the number of features.
-
-        y : numpy array, shape = [n_samples]
-            Target values.
-
-        groups : numpy array/None, shape = [n_samples]
-            The group that each sample belongs to. This is used by specific
-            folding strategies such as GroupKFold()
-
-        sample_weight : array-like, shape = [n_samples], optional
-            Sample weights passed as sample_weights to each regressor
-            in the regressors list as well as the meta_regressor.
-            Raises error if some regressor does not support
-            sample_weight in the fit() method.
-
-        Returns
-        -------
-        self : object
-
-        """
-        if self.refit:
-            self.regr_ = [clone(clf) for clf in self.regressors]
-            self.meta_regr_ = clone(self.meta_regressor)
-        else:
-            self.regr_ = self.regressors
-            self.meta_regr_ = self.meta_regressor
-
-        kfold = check_cv(self.cv, y)
-        if isinstance(self.cv, int):
-            # Override shuffle parameter in case of self generated
-            # cross-validation strategy
-            kfold.shuffle = self.shuffle
-
-        meta_features = np.zeros((X.shape[0], len(self.regressors)))
-
-        #
-        # The outer loop iterates over the base-regressors. Each regressor
-        # is trained cv times and makes predictions, after which we train
-        # the meta-regressor on their combined results.
-        #
-        for i, regr in enumerate(self.regressors):
-            #
-            # In the inner loop, each model is trained cv times on the
-            # training-part of this fold of data; and the holdout-part of data
-            # is used for predictions. This is repeated cv times, so in
-            # the end we have predictions for each data point.
-            #
-            # Advantage of this complex approach is that data points we're
-            # predicting have not been trained on by the algorithm, so it's
-            # less susceptible to overfitting.
-            #
-            for train_idx, holdout_idx in kfold.split(X, y, groups):
-                instance = clone(regr)
-                if sample_weight is None:
-                    instance.fit(X[train_idx], y[train_idx])
-                else:
-                    instance.fit(X[train_idx], y[train_idx],
-                                 sample_weight=sample_weight[train_idx])
-                y_pred = instance.predict(X[holdout_idx])
-                meta_features[holdout_idx, i] = y_pred
-
-        # save meta-features for training data
-        if self.store_train_meta_features:
-            self.train_meta_features_ = meta_features
-
-        # Train meta-model on the out-of-fold predictions
-        if not self.use_features_in_secondary:
-            pass
-        elif sparse.issparse(X):
-            meta_features = sparse.hstack((X, meta_features))
-        else:
-            meta_features = np.hstack((X, meta_features))
-
-        if sample_weight is None:
-            self.meta_regr_.fit(meta_features, y)
-        else:
-            self.meta_regr_.fit(meta_features, y, sample_weight=sample_weight)
-
-        # Retrain base models on all data
-        for regr in self.regr_:
-            if sample_weight is None:
-                regr.fit(X, y)
-            else:
-                regr.fit(X, y, sample_weight=sample_weight)
-
-        return self
-
-    def predict(self, X):
-        """ Predict target values for X.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
-            Training vectors, where n_samples is the number of samples and
-            n_features is the number of features.
-
-        Returns
-        ----------
-        y_target : array-like, shape = [n_samples] or [n_samples, n_targets]
-            Predicted target values.
-        """
-
-        #
-        # First we make predictions with the base-models then we predict with
-        # the meta-model from that info.
-        #
-
-        check_is_fitted(self, 'regr_')
-
-        meta_features = np.column_stack([
-            regr.predict(X) for regr in self.regr_
-        ])
-
-        if not self.use_features_in_secondary:
-            return self.meta_regr_.predict(meta_features)
-        elif sparse.issparse(X):
-            return self.meta_regr_.predict(sparse.hstack((X, meta_features)))
-        else:
-            return self.meta_regr_.predict(np.hstack((X, meta_features)))
-
-    def predict_meta_features(self, X):
-        """ Get meta-features of test-data.
-
-        Parameters
-        ----------
-        X : numpy array, shape = [n_samples, n_features]
-            Test vectors, where n_samples is the number of samples and
-            n_features is the number of features.
-
-        Returns
-        -------
-        meta-features : numpy array, shape = [n_samples, len(self.regressors)]
-            meta-features for test data, where n_samples is the number of
-            samples in test data and len(self.regressors) is the number
-            of regressors.
-
-        """
-        check_is_fitted(self, 'regr_')
-        return np.column_stack([regr.predict(X) for regr in self.regr_])
-
-    def get_params(self, deep=True):
-        #
-        # Return estimator parameter names for GridSearch support.
-        #
-        if not deep:
-            return super(StackingCVRegressor, self).get_params(deep=False)
-        else:
-            out = self.named_regressors.copy()
-            for name, step in six.iteritems(self.named_regressors):
-                for key, value in six.iteritems(step.get_params(deep=True)):
-                    out['%s__%s' % (name, key)] = value
-
-            out.update(self.named_meta_regressor.copy())
-            for name, step in six.iteritems(self.named_meta_regressor):
-                for key, value in six.iteritems(step.get_params(deep=True)):
-                    out['%s__%s' % (name, key)] = value
-
-            for key, value in six.iteritems(super(StackingCVRegressor,
-                                            self).get_params(deep=False)):
-                out['%s' % key] = value
-
-            return out
+from ..stacking import StackingCVRegressor
diff --git a/mlxtend/regressor/stacking_regression.py b/mlxtend/regressor/stacking_regression.py
@@ -8,6 +8,9 @@
 #
 # License: BSD 3 clause
 
+from ..stacking import StackingRegressor
+
+'''
 from ..externals.estimator_checks import check_is_fitted
 from ..externals.name_estimators import _name_estimators
 from ..externals import six
@@ -243,3 +246,4 @@ def predict(self, X):
             return self.meta_regr_.predict(sparse.hstack((X, meta_features)))
         else:
             return self.meta_regr_.predict(np.hstack((X, meta_features)))
+'''
diff --git a/mlxtend/regressor/tests/test_stacking_cv_regression.py b/mlxtend/regressor/tests/test_stacking_cv_regression.py
@@ -120,17 +120,18 @@ def test_get_params():
 
     got = sorted(list({s.split('__')[0] for s in stregr.get_params().keys()}))
     expect = ['cv',
+              'verbose',
               'linearregression',
               'meta-svr',
               'meta_regressor',
-              'refit',
+              'use_clones',
               'regressors',
               'ridge',
               'shuffle',
               'store_train_meta_features',
               'use_features_in_secondary']
-    assert got == expect, got
-
+    assert set(got) == set(expect), "Missing: %s, Redundant %s" % (
+        set(expect) - set(got), set(got) - set(expect))
 
 def test_regressor_gridsearch():
     lr = LinearRegression()

diff --git a/mlxtend/regressor/tests/test_stacking_regression.py b/mlxtend/regressor/tests/test_stacking_regression.py
@@ -240,13 +240,13 @@ def test_get_params():
     expect = ['linearregression',
               'meta-svr',
               'meta_regressor',
-              'refit',
+              'use_clones',
               'regressors',
               'ridge',
               'store_train_meta_features',
               'use_features_in_secondary',
               'verbose']
-    assert got == expect, got
+    assert set(got) == set(expect), got 
 
 
 def test_regressor_gridsearch():
@@ -391,4 +391,4 @@ def test_sparse_matrix_inputs_and_features_in_secondary():
     stack.fit(sparse.csr_matrix(X1), y)
     mse = 0.14
     got = np.mean((stack.predict(sparse.csr_matrix(X1)) - y) ** 2)
-    assert round(got, 2) == mse
+    assert round(got, 2) == mse
diff --git a/mlxtend/stacking/__init__.py b/mlxtend/stacking/__init__.py
@@ -0,0 +1,11 @@
+# Sebastian Raschka 2014-2018
+# mlxtend Machine Learning Library Extensions
+# Author: Sebastian Raschka <sebastianraschka.com>
+#
+# License: BSD 3 clause
+
+from .stacking import StackingRegressor, StackingClassifier
+from .stacking_cv import StackingCVRegressor, StackingCVClassifier
+
+__all__ = ["StackingRegressor", "StackingClassifier",
+           "StackingCVRegressor", "StackingCVClassifier"]