Skip to content

Commit 8e2fbb2

Browse files
CI Fixes (#1018)
* skip test on older dask versions Remove once dask 2025.5.0 is required. * Skip hyperband / incremental tests for new dask This adds a module-wide skip for all hyerband and incremental tests with dask>=2025.3.0. See #1016 for details.
1 parent 6fdd1f4 commit 8e2fbb2

File tree

12 files changed

+49
-20
lines changed

12 files changed

+49
-20
lines changed

dask_ml/_compat.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
DASK_2_28_0 = DASK_VERSION > packaging.version.parse("2.27.0")
2424
DASK_2021_02_0 = DASK_VERSION >= packaging.version.parse("2021.02.0")
2525
DASK_2022_01_0 = DASK_VERSION > packaging.version.parse("2021.12.0")
26+
DASK_2025_3_0 = DASK_VERSION >= packaging.version.parse("2025.3.0")
27+
DASK_2025_5_0 = DASK_VERSION >= packaging.version.parse("2025.5.0")
2628
DISTRIBUTED_2_5_0 = DISTRIBUTED_VERSION > packaging.version.parse("2.5.0")
2729
DISTRIBUTED_2_11_0 = DISTRIBUTED_VERSION > packaging.version.parse("2.10.0") # dev
2830
DISTRIBUTED_2021_02_0 = DISTRIBUTED_VERSION >= packaging.version.parse("2021.02.0")

dask_ml/model_selection/_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1521,7 +1521,7 @@ def fit(self, X, y=None, groups=None, **fit_params):
15211521
The number of cross-validation splits (folds/iterations).
15221522
15231523
Notes
1524-
------
1524+
-----
15251525
The parameters selected are those that maximize the score of the left out
15261526
data, unless an explicit score is passed in which case it is used instead.
15271527
"""

docs/_static/.gitignore

Whitespace-only changes.

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
#
8787
# This is also used if you do content translation via gettext catalogs.
8888
# Usually you set "language" from the command line for these cases.
89-
language = None
89+
language = "en"
9090

9191
# List of patterns, relative to source directory, that match files and
9292
# directories to ignore when looking for source files.

docs/source/hyper-parameter-search.rst

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -419,8 +419,7 @@ Basic use
419419
This section uses :class:`~dask_ml.model_selection.HyperbandSearchCV`, but it can
420420
also be applied to to :class:`~dask_ml.model_selection.IncrementalSearchCV` too.
421421

422-
.. ipython:: python
423-
:okwarning:
422+
.. code-block:: python
424423
425424
from dask.distributed import Client
426425
from dask_ml.datasets import make_classification
@@ -432,14 +431,14 @@ also be applied to to :class:`~dask_ml.model_selection.IncrementalSearchCV` too.
432431
Our underlying model is an :class:`sklearn.linear_model.SGDClasifier`. We
433432
specify a few parameters common to each clone of the model:
434433

435-
.. ipython:: python
434+
.. code-block:: python
436435
437436
from sklearn.linear_model import SGDClassifier
438437
clf = SGDClassifier(tol=1e-3, penalty='elasticnet', random_state=0)
439438
440439
We also define the distribution of parameters from which we will sample:
441440

442-
.. ipython:: python
441+
.. code-block:: python
443442
444443
from scipy.stats import uniform, loguniform
445444
params = {'alpha': loguniform(1e-2, 1e0), # or np.logspace
@@ -449,7 +448,7 @@ We also define the distribution of parameters from which we will sample:
449448
Finally we create many random models in this parameter space and
450449
train-and-score them until we find the best one.
451450

452-
.. ipython:: python
451+
.. code-block:: python
453452
454453
from dask_ml.model_selection import HyperbandSearchCV
455454
@@ -465,7 +464,7 @@ larger-than-memory Dask Array, you'll exhaust your machine's memory. If you plan
465464
to use post-estimation features like scoring or prediction, we recommend using
466465
:class:`dask_ml.wrappers.ParallelPostFit`.
467466

468-
.. ipython:: python
467+
.. code-block:: python
469468
470469
from dask_ml.wrappers import ParallelPostFit
471470
params = {'estimator__alpha': loguniform(1e-2, 1e0),
@@ -523,22 +522,22 @@ Hyperband parameters: rule-of-thumb
523522
These fall out pretty naturally once it's known how long to train the best
524523
model and very approximately how many parameters to sample:
525524

526-
.. ipython:: python
525+
.. code-block:: python
527526
528527
n_examples = 20 * len(X_train) # 20 passes through dataset for best model
529528
n_params = 94 # sample approximately 100 parameters; more than 94 will be sampled
530529
531530
With this, it's easy use a rule-of-thumb to compute the inputs to Hyperband:
532531

533-
.. ipython:: python
532+
.. code-block:: python
534533
535534
max_iter = n_params
536535
chunk_size = n_examples // n_params # implicit
537536
538537
Now that we've determined the inputs, let's create our search object and
539538
rechunk the Dask array:
540539

541-
.. ipython:: python
540+
.. code-block:: python
542541
543542
clf = SGDClassifier(tol=1e-3, penalty='elasticnet', random_state=0)
544543
params = {'alpha': loguniform(1e-2, 1e0), # or np.logspace
@@ -567,7 +566,7 @@ rule-of-thumb in the "Notes" section of
567566
However, this does not explicitly mention the amount of computation performed
568567
-- it's only an approximation. The amount of computation can be viewed like so:
569568

570-
.. ipython:: python
569+
.. code-block:: python
571570
572571
search.metadata["partial_fit_calls"] # best model will see `max_iter` chunks
573572
search.metadata["n_models"] # actual number of parameters to sample
@@ -578,7 +577,7 @@ amount of computation. Let's fit
578577
:class:`~dask_ml.model_selection.HyperbandSearchCV` with these different
579578
chunks:
580579

581-
.. ipython:: python
580+
.. code-block:: python
582581
583582
search.fit(X_train, y_train, classes=[0, 1]);
584583
search.best_params_

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ dev = [
5151
"sphinx-gallery",
5252
"sphinx-rtd-theme",
5353
]
54-
docs = ["nbsphinx", "numpydoc", "sphinx", "sphinx-gallery", "sphinx-rtd-theme"]
54+
docs = ["nbsphinx", "numpydoc", "sphinx", "sphinx-gallery", "sphinx-rtd-theme", "dask-sphinx-theme"]
5555
test = [
5656
"black",
5757
"coverage",

tests/linear_model/test_glm.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,19 @@
1010
from sklearn.pipeline import make_pipeline
1111

1212
import dask_ml.linear_model
13+
import dask_ml._compat
1314
from dask_ml.datasets import make_classification, make_counts, make_regression
1415
from dask_ml.linear_model import LinearRegression, LogisticRegression, PoissonRegression
1516
from dask_ml.linear_model.utils import add_intercept
1617
from dask_ml.model_selection import GridSearchCV
1718

1819

20+
pytestmark = pytest.mark.skipif(
21+
dask_ml._compat.DASK_2025_3_0,
22+
reason="https://github.com/dask/dask-ml/issues/1016",
23+
)
24+
25+
1926
@pytest.fixture(params=[r() for r in Regularizer.__subclasses__()])
2027
def solver(request):
2128
"""Parametrized fixture for all the solver names"""

tests/model_selection/test_hyperband.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616
from sklearn.linear_model import SGDClassifier
1717

18-
from dask_ml._compat import DISTRIBUTED_2_5_0, SK_LOG_LOSS
18+
from dask_ml._compat import SK_LOG_LOSS, DASK_2025_3_0
1919
from dask_ml.datasets import make_classification
2020
from dask_ml.model_selection import (
2121
HyperbandSearchCV,
@@ -26,7 +26,10 @@
2626
from dask_ml.utils import ConstantFunction
2727
from dask_ml.wrappers import Incremental
2828

29-
pytestmark = pytest.mark.skipif(not DISTRIBUTED_2_5_0, reason="hangs")
29+
pytestmark = pytest.mark.skipif(
30+
DASK_2025_3_0,
31+
reason="https://github.com/dask/dask-ml/issues/1016",
32+
)
3033

3134

3235
@pytest.mark.parametrize(

tests/model_selection/test_incremental.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from sklearn.model_selection import ParameterGrid, ParameterSampler
2828
from sklearn.utils import check_random_state
2929

30-
from dask_ml._compat import DISTRIBUTED_2_5_0, SK_LOG_LOSS
30+
from dask_ml._compat import DASK_2025_3_0, SK_LOG_LOSS
3131
from dask_ml.datasets import make_classification
3232
from dask_ml.model_selection import (
3333
HyperbandSearchCV,
@@ -39,7 +39,10 @@
3939
from dask_ml.utils import ConstantFunction
4040

4141
pytestmark = [
42-
pytest.mark.skipif(not DISTRIBUTED_2_5_0, reason="hangs"),
42+
pytest.mark.skipif(
43+
DASK_2025_3_0,
44+
reason="https://github.com/dask/dask-ml/issues/1016",
45+
),
4346
pytest.mark.filterwarnings("ignore:decay_rate"),
4447
] # decay_rate warnings are tested in test_incremental_warns.py
4548

tests/model_selection/test_incremental_warns.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,18 @@
22
import pytest
33
from distributed.utils_test import gen_cluster
44

5+
import dask_ml._compat
56
from dask_ml.datasets import make_classification
67
from dask_ml.model_selection import IncrementalSearchCV, InverseDecaySearchCV
78
from dask_ml.utils import ConstantFunction
89

910

11+
pytestmark = pytest.mark.skipif(
12+
dask_ml._compat.DASK_2025_3_0,
13+
reason="https://github.com/dask/dask-ml/issues/1016",
14+
)
15+
16+
1017
@gen_cluster(client=True)
1118
async def test_warns_decay_rate(c, s, a, b):
1219
X, y = make_classification(n_samples=100, n_features=5, chunks=10)

0 commit comments

Comments
 (0)