Skip to content

Lift xarray pin #437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ autodoc: clean-docs ## create sphinx-apidoc files:

docs: autodoc ## generate Sphinx HTML documentation, including API docs
$(MAKE) -C docs html
ifndef READTHEDOCS
$(BROWSER) docs/_build/html/index.html
endif

servedocs: docs ## compile the docs watching for changes
watchmedo shell-command -p '*.md' -c '$(MAKE) -C docs html' -R -D .
Expand Down
19 changes: 10 additions & 9 deletions clisops/core/regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

# Try importing xesmf and set to None if not found at correct version
# If set to None, the `require_module` decorator will throw an exception
XESMF_MINIMUM_VERSION = "0.8.2"
XESMF_MINIMUM_VERSION = "0.8.10"
try:
import xesmf as xe

Expand All @@ -41,10 +41,11 @@

# FIXME: Remove this when xarray addresses https://github.com/pydata/xarray/issues/7794
XARRAY_INCOMPATIBLE_VERSION = "2023.3.0"
XARRAY_COMPATIBLE_VERSION = "2025.6.0"
XARRAY_WARNING_MESSAGE = (
f"xarray version >= {XARRAY_INCOMPATIBLE_VERSION} "
f"is not supported for regridding operations with cf-time indexed arrays. "
f"Please use xarray version < {XARRAY_INCOMPATIBLE_VERSION}. "
f"xarray versions between {XARRAY_INCOMPATIBLE_VERSION} and {XARRAY_COMPATIBLE_VERSION} "
f"are not supported for regridding operations with cf-time indexed arrays. "
f"Please use xarray version >= {XARRAY_COMPATIBLE_VERSION}. "
"For more information, see: https://github.com/pydata/xarray/issues/7794."
)

Expand Down Expand Up @@ -76,8 +77,8 @@ def weights_cache_init(
Parameters
----------
weights_dir : str or Path
Directory name to initialize the local weights cache in.
Will be created if it does not exist.
Directory name to initialise the local weights cache in.
It will be created if it does not exist.
Per default, this function is called upon import with weights_dir as defined in roocs.ini.
config : dict
Configuration dictionary as read from top-level.
Expand Down Expand Up @@ -122,7 +123,7 @@ def weights_cache_flush(
----------
weights_dir_init : str, optional
Directory name to reinitialize the local weights cache in.
Will be created if it does not exist.
It will be created if it does not exist.
The default is CONFIG["clisops:grid_weights"]["local_weights_dir"] as defined in roocs.ini
(or as redefined by a manual weights_cache_init call).
dryrun : bool, optional
Expand Down Expand Up @@ -1554,7 +1555,7 @@ def to_netcdf(
):
"""Store a copy of the horizontal Grid as netCDF file on disk.

Define output folder, filename and output format (currently only 'CF' is supported).
Define output folder, file name and output format (currently only 'CF' is supported).
Does not overwrite an existing file.

Parameters
Expand Down Expand Up @@ -1635,7 +1636,7 @@ def to_netcdf(
class Weights:
"""Creates remapping weights out of two Grid objects serving as source and target grid.

Reads weights from cache if possible. Reads weights from disk if specified (not yet implemented).
Reads weights from cache if possible or from disk if specified (not yet implemented).
In the latter case, the weight file format has to be supported, to reformat it to xESMF format.

Parameters
Expand Down
3 changes: 1 addition & 2 deletions clisops/ops/average.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def average_shape(
shape : Union[str, Path, gpd.GeoDataFrame]
Path to shape file, or directly a GeoDataFrame. Supports formats compatible with geopandas.
Will be converted to EPSG:4326 if needed.

variable : Optional[Union[str, Sequence[str], None]]
Variables to average. If None, average over all data variables.
output_dir : Optional[Union[str, Path]]
Expand Down Expand Up @@ -210,7 +209,7 @@ def average_time(
ds : Union[xr.Dataset, str]
Xarray dataset.
freq : str
The frequency to average over. One of "month", "year".
The frequency to average over, either "month" or "year".
output_dir : Optional[Union[str, Path]]
output_type : {"netcdf", "nc", "zarr", "xarray"}
split_method : {"time:auto"}
Expand Down
25 changes: 16 additions & 9 deletions clisops/utils/output_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@
from clisops.utils.dataset_utils import get_main_variable

SUPPORTED_FORMATS = {
"netcdf": {"method": "to_netcdf", "extension": "nc"},
"nc": {"method": "to_netcdf", "extension": "nc"},
"zarr": {"method": "to_zarr", "extension": "zarr"},
"netcdf": {"method": "to_netcdf", "extension": "nc", "engine": "h5netcdf"},
"nc": {"method": "to_netcdf", "extension": "nc", "engine": "h5netcdf"},
"zarr": {"method": "to_zarr", "extension": "zarr", "engine": "zarr"},
"xarray": {"method": None, "extension": None},
}

SUPPORTED_SPLIT_METHODS = ["time:auto"]


def check_format(fmt):
"""Checks requested format exists."""
"""Check that the requested format exists."""
if fmt not in SUPPORTED_FORMATS:
raise KeyError(
f'Format not recognised: "{fmt}". Must be one of: {SUPPORTED_FORMATS}.'
Expand All @@ -47,6 +47,12 @@ def get_format_extension(fmt):
return SUPPORTED_FORMATS[fmt]["extension"]


def get_format_engine(fmt):
"""Finds the engine for the requested output format."""
check_format(fmt)
return SUPPORTED_FORMATS[fmt]["engine"]


def _format_time(tm: Union[str, dt], fmt="%Y-%m-%d"):
"""Convert to datetime if time is a numpy datetime."""
if not hasattr(tm, "strftime"):
Expand Down Expand Up @@ -74,7 +80,7 @@ def filter_times_within(times, start=None, end=None):


def get_da(ds):
"""Returns xr.DataArray when format of ds may be either xr.Dataset or xr.DataArray."""
"""Returns xr.DataArray when the format of ds may be either xr.Dataset or xr.DataArray."""
if isinstance(ds, xr.DataArray):
da = ds
else:
Expand Down Expand Up @@ -119,7 +125,7 @@ def get_time_slices(
f"The split method {split_method} is not implemented."
)

# Use default file size limit if not provided
# Use the default file size limit if not provided
if not file_size_limit:
file_size_limit = parse_size(CONFIG["clisops:write"]["file_size_limit"])

Expand Down Expand Up @@ -164,7 +170,7 @@ def get_time_slices(
def get_chunk_length(da):
"""Calculate the chunk length to use when chunking xarray datasets.

Based on memory limit provided in config and the size of the dataset.
Based on the memory limit provided in config and the size of the dataset.
"""
size = da.nbytes
n_times = len(da.time.values)
Expand Down Expand Up @@ -209,7 +215,7 @@ def get_output(ds, output_type, output_dir, namer):
except AttributeError:
chunked_ds = ds

# If `output_dir` is not set, use current directory
# If `output_dir` is not set, use the current directory
if not output_dir:
output_dir = Path().cwd().expanduser()
else:
Expand All @@ -235,7 +241,8 @@ def get_output(ds, output_type, output_dir, namer):
# - https://docs.dask.org/en/latest/scheduling.html
with dask.config.set(scheduler="synchronous"):
writer = getattr(chunked_ds, format_writer)
delayed_obj = writer(target_path, compute=False)
engine = get_format_engine(output_type)
delayed_obj = writer(target_path, engine=engine, compute=False)
delayed_obj.compute()

# If "output_staging_dir" is set, then pause, move the output file,
Expand Down
8 changes: 4 additions & 4 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
- loguru >=0.5.3
- numpy >=1.25.0,<2.3.0
- packaging >=23.2
- pandas >=1.5.3
- pandas >=2.2.0
- platformdirs >=4.0
- pooch >=1.8.0
- pyproj >=3.3.0
Expand All @@ -29,21 +29,21 @@ dependencies:
- s3fs >=2025.5.1
- scipy >=1.11.0
- shapely >=2.0
- xarray >=2025.1.1 # use decode_times=CFDatetimeCoder(use_cftime=True)
- xarray >=2025.6.0
- zarr >=2.13.0,<3.0
# Compression
- zstandard
# Extras
- netcdf4 >=1.4
- xesmf >=0.8.2
- xesmf >=0.8.10
# # Dev tools and testing
- black >=25.1.0
- bump-my-version >=1.0.0
- coverage >=7.5.0
- deptry >=0.23.0
- flake8 >=7.1.0
- h5netcdf >=1.4.0
- isort >=6.0.1
- netCDF4 >=1.4
- pre-commit >=3.5.0
- pytest >=8.0.0
- pytest-cov >=5.0.0
Expand Down
12 changes: 6 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12", # Compatibility issues will persist until https://github.com/pydata/xarray/issues/7794 is resolved
"Programming Language :: Python :: 3.13", # Compatibility issues will persist until https://github.com/pydata/xarray/issues/7794 is resolved
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering",
"Topic :: Scientific/Engineering :: Atmospheric Science",
"Topic :: Scientific/Engineering :: GIS",
Expand All @@ -58,7 +58,7 @@ dependencies = [
"loguru >=0.5.3",
"numpy >=1.25.0,<2.3.0",
"packaging >=23.2",
"pandas >=1.5.3",
"pandas >=2.2.0",
"platformdirs >=4.0",
"pooch >=1.8.0",
"pyproj >=3.3.0",
Expand All @@ -80,8 +80,8 @@ dev = [
"coverage[toml] >=7.5.0",
"deptry >=0.23.0",
"flake8 >=7.2.0",
"h5netcdf >=1.4.0",
"isort >=6.0.1",
"netCDF4 >=1.4",
"pip >=25.0",
"pre-commit >=3.5.0",
"pytest >=8.0.0",
Expand All @@ -107,9 +107,9 @@ docs = [
"sphinx-rtd-theme >=1.0"
]
extras = [
"xesmf >=0.8.2",
"xesmf >=0.8.10",
# See: https://github.com/pydata/xarray/issues/7794
"xarray >=2022.6.0,<2023.3.0"
"xarray >=2025.6.0"
]

[project.urls]
Expand Down
2 changes: 0 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import pytest
import xarray as xr
from _pytest.logging import caplog as _caplog # noqa
from packaging.version import Version

from clisops.core.regrid import XARRAY_INCOMPATIBLE_VERSION
from clisops.utils import testing
from clisops.utils.testing import stratus as _stratus
from clisops.utils.testing import write_roocs_cfg as _write_roocs_cfg
Expand Down
6 changes: 0 additions & 6 deletions tests/test_core_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,6 @@ def test_grid_init_ds_tos_degenerated_cells(mini_esgf_data):
]


@pytest.mark.xfail(
reason="Needs xarray fix https://github.com/pydata/xarray/issues/7794"
)
def test_grid_init_da_tas_regular(mini_esgf_data):
with xr.open_dataset(
mini_esgf_data["CMIP6_TAS_ONE_TIME_STEP"],
Expand Down Expand Up @@ -1532,9 +1529,6 @@ def test_duplicated_cells_warning_issued(self, tmp_path, mini_esgf_data):
else:
assert len(issuedWarnings) == 1

@pytest.mark.xfail(
reason="Needs xarray fix https://github.com/pydata/xarray/issues/7794"
)
def test_regrid_dataarray(self, tmp_path, mini_esgf_data):
with xr.open_dataset(
mini_esgf_data[self.c6tots],
Expand Down
22 changes: 9 additions & 13 deletions tests/test_core_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,6 @@ class TestSubsetBbox:
lonGCM = [-70.0, -60.0]
latGCM = [43.0, 59.0]

@pytest.mark.xfail(
reason="NetCDF4 has thread safety issues. Consider using h5netcdf in the future",
strict=False,
)
def test_dataset(self, nimbus):
da = xr.open_mfdataset(
[nimbus.fetch(self.nc_tasmax_file), nimbus.fetch(self.nc_tasmin_file)],
Expand Down Expand Up @@ -838,11 +834,8 @@ def test_mask_multiregions(self, nimbus, clisops_test_data):
np.testing.assert_array_equal(vals, [0, 1, 2])
np.testing.assert_array_equal(counts, [58, 250, 22])

@pytest.mark.skipif(
xesmf is None, reason="xESMF >= 0.6.2 is needed for average_shape."
)
@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_weight_masks_multiregions(self, nimbus, clisops_test_data):
# rename is due to a small limitation of xESMF 0.5.2
ds = xr.open_dataset(nimbus.fetch(self.nc_file)).rename(bnds="bounds")
regions = gpd.read_file(clisops_test_data["multi_regions_geojson"]).set_index(
"id"
Expand Down Expand Up @@ -1064,8 +1057,9 @@ def test_nudge_levels(self, nimbus):


class TestGridPolygon:

@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_rectilinear(self):
pytest.importorskip("xesmf", "0.6.2")
# CF-Compliant with bounds
ds = xesmf.util.cf_grid_2d(-200, -100, 20, -60, 60, 10)
poly = subset._rectilinear_grid_exterior_polygon(ds)
Expand Down Expand Up @@ -1101,9 +1095,10 @@ def test_curvilinear(self, mode, nimbus):


class TestShapeBboxIndexer:

@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_rectilinear(self):
# Create small polygon fitting in one cell.
pytest.importorskip("xesmf", "0.6.2")
x, y = -150, 35
p = Point(x, y)
ds = xesmf.util.cf_grid_2d(-200, 0, 20, -60, 60, 10)
Expand All @@ -1114,9 +1109,9 @@ def test_rectilinear(self):
inds = subset.shape_bbox_indexer(ds, gpd.GeoDataFrame(geometry=[pb]))
assert pb.within(subset.grid_exterior_polygon(ds.isel(inds)))

@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_complex_geometries(self):
# Test with geometries that cannot be simplified to a single polygon using `unary_union`.
pytest.importorskip("xesmf", "0.6.2")
import shapely.wkt

p1 = shapely.wkt.loads(
Expand Down Expand Up @@ -1149,9 +1144,9 @@ def test_complex_geometries(self):
inds = subset.shape_bbox_indexer(ds, gpd.GeoDataFrame(geometry=[p1, p2]))
assert inds == {}

@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_curvilinear(self):
# Check that grid along lon/lat and a rotated grid are indexed identically for geometry and rotated geometry.
pytest.importorskip("xesmf", "0.6.2")
from shapely.affinity import rotate

ds = xesmf.util.grid_2d(0, 100, 10, 0, 60, 6)
Expand All @@ -1165,9 +1160,9 @@ def test_curvilinear(self):
ri = subset.shape_bbox_indexer(rds, gpd.GeoSeries([rgeom]))
assert ri == i

@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def test_multipoints(self):
# Test with a MultiPoint geometry.
pytest.importorskip("xesmf", "0.6.2")
from shapely.geometry import MultiPoint, Point

ds = xesmf.util.cf_grid_2d(-200, 0, 20, -60, 60, 10)
Expand All @@ -1189,6 +1184,7 @@ def test_multipoints(self):
assert geom.within(subset.grid_exterior_polygon(ds.isel(inds)))


@pytest.mark.skipif(xesmf is None, reason="xESMF required.")
def rotated_grid_2d(lon0_b, lon1_b, d_lon, lat0_b, lat1_b, d_lat, angle):
# Rotate lat lon by degree.
ds = xesmf.util.grid_2d(lon0_b, lon1_b, d_lon, lat0_b, lat1_b, d_lat)
Expand Down
Loading