Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.bumpversion]
current_version = "0.7.3"
current_version = "0.7.4"
parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
serialize = ["{major}.{minor}.{patch}"]
search = "{current_version}"
Expand Down
7 changes: 3 additions & 4 deletions .github/actions/setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ runs:
- name: Install pixi
uses: prefix-dev/[email protected]
with:
pixi-version: "v0.49.0"
cache: true
# Frozen is needed as the ref git dependency was not playing nice with a fully locked environment
frozen: true
pixi-version: "v0.54.0"
cache: false # The self-hosted runners have a local cache
locked: true
log-level: "v"
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ from the examples given in that link.

<!-- towncrier release notes start -->

## ref-sample-data 0.7.4 (2025-09-26)

### Improvements

- Removes the ERA5 ta data Obs4REF in preference for fetching from Obs4MIPs directly.
This also removes the dependency on the climate-ref package. ([#50](https://github.com/CMIP-REF/ref-sample-data/pulls/50))


## ref-sample-data 0.7.3 (2025-09-12)

### Improvements
Expand Down
1 change: 1 addition & 0 deletions changelog/354.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Removes ERA5 ta dataset from obs4REF in preference for obs4MIP
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1,098 changes: 44 additions & 1,054 deletions pixi.lock

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "ref-sample-data"
version = "0.7.3"
version = "0.7.4"
description = "CMIP Rapid Evaluation Framework Sample Data"
readme = "README.md"
authors = [
Expand All @@ -18,18 +18,20 @@ platforms = ["osx-arm64", "linux-64"]
[tool.pixi.dependencies]
python = ">=3.10"
pooch = ">=1.8.2,<2"
intake-esgf = ">=2025.7.16,<2026"
intake-esgf = ">=2025.9.26,<2026"
matplotlib = ">=3.10.3,<4"
scipy = ">=1.16.0,<2"
xarray = ">=2025.7.1,<2026"
typer = ">=0.16.0,<0.17"
xcdat = ">=0.9.1,<0.10"
xesmf = ">=0.8.10,<0.9"
joblib = ">=1.5.2,<2"
loguru = ">=0.7,<0.8"
requests = ">=2.32.5,<3"

[tool.pixi.pypi-dependencies]
# Add any dependencies that aren't available on conda-forge here
ref_sample_data = { path = ".", editable = true }
climate-ref = ">=0.6.5,<0.7"

[tool.pixi.feature.dev.dependencies]
ruff = "*"
Expand Down
9 changes: 0 additions & 9 deletions registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,6 @@ obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_RE
obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc 14bdeae9e0b4b7bfe849c97dbdd29eae87f27d9464e8b3795d815369b13ffd0c
obs4REF/obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc 53262d8f9076f233399d149810a644464d3bb36ae0f131fd55f164bc623b78da
obs4REF/obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc 95bf8da4b8a071464688b527e822724c33c2794d100052eb12eb2804219ddb94
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc 36bd5cbda06258fb6aafd9fb2ccb79b4d08574116a6ebe8ccc48b6462bdb6419
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc 9b7802f845ca67f6b4d4bd0a73e0bce1c5042ecf3e7b209a5e470fd084ead238
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc 208a988bc440699beda1738342e7571c28dd2c3b2d169e0770c1764996bd41a4
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc 3bfb4dec6966cea160af4ce872302af4d84ee2bd8bd3bba91468a424e17d9eae
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc da16b7d20e764e25af3c6b834376bed5041872a0b11fab59234eca5cf1124495
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc 08ae50141a576dfcbba0a9cf15a32653f48fa88d58406b60d21383e50dd309f0
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc 488e55c4f6c858301abb957a5fb7de866e93fa54b234dbce08df652fad634649
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc 9c5c4656b929d1c6dba5d83d5459db61d7d543182e58e29168eacdb7f151b125
obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc 98e254f10b15c4d90dd258f66b8352f6e8b758f9bd64f435c90cb3bdd99c7086
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc 50d2b48789dcd642641b30ab52cc0f3ad161c057220cda52788080b2be2b927e
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc 72f15a671e88cb0ec239af9e8c1a608bdf2837c884efde9721213481bcfa02a0
obs4REF/obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc 54c939a1a461930230a1ae1423856c1929d5dd6bab72cbdad1fe24c5da579908
Expand Down
12 changes: 0 additions & 12 deletions scripts/fetch_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,18 +485,6 @@ def process_sample_data_request(
remove_ensembles=False,
time_span=("2000", "2025"),
),
# # Obs4MIPs AIRS data
# Obs4MIPsRequest(
# facets=dict(
# project="obs4MIPs",
# institution_id="NASA-JPL",
# frequency="mon",
# source_id="AIRS-2-1",
# variable_id="ta",
# ),
# remove_ensembles=False,
# time_span=("2002", "2016"),
# ),
# All unpublished obs4mips datasets
Obs4REFRequest(),
]
Expand Down
38 changes: 35 additions & 3 deletions src/ref_sample_data/data_request/obs4ref.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
import pathlib
import tempfile
from pathlib import Path

import climate_ref # noqa
import pandas as pd
import pooch
import requests
import xarray as xr
from climate_ref_core.dataset_registry import dataset_registry_manager
from loguru import logger

from ref_sample_data.data_request.base import DecimateMixin

Expand All @@ -22,15 +25,44 @@ class Obs4REFRequest(DecimateMixin):
id = "obs4ref"
source_type = "obs4REF"
time_span = None
branch_or_tag: str = "main"
"""
The branch or tag to use for fetching the dataset registry

This defaults to `main` but can be set to a specific tag or branch name to pin a different
version of the datasets.
"""

def fetch_datasets(self) -> pd.DataFrame:
"""
Fetch the datasets from the source

Returns a dataframe of the metadata and paths to the fetched datasets.
"""
registry = dataset_registry_manager["obs4ref"]
# This mimics how a registry is set up in climate_ref_core.dataset_registry
DATASET_URL = os.environ.get("REF_DATASET_URL", default="https://obs4ref.climate-ref.org")

registry = pooch.create(
path=pooch.os_cache("climate_ref"),
base_url=DATASET_URL,
retry_if_failed=10,
env="REF_DATASET_CACHE_DIR",
)
registry_url = (
f"https://raw.githubusercontent.com/Climate-REF/climate-ref/refs/heads/{self.branch_or_tag}"
f"/packages/climate-ref/src/climate_ref/dataset_registry/obs4ref_reference.txt"
)
response = requests.get(registry_url, timeout=30)
response.raise_for_status()
with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmpfile:
# Write the registry to a temporary file
tmpfile.write(response.text)
tmpfile.flush()
logger.debug(f"Downloaded {tmpfile.name}: {response.text.count(os.linesep)} lines")

# Load the registry from the temporary file
registry.load_registry(tmpfile.name)
logger.info(f"Loaded dataset registry from {registry_url}: {len(registry.registry)} entries")
datasets = []
for key in registry.registry.keys():
dataset_path = registry.fetch(key)
Expand Down