Climate-REF · lewisjared · Sep 30, 2025 · Sep 26, 2025 · Sep 26, 2025 · Sep 26, 2025
diff --git a/.bumpversion.toml b/.bumpversion.toml
@@ -1,5 +1,5 @@
 [tool.bumpversion]
-current_version = "0.7.3"
+current_version = "0.7.4"
 parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
 serialize = ["{major}.{minor}.{patch}"]
 search = "{current_version}"

diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml
@@ -6,8 +6,7 @@ runs:
     - name: Install pixi
       uses: prefix-dev/[email protected]
       with:
-        pixi-version: "v0.49.0"
-        cache: true
-        # Frozen is needed as the ref git dependency was not playing nice with a fully locked environment
-        frozen: true
+        pixi-version: "v0.54.0"
+        cache: false # The self-hosted runners have a local cache
+        locked: true
         log-level: "v"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,14 @@ from the examples given in that link.
 
 <!-- towncrier release notes start -->
 
+## ref-sample-data 0.7.4 (2025-09-26)
+
+### Improvements
+
+- Removes the ERA5 ta data Obs4REF in preference for fetching from Obs4MIPs directly.
+  This also removes the dependency on the climate-ref package. ([#50](https://github.com/CMIP-REF/ref-sample-data/pulls/50))
+
+
 ## ref-sample-data 0.7.3 (2025-09-12)
 
 ### Improvements

diff --git a/changelog/354.fix.md b/changelog/354.fix.md
@@ -0,0 +1 @@
+Removes ERA5 ta dataset from obs4REF in preference for obs4MIP
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc
diff --git a/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc b/data/obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "ref-sample-data"
-version = "0.7.3"
+version = "0.7.4"
 description = "CMIP Rapid Evaluation Framework Sample Data"
 readme = "README.md"
 authors = [
@@ -18,18 +18,20 @@ platforms = ["osx-arm64", "linux-64"]
 [tool.pixi.dependencies]
 python = ">=3.10"
 pooch = ">=1.8.2,<2"
-intake-esgf = ">=2025.7.16,<2026"
+intake-esgf = ">=2025.9.26,<2026"
 matplotlib = ">=3.10.3,<4"
 scipy = ">=1.16.0,<2"
 xarray = ">=2025.7.1,<2026"
 typer = ">=0.16.0,<0.17"
 xcdat = ">=0.9.1,<0.10"
 xesmf = ">=0.8.10,<0.9"
+joblib = ">=1.5.2,<2"
+loguru = ">=0.7,<0.8"
+requests = ">=2.32.5,<3"
 
 [tool.pixi.pypi-dependencies]
 # Add any dependencies that aren't available on conda-forge here
 ref_sample_data = { path = ".", editable = true }
-climate-ref = ">=0.6.5,<0.7"
 
 [tool.pixi.feature.dev.dependencies]
 ruff = "*"

diff --git a/registry.txt b/registry.txt
@@ -216,15 +216,6 @@ obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_RE
 obs4REF/obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc 14bdeae9e0b4b7bfe849c97dbdd29eae87f27d9464e8b3795d815369b13ffd0c
 obs4REF/obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc 53262d8f9076f233399d149810a644464d3bb36ae0f131fd55f164bc623b78da
 obs4REF/obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc 95bf8da4b8a071464688b527e822724c33c2794d100052eb12eb2804219ddb94
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc 36bd5cbda06258fb6aafd9fb2ccb79b4d08574116a6ebe8ccc48b6462bdb6419
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc 9b7802f845ca67f6b4d4bd0a73e0bce1c5042ecf3e7b209a5e470fd084ead238
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc 208a988bc440699beda1738342e7571c28dd2c3b2d169e0770c1764996bd41a4
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc 3bfb4dec6966cea160af4ce872302af4d84ee2bd8bd3bba91468a424e17d9eae
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc da16b7d20e764e25af3c6b834376bed5041872a0b11fab59234eca5cf1124495
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc 08ae50141a576dfcbba0a9cf15a32653f48fa88d58406b60d21383e50dd309f0
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc 488e55c4f6c858301abb957a5fb7de866e93fa54b234dbce08df652fad634649
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc 9c5c4656b929d1c6dba5d83d5459db61d7d543182e58e29168eacdb7f151b125
-obs4REF/obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc 98e254f10b15c4d90dd258f66b8352f6e8b758f9bd64f435c90cb3bdd99c7086
 obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc 50d2b48789dcd642641b30ab52cc0f3ad161c057220cda52788080b2be2b927e
 obs4REF/obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc 72f15a671e88cb0ec239af9e8c1a608bdf2837c884efde9721213481bcfa02a0
 obs4REF/obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc 54c939a1a461930230a1ae1423856c1929d5dd6bab72cbdad1fe24c5da579908

diff --git a/scripts/fetch_test_data.py b/scripts/fetch_test_data.py
@@ -485,18 +485,6 @@ def process_sample_data_request(
         remove_ensembles=False,
         time_span=("2000", "2025"),
     ),
-    # # Obs4MIPs AIRS data
-    # Obs4MIPsRequest(
-    #     facets=dict(
-    #         project="obs4MIPs",
-    #         institution_id="NASA-JPL",
-    #         frequency="mon",
-    #         source_id="AIRS-2-1",
-    #         variable_id="ta",
-    #     ),
-    #     remove_ensembles=False,
-    #     time_span=("2002", "2016"),
-    # ),
     # All unpublished obs4mips datasets
     Obs4REFRequest(),
 ]

diff --git a/src/ref_sample_data/data_request/obs4ref.py b/src/ref_sample_data/data_request/obs4ref.py
@@ -1,10 +1,13 @@
+import os
 import pathlib
+import tempfile
 from pathlib import Path
 
-import climate_ref  # noqa
 import pandas as pd
+import pooch
+import requests
 import xarray as xr
-from climate_ref_core.dataset_registry import dataset_registry_manager
+from loguru import logger
 
 from ref_sample_data.data_request.base import DecimateMixin
 
@@ -22,15 +25,44 @@ class Obs4REFRequest(DecimateMixin):
     id = "obs4ref"
     source_type = "obs4REF"
     time_span = None
+    branch_or_tag: str = "main"
+    """
+    The branch or tag to use for fetching the dataset registry
+
+    This defaults to `main` but can be set to a specific tag or branch name to pin a different
+    version of the datasets.
+    """
 
     def fetch_datasets(self) -> pd.DataFrame:
         """
         Fetch the datasets from the source
 
         Returns a dataframe of the metadata and paths to the fetched datasets.
         """
-        registry = dataset_registry_manager["obs4ref"]
+        # This mimics how a registry is set up in climate_ref_core.dataset_registry
+        DATASET_URL = os.environ.get("REF_DATASET_URL", default="https://obs4ref.climate-ref.org")
+
+        registry = pooch.create(
+            path=pooch.os_cache("climate_ref"),
+            base_url=DATASET_URL,
+            retry_if_failed=10,
+            env="REF_DATASET_CACHE_DIR",
+        )
+        registry_url = (
+            f"https://raw.githubusercontent.com/Climate-REF/climate-ref/refs/heads/{self.branch_or_tag}"
+            f"/packages/climate-ref/src/climate_ref/dataset_registry/obs4ref_reference.txt"
+        )
+        response = requests.get(registry_url, timeout=30)
+        response.raise_for_status()
+        with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmpfile:
+            # Write the registry to a temporary file
+            tmpfile.write(response.text)
+            tmpfile.flush()
+            logger.debug(f"Downloaded {tmpfile.name}: {response.text.count(os.linesep)} lines")
 
+            # Load the registry from the temporary file
+            registry.load_registry(tmpfile.name)
+        logger.info(f"Loaded dataset registry from {registry_url}: {len(registry.registry)} entries")
         datasets = []
         for key in registry.registry.keys():
             dataset_path = registry.fetch(key)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Removes ERA5 ta dataset from obs4REF in preference for obs4MIP