Skip to content

Commit c10571b

Browse files
committed
feat: add reference datasets
1 parent ddfe20e commit c10571b

File tree

13 files changed

+2679
-3
lines changed

13 files changed

+2679
-3
lines changed

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"sentry-sdk[fastapi]>=1.40.6,<2.0.0",
1414
"climate-ref[aft-providers,postgres]>=0.6.3",
1515
"loguru",
16+
"pyyaml>=6.0",
1617
# Not exactly sure why we need to pin this
1718
"lz4==4.4.4",
1819
"fastapi-sqlalchemy-monitor>=1.1.3",

backend/src/ref_backend/core/config.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import functools
22
import secrets
33
import warnings
4+
from pathlib import Path
45
from typing import Annotated, Any, Literal, Self
56

67
from pydantic import (
@@ -60,6 +61,28 @@ def all_cors_origins(self) -> list[str]:
6061
This is useful for local development and testing, but should not be used in production.
6162
"""
6263

64+
DIAGNOSTIC_METADATA_PATH: Path | None = None
65+
"""
66+
Path to the diagnostic metadata YAML file.
67+
68+
This file provides additional metadata for diagnostics that can override or supplement
69+
the default values from diagnostic implementations. If not provided, defaults to
70+
'static/diagnostics/metadata.yaml' relative to the backend directory.
71+
"""
72+
73+
@computed_field # type: ignore[prop-decorator]
74+
@property
75+
def diagnostic_metadata_path_resolved(self) -> Path:
76+
"""
77+
Get the resolved path to the diagnostic metadata file.
78+
79+
Returns the configured path or the default location.
80+
"""
81+
if self.DIAGNOSTIC_METADATA_PATH is not None:
82+
return self.DIAGNOSTIC_METADATA_PATH
83+
# Default to static/diagnostics/metadata.yaml relative to backend directory
84+
return Path(__file__).parent.parent.parent.parent / "static" / "diagnostics" / "metadata.yaml"
85+
6386
def _check_default_secret(self, var_name: str, value: str | None) -> None:
6487
if value == "changethis":
6588
message = (
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""
2+
Diagnostic metadata loading and override system.
3+
4+
This module provides functionality for loading diagnostic metadata from a YAML file
5+
that can override or supplement the default metadata from diagnostic implementations.
6+
It's particularly useful for exposing reference dataset information and other metadata
7+
that may not be directly available from the diagnostic provider code.
8+
"""
9+
10+
from pathlib import Path
11+
from typing import Literal
12+
13+
import yaml
14+
from loguru import logger
15+
from pydantic import BaseModel, Field
16+
17+
18+
class ReferenceDatasetLink(BaseModel):
19+
"""
20+
Link to a reference dataset used by a diagnostic.
21+
22+
Reference datasets are observational or reanalysis datasets that diagnostics
23+
compare model outputs against. They can be classified by their role in the analysis.
24+
"""
25+
26+
slug: str = Field(..., description="Unique identifier for the dataset(e.g., 'obs4mips.CERES-EBAF.v4.2')")
27+
description: str | None = Field(
28+
None, description="Description of how this dataset is used in the diagnostic"
29+
)
30+
type: Literal["primary", "secondary", "comparison"] = Field(
31+
...,
32+
description=(
33+
"Role of this reference dataset:\n"
34+
"- 'primary': Main reference dataset for the diagnostic\n"
35+
"- 'secondary': Additional reference for comparison or validation\n"
36+
"- 'comparison': Used for comparative analysis"
37+
),
38+
)
39+
40+
41+
class DiagnosticMetadata(BaseModel):
42+
"""
43+
Metadata overrides for a diagnostic.
44+
45+
This model represents supplemental or override metadata for diagnostics,
46+
loaded from a YAML file. All fields are optional to allow partial overrides.
47+
"""
48+
49+
reference_datasets: list[ReferenceDatasetLink] | None = Field(
50+
None, description="Reference datasets used by this diagnostic"
51+
)
52+
display_name: str | None = Field(None, description="Display name override for the diagnostic")
53+
tags: list[str] | None = Field(None, description="Tags for categorizing the diagnostic")
54+
55+
56+
def load_diagnostic_metadata(yaml_path: Path) -> dict[str, DiagnosticMetadata]:
57+
"""
58+
Load diagnostic metadata from a YAML file.
59+
60+
This function loads metadata overrides from a YAML file, which should follow
61+
the structure defined in DiagnosticMetadata. The file uses diagnostic keys
62+
in the format "provider-slug/diagnostic-slug" to map metadata to diagnostics.
63+
64+
Args:
65+
yaml_path: Path to the YAML metadata file
66+
67+
Returns
68+
-------
69+
Dictionary mapping diagnostic keys (provider/diagnostic) to their metadata.
70+
Returns an empty dict if the file doesn't exist or cannot be parsed.
71+
72+
Example YAML structure:
73+
```yaml
74+
pmp/annual-cycle:
75+
reference_datasets:
76+
- slug: "obs4mips.CERES-EBAF.v4.2"
77+
description: "CERES Energy Balanced and Filled"
78+
type: "primary"
79+
display_name: "Annual Cycle Analysis"
80+
tags: ["atmosphere", "seasonal-cycle"]
81+
```
82+
"""
83+
if not yaml_path.exists():
84+
logger.warning(
85+
f"Diagnostic metadata file not found at {yaml_path}. "
86+
"No metadata overrides will be applied. This is expected if you haven't "
87+
"created the metadata file yet."
88+
)
89+
return {}
90+
91+
try:
92+
with open(yaml_path) as f:
93+
raw_data = yaml.safe_load(f)
94+
95+
if raw_data is None:
96+
logger.info(f"Diagnostic metadata file at {yaml_path} is empty.")
97+
return {}
98+
99+
# Parse each diagnostic's metadata
100+
metadata_dict: dict[str, DiagnosticMetadata] = {}
101+
for diagnostic_key, metadata_raw in raw_data.items():
102+
try:
103+
metadata = DiagnosticMetadata(**metadata_raw)
104+
metadata_dict[diagnostic_key] = metadata
105+
logger.debug(f"Loaded metadata for diagnostic: {diagnostic_key}")
106+
except Exception as e:
107+
logger.error(f"Failed to parse metadata for diagnostic '{diagnostic_key}': {e}")
108+
continue
109+
110+
logger.info(f"Successfully loaded metadata for {len(metadata_dict)} diagnostics from {yaml_path}")
111+
return metadata_dict
112+
113+
except yaml.YAMLError as e:
114+
logger.error(f"Failed to parse YAML file at {yaml_path}: {e}")
115+
return {}
116+
except Exception as e:
117+
logger.error(f"Unexpected error loading diagnostic metadata from {yaml_path}: {e}")
118+
return {}

backend/src/ref_backend/models.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections.abc import Sequence
22
from datetime import datetime
3-
from typing import TYPE_CHECKING, Generic, Literal, TypeVar, Union
3+
from typing import TYPE_CHECKING, ClassVar, Generic, Literal, TypeVar, Union
44

55
from attr import define
66
from loguru import logger
@@ -11,6 +11,11 @@
1111
from climate_ref.models.dataset import CMIP6Dataset
1212
from climate_ref.models.execution import ResultOutputType
1313
from climate_ref_core.metric_values import ScalarMetricValue
14+
from ref_backend.core.diagnostic_metadata import (
15+
DiagnosticMetadata,
16+
ReferenceDatasetLink,
17+
load_diagnostic_metadata,
18+
)
1419
from ref_backend.core.json_utils import sanitize_float_list, sanitize_float_value
1520

1621
if TYPE_CHECKING:
@@ -124,12 +129,33 @@ class DiagnosticSummary(BaseModel):
124129
"""
125130
Associated AFT diagnostics
126131
"""
132+
reference_datasets: list[ReferenceDatasetLink] | None = None
133+
"""
134+
Reference datasets used by this diagnostic (from metadata overrides)
135+
136+
These are manually curated and may not be complete at this time.
137+
"""
138+
tags: list[str] | None = None
139+
"""
140+
Tags for categorizing the diagnostic (from metadata overrides)
141+
"""
142+
143+
# Cache for loaded diagnostic metadata (class variable)
144+
_metadata_cache: ClassVar[dict[str, DiagnosticMetadata] | None] = None
127145

128146
@staticmethod
129147
def build(diagnostic: models.Diagnostic, app_context: "AppContext") -> "DiagnosticSummary":
130148
# Import here to avoid circular import issues
131149
from ref_backend.core.aft import get_aft_diagnostic_by_id, get_aft_for_ref_diagnostic
132150

151+
# Load metadata YAML on first use (cached)
152+
if DiagnosticSummary._metadata_cache is None:
153+
metadata_path = app_context.settings.diagnostic_metadata_path_resolved
154+
DiagnosticSummary._metadata_cache = load_diagnostic_metadata(metadata_path)
155+
logger.debug(
156+
f"Loaded diagnostic metadata cache with {len(DiagnosticSummary._metadata_cache)} entries"
157+
)
158+
133159
concrete_diagnostic = app_context.provider_registry.get_metric(
134160
diagnostic.provider.slug, diagnostic.slug
135161
)
@@ -234,7 +260,8 @@ def build(diagnostic: models.Diagnostic, app_context: "AppContext") -> "Diagnost
234260
logger.warning(f"No AFT found for diagnostic {diagnostic.provider.slug}/{diagnostic.slug}")
235261
aft = None
236262

237-
return DiagnosticSummary(
263+
# Build the base diagnostic summary
264+
summary = DiagnosticSummary(
238265
id=diagnostic.id,
239266
provider=ProviderSummary.build(diagnostic.provider),
240267
slug=diagnostic.slug,
@@ -252,6 +279,23 @@ def build(diagnostic: models.Diagnostic, app_context: "AppContext") -> "Diagnost
252279
aft_link=aft,
253280
)
254281

282+
# Apply metadata overrides from YAML if available
283+
diagnostic_key = f"{diagnostic.provider.slug}/{diagnostic.slug}"
284+
if diagnostic_key in DiagnosticSummary._metadata_cache:
285+
metadata = DiagnosticSummary._metadata_cache[diagnostic_key]
286+
287+
# Apply overrides: YAML values override database values
288+
if metadata.display_name is not None:
289+
summary.name = metadata.display_name
290+
if metadata.reference_datasets is not None:
291+
summary.reference_datasets = metadata.reference_datasets
292+
if metadata.tags is not None:
293+
summary.tags = metadata.tags
294+
295+
logger.debug(f"Applied metadata overrides for diagnostic {diagnostic_key}")
296+
297+
return summary
298+
255299

256300
class ExecutionOutput(BaseModel):
257301
id: int
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Diagnostic Metadata Overrides
2+
#
3+
# This file provides additional metadata for diagnostics that supplements or overrides
4+
# the default values from the diagnostic implementations. It's particularly useful for:
5+
# - Exposing which reference datasets are used by each diagnostic
6+
# - Providing display names and descriptions
7+
# - Adding tags for categorization
8+
#
9+
# Structure:
10+
# provider-slug/diagnostic-slug:
11+
# reference_datasets:
12+
# - slug: "dataset-slug"
13+
# description: "Description of how this dataset is used"
14+
# type: "primary|secondary|comparison"
15+
# display_name: "Display Name (overrides default)"
16+
# tags: ["category1", "category2"]
17+
#
18+
# Example diagnostics (uncomment and populate with real diagnostics as needed):
19+
20+
# pmp/annual-cycle:
21+
# reference_datasets:
22+
# - slug: "obs4mips.CERES-EBAF.v4.2"
23+
# description: "CERES Energy Balanced and Filled - Primary reference for TOA radiation"
24+
# type: "primary"
25+
# - slug: "obs4mips.GPCP.v2.3"
26+
# description: "Global Precipitation Climatology Project - Precipitation comparison"
27+
# type: "secondary"
28+
# display_name: "Annual Cycle Analysis"
29+
# tags: ["atmosphere", "seasonal-cycle", "radiation"]
30+
31+
# pmp/mean-state:
32+
# reference_datasets:
33+
# - slug: "obs4mips.ERA5.v1"
34+
# description: "ERA5 Reanalysis - Primary climatology reference"
35+
# type: "primary"
36+
# display_name: "Mean State Climatology"
37+
# tags: ["atmosphere", "climatology"]
38+
39+
# Add your diagnostic metadata here
40+
41+
# Real diagnostic from production database
42+
pmp/annual-cycle:
43+
reference_datasets:
44+
- slug: "obs4mips.CERES-EBAF.v4.2"
45+
description: "CERES Energy Balanced and Filled - Primary reference for TOA radiation"
46+
type: "primary"
47+
# - slug: "obs4mips.GPCP.v2.3"
48+
# description: "Global Precipitation Climatology Project - Precipitation comparison"
49+
# type: "secondary"
50+
# - slug: "ERA5.monthly.single-levels"
51+
# description: "ERA5 Reanalysis - Temperature and pressure fields"
52+
# type: "comparison"
53+
display_name: "Annual Cycle Analysis"

backend/uv.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)