Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/11.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added Obs4MIPs sample data to the fetching script.
1 change: 1 addition & 0 deletions changelog/12.feature.md
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was for the next PR, but I'll leave it in for now

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Restructured code into classes so that additional sample datasets can be fetched easily in the future.
Binary file not shown.
1 change: 1 addition & 0 deletions registry.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/rsut/gn/v20210318/rsu
CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Amon/tas/gn/v20210318/tas_Amon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-202512.nc 3124671936cb2554af0a1f48b814fa8bb186a0ee2af6bcc86b5cb126b107d7a2
CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/Omon/tos/gn/v20210318/tos_Omon_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn_201501-202512.nc 10d13b1250f5483e5d6105b0dd811658849324c03f27539b83642062a1151b93
CMIP6/ScenarioMIP/CSIRO/ACCESS-ESM1-5/ssp126/r1i1p1f1/fx/areacella/gn/v20210318/areacella_fx_ACCESS-ESM1-5_ssp126_r1i1p1f1_gn.nc 064b48e5b2971cb4e8edad95b27fbbfc2f6dcdc2de99e2df2944d9c2b0db4910
obs4MIPs/NASA-JPL/AIRS-2-1/ta/gn/v20201110/ta_AIRS-2-1_gn_200209-201609.nc 689d9f175fab93428f1431f6625c8fd7f66700eaf332053485faf31992980b2a
43 changes: 36 additions & 7 deletions scripts/fetch_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ def fetch_datasets(
cat.remove_ensembles()

path_dict = cat.to_path_dict(prefer_streaming=False, minimal_keys=False)

merged_df = cat.df.merge(pd.Series(path_dict, name="files"), left_on="key", right_index=True)
if time_span:
merged_df["time_start"] = time_span[0]
Expand Down Expand Up @@ -152,10 +151,30 @@ def create_out_filename(metadata: pd.Series, ds: xr.Dataset) -> pathlib.Path:
"grid_label",
]

output_path = (
Path(os.path.join(*[metadata[item] for item in cmip6_path_items])) / f"v{metadata['version']}"
)
filename_prefix = "_".join([metadata[item] for item in cmip6_filename_paths])
obs4mips_path_items = [
"activity_id",
"institution_id",
"source_id",
"variable_id",
"grid_label",
]

obs4mips_filename_paths = [
"variable_id",
"source_id",
"grid_label",
]

if metadata.project == "obs4MIPs":
output_path = (
Path(os.path.join(*[metadata[item] for item in obs4mips_path_items])) / f"v{metadata['version']}"
)
filename_prefix = "_".join([metadata[item] for item in obs4mips_filename_paths])
else:
output_path = (
Path(os.path.join(*[metadata[item] for item in cmip6_path_items])) / f"v{metadata['version']}"
)
filename_prefix = "_".join([metadata[item] for item in cmip6_filename_paths])

if "time" in ds.dims:
time_range = f"{ds.time.min().dt.strftime('%Y%m').item()}-{ds.time.max().dt.strftime('%Y%m').item()}"
Expand Down Expand Up @@ -194,6 +213,16 @@ def create_out_filename(metadata: pd.Series, ds: xr.Dataset) -> pathlib.Path:
remove_ensembles=True,
time_span=("0101", "0180"),
),
# Obs4MIPs AIRS data
dict(
project="obs4MIPs",
institution_id="NASA-JPL",
frequency="mon",
source_id="AIRS-2-1",
variable_id="ta",
remove_ensembles=False,
time_span=("2002", "2016"),
),
]

dataset_metadata_collection: list[pd.DataFrame] = []
Expand All @@ -210,10 +239,10 @@ def create_out_filename(metadata: pd.Series, ds: xr.Dataset) -> pathlib.Path:
datasets = deduplicate_datasets(pd.concat(dataset_metadata_collection))

for _, dataset in datasets.iterrows():
print(dataset.key)
for ds_filename in dataset["files"]:
if ds_filename.name.split("_")[0] != dataset.variable_id:
continue
ds_orig = xr.open_dataset(ds_filename)

ds_decimated = decimate_dataset(ds_orig, time_span=(dataset["time_start"], dataset["time_end"]))
if ds_decimated is None:
continue
Expand Down