From 089d82f4f5ce822aa6142ac124703e36946d8c80 Mon Sep 17 00:00:00 2001 From: maryam abdi-oskouei Date: Mon, 13 Apr 2026 13:54:55 -0600 Subject: [PATCH 01/18] save --- .claude/settings.local.json | 19 ++ .../geos_cf/convert_observations/omps_nm.yaml | 23 ++ .../download_observations/omps_nm.yaml | 18 ++ .../geos_cf/ingest_observations/omps_nm.yaml | 6 + src/swell/suites/ingest_obs/flow.cylc | 14 + src/swell/suites/ingest_obs/suite_config.py | 19 ++ src/swell/tasks/convert_obs_to_ioda.py | 157 +++++++++++ src/swell/tasks/download_obs.py | 256 ++++++++++++++++++ src/swell/tasks/ingest_obs.py | 64 +++-- src/swell/utilities/question_defaults.py | 27 ++ 10 files changed, 574 insertions(+), 29 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 src/swell/configuration/jedi/interfaces/geos_cf/convert_observations/omps_nm.yaml create mode 100644 src/swell/configuration/jedi/interfaces/geos_cf/download_observations/omps_nm.yaml create mode 100644 src/swell/configuration/jedi/interfaces/geos_cf/ingest_observations/omps_nm.yaml create mode 100644 src/swell/tasks/convert_obs_to_ioda.py create mode 100644 src/swell/tasks/download_obs.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 000000000..47d08f5a5 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,19 @@ +{ + "permissions": { + "allow": [ + "Read(//Users/maryamabdi/Work/JEDI/playground/skylab/models/geos_cf/tasks/**)", + "Bash(git ls-tree -r --name-only develop)", + "Bash(git checkout develop -- docs/examples/soca/3dfgat_marine_cycle.md docs/examples/soca/3dvar_marine.md docs/examples/soca/3dvar_marine_cycle_tier2.md src/swell/configuration/jedi/interfaces/geos_atmosphere/model/r2d2.py src/swell/configuration/jedi/interfaces/geos_cf/model/r2d2.py src/swell/configuration/jedi/interfaces/geos_cf/model/stage_cycle.py src/swell/configuration/jedi/interfaces/geos_cf/task_questions.yaml src/swell/configuration/jedi/interfaces/geos_marine/model/r2d2.py src/swell/configuration/jedi/oops/variational3D.py src/swell/deployment/platforms/generic/r2d2_config.yaml src/swell/deployment/platforms/generic/task_questions.yaml src/swell/deployment/platforms/nccs_discover_cascade/r2d2_config.yaml src/swell/deployment/platforms/nccs_discover_cascade/task_questions.yaml src/swell/deployment/platforms/nccs_discover_sles15/r2d2_config.yaml src/swell/deployment/platforms/nccs_discover_sles15/suite_questions.yaml src/swell/deployment/platforms/nccs_discover_sles15/task_questions.yaml src/swell/suites/hofx_cf/suite_config.py src/swell/tasks/get_background.py src/swell/tasks/get_geovals.py src/swell/tasks/get_ncdiags.py src/swell/tasks/get_observations.py src/swell/tasks/get_restart.py src/swell/tasks/ingest_obs.py src/swell/tasks/run_jedi_variational_executable.py src/swell/tasks/save_obs_diags.py src/swell/tasks/save_restart.py src/swell/utilities/r2d2.py)", + "Bash(git checkout develop -- docs/examples/soca/3dfgat_marine_cycle.md docs/examples/soca/3dvar_marine.md docs/examples/soca/3dvar_marine_cycle_tier2.md src/swell/configuration/jedi/interfaces/geos_atmosphere/model/r2d2.py src/swell/configuration/jedi/interfaces/geos_cf/model/r2d2.py src/swell/configuration/jedi/interfaces/geos_cf/model/stage_cycle.py src/swell/configuration/jedi/interfaces/geos_cf/task_questions.yaml src/swell/configuration/jedi/interfaces/geos_marine/model/r2d2.py src/swell/configuration/jedi/oops/variational3D.py src/swell/deployment/platforms/generic/r2d2_config.yaml src/swell/deployment/platforms/generic/task_questions.yaml src/swell/deployment/platforms/nccs_discover_cascade/r2d2_config.yaml src/swell/deployment/platforms/nccs_discover_cascade/task_questions.yaml src/swell/deployment/platforms/nccs_discover_sles15/r2d2_config.yaml src/swell/deployment/platforms/nccs_discover_sles15/suite_questions.yaml src/swell/deployment/platforms/nccs_discover_sles15/task_questions.yaml src/swell/suites/hofx_cf/suite_config.py src/swell/tasks/get_background.py src/swell/tasks/get_geovals.py src/swell/tasks/get_ncdiags.py src/swell/tasks/get_observations.py src/swell/tasks/ingest_obs.py src/swell/tasks/run_jedi_variational_executable.py src/swell/tasks/save_obs_diags.py src/swell/tasks/save_restart.py src/swell/utilities/r2d2.py)", + "Bash(grep -l \"RunForecast\\\\|run_forecast\" /Users/maryamabdi/Work/JEDI/swell/src/swell/suites/*/flow.cylc)", + "Bash(gh pr:*)", + "Bash(python3 -c \"import swell.suites.base.suite_attributes\")", + "Bash(grep -E \"\\\\.\\(yaml|yml\\)$\")", + "Bash(grep -n \"def render_oops_file\" /Users/maryamabdi/Work/JEDI/swell/src/swell/utilities/*.py)", + "Bash(ls /Users/maryamabdi/Work/JEDI/swell/src/swell/*.py)", + "Read(//Users/maryamabdi/Downloads/**)", + "Read(//Users/maryamabdi/Work/JEDI/playground/**)", + "Bash(grep -r \"class.*Task\" /Users/maryamabdi/Work/JEDI/swell/src/swell/tasks/*.py)" + ] + } +} diff --git a/src/swell/configuration/jedi/interfaces/geos_cf/convert_observations/omps_nm.yaml b/src/swell/configuration/jedi/interfaces/geos_cf/convert_observations/omps_nm.yaml new file mode 100644 index 000000000..8cabc423a --- /dev/null +++ b/src/swell/configuration/jedi/interfaces/geos_cf/convert_observations/omps_nm.yaml @@ -0,0 +1,23 @@ +# OMPS Nadir Mapper Total Ozone L2 — IODA converter configuration +# Converter: omps_o3_nm_h52ioda.py (installed by ioda-converters into jedi_bundle/build/bin/) +# +# Invocation (one call per cycle, all granules passed together): +# python3 /build/bin/omps_o3_nm_h52ioda.py +# -i ... +# -o /ioda/omps_nm/omps_nm_YYYYMMDDHH.nc +# -q 128 +# -e atbd + +# Python script name installed in the JEDI bundle bin directory +converter_script: omps_o3_nm_h52ioda.py + +# Glob pattern for selecting input files from /download/omps_nm/ +input_glob: "*.h5" + +# Output filename template (strftime placeholders: %Y %m %d %H) +output_filename_template: "omps_nm_%Y%m%d%H.nc" + +# Additional CLI flags passed verbatim after -i and -o +extra_flags: + -q: 128 # quality flag threshold + -e: atbd # error estimation method diff --git a/src/swell/configuration/jedi/interfaces/geos_cf/download_observations/omps_nm.yaml b/src/swell/configuration/jedi/interfaces/geos_cf/download_observations/omps_nm.yaml new file mode 100644 index 000000000..b375b7883 --- /dev/null +++ b/src/swell/configuration/jedi/interfaces/geos_cf/download_observations/omps_nm.yaml @@ -0,0 +1,18 @@ +# OMPS Nadir Mapper Total Ozone L2 — download configuration +# Instrument: OMPS-NM on Suomi NPP +# Product: OMPS_NPP_NMTO3_L2 v2.1 +# Source: NASA GES DISC (Earthdata authentication required) +# +# Files are named: OMPS-NPP_NMTO3-L2_v2.1_YYYYmMMDDtHH*.h5 +# where HH is the UTC hour at the start of the orbit granule. + +remote_host: https://snpp-omps.gesdisc.eosdis.nasa.gov +remote_path_template: /data/SNPP_OMPS_Level2/OMPS_NPP_NMTO3_L2.2/YYYY/JJJ/ +filename_pattern: OMPS-NPP_NMTO3-L2_v2.1_YYYYmMMDDtHH*.h5 +auth_type: earthdata_token + +# Maximum duration of a single orbit granule. Used to extend the file +# search window backwards so that orbits starting before window_begin +# but containing data within the DA window are not missed. +# OMPS-NM orbits are approximately 101 minutes; PT2H provides a safe margin. +max_orbit_duration: PT2H diff --git a/src/swell/configuration/jedi/interfaces/geos_cf/ingest_observations/omps_nm.yaml b/src/swell/configuration/jedi/interfaces/geos_cf/ingest_observations/omps_nm.yaml new file mode 100644 index 000000000..5354528a9 --- /dev/null +++ b/src/swell/configuration/jedi/interfaces/geos_cf/ingest_observations/omps_nm.yaml @@ -0,0 +1,6 @@ +# OMPS Nadir Mapper Total Ozone L2 — R2D2 ingestion configuration +# Source: output of ConvertObsToIoda, relative to the cycle work directory. +# Path uses strftime placeholders (%Y, %m, %d, %H). + +retrieval_method: local +source: ioda/omps_nm/omps_nm_%Y%m%d%H.nc # in CYCLE_DIR diff --git a/src/swell/suites/ingest_obs/flow.cylc b/src/swell/suites/ingest_obs/flow.cylc index dc999598f..e0399943e 100644 --- a/src/swell/suites/ingest_obs/flow.cylc +++ b/src/swell/suites/ingest_obs/flow.cylc @@ -11,7 +11,11 @@ {% for cycle_time in cycle_times %} {{cycle_time.cycle_time}} = """ {% for model_component in model_components %} + {% if obs_to_download is defined and obs_to_download %} + DownloadObs-{{model_component}} => ConvertObsToIoda-{{model_component}} => IngestObs-{{model_component}} + {% else %} IngestObs-{{model_component}} + {% endif %} {% endfor %} """ {% endfor %} @@ -25,6 +29,16 @@ config = $CYLC_SUITE_DEF_PATH/experiment.yaml {% for model_component in model_components %} + {% if obs_to_download is defined and obs_to_download %} + [[DownloadObs-{{model_component}}]] + script = "swell task DownloadObs $config -d $datetime -m {{model_component}}" + execution time limit = PT30M + + [[ConvertObsToIoda-{{model_component}}]] + script = "swell task ConvertObsToIoda $config -d $datetime -m {{model_component}}" + execution time limit = PT15M + + {% endif %} [[IngestObs-{{model_component}}]] script = "swell task IngestObs $config -d $datetime -m {{model_component}}" execution time limit = PT10M diff --git a/src/swell/suites/ingest_obs/suite_config.py b/src/swell/suites/ingest_obs/suite_config.py index 6d9f1655f..d542d7058 100644 --- a/src/swell/suites/ingest_obs/suite_config.py +++ b/src/swell/suites/ingest_obs/suite_config.py @@ -38,3 +38,22 @@ class SuiteConfig(QuestionContainer, Enum): qd.dry_run(True), ] ) + + ingest_obs_cf = QuestionList( + list_name="ingest_obs_cf", + questions=[ + ingest_obs, + qd.start_cycle_point("2024-01-01T00:00:00Z"), + qd.final_cycle_point("2024-01-02T00:00:00Z"), + qd.model_components(['geos_cf']), + qd.runahead_limit("P5"), + ], + geos_cf=[ + qd.window_length("PT6H"), + qd.cycle_times(['T00', 'T06', 'T12', 'T18']), + qd.obs_to_download(['omps_nm']), + qd.earthdata_token_path(''), + qd.obs_to_ingest(['omps_nm']), + qd.dry_run(True), + ] + ) diff --git a/src/swell/tasks/convert_obs_to_ioda.py b/src/swell/tasks/convert_obs_to_ioda.py new file mode 100644 index 000000000..dc9ca994a --- /dev/null +++ b/src/swell/tasks/convert_obs_to_ioda.py @@ -0,0 +1,157 @@ +# (C) Copyright 2021- United States Government as represented by the Administrator of the +# National Aeronautics and Space Administration. All Rights Reserved. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. + +""" +Task for converting downloaded native observation files to IODA format. + +Runs an ioda-converters Python script installed in the JEDI bundle's bin +directory against the raw files produced by DownloadObs, writing a single +IODA-formatted NetCDF file per cycle into the cycle's ioda/ directory. +""" + +import glob +import os +import subprocess +import sys +import yaml +from datetime import datetime + +from swell.tasks.base.task_base import taskBase + + +class ConvertObsToIoda(taskBase): + """Convert downloaded native observation files to IODA format. + + For each observation in ``obs_to_download``, this task: + + 1. Reads a per-obs converter config from + ``convert_observations/.yaml`` in the experiment's + configuration directory. + 2. Collects all raw files from ``/download//``. + 3. Runs the ioda-converters Python script from the JEDI bundle's bin + directory, passing all input files in a single invocation. + 4. Writes the converted IODA file to + ``/ioda//``. + + The converter script is invoked as:: + + python3 /build/bin/ + -i ... + -o + [additional flags from converter config] + + Args: + config: Inherited from ``taskBase``. Relevant keys: + + - ``obs_to_download``: list of obs names — reuses the same list + set for ``DownloadObs`` so no extra config key is needed. + - ``dry_run``: if ``True``, log the command but do not run it. + + Example: + In a Cylc suite:: + + swell task ConvertObsToIoda experiment.yaml -d 2024-01-01T00:00:00Z -m geos_cf + """ + + def execute(self) -> None: + + obs_to_convert = self.config.obs_to_download([]) + dry_run = self.config.dry_run(True) + + if dry_run: + self.logger.info('DRY RUN MODE - No converters will be run') + + jedi_bin = os.path.join( + self.experiment_path(), 'jedi_bundle', 'build', 'bin') + + cycle_time_dto = self.cycle_time_dto() + + for obs_name in obs_to_convert: + self.logger.info(f'Converting: {obs_name}') + + config_path = os.path.join( + self.experiment_path(), + 'configuration', 'jedi', 'interfaces', + self.get_model(), + 'convert_observations', + f'{obs_name}.yaml') + + if not os.path.exists(config_path): + self.logger.error( + f'Converter config not found for {obs_name} at {config_path}') + continue + + with open(config_path, 'r') as fh: + conv_config = yaml.safe_load(fh) + + self._run_converter( + obs_name, conv_config, jedi_bin, cycle_time_dto, dry_run) + + # ------------------------------------------------------------------ + # Private helpers + # ------------------------------------------------------------------ + + def _run_converter( + self, + obs_name: str, + conv_config: dict, + jedi_bin: str, + cycle_time_dto: datetime, + dry_run: bool, + ) -> None: + """Build and run the ioda-converter command for one observation type.""" + + # Collect all downloaded input files + download_dir = os.path.join(self.cycle_dir(), 'download', obs_name) + input_pattern = os.path.join(download_dir, conv_config.get('input_glob', '*.h5')) + input_files = sorted(glob.glob(input_pattern)) + + if not input_files: + self.logger.warning( + f'No input files found for {obs_name} in {download_dir}') + return + + self.logger.info(f' Found {len(input_files)} input file(s)') + + # Build output path + ioda_dir = os.path.join(self.cycle_dir(), 'ioda', obs_name) + output_filename = cycle_time_dto.strftime( + conv_config.get('output_filename_template', f'{obs_name}_%Y%m%d%H.nc')) + output_file = os.path.join(ioda_dir, output_filename) + + if not dry_run: + os.makedirs(ioda_dir, exist_ok=True) + + # Locate the converter script in the JEDI bundle bin directory + script_name = conv_config['converter_script'] + script_path = os.path.join(jedi_bin, script_name) + + if not dry_run and not os.path.exists(script_path): + self.logger.error(f'Converter script not found: {script_path}') + return + + # Build command: python3