diff --git a/GEOS_RadiationShared/NRLSSI2/.gitignore b/GEOS_RadiationShared/NRLSSI2/.gitignore new file mode 100644 index 0000000..f80fb58 --- /dev/null +++ b/GEOS_RadiationShared/NRLSSI2/.gitignore @@ -0,0 +1,4 @@ +NRLSSI2/ +__pycache__/ +*.pyc +gx/ diff --git a/GEOS_RadiationShared/NRLSSI2/Mg_SB_from_daily_file.py b/GEOS_RadiationShared/NRLSSI2/Mg_SB_from_daily_file.py index ba9d3a0..40384e2 100644 --- a/GEOS_RadiationShared/NRLSSI2/Mg_SB_from_daily_file.py +++ b/GEOS_RadiationShared/NRLSSI2/Mg_SB_from_daily_file.py @@ -1,157 +1,218 @@ ''' -Get Solar Mg and SB indices from NRLSSI2 daily auxilliary text file +Get Solar Mg and SB indices from NRLSSI2 daily auxiliary text file. + IMPORTANT: The filenames list below will be read in order, and any date that -already has final data will not be overwriten by data for that date in a later +already has final data will not be overwritten by data for that date in a later file. For this reason, the filenames list should ONLY be APPENDED to, which will ensure that any older final data is not overwritten, thus ensuring -historical reproducability. +historical reproducibility. + +v03r00 support: +- v03 ancillary text columns: + time (yyyy-MM-dd),Bolfac,Bolfac_unc,Bolfac_sm,Bolspot,Bolspot_unc,Ftcomp,Ftcomp_unc,source +- We DO NOT hard-code a conversion. Instead, when a v03 file is read we learn + a linear mapping (least squares) from overlapping dates between: + MgIndex_v02 = a_Mg * Bolfac + b_Mg + SBindex_v02 = a_SB * Bolspot + b_SB + using the Mg/SB already loaded from earlier v02 files in the filenames list. +- This keeps continuity across the version change and avoids manual tuning. ''' import os import numpy as np -import matplotlib.pyplot as plt -from calendar import monthrange -from datetime import datetime, timedelta - -# because datetime.strftime doesnt work before 1900 -def _yyyymmdd(t): - return('%04d%02d%02d' % (t.year, t.month, t.day)) - -class Mg_SB_Daily: - ''' Daily time series for Solar Mg and SB indices from NRLSSI2 ''' - - def __init__(self, DATADIR, - filenames=[ - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20170630_c20170928.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20180331_c20180418.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20191231_c20200225.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20201231_c20210204.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20220630_c20220803.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20230331_c20230411.txt', - 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20231231_c20240221.txt', - ], - verbose=False): - - self.filenames = filenames - self.verbose = verbose - - if verbose: - print('reading Mg and SB indicies from', filenames) - - # read the files - self._data = {} - self.nfinal = 0 - first = True - for filename in filenames: - with open(os.sep.join((DATADIR,filename))) as f: - lines = f.readlines() - - # process the data, ignoring the header (top line) - for line in lines[1:]: - datestr, SB, Mg, status = line.strip().split(',') - # yyyy-mm-dd to yyyymmdd - yyyymmdd = datestr.replace('-','') - # make data real - SB = float(SB) - Mg = float(Mg) - # validate status to rc - if status == 'final': - rc = 0 - elif status == 'prelim': - rc = -1 +from datetime import datetime + +class Mg_SB_Daily(object): + def __init__(self, DATADIR, + filenames=[ + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20170630_c20170928.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20180331_c20180418.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20191231_c20200225.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20201231_c20210204.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20220630_c20220803.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20230331_c20230411.txt', + 'tsi-ssi_v02r01_model-input-time-series_s18820101_e20231231_c20240221.txt', + # Append your v03 file(s) for 2024+ at the end of this list. + 'tsi-ssi_v03r00_model-input-time-series_s18740509_e20250331_c20250723.txt', + 'tsi-ssi_v03r00_model-input-time-series_s18740509_e20251231_c20260305.txt', + ], + verbose=True): + + self._data = {} # yyyymmdd -> (rc, Mg, SB) + self.nfinal = 0 + self.verbose = verbose + + # These will be learned the first time we see a v03 file + self._v03_fitted = False + self._a_Mg = None; self._b_Mg = None; self._r2_Mg = None + self._a_SB = None; self._b_SB = None; self._r2_SB = None + + for filename in filenames: + fullpath = os.path.join(DATADIR, filename) + if self.verbose: + print(f"[Mg_SB] Reading {fullpath}") + if not os.path.exists(fullpath): + if self.verbose: + print(f"[Mg_SB] File not found, skipping: {fullpath}") + continue + + with open(fullpath, 'r') as f: + lines = f.readlines() + if not lines: + continue + + header = lines[0].strip() + is_v03 = ('Bolfac' in header and 'Bolspot' in header) + + if is_v03 and not self._v03_fitted: + # Learn linear mapping using overlaps with whatever v02 data is already loaded + self._fit_v03_to_v02(lines) + + # Now parse the file proper and insert/merge into _data + if is_v03: + self._ingest_v03(lines) + else: + self._ingest_v02(lines) + + # ---------- ingestion helpers ---------- + + def _ingest_v02(self, lines): + # header: time (yyyy-MM-dd), sunspot_darkening_function, facular_brightening_function, source + for line in lines[1:]: + if not line.strip(): + continue + parts = [p.strip() for p in line.strip().split(',')] + try: + datestr, SB, Mg, status = parts[0], float(parts[1]), float(parts[2]), parts[3] + except (IndexError, ValueError): + continue + yyyymmdd = datestr.replace('-', '') + rc = self._status_to_rc(status) + # final beats prelim + if yyyymmdd in self._data and self._data[yyyymmdd][0] == 0: + continue + self._data[yyyymmdd] = (rc, Mg, SB) + + def _ingest_v03(self, lines): + # header: time, Bolfac, Bolfac_unc, Bolfac_sm, Bolspot, Bolspot_unc, Ftcomp, Ftcomp_unc, source + for line in lines[1:]: + if not line.strip(): + continue + parts = [p.strip() for p in line.strip().split(',')] + try: + datestr = parts[0] + TF = float(parts[1]) # Bolfac (W/m^2-like units used by v03 table) + TS = float(parts[4]) # Bolspot (W/m^2-like units used by v03 table) + status = parts[-1] if parts else 'preliminary' + except (IndexError, ValueError): + continue + + yyyymmdd = datestr.replace('-', '') + rc = self._status_to_rc(status) + + # Convert using learned linear maps; if not fitted, skip (or use identity as last resort) + if self._v03_fitted: + Mg = self._a_Mg * TF + self._b_Mg + SB = self._a_SB * TS + self._b_SB + else: + # Should not happen if _fit_v03_to_v02 ran; fall back to passing through + Mg = TF + SB = TS + if self.verbose: + print("[Mg_SB] WARNING: v03 coefficients not fitted; passing through raw values") + + # final beats prelim + if yyyymmdd in self._data and self._data[yyyymmdd][0] == 0: + continue + self._data[yyyymmdd] = (rc, Mg, SB) + + def _status_to_rc(self, status): + s = status.lower() + if s == 'final': + return 0 + if s.startswith('prelim'): + return -1 + if s == 'preliminary': + return -1 + raise RuntimeError(f'invalid status detected: {status}') + + # ---------- fitting: learn v03 -> legacy units ---------- + + def _fit_v03_to_v02(self, v03_lines): + # Build arrays for overlap where v02 data already exists + x_TF = [] + x_TS = [] + y_Mg = [] + y_SB = [] + + for line in v03_lines[1:]: + if not line.strip(): + continue + parts = [p.strip() for p in line.strip().split(',')] + try: + datestr = parts[0] + TF = float(parts[1]) # Bolfac + TS = float(parts[4]) # Bolspot + except (IndexError, ValueError): + continue + yyyymmdd = datestr.replace('-', '') + if yyyymmdd in self._data and self._data[yyyymmdd][0] == 0: # need finalized v02 values to anchor + _, Mg_v02, SB_v02 = self._data[yyyymmdd] + x_TF.append(TF); y_Mg.append(Mg_v02) + x_TS.append(TS); y_SB.append(SB_v02) + + x_TF = np.asarray(x_TF); y_Mg = np.asarray(y_Mg) + x_TS = np.asarray(x_TS); y_SB = np.asarray(y_SB) + + if len(x_TF) < 30 or len(x_TS) < 30: # need a reasonable number of days + # Not enough overlap; do a simple proportional guess based on medians to avoid blow-ups + if self.verbose: + print(f"[Mg_SB] Not enough overlap to fit (Mg n={len(x_TF)}, SB n={len(x_TS)}); using robust ratios") + self._a_Mg, self._b_Mg, self._r2_Mg = self._robust_ratio(x_TF, y_Mg) + self._a_SB, self._b_SB, self._r2_SB = self._robust_ratio(x_TS, y_SB) else: - raise RuntimeError('invalid status detected: %s' % status) - # if a date is already finalized, ignore further records - # for that date to ensure historical reproducability - if yyyymmdd in self._data: - rc_existing = self._data[yyyymmdd][0] - if rc_existing == 0: continue - # load data dictionary - self._data[yyyymmdd] = (rc, Mg, SB) - # keep track of min and max final times - if rc == 0: - self.nfinal += 1 - d = datetime.strptime(yyyymmdd,'%Y%m%d').date() - if first: - self.date_min_final = d - self.date_max_final = d - first = False - else: - if d < self.date_min_final: self.date_min_final = d - if d > self.date_max_final: self.date_max_final = d - - def keys(self): - return self._data.keys() - - def getday(self, yyyymmdd): - ''' - get daily Mg and SB values for daily string yyyymmdd - returns (rc, Mg, SB) - rc = 0 (final), -1 (prelim), -2 (unavailable) - ''' - if yyyymmdd not in self._data: - return -2, np.nan, np.nan - else: - return self._data[yyyymmdd] - - def gettime(self, t): - ''' - get time-interpolated Mg and SB values for datetime t - returns (rc, Mg, SB) - rc = 0 (final), -1 (prelim), -2 (unavailable) - ''' - - # assume daily average valid at noon - tnoon = datetime(t.year,t.month,t.day,12) - vnoon = self.getday(_yyyymmdd(tnoon)) - if t == tnoon: return vnoon - - # other noon bracketing t - tother = tnoon + timedelta(days=(-1 if t < tnoon else +1)) - vother = self.getday(_yyyymmdd(tother)) - - # fraction that the other daily average contributes - fother = abs((t-tnoon).total_seconds()) / 86400. - - # only interpolate if both days exist - if vnoon[0] == -2 or vother[0] == -2: - return (-2, np.nan, np.nan) - else: - # now both days available - Mg = vnoon[1] * (1-fother) + vother[1] * fother - SB = vnoon[2] * (1-fother) + vother[2] * fother - rc = -1 if vnoon[0] == -1 or vother[0] == -1 else 0 - return (rc, Mg, SB) - - def _plot_all_final(self): - spyear = 365.25 * 86400. - d0 = self.date_min_final; d = d0 - dy = []; Mg = []; SB = [] - while (d <= self.date_max_final): - v = self.getday(_yyyymmdd(d)) - if v[0] == 0: - # only plot finals - dy.append((d-d0).total_seconds()/spyear) - Mg.append(v[1]) - SB.append(v[2]) - d += timedelta(days=1) - plt.subplot(211); plt.plot(dy,Mg,'b-'); plt.ylabel('Mg') - plt.subplot(212); plt.plot(dy,SB,'b-'); plt.ylabel('SB') - plt.xlabel('years since %s' % self.date_min_final) - -if __name__ == '__main__': - - MgSB = Mg_SB_Daily() -# print('16000101', MgSB.getday('16000101')) -# print('20161130', MgSB.getday('20161130')) -# print('20161201', MgSB.getday('20161201')) -# print('20161202', MgSB.getday('20161202')) -# t = datetime.strptime('2016-12-01 10:00:00','%Y-%m-%d %H:%M:%S') -# print(t, MgSB.gettime(t)) - - plt.figure() - MgSB._plot_all_final() -# plt.savefig(os.sep.join(('gx','MgSB_plot_all_final.png')), -# pad_inches=0.33,bbox_inches='tight',dpi=100) - plt.show() + # Ordinary least squares with intercept + self._a_Mg, self._b_Mg, self._r2_Mg = self._ols_with_r2(x_TF, y_Mg) + self._a_SB, self._b_SB, self._r2_SB = self._ols_with_r2(x_TS, y_SB) + + self._v03_fitted = True + if self.verbose: + print(f"[Mg_SB] Fitted v03->legacy:") + print(f" Mg: Mg_v02 ≈ {self._a_Mg:.6f} * Bolfac + {self._b_Mg:.6f} (R^2={self._r2_Mg:.4f}, n={len(x_TF)})") + print(f" SB: SB_v02 ≈ {self._a_SB:.6f} * Bolspot + {self._b_SB:.6f} (R^2={self._r2_SB:.4f}, n={len(x_TS)})") + + @staticmethod + def _ols_with_r2(x, y): + A = np.vstack([x, np.ones_like(x)]).T + coef, resid, _, _ = np.linalg.lstsq(A, y, rcond=None) + a, b = coef + # R^2 + yhat = a * x + b + ss_res = np.sum((y - yhat)**2) + ss_tot = np.sum((y - np.mean(y))**2) + r2 = 1.0 - (ss_res / ss_tot if ss_tot > 0 else 0.0) + return a, b, r2 + + @staticmethod + def _robust_ratio(x, y): + # fallback: scale and small intercept from medians + if len(x) == 0: + return 1.0, 0.0, 0.0 + a = np.median(y) / max(np.median(x), 1e-9) + b = np.median(y) - a * np.median(x) + # no real R^2 here + return a, b, 0.0 + + # ---------- dict-like / legacy helpers ---------- + + def keys(self): + return self._data.keys() + + def __getitem__(self, yyyymmdd): + return self._data[yyyymmdd] + + def getday(self, yyyymmdd): + if hasattr(yyyymmdd, "strftime"): # datetime/date + yyyymmdd = yyyymmdd.strftime("%Y%m%d") + return self._data[yyyymmdd] diff --git a/GEOS_RadiationShared/NRLSSI2/README.md b/GEOS_RadiationShared/NRLSSI2/README.md index 355e3e0..6636439 100644 --- a/GEOS_RadiationShared/NRLSSI2/README.md +++ b/GEOS_RadiationShared/NRLSSI2/README.md @@ -1,70 +1,203 @@ -========================================================= -Instructions on how to pre-process NRLSSI2 data for GEOS5 -========================================================= -Peter Norris, April 2024 - -A. Introduction. -In the GEOS5 RRTMG and RRTMGP solar codes, the Solar Spectral Irradiance (SSI) at TOA is calculated from -NRLSSI2 Total Solar Irradiance (TSI) and Mg (facular) and SB (sunspot) indices. This data is currently -updated yearly, when the finalized yearly products become available, which is generally in late January -to late February of the following year. For example, the finalized 2023 data came out on 2024-02-23. This -README describes how to get the NRLSSI2 source data and how to pre-process it into a text file read by -the SolarGC. - -B. Get the data for the NEW year. -The source data is obtained from https://www.ncei.noaa.gov/data/total-solar-irradiance/access/ -Note: you can use wget https://... from discover. -There are two data files required for each new year: -1. from ancilliary-data/, e.g., tsi-ssi_v02r01_model-input-time-series_s18820101_e20180331_c20180418.txt - !!! NOTE: Please read comments in docstring of Mg_SB_from_daily_file.py BEFORE downloading !!! - In particular, - -- the list in that python code should ONLY be APPENDED to, - which will ensure that any existing final data is not overwritten, - thus ensuring historical reproducibility. -2. from daily/, e.g., tsi_v02r01_daily_s20170101_e20171231_c20180227.nc - !!! NOTE: Please read comments in docstring of TSI_from_daily_files.py BEFORE downloading !!! - In particular, - -- there should be no files with overlapping time periods, and - -- you should NEVER overwrite any existing non-preliminary file, - since this may cause historical non-reproducibility. -NOTE: Only two files are needed for the new year. Existing data has already been obtained -for previous years. There is no need to get historical data for past years. Even though the -file in part 1 above contains historical data, only the data for the new year inside it will -be used if you follow these instructions, in order to maintain historical reproducibility. - -C. Decide where to put the source data. -The source data is put in the directory DATADIR, where DATADIR is currently - /discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar/NRLSSI2/data -This directory is managed by the GMAO Operations Team, so if you is not they, you will need to download -the data somewhere else first and then work with "Ops" to add it to that directory. DATADIR here is NOT -an environmental variable, just a shorthand. But whatever DATADIR is used must also be explicity set in -the "main" of the preprocessing driver: TSI_Mg_SB_merged_from_daily.py. - -D. Preparing the preprocessor. -1. Add the filename from B.1. to the filenames list in Mg_SB_from_daily_file.py. Specifically APPEND -it to the END of the list in the default argument of the __init__ of class Mg_SB_Daily -in Mg_SB_from_daily_file.py. -2. Set the OUTDIR in the "main" of TSI_Mg_SB_merged_from_daily.py to where you want to store the -pre-processed file for the neaw year. - -E. Running the preprocessor. -python TSI_Mg_SB_merged_from_daily.py -After you exit the program, rename the NRLSSI2.VYYYY.txt in OUTDIR to the correct year. - -F. Storing the Output file -Get "Ops" to put the new NRLSSI2.VYYYY.txt from your OUTDIR into their directory: - /discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar - -G. Symlinks -There is a symlink in later "Ops solar" directory named NRLSSI2.txt. -It will typically be pointed to the file NRLSSI2.vYYYY.txt for the new year, but that decision must -be approved by Ops and by the Modelling Team and Data Assimilation Team heads. - -H. Using the data. -The GEOS5 run should have the following lines in its ASGCM.rc: +# Pre-processing NRLSSI2 Data for GEOS5 + +*Peter Norris, April 2024 — Matt Thompson, April 2025* + +> **Note:** Starting with 2024 data, NRL released version v03r00 of the ancillary text files, +> which use a different column format than the legacy v02r01 files. See [Section I](#i-handling-nrlssi2-v03r00-input-data) for details. + +--- + +## A. Introduction + +In the GEOS5 RRTMG and RRTMGP solar codes, the Solar Spectral Irradiance (SSI) at TOA is +calculated from NRLSSI2 Total Solar Irradiance (TSI) and Mg (facular) and SB (sunspot) indices. +This data is currently updated yearly, when the finalized yearly products become available, which +is generally in late January to late February of the following year. For example, the finalized +2023 data came out on 2024-02-23. This README describes how to get the NRLSSI2 source data and +how to pre-process it into a text file read by the SolarGC. + +--- + +## B. Get the Data for the New Year + +The source data is obtained from . +You can use `wget https://...` from Discover. + +Two data files are required for each new year: + +### B.1. Ancillary file (from `ancillary-data/`) + +> **Before downloading, read the docstring in `Mg_SB_from_daily_file.py`.** + +Example filenames: +- v02r01 (through 2023): `tsi-ssi_v02r01_model-input-time-series_s18820101_e20231231_c20240221.txt` +- v03r00 (2024 onward): `tsi-ssi_v03r00_model-input-time-series_s18740509_e20251231_c20260305.txt` + +Key rules: +- The `filenames` list in the code must **only ever be appended to** — never reordered or + shortened — to ensure existing final data is never overwritten (historical reproducibility). +- v02r01 files are **no longer updated past 2023**. For 2024 onward, download v03r00 files. +- v02r01 files **must appear before** any v03r00 files in the list (see [Section I](#i-handling-nrlssi2-v03r00-input-data)). + +### B.2. Daily TSI file (from `daily/`) + +> **Before downloading, read the docstring in `TSI_from_daily_files.py`.** + +Example filename: `tsi_v02r01_daily_s20170101_e20171231_c20180227.nc` + +Key rules: +- There must be **no files with overlapping time periods**. +- **Never overwrite** any existing non-preliminary file, as this may cause historical + non-reproducibility. + +> **Note:** Only two files are needed for the new year. Existing data has already been obtained +> for previous years. Even though the ancillary file contains historical data, only the data for +> the new year will be used if you follow these instructions. + +--- + +## C. Decide Where to Put the Source Data + +The source data goes in `DATADIR`, which is currently: + +``` +/discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar/NRLSSI2/data +``` + +This directory is managed by the GMAO Operations Team. If you are not they, download the data +somewhere else first and then work with "Ops" to add it to that directory. `DATADIR` is not an +environment variable — just a shorthand. Whatever path is used must also be explicitly set in +the `__main__` block of `TSI_Mg_SB_merged_from_daily.py`. + +--- + +## D. Preparing the Preprocessor + +1. **Append** the new ancillary filename (from B.1) to the end of the `filenames` list in the + `__init__` of class `Mg_SB_Daily` in `Mg_SB_from_daily_file.py`. + + > **Important:** All v02r01 files must appear before any v03r00 files in the list. The v03 + > conversion relies on learning a mapping from the overlap period, which requires v02 data + > to already be loaded. + +2. Set `DATADIR` and `OUTDIR` in the `__main__` block of `TSI_Mg_SB_merged_from_daily.py` to + where you want to read the source data and write the pre-processed output. + +--- + +## E. Running the Preprocessor + +```bash +python3 TSI_Mg_SB_merged_from_daily.py +``` + +After the script completes, rename `NRLSSI2.vYYYY.txt` in `OUTDIR` to reflect the correct year. + +--- + +## F. Storing the Output File + +Ask "Ops" to place the new `NRLSSI2.vYYYY.txt` from your `OUTDIR` into: + +``` +/discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar +``` + +--- + +## G. Symlinks + +There is a symlink in the "Ops solar" directory named `NRLSSI2.txt`. It will typically be +pointed to `NRLSSI2.vYYYY.txt` for the new year, but that decision must be approved by Ops +and by the Modelling Team and Data Assimilation Team heads. + +--- + +## H. Using the Data + +The GEOS5 run should have the following lines in its `ASGCM.rc`: + +``` USE_NRLSSI2: .TRUE. SOLAR_CYCLE_FILE_NAME: ExtData/g5gcm/solar/NRLSSI2.txt -(Note: These will only be used if the solar code being run is RRTMG or RRTMGP. Chou-Suarez runs do -not use NRLSSI2 data). -PS: You are free to change the use of the symlink NRLSSI2.txt to any specific NRLSSI2.vYYYY.txt -if you want reproducibility to a specific historical run. +``` + +> These settings are only used if the solar code is RRTMG or RRTMGP. Chou-Suarez runs do not +> use NRLSSI2 data. + +You may replace the symlink `NRLSSI2.txt` with a specific `NRLSSI2.vYYYY.txt` if you need +reproducibility to a particular historical run. + +--- + +## I. Handling NRLSSI2 v03r00 Input Data + +### Background + +The GEOS radiation code expects solar variability inputs in the legacy NRLSSI2 v02r01 format: + +| Variable | Description | +|------------|------------------------------------------------------| +| `MgIndex` | Facular brightening proxy (unitless) | +| `SBindex` | Sunspot darkening proxy (millionths of a hemisphere) | + +The newer NRLSSI2 v03r00 ancillary text file instead provides: + +| Variable | Description | +|------------|--------------------------------------| +| `Bolfac` | Bolometric facular contribution | +| `Bolspot` | Bolometric sunspot contribution | + +These are physically related to the older indices but use different scaling. The v03r00 file +also extends the historical record back to 1874-05-09 (vs. 1882-01-01 for v02r01), though +only the 2024+ data is new. + +### Approach + +Rather than modifying the upstream NRLSSI2 files or the GEOS radiation code, the conversion +is performed in `Mg_SB_from_daily_file.py` at read time using an empirical linear mapping: + +1. Read the historical v02r01 files first (they must come first in the `filenames` list). +2. When a v03r00 file is encountered, identify the overlapping dates between v02 and v03. +3. Fit linear least-squares relationships mapping v03 quantities to the legacy format: + ``` + MgIndex_v02 = a_Mg * Bolfac + b_Mg + SBindex_v02 = a_SB * Bolspot + b_SB + ``` +4. Apply those fitted relationships to convert all v03-only dates into legacy Mg/SB values. + +### Fitted Coefficients + +Computed from the 1882–2023 overlap period: + +``` +Mg: Mg_v02 ≈ 0.007506 * Bolfac + 0.150331 (R² = 0.9049, n=51864) +SB: SB_v02 ≈ 1941.389784 * Bolspot + 42.906433 (R² = 0.9940, n=51864) +``` + +The coefficients are fitted automatically at runtime and printed to stdout. They will update +if NOAA/NCEI revises the underlying datasets. + +### Interpretation + +- **SB mapping** (R² = 0.994): Very tight fit. `SBindex` is well represented by a linear + scaling of `Bolspot`. +- **Mg mapping** (R² = 0.905): Clear linear relationship, but with more scatter. Post-2023 + `MgIndex` values carry more uncertainty than pre-2024 values. + +### Output Format + +The final GEOS-facing output file remains in the same legacy format as before: + +``` +# yyyy doy TSI:W/m2 MgIndex SBindex +``` + +### Notes and Caveats + +- This conversion is **empirical**, not a first-principles physical derivation. +- Post-2023 Mg/SB values are approximations derived from v03 `Bolfac`/`Bolspot` quantities. +- The diagnostic plot produced by the preprocessor shades the post-2024 region to indicate + where v03-derived mapped values begin. +- If NOAA/NCEI revises either the v02 or v03 datasets, the fitted coefficients should be + recomputed and verified. diff --git a/GEOS_RadiationShared/NRLSSI2/TSI_Mg_SB_merged_from_daily.py b/GEOS_RadiationShared/NRLSSI2/TSI_Mg_SB_merged_from_daily.py index 04aa28d..3d83129 100644 --- a/GEOS_RadiationShared/NRLSSI2/TSI_Mg_SB_merged_from_daily.py +++ b/GEOS_RadiationShared/NRLSSI2/TSI_Mg_SB_merged_from_daily.py @@ -1,6 +1,6 @@ -''' +""" Get TSI and Solar Mg and SB indices from NRLSSI2 daily data -''' +""" import os import numpy as np @@ -10,150 +10,269 @@ from TSI_from_daily_files import TSI_Daily from Mg_SB_from_daily_file import Mg_SB_Daily + # because datetime.strftime doesnt work before 1900 def _yyyymmdd(t): - return('%04d%02d%02d' % (t.year, t.month, t.day)) + return "%04d%02d%02d" % (t.year, t.month, t.day) + class TSI_Mg_SB_Merged_Daily: - ''' Daily time series for TSI and Solar Mg and SB indices from NRLSSI2 ''' - - def __init__(self, DATADIR, verbose=False): - - self.verbose = verbose - - # get each data set - zM = Mg_SB_Daily(DATADIR, verbose=verbose) - zT = TSI_Daily(DATADIR, verbose=verbose) - - # form the INTERSECTION: - # (each set is indexed by yyyymmdd, unique in each set) - self._data = {} - self.nfinal = 0 - first = True - for yyyymmdd in (zM.keys() & zT.keys()): - - # load data dictionary - rc1, Mg, SB = zM.getday(yyyymmdd) - rc2, TSI, TSI_UNC = zT.getday(yyyymmdd) - rc = -1 if rc1 == -1 or rc2 == -1 else 0 - self._data[yyyymmdd] = (rc, TSI, Mg, SB, TSI_UNC) - - # keep track of min and max final times - if rc == 0: - self.nfinal += 1 - d = datetime.strptime(yyyymmdd,'%Y%m%d').date() - if first: - self.date_min_final = d - self.date_max_final = d - first = False - else: - if d < self.date_min_final: self.date_min_final = d - if d > self.date_max_final: self.date_max_final = d - - def getday(self, yyyymmdd): - ''' - get daily TSI, Mg and SB values for daily string yyyymmdd - returns (rc, TSI, Mg, SB, TSI_UNC) - rc = 0 (final), -1 (prelim), -2 (unavailable) - ''' - if yyyymmdd not in self._data: - return -2, np.nan, np.nan, np.nan - else: - return self._data[yyyymmdd] - - def gettime(self, t): - ''' - get time-interpolated TSI, Mg, and SB values for datetime t - returns (rc, TSI, Mg, SB, TSI_UNC) - rc = 0 (final), -1 (prelim), -2 (unavailable) - ''' - - # assume daily average valid at noon - tnoon = datetime(t.year,t.month,t.day,12) - vnoon = self.getday(_yyyymmdd(tnoon)) - if t == tnoon: return vnoon - - # other noon bracketing t - tother = tnoon + timedelta(days=(-1 if t < tnoon else +1)) - vother = self.getday(_yyyymmdd(tother)) - - # fraction that the other daily average contributes - fother = abs((t-tnoon).total_seconds()) / 86400. - - # only interpolate if both days exist - if vnoon[0] == -2 or vother[0] == -2: - return (-2, np.nan, np.nan, np.nan) - else: - # now both days available - TSI = vnoon[1] * (1-fother) + vother[1] * fother - Mg = vnoon[2] * (1-fother) + vother[2] * fother - SB = vnoon[3] * (1-fother) + vother[3] * fother - TSI_UNC = vnoon[4] * (1-fother) + vother[4] * fother - rc = -1 if vnoon[0] == -1 or vother[0] == -1 else 0 - return (rc, TSI, Mg, SB, TSI_UNC) - - def final_date_range(self, - yyyymmdd0, yyyymmdd1, - no_gaps=True): - ''' final only lists over date range ''' - - d0 = datetime.strptime(yyyymmdd0,'%Y%m%d').date() - d1 = datetime.strptime(yyyymmdd1,'%Y%m%d').date() - d = d0; dates = []; TSI = []; Mg = []; SB = [] - while (d <= d1): - v = self.getday(_yyyymmdd(d)) - if v[0] == 0: - # only include finals - dates.append(d) - TSI.append(v[1]) - Mg.append( v[2]) - SB.append( v[3]) - elif no_gaps: - raise RuntimeError('no_gaps: intervening non-final date found') - d += timedelta(days=1) - return dates, TSI, Mg, SB - - def _plot_all_final(self): - dates, TSI, Mg, SB = self.final_date_range( - _yyyymmdd(self.date_min_final), - _yyyymmdd(self.date_max_final), - no_gaps=False) - spyear = 365.25 * 86400. - dy = [(d-self.date_min_final).total_seconds()/spyear for d in dates] - plt.subplot(311); plt.plot(dy,TSI,'b-'); plt.ylabel('TSI') - plt.subplot(312); plt.plot(dy, Mg,'b-'); plt.ylabel('Mg') - plt.subplot(313); plt.plot(dy, SB,'b-'); plt.ylabel('SB') - plt.xlabel('years since %s' % self.date_min_final) - - def output_final_textfile(self, filename): - f = open(filename,'w') - f.write('# NRLSSI2 daily input\n') - f.write('# treat daily values as valid at 12:00 GMT\n') - f.write('# yyyy doy TSI:W/m2 MgIndex SBindex\n') - for d, TSI, Mg, SB in zip(*self.final_date_range( - _yyyymmdd(self.date_min_final), _yyyymmdd(self.date_max_final))): - f.write(' %04d %03d %8.3f %8.6f %9.4f\n' % ( - d.year, d.timetuple().tm_yday, TSI, Mg, SB)) - f.close() - -if __name__ == '__main__': - - DATADIR = '/discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar/NRLSSI2/data' - OUTDIR = os.sep.join((os.environ['NOBACKUP'],'NRLSSI2','output')) - - z = TSI_Mg_SB_Merged_Daily(DATADIR) -# print('16000101', z.getday('16000101')) -# print('20161130', z.getday('20161130')) -# print('20161201', z.getday('20161201')) -# print('20161202', z.getday('20161202')) -# t = datetime.strptime('2016-12-01 10:00:00','%Y-%m-%d %H:%M:%S') -# print(t, z.gettime(t)) - - z.output_final_textfile(os.sep.join((OUTDIR,'NRLSSI2.vYYYY.txt'))) - - plt.figure(figsize=(6,12)) - z._plot_all_final() -# plt.savefig(os.sep.join(('gx','TSInMgSB_plot_all_final.png')), -# pad_inches=0.33,bbox_inches='tight',dpi=100) - plt.show() + """Daily time series for TSI and Solar Mg and SB indices from NRLSSI2""" + + def __init__(self, DATADIR, verbose=True): + + self.verbose = verbose + + # get each data set + zM = Mg_SB_Daily(DATADIR, verbose=verbose) + zT = TSI_Daily(DATADIR, verbose=verbose) + + # form the INTERSECTION: + # (each set is indexed by yyyymmdd, unique in each set) + self._data = {} + self.nfinal = 0 + first = True + for yyyymmdd in zM.keys() & zT.keys(): + # load data dictionary + rc1, Mg, SB = zM.getday(yyyymmdd) + rc2, TSI, TSI_UNC = zT.getday(yyyymmdd) + rc = -1 if rc1 == -1 or rc2 == -1 else 0 + self._data[yyyymmdd] = (rc, TSI, Mg, SB, TSI_UNC) + + # keep track of min and max final times + if rc == 0: + self.nfinal += 1 + d = datetime.strptime(yyyymmdd, "%Y%m%d").date() + if first: + self.date_min_final = d + self.date_max_final = d + first = False + else: + if d < self.date_min_final: + self.date_min_final = d + if d > self.date_max_final: + self.date_max_final = d + + def getday(self, yyyymmdd): + """ + get daily TSI, Mg and SB values for daily string yyyymmdd + returns (rc, TSI, Mg, SB, TSI_UNC) + rc = 0 (final), -1 (prelim), -2 (unavailable) + """ + if yyyymmdd not in self._data: + return -2, np.nan, np.nan, np.nan + else: + return self._data[yyyymmdd] + + def gettime(self, t): + """ + get time-interpolated TSI, Mg, and SB values for datetime t + returns (rc, TSI, Mg, SB, TSI_UNC) + rc = 0 (final), -1 (prelim), -2 (unavailable) + """ + + # assume daily average valid at noon + tnoon = datetime(t.year, t.month, t.day, 12) + vnoon = self.getday(_yyyymmdd(tnoon)) + if t == tnoon: + return vnoon + + # other noon bracketing t + tother = tnoon + timedelta(days=(-1 if t < tnoon else +1)) + vother = self.getday(_yyyymmdd(tother)) + + # fraction that the other daily average contributes + fother = abs((t - tnoon).total_seconds()) / 86400.0 + + # only interpolate if both days exist + if vnoon[0] == -2 or vother[0] == -2: + return (-2, np.nan, np.nan, np.nan) + else: + # now both days available + TSI = vnoon[1] * (1 - fother) + vother[1] * fother + Mg = vnoon[2] * (1 - fother) + vother[2] * fother + SB = vnoon[3] * (1 - fother) + vother[3] * fother + TSI_UNC = vnoon[4] * (1 - fother) + vother[4] * fother + rc = -1 if vnoon[0] == -1 or vother[0] == -1 else 0 + return (rc, TSI, Mg, SB, TSI_UNC) + + def final_date_range(self, yyyymmdd0, yyyymmdd1, no_gaps=True): + """final only lists over date range""" + + d0 = datetime.strptime(yyyymmdd0, "%Y%m%d").date() + d1 = datetime.strptime(yyyymmdd1, "%Y%m%d").date() + d = d0 + dates = [] + TSI = [] + Mg = [] + SB = [] + while d <= d1: + v = self.getday(_yyyymmdd(d)) + if v[0] == 0: + # only include finals + dates.append(d) + TSI.append(v[1]) + Mg.append(v[2]) + SB.append(v[3]) + elif no_gaps: + raise RuntimeError("no_gaps: intervening non-final date found") + d += timedelta(days=1) + return dates, TSI, Mg, SB + + def _plot_all_final(self): + """ + Recreates the original 3-panel plot (TSI, Mg, SB) but computes + the x-axis from the class's final date range. It then shades the + post-2024 region using _shade_v03_block. + Draws on the CURRENT figure (3 rows, 1 column). + """ + import numpy as np + import matplotlib.pyplot as plt + from datetime import datetime, date + + # Pull the final date/series from the class API you already use in the writer + start = _yyyymmdd(self.date_min_final) + end = _yyyymmdd(self.date_max_final) + dates, TSI, Mg, SB = self.final_date_range(start, end) + + # Normalize date_min_final -> date + d0 = self.date_min_final + if isinstance(d0, str): + d0 = datetime.strptime(d0, "%Y-%m-%d").date() + elif hasattr(d0, "date"): # datetime -> date + d0 = d0.date() + + # x-axis: years since first final date + dy = np.array([(dd - d0).days / 365.25 for dd in dates], dtype=float) + TSI = np.asarray(TSI, dtype=float) + Mg = np.asarray(Mg, dtype=float) + SB = np.asarray(SB, dtype=float) + + # Plot on 3 subplots, like the original + plt.subplot(311) + plt.plot(dy, TSI, "-") + plt.ylabel("TSI") + plt.subplot(312) + plt.plot(dy, Mg, "-") + plt.ylabel("Mg") + plt.subplot(313) + plt.plot(dy, SB, "-") + plt.ylabel("SB") + plt.xlabel(f"years since {d0.isoformat()}") + + # Shade post-2024 (v03-mapped) region + self._shade_v03_block(dy) + + def _shade_v03_block(self, dy): + """ + Decorate the current 3-panel figure by shading the post-2024 region and + adding a small label. Uses ONLY the provided dy (years since start) and + self.date_min_final to compute the boundary position. + Call this AFTER you have already plotted TSI, Mg, SB on 3 subplots. + """ + from datetime import datetime, date + from matplotlib.patches import Patch + from matplotlib.transforms import blended_transform_factory + import numpy as np + import matplotlib.pyplot as plt + + # Normalize date_min_final -> date + d0 = self.date_min_final + if isinstance(d0, str): + d0 = datetime.strptime(d0, "%Y-%m-%d").date() + elif hasattr(d0, "date"): # datetime -> date + d0 = d0.date() + elif not hasattr(d0, "toordinal"): + # Very defensive fallback (won't affect data, only label position) + d0 = date(1882, 1, 1) + + boundary = date(2024, 1, 1) + bdy_years = (boundary - d0).days / 365.25 + + fig = plt.gcf() + axes = fig.axes + if not axes: + return + + x_end = float(np.asarray(dy)[-1]) + for ax in axes: + # shade post-2024 region + ax.axvspan( + bdy_years, x_end, facecolor="lightgoldenrodyellow", alpha=0.6, zorder=0 + ) + # vertical boundary line + ax.axvline(bdy_years, color="k", linestyle="--", lw=1.1, zorder=3) + # thin top ribbon + centered label above data + trans = blended_transform_factory(ax.transData, ax.transAxes) + ax.axvspan( + bdy_years, + x_end, + ymin=0.965, + ymax=0.985, + facecolor="#ffd54f", + alpha=0.9, + zorder=4, + ) + ax.text( + (bdy_years + x_end) / 2.0, + 0.982, + "v03r00 mapped to legacy units", + transform=trans, + ha="center", + va="top", + fontsize=8, + zorder=5, + ) + + # legend in bottom panel + legend_patch = Patch( + facecolor="lightgoldenrodyellow", + edgecolor="none", + label="v03r00 mapped (post-2024)", + ) + axes[-1].legend(handles=[legend_patch], loc="upper left", frameon=False) + + def output_final_textfile(self, filename): + f = open(filename, "w") + f.write("# NRLSSI2 daily input\n") + f.write("# treat daily values as valid at 12:00 GMT\n") + f.write("# yyyy doy TSI:W/m2 MgIndex SBindex\n") + for d, TSI, Mg, SB in zip( + *self.final_date_range( + _yyyymmdd(self.date_min_final), _yyyymmdd(self.date_max_final) + ) + ): + f.write( + " %04d %03d %8.3f %8.6f %9.4f\n" + % (d.year, d.timetuple().tm_yday, TSI, Mg, SB) + ) + f.close() + + +if __name__ == "__main__": + DATADIR = "/discover/nobackup/projects/gmao/share/gmao_ops/fvInput/g5gcm/solar/NRLSSI2/data" + OUTDIR = os.sep.join((os.environ["NOBACKUP"], "NRLSSI2", "output")) + + # mathomp4 personal paths (for reference): + # DATADIR = '/discover/nobackup/mathomp4/NRLSSI2-Construct/GEOSradiation_GridComp/GEOS_RadiationShared/NRLSSI2/NRLSSI2/data' + # OUTDIR = '/discover/nobackup/mathomp4/NRLSSI2-Construct/GEOSradiation_GridComp/GEOS_RadiationShared/NRLSSI2/NRLSSI2/output' + + z = TSI_Mg_SB_Merged_Daily(DATADIR) + # print('16000101', z.getday('16000101')) + # print('20161130', z.getday('20161130')) + # print('20161201', z.getday('20161201')) + # print('20161202', z.getday('20161202')) + # t = datetime.strptime('2016-12-01 10:00:00','%Y-%m-%d %H:%M:%S') + # print(t, z.gettime(t)) + + z.output_final_textfile(os.sep.join((OUTDIR, "NRLSSI2.vYYYY.txt"))) + plt.figure(figsize=(6, 12)) + z._plot_all_final() + plt.savefig( + os.sep.join(("gx", "TSInMgSB_plot_all_final.png")), + pad_inches=0.33, + bbox_inches="tight", + dpi=100, + ) + plt.show()