From 452b12c69242ad0b85eb0150ce5e09eb71795434 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Wed, 8 Apr 2026 15:29:07 -0700 Subject: [PATCH 1/3] Fix GNSS data retrieval and downloading functions Updated the get_stats_by_llh function to prioritize IGS20 coordinates over legacy data. Enhanced the download_UNR function to include a fallback to the legacy archive when downloading tropospheric delays. Context: There appears to be ongoing migration of data in the UNR archive such that data from much of the eastern hemisphere is disrupted after 2023. This PR is intended to circumvent this. --- tools/RAiDER/gnss/downloadGNSSDelays.py | 77 +++++++++++++++++++------ 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/tools/RAiDER/gnss/downloadGNSSDelays.py b/tools/RAiDER/gnss/downloadGNSSDelays.py index 07ee4ccd..92b34b70 100755 --- a/tools/RAiDER/gnss/downloadGNSSDelays.py +++ b/tools/RAiDER/gnss/downloadGNSSDelays.py @@ -69,19 +69,35 @@ def get_station_list( def get_stats_by_llh(llhBox=None, baseURL=_UNR_URL): """ - Function to pull lat, lon, height, beginning date, end date, and number of solutions for stations inside the bounding box llhBox. - llhBox should be a tuple in SNWE format. + Pull lat, lon, height for stations inside the bounding box from + both legacy and IGS20 UNR holdings. Prioritizes IGS20 coordinates. """ if llhBox is None: llhBox = [-90, 90, -180, 180] - S, N, W, E = llhBox - stationHoldings = f'{baseURL}NGLStationPages/llh.out' - # it's a file like object and works just like a file + url_legacy = f'{baseURL}NGLStationPages/llh.out' + url_igs20 = f'{baseURL}gps_timeseries/IGS20/llh/llh.out' + col_names = ['ID', 'Lat', 'Lon', 'Hgt_m'] + + # Read legacy list + try: + stat_leg = pd.read_csv(url_legacy, sep=r'\s+', names=col_names) + except Exception as e: + logger.warning("Failed to fetch legacy llh.out: %s", e) + stat_leg = pd.DataFrame(columns=col_names) + + # Read IGS20 list + try: + stat_igs = pd.read_csv(url_igs20, sep=r'\s+', names=col_names) + except Exception as e: + logger.warning("Failed to fetch IGS20 llh.out: %s", e) + stat_igs = pd.DataFrame(columns=col_names) - stations = pd.read_csv(stationHoldings, sep=r'\s+', names=['ID', 'Lat', 'Lon', 'Hgt_m']) + # Combine prioritizing IGS20 (placing it first and keeping 'first') + stations = pd.concat([stat_igs, stat_leg], ignore_index=True) + stations.drop_duplicates(subset=['ID'], keep='first', inplace=True) - # convert lons from [-360, 0] to [-180, 180] + # Convert lons from [-360, 0] to [-180, 180] stations['Lon'] = ((stations['Lon'].values + 180) % 360) - 180 stations = filterToBBox(stations, llhBox) @@ -142,7 +158,7 @@ def download_tropo_delays( def download_UNR(statID, year, writeDir=".", download=False, baseURL=_UNR_URL): """ Download a zip file containing tropospheric delays for a given - station and year. + station and year, with a fallback to the legacy archive. The URL format is: http://geodesy.unr.edu/gps_timeseries/IGS20/trop// @@ -168,27 +184,50 @@ def download_UNR(statID, year, writeDir=".", download=False, baseURL=_UNR_URL): dict Dictionary with keys 'ID', 'year', and 'path'. """ - if baseURL not in [_UNR_URL]: +if baseURL not in [_UNR_URL]: raise NotImplementedError( f"Data repository {baseURL} has not yet been implemented" ) - URL = ( + stat_upper = statID.upper() + + # First attempt: IGS20 framework + url_igs20 = ( f"{baseURL}gps_timeseries/IGS20/trop/" - f"{statID.upper()}/{statID.upper()}.{year}.trop.zip" + f"{stat_upper}/{stat_upper}.{year}.trop.zip" + ) + + # Fallback: Legacy operational framework + url_legacy = ( + f"{baseURL}gps_timeseries/trop/" + f"{stat_upper}/{stat_upper}.{year}.trop.zip" ) - logger.debug("Currently checking station %s in %s", statID, year) + logger.debug("Checking station %s in %s", statID, year) if download: - saveLoc = os.path.abspath( - os.path.join(writeDir, f"{statID.upper()}.{year}.trop.zip") - ) - filepath = download_url(URL, saveLoc) - if filepath == "": - raise ValueError("Year or station ID does not exist") + filename = f"{stat_upper}.{year}.trop.zip" + save_loc = os.path.abspath(os.path.join(writeDir, filename)) + + # Try IGS20 first + filepath = download_url(url_igs20, save_loc) + + # If IGS20 is missing, try legacy + if not filepath: + logger.debug( + "IGS20 not found for %s in %s. Trying legacy.", + statID, year + ) + filepath = download_url(url_legacy, save_loc) + + if not filepath: + raise ValueError( + "Year or station ID does not exist in either archive" + ) else: - filepath = check_url(URL) + filepath = check_url(url_igs20) + if not filepath: + filepath = check_url(url_legacy) return {"ID": statID, "year": year, "path": filepath} From 624483ddf0f1f82c9893d3511943853920e63bf1 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Wed, 8 Apr 2026 15:41:29 -0700 Subject: [PATCH 2/3] Fix indentation for baseURL check in downloadGNSSDelays.py --- tools/RAiDER/gnss/downloadGNSSDelays.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/RAiDER/gnss/downloadGNSSDelays.py b/tools/RAiDER/gnss/downloadGNSSDelays.py index 92b34b70..23769dd1 100755 --- a/tools/RAiDER/gnss/downloadGNSSDelays.py +++ b/tools/RAiDER/gnss/downloadGNSSDelays.py @@ -184,7 +184,7 @@ def download_UNR(statID, year, writeDir=".", download=False, baseURL=_UNR_URL): dict Dictionary with keys 'ID', 'year', and 'path'. """ -if baseURL not in [_UNR_URL]: + if baseURL not in [_UNR_URL]: raise NotImplementedError( f"Data repository {baseURL} has not yet been implemented" ) From ba9b1b8614b8066715143c1257f86a13a82b6348 Mon Sep 17 00:00:00 2001 From: Simran S Sangha Date: Thu, 9 Apr 2026 14:48:04 -0700 Subject: [PATCH 3/3] Update CHANGELOG.md for recent fixes and changes Added entry for PR #794 to address data migration issues in the UNR archive affecting the eastern hemisphere. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34cfa6a1..72741bf2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). * [743](https://github.com/dbekaert/RAiDER/pull/743) - Switched from HTTPS to DAP4 for retrieving MERRA2 data, and suppressed a warning for using DAP4 for GMAO data where doing so is not possible. ### Fixed +* [794](https://github.com/dbekaert/RAiDER/pull/794) - Circumvent ongoing migration of data in the UNR archive such that data from much of the eastern hemisphere is disrupted after 2023. * [787](https://github.com/dbekaert/RAiDER/pull/787) - Updated weather model uncertainty estimation and included error thresholds to discard unreliable observations. * [782](https://github.com/dbekaert/RAiDER/pull/782) - Fixed bug with handling corrupted or non-existent UNR hosted GNSS ZIP files. * [781](https://github.com/dbekaert/RAiDER/pull/781) - In the combine workflow, accurately pass and write out matching midnight datetimes.