diff --git a/xbpch/bpch.py b/xbpch/bpch.py index ee85195..a98e067 100644 --- a/xbpch/bpch.py +++ b/xbpch/bpch.py @@ -100,7 +100,7 @@ class BPCHFile(object): """ def __init__(self, filename, mode='rb', endian='>', - diaginfo_file='', tracerinfo_file='', eager=False, + diaginfo_file='', tracerinfo_file='', legacy=False, eager=False, use_mmap=False, dask_delayed=False): """ Load a BPCHFile @@ -116,6 +116,8 @@ def __init__(self, filename, mode='rb', endian='>', {tracerinfo, diaginfo}_file : str Path to the tracerinfo.dat and diaginfo.dat files containing metadata pertaining to the output in the bpch file being read. + legacy : bool + Flag indicating that this data was generated prior to GEOS-Chem v12.2.0 eager : bool Flag to immediately read variable data; if "False", then nothing will be read from the file and you'll need to do so manually @@ -155,7 +157,7 @@ def __init__(self, filename, mode='rb', endian='>', # Don't necessarily need to save diag/tracer_dict yet self.diaginfo_df, _ = get_diaginfo(self.diaginfo_file) - self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file) + self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file, legacy) # Container for bundles contained in the output file. self.var_data = {} diff --git a/xbpch/core.py b/xbpch/core.py index 8826668..704ed5b 100644 --- a/xbpch/core.py +++ b/xbpch/core.py @@ -27,6 +27,7 @@ def open_bpchdataset(filename, fields=[], categories=[], tracerinfo_file='tracerinfo.dat', diaginfo_file='diaginfo.dat', + legacy=False, endian=">", decode_cf=True, memmap=True, dask=True, return_store=False): """ Open a GEOS-Chem BPCH file output as an xarray Dataset. @@ -40,6 +41,8 @@ def open_bpchdataset(filename, fields=[], categories=[], the metadata corresponding to each variable in the output dataset. If not provided, will look for them in the current directory or fall back on a generic set. + legacy : bool, optional + Flag indicating that this data was generated prior to GEOS-Chem v12.2.0 fields : list, optional List of a subset of variable names to return. This can substantially improve read performance. Note that the field here is just the tracer @@ -76,7 +79,7 @@ def open_bpchdataset(filename, fields=[], categories=[], store = BPCHDataStore( filename, fields=fields, categories=categories, - tracerinfo_file=tracerinfo_file, + tracerinfo_file=tracerinfo_file, legacy=legacy, diaginfo_file=diaginfo_file, endian=endian, use_mmap=memmap, dask_delayed=dask ) @@ -231,7 +234,7 @@ class BPCHDataStore(AbstractDataStore): def __init__(self, filename, fields=[], categories=[], fix_cf=False, mode='r', endian='>', - diaginfo_file='', tracerinfo_file='', + diaginfo_file='', tracerinfo_file='', legacy=False, use_mmap=False, dask_delayed=False): # Track the metadata accompanying this dataset. @@ -266,6 +269,7 @@ def __init__(self, filename, fields=[], categories=[], fix_cf=False, self._bpch = BPCHFile(self.filename, self.mode, self.endian, tracerinfo_file=tracerinfo_file, diaginfo_file=diaginfo_file, + legacy=legacy, eager=False, use_mmap=self._mmap, dask_delayed=self._dask) self.fields = fields diff --git a/xbpch/util/diaginfo.py b/xbpch/util/diaginfo.py index 1a03766..e3e3416 100644 --- a/xbpch/util/diaginfo.py +++ b/xbpch/util/diaginfo.py @@ -5,7 +5,7 @@ import os import pandas as pd -from .. common import C_MOLECULAR_WEIGHT +# from .. common import C_MOLECULAR_WEIGHT #: Info for parsing diagnostic records diag_rec = namedtuple("diag_rec", @@ -22,17 +22,18 @@ ] #: Info for parsing tracer records +_LEGACY_NAME_WIDTH = 8 tracer_rec = diag_rec tracer_recs = [ - tracer_rec('name', 8, str, None, True, "Tracer name"), - tracer_rec("-0", 1, str, ' ', True, None), + tracer_rec('name', 31, str, None, True, "Tracer name"), + # tracer_rec("-0", 1, str, ' ', True, None), tracer_rec('full_name', 30, str, None, True, "Full tracer name"), tracer_rec('molwt', 10, float, 1., True, "Molecular weight (kg/mole)"), tracer_rec('C', 3, int, 1, True, "# moles C/moles tracer for HCs"), tracer_rec('tracer', 9, int, None, True, "Tracer number"), tracer_rec('scale', 10, float, 1e9, True, "Standard scale factor to convert to" " given units"), - tracer_rec("-1", 1, str, ' ', True, None), + # tracer_rec("-1", 1, str, ' ', True, None), tracer_rec('unit', 40, str, 'ppbv', True, "Unit string"), ] @@ -66,7 +67,7 @@ def get_diaginfo(diaginfo_file): return diag_df, diag_desc -def get_tracerinfo(tracerinfo_file): +def get_tracerinfo(tracerinfo_file, legacy=False): """ Read an output's tracerinfo.dat file and parse into a DataFrame for use in selecting and parsing categories. @@ -75,6 +76,9 @@ def get_tracerinfo(tracerinfo_file): ---------- tracerinfo_file : str Path to tracerinfo.dat + legacy : logical + Flag to indicate that the tracerinfo.dat file was generated *before* GC + v12.2.0 Returns ------- @@ -85,11 +89,21 @@ def get_tracerinfo(tracerinfo_file): widths = [rec.width for rec in tracer_recs] col_names = [rec.name for rec in tracer_recs] dtypes = [rec.type for rec in tracer_recs] - usecols = [name for name in col_names if not name.startswith('-')] + dtypes = {name: dtype for name, dtype in zip(col_names, dtypes)} + # usecols = [name for name in col_names if not name.startswith('-')] + + # This isn't a great kluge, but it's a simple way to handle the backwards- + # incompatible change in the width specficiation of the "name" column in + # `tracerinfo.dat`s generated with GC >= v.12.2.0 + # if legacy: + # widths[0] = _LEGACY_NAME_WIDTH + + # tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names, + # dtypes=dtypes, comment="#", header=None, + # usecols=usecols) - tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names, - dtypes=dtypes, comment="#", header=None, - usecols=usecols) + tracer_df = pd.read_csv(tracerinfo_file, names=col_names, sep=r'\s\s+?', + comment="#", header=None, index_col=False) # Check an edge case related to a bug in GEOS-Chem v12.0.3 which # erroneously dropped short/long tracer names in certain tracerinfo.dat outputs. @@ -111,7 +125,7 @@ def get_tracerinfo(tracerinfo_file): def _assign_hydrocarbon(row): if row['C'] != 1: row['hydrocarbon'] = True - row['molwt'] = C_MOLECULAR_WEIGHT + row['molwt'] = 1. # C_MOLECULAR_WEIGHT else: row['hydrocarbon'] = False return row