darothen · darothen · May 19, 2019 · May 19, 2019 · May 19, 2019
diff --git a/xbpch/bpch.py b/xbpch/bpch.py
@@ -100,7 +100,7 @@ class BPCHFile(object):
     """
 
     def __init__(self, filename, mode='rb', endian='>',
-                 diaginfo_file='', tracerinfo_file='', eager=False,
+                 diaginfo_file='', tracerinfo_file='', legacy=False, eager=False,
                  use_mmap=False, dask_delayed=False):
         """ Load a BPCHFile
 
@@ -116,6 +116,8 @@ def __init__(self, filename, mode='rb', endian='>',
         {tracerinfo, diaginfo}_file : str
             Path to the tracerinfo.dat and diaginfo.dat files containing
             metadata pertaining to the output in the bpch file being read.
+        legacy : bool
+            Flag indicating that this data was generated prior to GEOS-Chem v12.2.0
         eager : bool
             Flag to immediately read variable data; if "False", then nothing
             will be read from the file and you'll need to do so manually
@@ -155,7 +157,7 @@ def __init__(self, filename, mode='rb', endian='>',
 
         # Don't necessarily need to save diag/tracer_dict yet
         self.diaginfo_df, _ = get_diaginfo(self.diaginfo_file)
-        self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file)
+        self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file, legacy)
 
         # Container for bundles contained in the output file.
         self.var_data = {}

diff --git a/xbpch/core.py b/xbpch/core.py
@@ -27,6 +27,7 @@
 def open_bpchdataset(filename, fields=[], categories=[],
                      tracerinfo_file='tracerinfo.dat',
                      diaginfo_file='diaginfo.dat',
+                     legacy=False,
                      endian=">", decode_cf=True,
                      memmap=True, dask=True, return_store=False):
     """ Open a GEOS-Chem BPCH file output as an xarray Dataset.
@@ -40,6 +41,8 @@ def open_bpchdataset(filename, fields=[], categories=[],
         the metadata corresponding to each variable in the output dataset.
         If not provided, will look for them in the current directory or
         fall back on a generic set.
+    legacy : bool, optional
+        Flag indicating that this data was generated prior to GEOS-Chem v12.2.0
     fields : list, optional
         List of a subset of variable names to return. This can substantially
         improve read performance. Note that the field here is just the tracer
@@ -76,7 +79,7 @@ def open_bpchdataset(filename, fields=[], categories=[],
 
     store = BPCHDataStore(
         filename, fields=fields, categories=categories,
-        tracerinfo_file=tracerinfo_file,
+        tracerinfo_file=tracerinfo_file, legacy=legacy,
         diaginfo_file=diaginfo_file, endian=endian,
         use_mmap=memmap, dask_delayed=dask
     )
@@ -231,7 +234,7 @@ class BPCHDataStore(AbstractDataStore):
 
     def __init__(self, filename, fields=[], categories=[], fix_cf=False,
                  mode='r', endian='>',
-                 diaginfo_file='', tracerinfo_file='',
+                 diaginfo_file='', tracerinfo_file='', legacy=False,
                  use_mmap=False, dask_delayed=False):
 
         # Track the metadata accompanying this dataset.
@@ -266,6 +269,7 @@ def __init__(self, filename, fields=[], categories=[], fix_cf=False,
         self._bpch = BPCHFile(self.filename, self.mode, self.endian,
                               tracerinfo_file=tracerinfo_file,
                               diaginfo_file=diaginfo_file,
+                              legacy=legacy,
                               eager=False, use_mmap=self._mmap,
                               dask_delayed=self._dask)
         self.fields = fields

diff --git a/xbpch/util/diaginfo.py b/xbpch/util/diaginfo.py
@@ -5,7 +5,7 @@
 import os
 import pandas as pd
 
-from .. common import C_MOLECULAR_WEIGHT
+# from .. common import C_MOLECULAR_WEIGHT
 
 #: Info for parsing diagnostic records
 diag_rec = namedtuple("diag_rec",
@@ -22,17 +22,18 @@
 ]
 
 #: Info for parsing tracer records
+_LEGACY_NAME_WIDTH = 8
 tracer_rec = diag_rec
 tracer_recs = [
-    tracer_rec('name', 8, str, None, True, "Tracer name"),
-    tracer_rec("-0", 1, str, ' ', True, None),
+    tracer_rec('name', 31, str, None, True, "Tracer name"),
+    # tracer_rec("-0", 1, str, ' ', True, None),
     tracer_rec('full_name', 30, str, None, True, "Full tracer name"),
     tracer_rec('molwt', 10, float, 1., True, "Molecular weight (kg/mole)"),
     tracer_rec('C', 3, int, 1, True, "# moles C/moles tracer for HCs"),
     tracer_rec('tracer', 9, int, None, True, "Tracer number"),
     tracer_rec('scale', 10, float, 1e9, True, "Standard scale factor to convert to"
                                               " given units"),
-    tracer_rec("-1", 1, str, ' ', True, None),
+    # tracer_rec("-1", 1, str, ' ', True, None),
     tracer_rec('unit', 40, str, 'ppbv', True, "Unit string"),
 ]
 
@@ -66,7 +67,7 @@ def get_diaginfo(diaginfo_file):
     return diag_df, diag_desc
 
 
-def get_tracerinfo(tracerinfo_file):
+def get_tracerinfo(tracerinfo_file, legacy=False):
     """
     Read an output's tracerinfo.dat file and parse into a DataFrame for
     use in selecting and parsing categories.
@@ -75,6 +76,9 @@ def get_tracerinfo(tracerinfo_file):
     ----------
     tracerinfo_file : str
         Path to tracerinfo.dat
+    legacy : logical
+        Flag to indicate that the tracerinfo.dat file was generated *before* GC 
+        v12.2.0
 
     Returns
     -------
@@ -85,11 +89,21 @@ def get_tracerinfo(tracerinfo_file):
     widths = [rec.width for rec in tracer_recs]
     col_names = [rec.name for rec in tracer_recs]
     dtypes = [rec.type for rec in tracer_recs]
-    usecols = [name for name in col_names if not name.startswith('-')]
+    dtypes = {name: dtype for name, dtype in zip(col_names, dtypes)}
+    # usecols = [name for name in col_names if not name.startswith('-')]
+
+    # This isn't a great kluge, but it's a simple way to handle the backwards-
+    # incompatible change in the width specficiation of the "name" column in
+    # `tracerinfo.dat`s generated with GC >= v.12.2.0
+    # if legacy:
+    #     widths[0] = _LEGACY_NAME_WIDTH
+
+    # tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names,
+    #                         dtypes=dtypes, comment="#", header=None,
+    #                         usecols=usecols)
 
-    tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names,
-                            dtypes=dtypes, comment="#", header=None,
-                            usecols=usecols)
+    tracer_df = pd.read_csv(tracerinfo_file, names=col_names, sep=r'\s\s+?',
+                              comment="#", header=None, index_col=False)
 
     # Check an edge case related to a bug in GEOS-Chem v12.0.3 which 
     # erroneously dropped short/long tracer names in certain tracerinfo.dat outputs.
@@ -111,7 +125,7 @@ def get_tracerinfo(tracerinfo_file):
     def _assign_hydrocarbon(row):
         if row['C'] != 1:
             row['hydrocarbon'] = True
-            row['molwt'] = C_MOLECULAR_WEIGHT
+            row['molwt'] = 1. # C_MOLECULAR_WEIGHT
         else:
             row['hydrocarbon'] = False
         return row