Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions xbpch/bpch.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class BPCHFile(object):
"""

def __init__(self, filename, mode='rb', endian='>',
diaginfo_file='', tracerinfo_file='', eager=False,
diaginfo_file='', tracerinfo_file='', legacy=False, eager=False,
use_mmap=False, dask_delayed=False):
""" Load a BPCHFile

Expand All @@ -116,6 +116,8 @@ def __init__(self, filename, mode='rb', endian='>',
{tracerinfo, diaginfo}_file : str
Path to the tracerinfo.dat and diaginfo.dat files containing
metadata pertaining to the output in the bpch file being read.
legacy : bool
Flag indicating that this data was generated prior to GEOS-Chem v12.2.0
eager : bool
Flag to immediately read variable data; if "False", then nothing
will be read from the file and you'll need to do so manually
Expand Down Expand Up @@ -155,7 +157,7 @@ def __init__(self, filename, mode='rb', endian='>',

# Don't necessarily need to save diag/tracer_dict yet
self.diaginfo_df, _ = get_diaginfo(self.diaginfo_file)
self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file)
self.tracerinfo_df, _ = get_tracerinfo(self.tracerinfo_file, legacy)

# Container for bundles contained in the output file.
self.var_data = {}
Expand Down
8 changes: 6 additions & 2 deletions xbpch/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
def open_bpchdataset(filename, fields=[], categories=[],
tracerinfo_file='tracerinfo.dat',
diaginfo_file='diaginfo.dat',
legacy=False,
endian=">", decode_cf=True,
memmap=True, dask=True, return_store=False):
""" Open a GEOS-Chem BPCH file output as an xarray Dataset.
Expand All @@ -40,6 +41,8 @@ def open_bpchdataset(filename, fields=[], categories=[],
the metadata corresponding to each variable in the output dataset.
If not provided, will look for them in the current directory or
fall back on a generic set.
legacy : bool, optional
Flag indicating that this data was generated prior to GEOS-Chem v12.2.0
fields : list, optional
List of a subset of variable names to return. This can substantially
improve read performance. Note that the field here is just the tracer
Expand Down Expand Up @@ -76,7 +79,7 @@ def open_bpchdataset(filename, fields=[], categories=[],

store = BPCHDataStore(
filename, fields=fields, categories=categories,
tracerinfo_file=tracerinfo_file,
tracerinfo_file=tracerinfo_file, legacy=legacy,
diaginfo_file=diaginfo_file, endian=endian,
use_mmap=memmap, dask_delayed=dask
)
Expand Down Expand Up @@ -231,7 +234,7 @@ class BPCHDataStore(AbstractDataStore):

def __init__(self, filename, fields=[], categories=[], fix_cf=False,
mode='r', endian='>',
diaginfo_file='', tracerinfo_file='',
diaginfo_file='', tracerinfo_file='', legacy=False,
use_mmap=False, dask_delayed=False):

# Track the metadata accompanying this dataset.
Expand Down Expand Up @@ -266,6 +269,7 @@ def __init__(self, filename, fields=[], categories=[], fix_cf=False,
self._bpch = BPCHFile(self.filename, self.mode, self.endian,
tracerinfo_file=tracerinfo_file,
diaginfo_file=diaginfo_file,
legacy=legacy,
eager=False, use_mmap=self._mmap,
dask_delayed=self._dask)
self.fields = fields
Expand Down
34 changes: 24 additions & 10 deletions xbpch/util/diaginfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os
import pandas as pd

from .. common import C_MOLECULAR_WEIGHT
# from .. common import C_MOLECULAR_WEIGHT

#: Info for parsing diagnostic records
diag_rec = namedtuple("diag_rec",
Expand All @@ -22,17 +22,18 @@
]

#: Info for parsing tracer records
_LEGACY_NAME_WIDTH = 8
tracer_rec = diag_rec
tracer_recs = [
tracer_rec('name', 8, str, None, True, "Tracer name"),
tracer_rec("-0", 1, str, ' ', True, None),
tracer_rec('name', 31, str, None, True, "Tracer name"),
# tracer_rec("-0", 1, str, ' ', True, None),
tracer_rec('full_name', 30, str, None, True, "Full tracer name"),
tracer_rec('molwt', 10, float, 1., True, "Molecular weight (kg/mole)"),
tracer_rec('C', 3, int, 1, True, "# moles C/moles tracer for HCs"),
tracer_rec('tracer', 9, int, None, True, "Tracer number"),
tracer_rec('scale', 10, float, 1e9, True, "Standard scale factor to convert to"
" given units"),
tracer_rec("-1", 1, str, ' ', True, None),
# tracer_rec("-1", 1, str, ' ', True, None),
tracer_rec('unit', 40, str, 'ppbv', True, "Unit string"),
]

Expand Down Expand Up @@ -66,7 +67,7 @@ def get_diaginfo(diaginfo_file):
return diag_df, diag_desc


def get_tracerinfo(tracerinfo_file):
def get_tracerinfo(tracerinfo_file, legacy=False):
"""
Read an output's tracerinfo.dat file and parse into a DataFrame for
use in selecting and parsing categories.
Expand All @@ -75,6 +76,9 @@ def get_tracerinfo(tracerinfo_file):
----------
tracerinfo_file : str
Path to tracerinfo.dat
legacy : logical
Flag to indicate that the tracerinfo.dat file was generated *before* GC
v12.2.0

Returns
-------
Expand All @@ -85,11 +89,21 @@ def get_tracerinfo(tracerinfo_file):
widths = [rec.width for rec in tracer_recs]
col_names = [rec.name for rec in tracer_recs]
dtypes = [rec.type for rec in tracer_recs]
usecols = [name for name in col_names if not name.startswith('-')]
dtypes = {name: dtype for name, dtype in zip(col_names, dtypes)}
# usecols = [name for name in col_names if not name.startswith('-')]

# This isn't a great kluge, but it's a simple way to handle the backwards-
# incompatible change in the width specficiation of the "name" column in
# `tracerinfo.dat`s generated with GC >= v.12.2.0
# if legacy:
# widths[0] = _LEGACY_NAME_WIDTH

# tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names,
# dtypes=dtypes, comment="#", header=None,
# usecols=usecols)

tracer_df = pd.read_fwf(tracerinfo_file, widths=widths, names=col_names,
dtypes=dtypes, comment="#", header=None,
usecols=usecols)
tracer_df = pd.read_csv(tracerinfo_file, names=col_names, sep=r'\s\s+?',
comment="#", header=None, index_col=False)

# Check an edge case related to a bug in GEOS-Chem v12.0.3 which
# erroneously dropped short/long tracer names in certain tracerinfo.dat outputs.
Expand All @@ -111,7 +125,7 @@ def get_tracerinfo(tracerinfo_file):
def _assign_hydrocarbon(row):
if row['C'] != 1:
row['hydrocarbon'] = True
row['molwt'] = C_MOLECULAR_WEIGHT
row['molwt'] = 1. # C_MOLECULAR_WEIGHT
else:
row['hydrocarbon'] = False
return row
Expand Down