ROVI-org · WardLT · May 27, 2025 · May 27, 2025 · May 27, 2025 · May 27, 2025
diff --git a/battdat/consistency/time.py b/battdat/consistency/time.py
@@ -0,0 +1,42 @@
+"""Check for problems across the columns which describe time"""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List
+
+import numpy as np
+
+from .base import ConsistencyChecker
+from ..data import BatteryDataset
+
+
+@dataclass
+class TestTimeVsTimeChecker(ConsistencyChecker):
+    """Ensure that the test time and timestamp columns agree
+
+    Verify that the difference between the first and current row
+    for the ``test_time`` (time elapsed since the beginning of cycling)
+    and ``time`` (clock datetime) columns agree.
+    """
+
+    max_inconsistency: float = 0.1
+    """Maximum inconsistency between timestamp and test time (s)"""
+
+    def check(self, dataset: BatteryDataset) -> List[str]:
+        output = []
+        for name, subset in dataset.tables.items():
+            if 'time' not in subset.columns or 'test_time' not in subset.columns:
+                continue
+
+            # Ensure that
+            test_time_normed = subset['test_time'] - subset['test_time'].min()
+            timestamp_normed = subset['time'] - subset['time'].min()
+            diffs = np.abs(test_time_normed - timestamp_normed)
+            max_diff = diffs.max()
+            if max_diff > self.max_inconsistency:
+                idx_max = np.argmax(diffs)
+                date_max = datetime.fromtimestamp(subset['time'].iloc[idx_max])
+                time_max = subset['test_time'].iloc[idx_max]
+                output.append(f'Test times and timestep in dataset "{name}" differ by {max_diff:.1e} seconds in row {idx_max}.'
+                              f' test_time={int(time_max)} s, time={date_max}')
+
+        return output
diff --git a/battdat/io/arbin.py b/battdat/io/arbin.py
@@ -21,8 +21,7 @@ def group(self, files: Union[str, List[str]], directories: List[str] = None,
             if file.lower().endswith('.csv'):
                 yield file
 
-    def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
-                  start_time: float = 0) -> pd.DataFrame:
+    def read_file(self, file: str) -> pd.DataFrame:
 
         # Read the file and rename the file
         df = pd.read_csv(file)
@@ -32,10 +31,9 @@ def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
         df_out = pd.DataFrame()
 
         # Convert the column names
-        df_out['cycle_number'] = df['Cycle_Index'] + start_cycle - df['Cycle_Index'].min()
+        df_out['cycle_number'] = df['Cycle_Index'] - df['Cycle_Index'].min()
         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
-        df_out['file_number'] = file_number  # df_out['cycle_number']*0
-        df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0] + start_time, dtype=float)
+        df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0], dtype=float)
         df_out['current'] = df['Current']  # TODO (wardlt): Check this!?
         df_out['temperature'] = df['Temperature']
         df_out['internal_resistance'] = df['Internal_Resistance']

diff --git a/battdat/io/base.py b/battdat/io/base.py
@@ -7,6 +7,7 @@
 
 from battdat.data import BatteryDataset
 from battdat.schemas import BatteryMetadata
+from battdat.schemas.column import ChargingState
 
 PathLike = Union[str, Path]
 
@@ -96,20 +97,13 @@ class CycleTestReader(DatasetFileReader):
     Adds logic for reading cycling time series from a list of files.
     """
 
-    def read_file(self,
-                  file: str,
-                  file_number: int = 0,
-                  start_cycle: int = 0,
-                  start_time: int = 0) -> pd.DataFrame:
+    def read_file(self, file: str) -> pd.DataFrame:
         """Generate a DataFrame containing the data in this file
 
         The dataframe will be in our standard format
 
         Args:
             file: Path to the file
-            file_number: Number of file, in case the test is spread across multiple files
-            start_cycle: Index to use for the first cycle, in case test is spread across multiple files
-            start_time: Test time to use for the start of the test, in case test is spread across multiple files
 
         Returns:
             Dataframe containing the battery data in a standard format
@@ -127,21 +121,53 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
             DataFrame containing the information from all files
         """
 
-        # Initialize counters for the cycle numbers, etc., Used to determine offsets for the files read
-        start_cycle = 0
-        start_time = 0
-
         # Read the data for each file
         #  Keep track of the ending index and ending time
         output_dfs = []
         for file_number, file in enumerate(group):
-            # Read the file
-            df_out = self.read_file(file, file_number, start_cycle, start_time)
-            output_dfs.append(df_out)
+            df_out = self.read_file(file)
+            df_out['file_number'] = file_number
+
+            # Adjust the test time and cycle for subsequent files
+            if len(output_dfs) > 0:
+                last_row = output_dfs[-1].iloc[-1]
+
+                # Determine the length of rest between last file and current
+                rest_between_files = 0  # Assume duplicate points if no data are available
+                if 'time' in last_row and 'time' in df_out:
+                    rest_between_files = max(df_out['time'].iloc[0] - last_row['time'], 0)
+
+                # Increment the test time such that it continues from the last file
+                df_out['test_time'] += last_row['test_time'] + rest_between_files
+
+                # Ensure current is zero if the rest between files is nonzero
+                if rest_between_files != 0 and (last_row['current'] != 0 or df_out['current'].iloc[0] != 0):
+                    # Assume the rest occurs a millisecond later
+                    new_last_row = output_dfs[-1].iloc[-1:].copy()
+                    new_last_row['test_time'] += 1e-3
+                    new_last_row['current'] = 0
+                    if 'time' in new_last_row:
+                        new_last_row['time'] += 1e-3
+                    if 'state' in new_last_row:
+                        new_last_row['state'] = ChargingState.hold
+                    output_dfs[-1] = pd.concat([output_dfs[-1], new_last_row], ignore_index=True)
+
+                    # Assume the rest ends a millisecond before the new cycle starts
+                    new_first_row = df_out.iloc[:1].copy()
+                    new_first_row['test_time'] -= 1e-3
+                    new_first_row['current'] = 0.
+                    if 'time' in new_first_row:
+                        new_first_row['time'] -= 1e-3
+                    if 'state' in new_first_row:
+                        df_out['state'] = ChargingState.hold
+                    df_out = pd.concat([new_first_row, df_out], ignore_index=True)
+
+                # Adjust the cycle number, if included
+                #  Assume the new file starts a new cycle
+                if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
+                    df_out['cycle_number'] += 1 + int(last_row['cycle_number'])
 
-            # Increment the start cycle and time to determine starting point of next file
-            start_cycle += df_out['cycle_number'].max() - df_out['cycle_number'].min() + 1
-            start_time = df_out['test_time'].max()
+            output_dfs.append(df_out)
 
         # Combine the data from all files
         df_out = pd.concat(output_dfs, ignore_index=True)

diff --git a/battdat/io/maccor.py b/battdat/io/maccor.py
@@ -1,5 +1,6 @@
 """Extractor for MACCOR"""
 import re
+import logging
 import itertools
 from dataclasses import dataclass
 from datetime import datetime
@@ -17,6 +18,8 @@
 
 _test_date_re = re.compile(r'Date of Test:\s+(\d{2}/\d{2}/\d{4})')
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class MACCORReader(CycleTestReader, DatasetFileReader):
@@ -26,10 +29,16 @@ class MACCORReader(CycleTestReader, DatasetFileReader):
     The :meth:`group` operation will consolidate files such that all with
     the same prefix (i.e., everything except the numerals in the extension)
     are treated as part of the same experiment.
-    """
 
-    ignore_time: bool = False
-    """Ignore the the time column, which can be problematic."""
+    MACCOR files include both a test time relative to the start of testing
+    and a timestamp following the clock time.
+    This parser only assumes the test time to be correct because the timestamps
+    are nontrivial to rely upon, as they may be non-monotonic due to
+    changes to the computer's clock.
+    Test times are always monotonic.
+    The timestamps are generated based on the timestamp of the first row and
+    the change in test time.
+    """
 
     def group(self, files: Union[str, List[str]], directories: List[str] = None,
               context: dict = None) -> Iterator[Tuple[str, ...]]:
@@ -50,7 +59,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
         # Verify the cells are ordered by test date
         start_dates = []
         for file in group:
-            with open(file, 'r') as fp:
+            with open(file, 'r', encoding='latin1') as fp:
                 header = fp.readline()
                 test_date = _test_date_re.findall(header)[0]
                 start_dates.append(datetime.strptime(test_date, '%m/%d/%Y'))
@@ -62,11 +71,10 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
 
         return super().read_dataset(group, metadata)
 
-    def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
-                  start_time: int = 0) -> pd.DataFrame:
+    def read_file(self, file: PathLike) -> pd.DataFrame:
 
         # Pull the test date from the first line of the file
-        with open(file, 'r') as fp:
+        with open(file, 'r', encoding='latin1') as fp:
             header = fp.readline()
         test_date = _test_date_re.findall(header)[0]
 
@@ -78,30 +86,30 @@ def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
         df_out = pd.DataFrame()
 
         # fill in new dataframe
-        df_out['cycle_number'] = df['Cyc#'] + start_cycle - df['Cyc#'].min()
+        df_out['cycle_number'] = df['Cyc#'] - df['Cyc#'].min()
         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
-        df_out['file_number'] = file_number  # df_out['cycle_number']*0
-        df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60 + start_time
+        df_out['test_time'] = (df['Test (Min)'] - df['Test (Min)'].iloc[0]) * 60
         df_out['state'] = df['State']
         df_out['current'] = df['Amps']
         df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
+        df_out['voltage'] = df['Volts']
 
-        if not self.ignore_time:
-            def _parse_time(time: str) -> float:
-                if '/' in time:
-                    return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
-                else:
-                    return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
+        # Parse the timestamps
+        def _parse_time(time: str) -> float:
+            if '/' in time:
+                return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
+            else:
+                return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
 
-            df_out['time'] = df['DPt Time'].apply(_parse_time)
+        start_time = _parse_time(df['DPt Time'].iloc[0])
+        df_out['time'] = start_time + df_out['test_time']
 
         #   0 is rest, 1 is charge, -1 is discharge
         df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.hold
         df_out.loc[df_out['state'] == 'C', 'state'] = ChargingState.charging
         df_out.loc[df_out['state'] == 'D', 'state'] = ChargingState.discharging
         df_out.loc[df_out['state'].apply(lambda x: x not in {'R', 'C', 'D'}), 'state'] = ChargingState.unknown
 
-        df_out['voltage'] = df['Volts']
         df_out = drop_cycles(df_out)
         AddSteps().enhance(df_out)
         AddMethod().enhance(df_out)

diff --git a/dev/environment.yml b/dev/environment.yml
@@ -1,5 +1,5 @@
 # Conda environment file
-name: batdata
+name: battdat
 channels:
   - defaults
 dependencies:

diff --git a/docs/source/consistency.rst b/docs/source/consistency.rst
@@ -22,3 +22,11 @@ Current (``b.consistency.current``)
    :members:
    :undoc-members:
    :show-inheritance:
+
+Current (``b.consistency.time``)
+------------------------------------
+
+.. automodule:: battdat.consistency.time
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/user-guide/io.rst b/docs/user-guide/io.rst
@@ -61,8 +61,17 @@ find files:
     group = next(extractor.identify_files('./example-path/'))
     dataset = extractor.read_dataset(group)
 
-The :ref:`type of output dataset <type-table>` is defined by the :attr:`~battdat.io.base.DatasetFileReader.output_class` attribute.
-Most uses of readers do not require modifying this attribute.
+
+Reading Data from Multiple Files
+++++++++++++++++++++++++++++++++
+
+The MACCOR and Arbin readers can combine test data from multiple files into the same, contiguous dataset.
+Combining is built on two key assumptions:
+
+1. The cells passed to ``read_dataset`` are in chronological order.
+2. The battery is at rest in any period between testing files.
+   The dataset reader will insert rows with zero current
+   if the current in the first or last measurement of a file is nonzero.
 
 Writing Data
 ------------

diff --git a/tests/consistency/test_times.py b/tests/consistency/test_times.py
@@ -0,0 +1,33 @@
+"""Test for inconsistencies in time columns"""
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+from pytest import fixture
+
+from battdat.consistency.time import TestTimeVsTimeChecker
+from battdat.data import BatteryDataset
+
+
+@fixture()
+def example_dataset():
+    df = pd.DataFrame({
+        'voltage': [1.] * 8,
+        'current': [0.] * 8,
+        'test_time': np.arange(8, dtype=float)
+    })
+    df['time'] = datetime.now().timestamp() + df['test_time']
+    data = BatteryDataset.make_cell_dataset(raw_data=df, cycle_stats=pd.DataFrame({'cycle_number': [0]}))
+    data.validate()
+    return data
+
+
+def test_correct_inter(example_dataset):
+    checker = TestTimeVsTimeChecker()
+    assert len(checker.check(example_dataset)) == 0
+
+    example_dataset.raw_data['time'].iloc[4:] += 0.2
+    errors = checker.check(example_dataset)
+    assert len(errors) == 1
+    assert '2.0e-01 seconds' in errors[0]
+    assert 'row 4. test_time=4 s' in errors[0]
diff --git a/tests/files/maccor_example.002 b/tests/files/maccor_example.002
@@ -1,10 +1,10 @@
 Today's Date 04/04/2016  Date of Test:	04/01/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
 Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
-1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	16:05:31
-2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:41
-3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:51
-4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:01
-5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:11
-6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:21
-7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:31
-8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	16:06:41
+1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	23:59:31
+2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	23:59:41
+3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	23:59:51
+4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	00:00:01
+5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	00:00:11
+6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	00:00:21
+7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	00:00:31
+8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	00:00:41
diff --git a/tests/files/maccor_example.charge.001 b/tests/files/maccor_example.charge.001
@@ -0,0 +1,10 @@
+Today's Date 04/04/2016  Date of Test:	03/31/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
+Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
+1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.1000000000	3.30678264	C	0	03/31/2016 16:05:31
+2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.1000000000	3.30571450	C	1	03/31/2016 16:05:41
+3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.1000000000	3.30571450	C	1	03/31/2016 16:05:51
+4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.1000000000	3.30586709	C	1	03/31/2016 16:06:01
+5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.1000000000	3.30601968	C	1	03/31/2016 16:06:11
+6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.1000000000	3.30601968	C	1	03/31/2016 16:06:21
+7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.1000000000	3.30586709	C	1	03/31/2016 16:06:31
+8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.1000000000	3.30617227	C	1	03/31/2016 16:06:41