From 7e3bfdf1de39fde4215cdd85155990738ee92ed1 Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Tue, 27 May 2025 15:19:04 -0400
Subject: [PATCH 01/10] Add check for time consistency

---
 battdat/consistency/time.py     | 40 +++++++++++++++++++++++++++++++++
 docs/source/consistency.rst     |  8 +++++++
 tests/consistency/test_times.py | 33 +++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)
 create mode 100644 battdat/consistency/time.py
 create mode 100644 tests/consistency/test_times.py

diff --git a/battdat/consistency/time.py b/battdat/consistency/time.py
new file mode 100644
index 0000000..3bc89b5
--- /dev/null
+++ b/battdat/consistency/time.py
@@ -0,0 +1,40 @@
+"""Check for problems across the columns which describe time"""
+from dataclasses import dataclass
+from datetime import datetime
+from typing import List
+
+import numpy as np
+
+from .base import ConsistencyChecker
+from ..data import BatteryDataset
+
+
+@dataclass
+class TestTimeVsTimeChecker(ConsistencyChecker):
+    """Ensure that the test time and timestamp columns agree
+
+    Verify that the difference between the test_time
+    """
+
+    max_inconsistency: float = 0.1
+    """Maximum inconsistency between timestamp and test time (s)"""
+
+    def check(self, dataset: BatteryDataset) -> List[str]:
+        output = []
+        for name, subset in dataset.tables.items():
+            if 'time' not in subset.columns or 'test_time' not in subset.columns:
+                continue
+
+            # Ensure that
+            test_time_normed = subset['test_time'] - subset['test_time'].min()
+            timestamp_normed = subset['time'] - subset['time'].min()
+            diffs = np.abs(test_time_normed - timestamp_normed)
+            max_diff = diffs.max()
+            if max_diff > self.max_inconsistency:
+                idx_max = np.argmax(diffs)
+                date_max = datetime.fromtimestamp(subset['time'].iloc[idx_max])
+                time_max = subset['test_time'].iloc[idx_max]
+                output.append(f'Test times and timestep in dataset "{name}" differ by {max_diff:.1e} seconds in row {idx_max}.'
+                              f' test_time={int(time_max)} s, time={date_max}')
+
+        return output
diff --git a/docs/source/consistency.rst b/docs/source/consistency.rst
index 8619125..914c0df 100644
--- a/docs/source/consistency.rst
+++ b/docs/source/consistency.rst
@@ -22,3 +22,11 @@ Current (``b.consistency.current``)
    :members:
    :undoc-members:
    :show-inheritance:
+
+Current (``b.consistency.time``)
+------------------------------------
+
+.. automodule:: battdat.consistency.time
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/tests/consistency/test_times.py b/tests/consistency/test_times.py
new file mode 100644
index 0000000..7ee005c
--- /dev/null
+++ b/tests/consistency/test_times.py
@@ -0,0 +1,33 @@
+"""Test for inconsistencies in time columns"""
+from datetime import datetime
+
+import numpy as np
+import pandas as pd
+from pytest import fixture
+
+from battdat.consistency.time import TestTimeVsTimeChecker
+from battdat.data import BatteryDataset
+
+
+@fixture()
+def example_dataset():
+    df = pd.DataFrame({
+        'voltage': [1.] * 8,
+        'current': [0.] * 8,
+        'test_time': np.arange(8, dtype=float)
+    })
+    df['time'] = datetime.now().timestamp() + df['test_time']
+    data = BatteryDataset.make_cell_dataset(raw_data=df)
+    data.validate()
+    return data
+
+
+def test_correct_inter(example_dataset):
+    checker = TestTimeVsTimeChecker()
+    assert len(checker.check(example_dataset)) == 0
+
+    example_dataset.raw_data['time'].iloc[4:] += 0.2
+    errors = checker.check(example_dataset)
+    assert len(errors) == 1
+    assert '2.0e-01 seconds' in errors[0]
+    assert 'row 4. test_time=4 s' in errors[0]

From 3895869d0abae8413af09646d1d8efcda08d7cba Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Tue, 27 May 2025 15:24:19 -0400
Subject: [PATCH 02/10] Test skipping irrelevant tables

---
 tests/consistency/test_times.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/consistency/test_times.py b/tests/consistency/test_times.py
index 7ee005c..53357ad 100644
--- a/tests/consistency/test_times.py
+++ b/tests/consistency/test_times.py
@@ -17,7 +17,7 @@ def example_dataset():
         'test_time': np.arange(8, dtype=float)
     })
     df['time'] = datetime.now().timestamp() + df['test_time']
-    data = BatteryDataset.make_cell_dataset(raw_data=df)
+    data = BatteryDataset.make_cell_dataset(raw_data=df, cycle_stats=pd.DataFrame({'cycle_number': [0]}))
     data.validate()
     return data
 

From f503f9ec2bcc9776e1dee31d8f2583810e24d52d Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Tue, 27 May 2025 16:26:36 -0400
Subject: [PATCH 03/10] Add tool for correcting the offsets

---
 battdat/io/maccor.py           | 62 ++++++++++++++++++++++++++++------
 tests/files/maccor_example.002 | 16 ++++-----
 tests/io/test_maccor.py        | 29 ++++++++++++----
 3 files changed, 83 insertions(+), 24 deletions(-)

diff --git a/battdat/io/maccor.py b/battdat/io/maccor.py
index 0eff245..356fe0a 100644
--- a/battdat/io/maccor.py
+++ b/battdat/io/maccor.py
@@ -1,5 +1,6 @@
 """Extractor for MACCOR"""
 import re
+import logging
 import itertools
 from dataclasses import dataclass
 from datetime import datetime
@@ -17,6 +18,48 @@
 
 _test_date_re = re.compile(r'Date of Test:\s+(\d{2}/\d{2}/\d{4})')
 
+logger = logging.getLogger(__name__)
+
+
+def correct_time_offsets(raw_data: pd.DataFrame, desync_tol: float = 0.01) -> int:
+    """Correct errors in the timestamp column that result
+    from the day not being listed with timestamp.
+
+    Day rollovers are detected by desynchronization between the test time
+    and timestamps, which are corrected by moving the test_time forward
+    to meet the date time.
+
+    Will warn if the desynchronization is not a multiple of a day,
+    an hour (daylight savings time), or a second (leap seconds).
+
+    Args:
+        raw_data: Raw data signal to be corrected
+        desync_tol: Tolerance of desynchronization between time columns
+    Returns:
+        Number of day rollovers that were detected
+    """
+
+    test_time = raw_data['test_time'] - raw_data['test_time'].iloc[0]
+
+    def _get_differences():
+        timestamp_diff = raw_data['time'] - raw_data['time'].iloc[0]
+        return timestamp_diff - test_time
+
+    while np.abs(diffs := _get_differences()).max() > desync_tol:
+        # Get the amount of offset detected
+        first_bad_ix = np.argmax(np.abs(diffs) > desync_tol)
+        offset = diffs[first_bad_ix].item()
+
+        # Check if it's consistent with a date rollover, daylight savings time, or leap second
+        if np.isclose(offset % 86400, 0, atol=1e-1) or \
+                np.isclose(np.abs(offset), [3600, 1], atol=1e-1).any():
+            pass  # Nothing of concern
+        else:
+            logger.warning(f'Detected an offset inconsistent with a day: {offset} s')
+
+        # Correct the offset
+        raw_data['time'].iloc[first_bad_ix:] -= offset
+
 
 @dataclass
 class MACCORReader(CycleTestReader, DatasetFileReader):
@@ -28,9 +71,6 @@ class MACCORReader(CycleTestReader, DatasetFileReader):
     are treated as part of the same experiment.
     """
 
-    ignore_time: bool = False
-    """Ignore the the time column, which can be problematic."""
-
     def group(self, files: Union[str, List[str]], directories: List[str] = None,
               context: dict = None) -> Iterator[Tuple[str, ...]]:
         if isinstance(files, str):
@@ -86,14 +126,16 @@ def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
         df_out['current'] = df['Amps']
         df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
 
-        if not self.ignore_time:
-            def _parse_time(time: str) -> float:
-                if '/' in time:
-                    return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
-                else:
-                    return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
+        # Parse the timestamps
+        def _parse_time(time: str) -> float:
+            if '/' in time:
+                return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
+            else:
+                return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
+
+        df_out['time'] = df['DPt Time'].apply(_parse_time)
 
-            df_out['time'] = df['DPt Time'].apply(_parse_time)
+        correct_time_offsets(df_out)
 
         #   0 is rest, 1 is charge, -1 is discharge
         df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.hold
diff --git a/tests/files/maccor_example.002 b/tests/files/maccor_example.002
index 2f45b7b..baf1a9b 100644
--- a/tests/files/maccor_example.002
+++ b/tests/files/maccor_example.002
@@ -1,10 +1,10 @@
 Today's Date 04/04/2016  Date of Test:	04/01/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
 Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
-1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	16:05:31
-2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:41
-3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	16:05:51
-4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:01
-5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:11
-6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	16:06:21
-7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	16:06:31
-8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	16:06:41
+1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.0000000000	3.30678264	R	0	23:59:31
+2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	23:59:41
+3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.0000000000	3.30571450	R	1	23:59:51
+4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	00:00:01
+5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	00:00:11
+6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.0000000000	3.30601968	R	1	00:00:21
+7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.0000000000	3.30586709	R	1	00:00:31
+8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.0000000000	3.30617227	R	1	00:00:41
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index 5ca412c..0e06ab3 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -1,8 +1,10 @@
 """Tests related to the MACCOR parser"""
+import numpy as np
+import pandas as pd
 from datetime import datetime
 from pytest import fixture, raises
 
-from battdat.io.maccor import MACCORReader
+from battdat.io.maccor import MACCORReader, correct_time_offsets
 
 
 @fixture()
@@ -21,6 +23,26 @@ def test_validation(extractor, test_file):
     data.validate_columns(allow_extra_columns=False)
 
 
+def test_check_offset_correct(caplog):
+    df = pd.DataFrame({
+        'test_time': np.arange(3, dtype=float),
+    })
+
+    # Test the OK offsets
+    for off in [86400, 1, -3600]:
+        df['time'] = df['test_time'] + datetime.now().timestamp()
+        df['time'].iloc[1:] += off
+        correct_time_offsets(df)
+        assert np.allclose(df['time'] - df['time'].iloc[0], np.arange(3.))
+        assert len(caplog.messages) == 0
+
+    # Test an offset which yields a warning
+    df['time'].iloc[1:] += 25
+    correct_time_offsets(df)
+    assert len(caplog.messages) == 1
+    assert '25' in caplog.messages[-1]
+
+
 def test_grouping(extractor, tmp_path):
     # Make a file structure with two sets of experiments and a nonsense file
     for f in ['README', 'testA.002', 'testA.001', 'testB.001']:
@@ -51,8 +73,3 @@ def test_time_parser(extractor, test_file):
     # With only the time in the time column
     df = extractor.read_file(test_file.with_suffix('.002'))
     assert datetime.fromtimestamp(df['time'].iloc[0]).month == 4
-
-    # Ignoring datetime
-    extractor.ignore_time = True
-    df = extractor.read_file(test_file)
-    assert 'time' not in df.columns

From 1c44ab339f2bfe45b6dfb83230fd5bccefa682a3 Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Tue, 27 May 2025 17:07:51 -0400
Subject: [PATCH 04/10] Refactor logic for multi files into superclass

We were repeating ourselves, and that became a problem with complex
strategies for combining files
---
 battdat/io/arbin.py     |  8 +++-----
 battdat/io/base.py      | 39 +++++++++++++++++++++------------------
 battdat/io/maccor.py    |  8 +++-----
 dev/environment.yml     |  2 +-
 docs/user-guide/io.rst  |  3 ---
 tests/io/test_maccor.py | 13 +++++++++++++
 6 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/battdat/io/arbin.py b/battdat/io/arbin.py
index 5c30303..38af3e4 100644
--- a/battdat/io/arbin.py
+++ b/battdat/io/arbin.py
@@ -21,8 +21,7 @@ def group(self, files: Union[str, List[str]], directories: List[str] = None,
             if file.lower().endswith('.csv'):
                 yield file
 
-    def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
-                  start_time: float = 0) -> pd.DataFrame:
+    def read_file(self, file: str) -> pd.DataFrame:
 
         # Read the file and rename the file
         df = pd.read_csv(file)
@@ -32,10 +31,9 @@ def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
         df_out = pd.DataFrame()
 
         # Convert the column names
-        df_out['cycle_number'] = df['Cycle_Index'] + start_cycle - df['Cycle_Index'].min()
+        df_out['cycle_number'] = df['Cycle_Index'] - df['Cycle_Index'].min()
         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
-        df_out['file_number'] = file_number  # df_out['cycle_number']*0
-        df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0] + start_time, dtype=float)
+        df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0], dtype=float)
         df_out['current'] = df['Current']  # TODO (wardlt): Check this!?
         df_out['temperature'] = df['Temperature']
         df_out['internal_resistance'] = df['Internal_Resistance']
diff --git a/battdat/io/base.py b/battdat/io/base.py
index c231557..b3d0809 100644
--- a/battdat/io/base.py
+++ b/battdat/io/base.py
@@ -96,20 +96,13 @@ class CycleTestReader(DatasetFileReader):
     Adds logic for reading cycling time series from a list of files.
     """
 
-    def read_file(self,
-                  file: str,
-                  file_number: int = 0,
-                  start_cycle: int = 0,
-                  start_time: int = 0) -> pd.DataFrame:
+    def read_file(self, file: str) -> pd.DataFrame:
         """Generate a DataFrame containing the data in this file
 
         The dataframe will be in our standard format
 
         Args:
             file: Path to the file
-            file_number: Number of file, in case the test is spread across multiple files
-            start_cycle: Index to use for the first cycle, in case test is spread across multiple files
-            start_time: Test time to use for the start of the test, in case test is spread across multiple files
 
         Returns:
             Dataframe containing the battery data in a standard format
@@ -127,21 +120,31 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
             DataFrame containing the information from all files
         """
 
-        # Initialize counters for the cycle numbers, etc., Used to determine offsets for the files read
-        start_cycle = 0
-        start_time = 0
-
         # Read the data for each file
         #  Keep track of the ending index and ending time
         output_dfs = []
         for file_number, file in enumerate(group):
-            # Read the file
-            df_out = self.read_file(file, file_number, start_cycle, start_time)
-            output_dfs.append(df_out)
+            df_out = self.read_file(file)
+            df_out['file_number'] = file_number
+
+            # Adjust the test time and cycle for subsequent files
+            if len(output_dfs) > 0:
+                last_row = output_dfs[-1]
+
+                # Determine the length of rest between last file and current
+                rest_between_files = 0  # Assume duplicate points if no data are available
+                if 'time' in last_row and 'time' in df_out:
+                    rest_between_files = df_out['time'].iloc[0] - last_row['time']
 
-            # Increment the start cycle and time to determine starting point of next file
-            start_cycle += df_out['cycle_number'].max() - df_out['cycle_number'].min() + 1
-            start_time = df_out['test_time'].max()
+                # Increment the test time such that it continues from the last file
+                df_out['test_time'] += last_row['test_time'] + rest_between_files
+
+                # Adjust the cycle number, if included
+                #  Assume the new file starts a new cycle
+                if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
+                    df_out['cycle_number'] += 1 + last_row['cycle_number']
+
+            output_dfs.append(df_out)
 
         # Combine the data from all files
         df_out = pd.concat(output_dfs, ignore_index=True)
diff --git a/battdat/io/maccor.py b/battdat/io/maccor.py
index 356fe0a..6a6c3ef 100644
--- a/battdat/io/maccor.py
+++ b/battdat/io/maccor.py
@@ -102,8 +102,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
 
         return super().read_dataset(group, metadata)
 
-    def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
-                  start_time: int = 0) -> pd.DataFrame:
+    def read_file(self, file: PathLike) -> pd.DataFrame:
 
         # Pull the test date from the first line of the file
         with open(file, 'r') as fp:
@@ -118,10 +117,9 @@ def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
         df_out = pd.DataFrame()
 
         # fill in new dataframe
-        df_out['cycle_number'] = df['Cyc#'] + start_cycle - df['Cyc#'].min()
+        df_out['cycle_number'] = df['Cyc#'] - df['Cyc#'].min()
         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
-        df_out['file_number'] = file_number  # df_out['cycle_number']*0
-        df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60 + start_time
+        df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60
         df_out['state'] = df['State']
         df_out['current'] = df['Amps']
         df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
diff --git a/dev/environment.yml b/dev/environment.yml
index dfb1fa5..49a5a12 100644
--- a/dev/environment.yml
+++ b/dev/environment.yml
@@ -1,5 +1,5 @@
 # Conda environment file
-name: batdata
+name: battdat
 channels:
   - defaults
 dependencies:
diff --git a/docs/user-guide/io.rst b/docs/user-guide/io.rst
index 4225b31..448a6e5 100644
--- a/docs/user-guide/io.rst
+++ b/docs/user-guide/io.rst
@@ -61,9 +61,6 @@ find files:
     group = next(extractor.identify_files('./example-path/'))
     dataset = extractor.read_dataset(group)
 
-The :ref:`type of output dataset <type-table>` is defined by the :attr:`~battdat.io.base.DatasetFileReader.output_class` attribute.
-Most uses of readers do not require modifying this attribute.
-
 Writing Data
 ------------
 
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index 0e06ab3..6abc26d 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from pytest import fixture, raises
 
+from battdat.consistency.time import TestTimeVsTimeChecker
 from battdat.io.maccor import MACCORReader, correct_time_offsets
 
 
@@ -55,7 +56,17 @@ def test_grouping(extractor, tmp_path):
     assert (str(tmp_path / 'testB.001'),) in groups
 
 
+def test_test_time_multifile(extractor, test_file):
+    """Ensure we get the time between starting files correctly"""
+    files = [test_file, test_file.with_suffix('.002')]
+    data = extractor.read_dataset(files)
+    data.validate()
+
+    assert len(TestTimeVsTimeChecker().check(data)) == 0  # That the test times and date columns are correct
+
+
 def test_date_check(extractor, test_file):
+    """Test detecting out-of-order files"""
     files = [test_file, test_file.with_suffix('.002')]
     data = extractor.read_dataset(files)
     data.validate()
@@ -73,3 +84,5 @@ def test_time_parser(extractor, test_file):
     # With only the time in the time column
     df = extractor.read_file(test_file.with_suffix('.002'))
     assert datetime.fromtimestamp(df['time'].iloc[0]).month == 4
+    assert datetime.fromtimestamp(df['time'].iloc[0]).day == 1
+    assert datetime.fromtimestamp(df['time'].iloc[-1]).day == 2

From 971c6f1c9e1679faa23d038e3dd32f578052e785 Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Wed, 28 May 2025 10:41:56 -0400
Subject: [PATCH 05/10] Stop relying on the timestamps in a MACCOR file

Assume that the first one is correct, infer the remainder
---
 battdat/io/base.py      |  6 ++---
 battdat/io/maccor.py    | 58 +++++++++--------------------------------
 tests/io/test_maccor.py | 26 +++---------------
 3 files changed, 19 insertions(+), 71 deletions(-)

diff --git a/battdat/io/base.py b/battdat/io/base.py
index b3d0809..be38e1a 100644
--- a/battdat/io/base.py
+++ b/battdat/io/base.py
@@ -129,12 +129,12 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
 
             # Adjust the test time and cycle for subsequent files
             if len(output_dfs) > 0:
-                last_row = output_dfs[-1]
+                last_row = output_dfs[-1].iloc[-1]
 
                 # Determine the length of rest between last file and current
                 rest_between_files = 0  # Assume duplicate points if no data are available
                 if 'time' in last_row and 'time' in df_out:
-                    rest_between_files = df_out['time'].iloc[0] - last_row['time']
+                    rest_between_files = max(df_out['time'].iloc[0] - last_row['time'], 0)
 
                 # Increment the test time such that it continues from the last file
                 df_out['test_time'] += last_row['test_time'] + rest_between_files
@@ -142,7 +142,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
                 # Adjust the cycle number, if included
                 #  Assume the new file starts a new cycle
                 if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
-                    df_out['cycle_number'] += 1 + last_row['cycle_number']
+                    df_out['cycle_number'] += 1 + int(last_row['cycle_number'])
 
             output_dfs.append(df_out)
 
diff --git a/battdat/io/maccor.py b/battdat/io/maccor.py
index 6a6c3ef..4aab905 100644
--- a/battdat/io/maccor.py
+++ b/battdat/io/maccor.py
@@ -21,46 +21,6 @@
 logger = logging.getLogger(__name__)
 
 
-def correct_time_offsets(raw_data: pd.DataFrame, desync_tol: float = 0.01) -> int:
-    """Correct errors in the timestamp column that result
-    from the day not being listed with timestamp.
-
-    Day rollovers are detected by desynchronization between the test time
-    and timestamps, which are corrected by moving the test_time forward
-    to meet the date time.
-
-    Will warn if the desynchronization is not a multiple of a day,
-    an hour (daylight savings time), or a second (leap seconds).
-
-    Args:
-        raw_data: Raw data signal to be corrected
-        desync_tol: Tolerance of desynchronization between time columns
-    Returns:
-        Number of day rollovers that were detected
-    """
-
-    test_time = raw_data['test_time'] - raw_data['test_time'].iloc[0]
-
-    def _get_differences():
-        timestamp_diff = raw_data['time'] - raw_data['time'].iloc[0]
-        return timestamp_diff - test_time
-
-    while np.abs(diffs := _get_differences()).max() > desync_tol:
-        # Get the amount of offset detected
-        first_bad_ix = np.argmax(np.abs(diffs) > desync_tol)
-        offset = diffs[first_bad_ix].item()
-
-        # Check if it's consistent with a date rollover, daylight savings time, or leap second
-        if np.isclose(offset % 86400, 0, atol=1e-1) or \
-                np.isclose(np.abs(offset), [3600, 1], atol=1e-1).any():
-            pass  # Nothing of concern
-        else:
-            logger.warning(f'Detected an offset inconsistent with a day: {offset} s')
-
-        # Correct the offset
-        raw_data['time'].iloc[first_bad_ix:] -= offset
-
-
 @dataclass
 class MACCORReader(CycleTestReader, DatasetFileReader):
     """Parser for reading from MACCOR-format files
@@ -69,6 +29,15 @@ class MACCORReader(CycleTestReader, DatasetFileReader):
     The :meth:`group` operation will consolidate files such that all with
     the same prefix (i.e., everything except the numerals in the extension)
     are treated as part of the same experiment.
+
+    MACCOR files include both a test time relative to the start of testing
+    and a timestamp following the clock time.
+    This parser only assumes the test time to be correct because the timestamps
+    are nontrivial to rely upon, as they may be non-monotonic due to
+    changes to the computer's clock.
+    Test times are always monotonic.
+    The timestamps are generated based on the timestamp of the first row and
+    the change in test time.
     """
 
     def group(self, files: Union[str, List[str]], directories: List[str] = None,
@@ -119,10 +88,11 @@ def read_file(self, file: PathLike) -> pd.DataFrame:
         # fill in new dataframe
         df_out['cycle_number'] = df['Cyc#'] - df['Cyc#'].min()
         df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
-        df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60
+        df_out['test_time'] = (df['Test (Min)'] - df['Test (Min)'].iloc[0]) * 60
         df_out['state'] = df['State']
         df_out['current'] = df['Amps']
         df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
+        df_out['voltage'] = df['Volts']
 
         # Parse the timestamps
         def _parse_time(time: str) -> float:
@@ -131,9 +101,8 @@ def _parse_time(time: str) -> float:
             else:
                 return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
 
-        df_out['time'] = df['DPt Time'].apply(_parse_time)
-
-        correct_time_offsets(df_out)
+        start_time = _parse_time(df['DPt Time'].iloc[0])
+        df_out['time'] = start_time + df_out['test_time']
 
         #   0 is rest, 1 is charge, -1 is discharge
         df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.hold
@@ -141,7 +110,6 @@ def _parse_time(time: str) -> float:
         df_out.loc[df_out['state'] == 'D', 'state'] = ChargingState.discharging
         df_out.loc[df_out['state'].apply(lambda x: x not in {'R', 'C', 'D'}), 'state'] = ChargingState.unknown
 
-        df_out['voltage'] = df['Volts']
         df_out = drop_cycles(df_out)
         AddSteps().enhance(df_out)
         AddMethod().enhance(df_out)
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index 6abc26d..5aa41c5 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -1,11 +1,9 @@
 """Tests related to the MACCOR parser"""
-import numpy as np
-import pandas as pd
 from datetime import datetime
 from pytest import fixture, raises
 
 from battdat.consistency.time import TestTimeVsTimeChecker
-from battdat.io.maccor import MACCORReader, correct_time_offsets
+from battdat.io.maccor import MACCORReader
 
 
 @fixture()
@@ -24,26 +22,6 @@ def test_validation(extractor, test_file):
     data.validate_columns(allow_extra_columns=False)
 
 
-def test_check_offset_correct(caplog):
-    df = pd.DataFrame({
-        'test_time': np.arange(3, dtype=float),
-    })
-
-    # Test the OK offsets
-    for off in [86400, 1, -3600]:
-        df['time'] = df['test_time'] + datetime.now().timestamp()
-        df['time'].iloc[1:] += off
-        correct_time_offsets(df)
-        assert np.allclose(df['time'] - df['time'].iloc[0], np.arange(3.))
-        assert len(caplog.messages) == 0
-
-    # Test an offset which yields a warning
-    df['time'].iloc[1:] += 25
-    correct_time_offsets(df)
-    assert len(caplog.messages) == 1
-    assert '25' in caplog.messages[-1]
-
-
 def test_grouping(extractor, tmp_path):
     # Make a file structure with two sets of experiments and a nonsense file
     for f in ['README', 'testA.002', 'testA.001', 'testB.001']:
@@ -63,6 +41,8 @@ def test_test_time_multifile(extractor, test_file):
     data.validate()
 
     assert len(TestTimeVsTimeChecker().check(data)) == 0  # That the test times and date columns are correct
+    assert data.raw_data['test_time'].max() > 86400
+    assert data.raw_data['cycle_number'].max() == 1
 
 
 def test_date_check(extractor, test_file):

From 762dbae0ccaa926006a6d82f5f146fb4ab2e22a5 Mon Sep 17 00:00:00 2001
From: lward <lward@anl.gov>
Date: Wed, 28 May 2025 13:53:55 -0400
Subject: [PATCH 06/10] Clarify the documentation

---
 battdat/consistency/time.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/battdat/consistency/time.py b/battdat/consistency/time.py
index 3bc89b5..63da827 100644
--- a/battdat/consistency/time.py
+++ b/battdat/consistency/time.py
@@ -13,7 +13,9 @@
 class TestTimeVsTimeChecker(ConsistencyChecker):
     """Ensure that the test time and timestamp columns agree
 
-    Verify that the difference between the test_time
+    Verify that the difference between the first and current row
+    for the ``test_time`` (time elapsed since the beginning of cycling)
+    and ``time`` (clock datetime) columns agree.
     """
 
     max_inconsistency: float = 0.1

From c5bd1ea5cd008149c3262f74408936bd37e9711a Mon Sep 17 00:00:00 2001
From: Logan Ward <ward.logan.t@gmail.com>
Date: Mon, 2 Jun 2025 08:48:10 -0400
Subject: [PATCH 07/10] Add a rest at the end of a file

---
 battdat/io/base.py                    |  9 +++++++++
 tests/files/maccor_example.charge.001 | 10 ++++++++++
 tests/io/test_maccor.py               | 12 ++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 tests/files/maccor_example.charge.001

diff --git a/battdat/io/base.py b/battdat/io/base.py
index be38e1a..c05dd9f 100644
--- a/battdat/io/base.py
+++ b/battdat/io/base.py
@@ -139,6 +139,15 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
                 # Increment the test time such that it continues from the last file
                 df_out['test_time'] += last_row['test_time'] + rest_between_files
 
+                # Ensure current is zero if the rest between files is nonzero
+                if rest_between_files != 0 and last_row['current'] != 0:
+                    new_last_row = output_dfs[-1].iloc[-1:].copy()
+                    new_last_row['test_time'] += 1e-3  # Assume the rest occurs a millisecond later
+                    new_last_row['current'] = 0
+                    if 'time' in new_last_row:
+                        new_last_row['time'] += 1e-3
+                    output_dfs[-1] = pd.concat([output_dfs[-1], new_last_row], ignore_index=True)
+
                 # Adjust the cycle number, if included
                 #  Assume the new file starts a new cycle
                 if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
diff --git a/tests/files/maccor_example.charge.001 b/tests/files/maccor_example.charge.001
new file mode 100644
index 0000000..31f4e01
--- /dev/null
+++ b/tests/files/maccor_example.charge.001
@@ -0,0 +1,10 @@
+Today's Date 04/04/2016  Date of Test:	03/31/2016	 Filename:	C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V	Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
+Rec#	Cyc#	Step	Test (Min)	Step (Min)	Amp-hr	Watt-hr	Amps	Volts	State	ES	DPt Time
+1	0	1	0.0000	0.0000	0.0000000000	0.0000000000	0.1000000000	3.30678264	C	0	03/31/2016 16:05:31
+2	0	1	0.1667	0.1667	0.0000000000	0.0000000000	0.1000000000	3.30571450	C	1	03/31/2016 16:05:41
+3	0	1	0.3333	0.3333	0.0000000000	0.0000000000	0.1000000000	3.30571450	C	1	03/31/2016 16:05:51
+4	0	1	0.5000	0.5000	0.0000000000	0.0000000000	0.1000000000	3.30586709	C	1	03/31/2016 16:06:01
+5	0	1	0.6667	0.6667	0.0000000000	0.0000000000	0.1000000000	3.30601968	C	1	03/31/2016 16:06:11
+6	0	1	0.8333	0.8333	0.0000000000	0.0000000000	0.1000000000	3.30601968	C	1	03/31/2016 16:06:21
+7	0	1	1.0000	1.0000	0.0000000000	0.0000000000	0.1000000000	3.30586709	C	1	03/31/2016 16:06:31
+8	0	1	1.1667	1.1667	0.0000000000	0.0000000000	0.1000000000	3.30617227	C	1	03/31/2016 16:06:41
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index 5aa41c5..976ca6f 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -45,6 +45,18 @@ def test_test_time_multifile(extractor, test_file):
     assert data.raw_data['cycle_number'].max() == 1
 
 
+def test_add_zero_current(extractor, test_file):
+    """Ensure that we add a zero-current row between files"""
+    data = extractor.read_dataset([test_file.with_suffix('.charge.001')])
+    orig_len = len(data.raw_data)
+    assert data.raw_data['current'].iloc[-1] != 0
+
+    # Append a second test file, ensure nonzero current
+    data = extractor.read_dataset([test_file.with_suffix('.charge.001'), test_file.with_suffix('.002')])
+    assert data.raw_data['current'].iloc[orig_len] == 0
+
+
+
 def test_date_check(extractor, test_file):
     """Test detecting out-of-order files"""
     files = [test_file, test_file.with_suffix('.002')]

From 7f76f66e95245bd9c47dea105d2f3456813d4c7a Mon Sep 17 00:00:00 2001
From: Logan Ward <ward.logan.t@gmail.com>
Date: Mon, 2 Jun 2025 09:10:02 -0400
Subject: [PATCH 08/10] Set the character encoding to latin-1

It's probably https://en.wikipedia.org/wiki/Windows-1252,
but latin-1 should work
---
 battdat/io/maccor.py    | 4 ++--
 tests/io/test_maccor.py | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/battdat/io/maccor.py b/battdat/io/maccor.py
index 4aab905..510080f 100644
--- a/battdat/io/maccor.py
+++ b/battdat/io/maccor.py
@@ -59,7 +59,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
         # Verify the cells are ordered by test date
         start_dates = []
         for file in group:
-            with open(file, 'r') as fp:
+            with open(file, 'r', encoding='latin1') as fp:
                 header = fp.readline()
                 test_date = _test_date_re.findall(header)[0]
                 start_dates.append(datetime.strptime(test_date, '%m/%d/%Y'))
@@ -74,7 +74,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
     def read_file(self, file: PathLike) -> pd.DataFrame:
 
         # Pull the test date from the first line of the file
-        with open(file, 'r') as fp:
+        with open(file, 'r', encoding='latin1') as fp:
             header = fp.readline()
         test_date = _test_date_re.findall(header)[0]
 
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index 976ca6f..cc095dc 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -56,7 +56,6 @@ def test_add_zero_current(extractor, test_file):
     assert data.raw_data['current'].iloc[orig_len] == 0
 
 
-
 def test_date_check(extractor, test_file):
     """Test detecting out-of-order files"""
     files = [test_file, test_file.with_suffix('.002')]

From a1b7a22063d2cd29e07427285f90f8ca863468ee Mon Sep 17 00:00:00 2001
From: Logan Ward <ward.logan.t@gmail.com>
Date: Mon, 2 Jun 2025 11:14:54 -0400
Subject: [PATCH 09/10] Add a rest to the beginning of next cycle too

---
 battdat/io/base.py      | 16 +++++++++++++++-
 tests/io/test_maccor.py |  5 ++++-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/battdat/io/base.py b/battdat/io/base.py
index c05dd9f..ea7d949 100644
--- a/battdat/io/base.py
+++ b/battdat/io/base.py
@@ -7,6 +7,7 @@
 
 from battdat.data import BatteryDataset
 from battdat.schemas import BatteryMetadata
+from battdat.schemas.column import ChargingState
 
 PathLike = Union[str, Path]
 
@@ -141,13 +142,26 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
 
                 # Ensure current is zero if the rest between files is nonzero
                 if rest_between_files != 0 and last_row['current'] != 0:
+                    # Assume the rest occurs a millisecond later
                     new_last_row = output_dfs[-1].iloc[-1:].copy()
-                    new_last_row['test_time'] += 1e-3  # Assume the rest occurs a millisecond later
+                    new_last_row['test_time'] += 1e-3
                     new_last_row['current'] = 0
                     if 'time' in new_last_row:
                         new_last_row['time'] += 1e-3
+                    if 'state' in new_last_row:
+                        new_last_row['state'] = ChargingState.hold
                     output_dfs[-1] = pd.concat([output_dfs[-1], new_last_row], ignore_index=True)
 
+                    # Assume the rest ends a millisecond before the new cycle starts
+                    new_first_row = df_out.iloc[:1].copy()
+                    new_first_row['test_time'] -= 1e-3
+                    new_first_row['current'] = 0.
+                    if 'time' in new_first_row:
+                        new_first_row['time'] -= 1e-3
+                    if 'state' in new_first_row:
+                        df_out['state'] = ChargingState.hold
+                    df_out = pd.concat([new_first_row, df_out], ignore_index=True)
+
                 # Adjust the cycle number, if included
                 #  Assume the new file starts a new cycle
                 if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
diff --git a/tests/io/test_maccor.py b/tests/io/test_maccor.py
index cc095dc..3dd1f9f 100644
--- a/tests/io/test_maccor.py
+++ b/tests/io/test_maccor.py
@@ -1,5 +1,7 @@
 """Tests related to the MACCOR parser"""
 from datetime import datetime
+
+import numpy as np
 from pytest import fixture, raises
 
 from battdat.consistency.time import TestTimeVsTimeChecker
@@ -53,7 +55,8 @@ def test_add_zero_current(extractor, test_file):
 
     # Append a second test file, ensure nonzero current
     data = extractor.read_dataset([test_file.with_suffix('.charge.001'), test_file.with_suffix('.002')])
-    assert data.raw_data['current'].iloc[orig_len] == 0
+    assert np.allclose(data.raw_data['current'].iloc[orig_len:orig_len + 2], 0)
+    assert np.allclose(data.raw_data['file_number'].iloc[orig_len:orig_len + 2], [0, 1])
 
 
 def test_date_check(extractor, test_file):

From 6cb1bd287334236c35ebcce2e649d6ed14d3a682 Mon Sep 17 00:00:00 2001
From: Logan Ward <ward.logan.t@gmail.com>
Date: Mon, 2 Jun 2025 11:23:17 -0400
Subject: [PATCH 10/10] Document the inserted rows

---
 battdat/io/base.py     |  2 +-
 docs/user-guide/io.rst | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/battdat/io/base.py b/battdat/io/base.py
index ea7d949..0cf2306 100644
--- a/battdat/io/base.py
+++ b/battdat/io/base.py
@@ -141,7 +141,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
                 df_out['test_time'] += last_row['test_time'] + rest_between_files
 
                 # Ensure current is zero if the rest between files is nonzero
-                if rest_between_files != 0 and last_row['current'] != 0:
+                if rest_between_files != 0 and (last_row['current'] != 0 or df_out['current'].iloc[0] != 0):
                     # Assume the rest occurs a millisecond later
                     new_last_row = output_dfs[-1].iloc[-1:].copy()
                     new_last_row['test_time'] += 1e-3
diff --git a/docs/user-guide/io.rst b/docs/user-guide/io.rst
index 448a6e5..c846928 100644
--- a/docs/user-guide/io.rst
+++ b/docs/user-guide/io.rst
@@ -61,6 +61,18 @@ find files:
     group = next(extractor.identify_files('./example-path/'))
     dataset = extractor.read_dataset(group)
 
+
+Reading Data from Multiple Files
+++++++++++++++++++++++++++++++++
+
+The MACCOR and Arbin readers can combine test data from multiple files into the same, contiguous dataset.
+Combining is built on two key assumptions:
+
+1. The cells passed to ``read_dataset`` are in chronological order.
+2. The battery is at rest in any period between testing files.
+   The dataset reader will insert rows with zero current
+   if the current in the first or last measurement of a file is nonzero.
+
 Writing Data
 ------------