Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions battdat/consistency/time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Check for problems across the columns which describe time"""
from dataclasses import dataclass
from datetime import datetime
from typing import List

import numpy as np

from .base import ConsistencyChecker
from ..data import BatteryDataset


@dataclass
class TestTimeVsTimeChecker(ConsistencyChecker):
"""Ensure that the test time and timestamp columns agree

Verify that the difference between the first and current row
for the ``test_time`` (time elapsed since the beginning of cycling)
and ``time`` (clock datetime) columns agree.
"""

max_inconsistency: float = 0.1
"""Maximum inconsistency between timestamp and test time (s)"""

def check(self, dataset: BatteryDataset) -> List[str]:
output = []
for name, subset in dataset.tables.items():
if 'time' not in subset.columns or 'test_time' not in subset.columns:
continue

# Ensure that
test_time_normed = subset['test_time'] - subset['test_time'].min()
timestamp_normed = subset['time'] - subset['time'].min()
diffs = np.abs(test_time_normed - timestamp_normed)
max_diff = diffs.max()
if max_diff > self.max_inconsistency:
idx_max = np.argmax(diffs)
date_max = datetime.fromtimestamp(subset['time'].iloc[idx_max])
time_max = subset['test_time'].iloc[idx_max]
output.append(f'Test times and timestep in dataset "{name}" differ by {max_diff:.1e} seconds in row {idx_max}.'
f' test_time={int(time_max)} s, time={date_max}')

return output
8 changes: 3 additions & 5 deletions battdat/io/arbin.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ def group(self, files: Union[str, List[str]], directories: List[str] = None,
if file.lower().endswith('.csv'):
yield file

def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
start_time: float = 0) -> pd.DataFrame:
def read_file(self, file: str) -> pd.DataFrame:

# Read the file and rename the file
df = pd.read_csv(file)
Expand All @@ -32,10 +31,9 @@ def read_file(self, file: str, file_number: int = 0, start_cycle: int = 0,
df_out = pd.DataFrame()

# Convert the column names
df_out['cycle_number'] = df['Cycle_Index'] + start_cycle - df['Cycle_Index'].min()
df_out['cycle_number'] = df['Cycle_Index'] - df['Cycle_Index'].min()
df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
df_out['file_number'] = file_number # df_out['cycle_number']*0
df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0] + start_time, dtype=float)
df_out['test_time'] = np.array(df['test_time'] - df['test_time'][0], dtype=float)
df_out['current'] = df['Current'] # TODO (wardlt): Check this!?
df_out['temperature'] = df['Temperature']
df_out['internal_resistance'] = df['Internal_Resistance']
Expand Down
62 changes: 44 additions & 18 deletions battdat/io/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from battdat.data import BatteryDataset
from battdat.schemas import BatteryMetadata
from battdat.schemas.column import ChargingState

PathLike = Union[str, Path]

Expand Down Expand Up @@ -96,20 +97,13 @@ class CycleTestReader(DatasetFileReader):
Adds logic for reading cycling time series from a list of files.
"""

def read_file(self,
file: str,
file_number: int = 0,
start_cycle: int = 0,
start_time: int = 0) -> pd.DataFrame:
def read_file(self, file: str) -> pd.DataFrame:
"""Generate a DataFrame containing the data in this file

The dataframe will be in our standard format

Args:
file: Path to the file
file_number: Number of file, in case the test is spread across multiple files
start_cycle: Index to use for the first cycle, in case test is spread across multiple files
start_time: Test time to use for the start of the test, in case test is spread across multiple files

Returns:
Dataframe containing the battery data in a standard format
Expand All @@ -127,21 +121,53 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
DataFrame containing the information from all files
"""

# Initialize counters for the cycle numbers, etc., Used to determine offsets for the files read
start_cycle = 0
start_time = 0

# Read the data for each file
# Keep track of the ending index and ending time
output_dfs = []
for file_number, file in enumerate(group):
# Read the file
df_out = self.read_file(file, file_number, start_cycle, start_time)
output_dfs.append(df_out)
df_out = self.read_file(file)
df_out['file_number'] = file_number

# Adjust the test time and cycle for subsequent files
if len(output_dfs) > 0:
last_row = output_dfs[-1].iloc[-1]

# Determine the length of rest between last file and current
rest_between_files = 0 # Assume duplicate points if no data are available
if 'time' in last_row and 'time' in df_out:
rest_between_files = max(df_out['time'].iloc[0] - last_row['time'], 0)

# Increment the test time such that it continues from the last file
df_out['test_time'] += last_row['test_time'] + rest_between_files

# Ensure current is zero if the rest between files is nonzero
if rest_between_files != 0 and (last_row['current'] != 0 or df_out['current'].iloc[0] != 0):
# Assume the rest occurs a millisecond later
new_last_row = output_dfs[-1].iloc[-1:].copy()
new_last_row['test_time'] += 1e-3
new_last_row['current'] = 0
if 'time' in new_last_row:
new_last_row['time'] += 1e-3
if 'state' in new_last_row:
new_last_row['state'] = ChargingState.hold
output_dfs[-1] = pd.concat([output_dfs[-1], new_last_row], ignore_index=True)

# Assume the rest ends a millisecond before the new cycle starts
new_first_row = df_out.iloc[:1].copy()
new_first_row['test_time'] -= 1e-3
new_first_row['current'] = 0.
if 'time' in new_first_row:
new_first_row['time'] -= 1e-3
if 'state' in new_first_row:
df_out['state'] = ChargingState.hold
df_out = pd.concat([new_first_row, df_out], ignore_index=True)

# Adjust the cycle number, if included
# Assume the new file starts a new cycle
if 'cycle_number' in df_out.columns and 'cycle_number' in last_row:
df_out['cycle_number'] += 1 + int(last_row['cycle_number'])

# Increment the start cycle and time to determine starting point of next file
start_cycle += df_out['cycle_number'].max() - df_out['cycle_number'].min() + 1
start_time = df_out['test_time'].max()
output_dfs.append(df_out)

# Combine the data from all files
df_out = pd.concat(output_dfs, ignore_index=True)
Expand Down
44 changes: 26 additions & 18 deletions battdat/io/maccor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Extractor for MACCOR"""
import re
import logging
import itertools
from dataclasses import dataclass
from datetime import datetime
Expand All @@ -17,6 +18,8 @@

_test_date_re = re.compile(r'Date of Test:\s+(\d{2}/\d{2}/\d{4})')

logger = logging.getLogger(__name__)


@dataclass
class MACCORReader(CycleTestReader, DatasetFileReader):
Expand All @@ -26,10 +29,16 @@ class MACCORReader(CycleTestReader, DatasetFileReader):
The :meth:`group` operation will consolidate files such that all with
the same prefix (i.e., everything except the numerals in the extension)
are treated as part of the same experiment.
"""

ignore_time: bool = False
"""Ignore the the time column, which can be problematic."""
MACCOR files include both a test time relative to the start of testing
and a timestamp following the clock time.
This parser only assumes the test time to be correct because the timestamps
are nontrivial to rely upon, as they may be non-monotonic due to
changes to the computer's clock.
Test times are always monotonic.
The timestamps are generated based on the timestamp of the first row and
the change in test time.
"""

def group(self, files: Union[str, List[str]], directories: List[str] = None,
context: dict = None) -> Iterator[Tuple[str, ...]]:
Expand All @@ -50,7 +59,7 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter
# Verify the cells are ordered by test date
start_dates = []
for file in group:
with open(file, 'r') as fp:
with open(file, 'r', encoding='latin1') as fp:
header = fp.readline()
test_date = _test_date_re.findall(header)[0]
start_dates.append(datetime.strptime(test_date, '%m/%d/%Y'))
Expand All @@ -62,11 +71,10 @@ def read_dataset(self, group: Sequence[PathLike] = (), metadata: Optional[Batter

return super().read_dataset(group, metadata)

def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
start_time: int = 0) -> pd.DataFrame:
def read_file(self, file: PathLike) -> pd.DataFrame:

# Pull the test date from the first line of the file
with open(file, 'r') as fp:
with open(file, 'r', encoding='latin1') as fp:
header = fp.readline()
test_date = _test_date_re.findall(header)[0]

Expand All @@ -78,30 +86,30 @@ def read_file(self, file: PathLike, file_number: int = 0, start_cycle: int = 0,
df_out = pd.DataFrame()

# fill in new dataframe
df_out['cycle_number'] = df['Cyc#'] + start_cycle - df['Cyc#'].min()
df_out['cycle_number'] = df['Cyc#'] - df['Cyc#'].min()
df_out['cycle_number'] = df_out['cycle_number'].astype('int64')
df_out['file_number'] = file_number # df_out['cycle_number']*0
df_out['test_time'] = df['Test (Min)'] * 60 - df['Test (Min)'].iloc[0] * 60 + start_time
df_out['test_time'] = (df['Test (Min)'] - df['Test (Min)'].iloc[0]) * 60
df_out['state'] = df['State']
df_out['current'] = df['Amps']
df_out['current'] = np.where(df['State'] == 'D', -1 * df_out['current'], df_out['current'])
df_out['voltage'] = df['Volts']

if not self.ignore_time:
def _parse_time(time: str) -> float:
if '/' in time:
return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
else:
return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()
# Parse the timestamps
def _parse_time(time: str) -> float:
if '/' in time:
return datetime.strptime(time, '%m/%d/%Y %H:%M:%S').timestamp()
else:
return datetime.strptime(f'{test_date} {time}', '%m/%d/%Y %H:%M:%S').timestamp()

df_out['time'] = df['DPt Time'].apply(_parse_time)
start_time = _parse_time(df['DPt Time'].iloc[0])
df_out['time'] = start_time + df_out['test_time']

# 0 is rest, 1 is charge, -1 is discharge
df_out.loc[df_out['state'] == 'R', 'state'] = ChargingState.hold
df_out.loc[df_out['state'] == 'C', 'state'] = ChargingState.charging
df_out.loc[df_out['state'] == 'D', 'state'] = ChargingState.discharging
df_out.loc[df_out['state'].apply(lambda x: x not in {'R', 'C', 'D'}), 'state'] = ChargingState.unknown

df_out['voltage'] = df['Volts']
df_out = drop_cycles(df_out)
AddSteps().enhance(df_out)
AddMethod().enhance(df_out)
Expand Down
2 changes: 1 addition & 1 deletion dev/environment.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Conda environment file
name: batdata
name: battdat
channels:
- defaults
dependencies:
Expand Down
8 changes: 8 additions & 0 deletions docs/source/consistency.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,11 @@ Current (``b.consistency.current``)
:members:
:undoc-members:
:show-inheritance:

Current (``b.consistency.time``)
------------------------------------

.. automodule:: battdat.consistency.time
:members:
:undoc-members:
:show-inheritance:
13 changes: 11 additions & 2 deletions docs/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,17 @@ find files:
group = next(extractor.identify_files('./example-path/'))
dataset = extractor.read_dataset(group)

The :ref:`type of output dataset <type-table>` is defined by the :attr:`~battdat.io.base.DatasetFileReader.output_class` attribute.
Most uses of readers do not require modifying this attribute.

Reading Data from Multiple Files
++++++++++++++++++++++++++++++++

The MACCOR and Arbin readers can combine test data from multiple files into the same, contiguous dataset.
Combining is built on two key assumptions:

1. The cells passed to ``read_dataset`` are in chronological order.
2. The battery is at rest in any period between testing files.
The dataset reader will insert rows with zero current
if the current in the first or last measurement of a file is nonzero.

Writing Data
------------
Expand Down
33 changes: 33 additions & 0 deletions tests/consistency/test_times.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Test for inconsistencies in time columns"""
from datetime import datetime

import numpy as np
import pandas as pd
from pytest import fixture

from battdat.consistency.time import TestTimeVsTimeChecker
from battdat.data import BatteryDataset


@fixture()
def example_dataset():
df = pd.DataFrame({
'voltage': [1.] * 8,
'current': [0.] * 8,
'test_time': np.arange(8, dtype=float)
})
df['time'] = datetime.now().timestamp() + df['test_time']
data = BatteryDataset.make_cell_dataset(raw_data=df, cycle_stats=pd.DataFrame({'cycle_number': [0]}))
data.validate()
return data


def test_correct_inter(example_dataset):
checker = TestTimeVsTimeChecker()
assert len(checker.check(example_dataset)) == 0

example_dataset.raw_data['time'].iloc[4:] += 0.2
errors = checker.check(example_dataset)
assert len(errors) == 1
assert '2.0e-01 seconds' in errors[0]
assert 'row 4. test_time=4 s' in errors[0]
16 changes: 8 additions & 8 deletions tests/files/maccor_example.002
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Today's Date 04/04/2016 Date of Test: 04/01/2016 Filename: C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
Rec# Cyc# Step Test (Min) Step (Min) Amp-hr Watt-hr Amps Volts State ES DPt Time
1 0 1 0.0000 0.0000 0.0000000000 0.0000000000 0.0000000000 3.30678264 R 0 16:05:31
2 0 1 0.1667 0.1667 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 16:05:41
3 0 1 0.3333 0.3333 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 16:05:51
4 0 1 0.5000 0.5000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 16:06:01
5 0 1 0.6667 0.6667 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 16:06:11
6 0 1 0.8333 0.8333 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 16:06:21
7 0 1 1.0000 1.0000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 16:06:31
8 0 1 1.1667 1.1667 0.0000000000 0.0000000000 0.0000000000 3.30617227 R 1 16:06:41
1 0 1 0.0000 0.0000 0.0000000000 0.0000000000 0.0000000000 3.30678264 R 0 23:59:31
2 0 1 0.1667 0.1667 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 23:59:41
3 0 1 0.3333 0.3333 0.0000000000 0.0000000000 0.0000000000 3.30571450 R 1 23:59:51
4 0 1 0.5000 0.5000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 00:00:01
5 0 1 0.6667 0.6667 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 00:00:11
6 0 1 0.8333 0.8333 0.0000000000 0.0000000000 0.0000000000 3.30601968 R 1 00:00:21
7 0 1 1.0000 1.0000 0.0000000000 0.0000000000 0.0000000000 3.30586709 R 1 00:00:31
8 0 1 1.1667 1.1667 0.0000000000 0.0000000000 0.0000000000 3.30617227 R 1 00:00:41
10 changes: 10 additions & 0 deletions tests/files/maccor_example.charge.001
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Today's Date 04/04/2016 Date of Test: 03/31/2016 Filename: C:\Data\MIMS\Backup\ARGONNE #20\SET-LN3024-104-1a.001 Procedure: ABRHV-NCM523-Form-4p1.000NCM 523, Formation Test at 0.1C; from 3.0 to 4.1V Comment/Barcode: SET-LN3024-104, Targray NCM811 [LN2086-32-4] vs. Li metal, 3.0 to 4.3V, Formation, C-rate= 2.4 mAh, Data collected for electrode matching (HEHV)
Rec# Cyc# Step Test (Min) Step (Min) Amp-hr Watt-hr Amps Volts State ES DPt Time
1 0 1 0.0000 0.0000 0.0000000000 0.0000000000 0.1000000000 3.30678264 C 0 03/31/2016 16:05:31
2 0 1 0.1667 0.1667 0.0000000000 0.0000000000 0.1000000000 3.30571450 C 1 03/31/2016 16:05:41
3 0 1 0.3333 0.3333 0.0000000000 0.0000000000 0.1000000000 3.30571450 C 1 03/31/2016 16:05:51
4 0 1 0.5000 0.5000 0.0000000000 0.0000000000 0.1000000000 3.30586709 C 1 03/31/2016 16:06:01
5 0 1 0.6667 0.6667 0.0000000000 0.0000000000 0.1000000000 3.30601968 C 1 03/31/2016 16:06:11
6 0 1 0.8333 0.8333 0.0000000000 0.0000000000 0.1000000000 3.30601968 C 1 03/31/2016 16:06:21
7 0 1 1.0000 1.0000 0.0000000000 0.0000000000 0.1000000000 3.30586709 C 1 03/31/2016 16:06:31
8 0 1 1.1667 1.1667 0.0000000000 0.0000000000 0.1000000000 3.30617227 C 1 03/31/2016 16:06:41
Loading