From 0591afa42de6ffb5e2ab5572ae13d789c1d59994 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 1 May 2025 16:20:44 -0400 Subject: [PATCH 01/14] Add support for reading .wfdb archive files. --- wfdb/io/_signal.py | 20 ++++++++--- wfdb/io/archive.py | 85 ++++++++++++++++++++++++++++++++++++++++++++++ wfdb/io/record.py | 51 ++++++++++++++++++---------- 3 files changed, 134 insertions(+), 22 deletions(-) create mode 100644 wfdb/io/archive.py diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 6bfafdb5..3bbfabf2 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1120,6 +1120,7 @@ def _rd_segment( no_file=False, sig_data=None, return_res=64, + wfdb_archive=None, ): """ Read the digital samples from a single segment record's associated @@ -1264,6 +1265,7 @@ def _rd_segment( sampto=sampto, no_file=no_file, sig_data=sig_data, + wfdb_archive=wfdb_archive, ) # Copy over the wanted signals @@ -1288,6 +1290,7 @@ def _rd_dat_signals( sampto, no_file=False, sig_data=None, + wfdb_archive=None, ): """ Read all signals from a WFDB dat file. @@ -1390,7 +1393,8 @@ def _rd_dat_signals( ) else: data_to_read = _rd_dat_file( - file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples + file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, + wfdb_archive=wfdb_archive ) if extra_flat_samples: @@ -1630,7 +1634,8 @@ def _required_byte_num(mode, fmt, n_samp): return int(n_bytes) -def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): +def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, + wfdb_archive=None): """ Read data from a dat file, either local or remote, into a 1d numpy array. @@ -1688,14 +1693,19 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): element_count = n_samp byte_count = n_samp * BYTES_PER_SAMPLE[fmt] - # Local or cloud dat file - if pn_dir is None: + # Local file or .wfdb archive + if wfdb_archive is not None: + with wfdb_archive.open(file_name, "rb") as fp: + fp.seek(start_byte) + sig_data = util.fromfile( + fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count + ) + elif pn_dir is None: with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp: fp.seek(start_byte) sig_data = util.fromfile( fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count ) - # Stream dat file from PhysioNet else: # check to make sure a cloud path isn't being passed under pn_dir diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py new file mode 100644 index 00000000..dd37d383 --- /dev/null +++ b/wfdb/io/archive.py @@ -0,0 +1,85 @@ +import os +import zipfile +from contextlib import contextmanager + +_archive_cache = {} + + +class WFDBArchive: + """ + Helper class for working with WFDB .wfdb ZIP archives. + + Used only if: + - .wfdb is included in the record_name explicitly, or + - .wfdb is passed directly to the file loading function. + """ + def __init__(self, record_name): + """ + Initialize a WFDBArchive for a given record name (without extension). + + record_name : str + The base name of the archive, without the .wfdb extension. + """ + self.record_name = record_name + self.archive_path = f"{record_name}.wfdb" + + if not os.path.exists(self.archive_path): + raise FileNotFoundError(f"Archive not found: {self.archive_path}") + if not zipfile.is_zipfile(self.archive_path): + raise ValueError(f"Invalid WFDB archive: {self.archive_path}") + self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") + + def exists(self, filename): + """ + Check if a file exists in the archive. + """ + return self.zipfile and filename in self.zipfile.namelist() + + @contextmanager + def open(self, filename, mode="r"): + """ + Open a file, either from disk or from the archive. + Mode 'r' (text) or 'rb' (binary) supported. + """ + if self.zipfile and filename in self.zipfile.namelist(): + with self.zipfile.open(filename, 'r') as f: + if "b" in mode: + yield f + else: + import io + yield io.TextIOWrapper(f) + else: + raise FileNotFoundError( + f"Could not find '{filename}' as loose file or inside '{self.archive_path}'." + ) + + def close(self): + """ + Close the archive if open. + """ + if self.zipfile: + self.zipfile.close() + + def create_archive(self, file_list, output_path=None): + """ + Create a .wfdb archive containing the specified list of files. + If output_path is not specified, uses self.archive_path. + """ + output_path = output_path or self.archive_path + with zipfile.ZipFile(output_path, mode="w") as zf: + for file in file_list: + compress = ( + zipfile.ZIP_STORED + if file.endswith((".hea", ".hea.json", ".hea.yml")) + else zipfile.ZIP_DEFLATED + ) + zf.write(file, arcname=os.path.basename(file), compress_type=compress) + + +def get_archive(record_base_name): + """ + Get or create a WFDBArchive for the given record base name. + """ + if record_base_name not in _archive_cache: + _archive_cache[record_base_name] = WFDBArchive(record_base_name) + return _archive_cache[record_base_name] diff --git a/wfdb/io/record.py b/wfdb/io/record.py index e611f364..29aa5639 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -11,6 +11,7 @@ from wfdb.io import _header from wfdb.io import _signal from wfdb.io import _url +from wfdb.io.archive import get_archive from wfdb.io import download from wfdb.io import header from wfdb.io import util @@ -2030,25 +2031,41 @@ def rdrecord( channels=[1, 3]) """ - dir_name, base_record_name = os.path.split(record_name) - # Update the dir_name using abspath unless it is a cloud path - if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): - dir_name = os.path.abspath(dir_name) + is_wfdb_archive = record_name.endswith(".wfdb") - # Read the header fields - if pn_dir is not None: - # check to make sure a cloud path isn't being passed under pn_dir - if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): - raise ValueError( - "Cloud paths should be passed under record_name, not under pn_dir" - ) - if "." not in pn_dir: - dir_list = pn_dir.split("/") - pn_dir = posixpath.join( - dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] - ) + if is_wfdb_archive: + record_base = record_name[:-5] # remove ".wfdb" + archive = get_archive(record_base) + hea_file = os.path.basename(record_base) + ".hea" + + with archive.open(hea_file, "r") as f: + record = Record() + record.wfdb_archive = archive + record._read_header(f.read()) + + # Set dir_name to the archive base (needed for _rd_segment) + dir_name = os.path.dirname(record_base) + + else: + dir_name, base_record_name = os.path.split(record_name) + # Update the dir_name using abspath unless it is a cloud path + if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): + dir_name = os.path.abspath(dir_name) + + # Read the header fields + if pn_dir is not None: + # check to make sure a cloud path isn't being passed under pn_dir + if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): + raise ValueError( + "Cloud paths should be passed under record_name, not under pn_dir" + ) + if "." not in pn_dir: + dir_list = pn_dir.split("/") + pn_dir = posixpath.join( + dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] + ) - record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) + record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) # Set defaults for sampto and channels input variables if sampto is None: From a08561c22da773e530159ee69dd9dcdde2397060 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Fri, 2 May 2025 23:07:29 -0400 Subject: [PATCH 02/14] write file --- wfdb/io/_signal.py | 58 ++++++++++++++++++++++++++++++++++------------ wfdb/io/archive.py | 18 ++++++++++++++ wfdb/io/record.py | 13 ++++++++--- 3 files changed, 71 insertions(+), 18 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 3bbfabf2..e7c1dfd8 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1,4 +1,5 @@ import math +import io import os import posixpath import sys @@ -120,7 +121,7 @@ class SignalMixin(object): """ - def wr_dats(self, expanded, write_dir): + def wr_dats(self, expanded, write_dir, wfdb_archive=None): """ Write all dat files associated with a record expanded=True to use e_d_signal instead of d_signal. @@ -132,6 +133,8 @@ def wr_dats(self, expanded, write_dir): the `d_signal` attribute (False). write_dir : str The directory to write the output file to. + wfdb_archive : WFDBArchive, optional + If set, writes to a .wfdb archive instead of the filesystem. Returns ------- @@ -160,7 +163,8 @@ def wr_dats(self, expanded, write_dir): self.check_sig_cohesion([], expanded) # Write each of the specified dat files - self.wr_dat_files(expanded=expanded, write_dir=write_dir) + self.wr_dat_files(expanded=expanded, write_dir=write_dir, + wfdb_archive=wfdb_archive) def check_sig_cohesion(self, write_fields, expanded): """ @@ -958,7 +962,7 @@ def calc_checksum(self, expanded=False): cs = [int(c) for c in cs] return cs - def wr_dat_files(self, expanded=False, write_dir=""): + def wr_dat_files(self, expanded=False, write_dir="", wfdb_archive=None): """ Write each of the specified dat files. @@ -969,6 +973,8 @@ def wr_dat_files(self, expanded=False, write_dir=""): the `d_signal` attribute (False). write_dir : str, optional The directory to write the output file to. + wfdb_archive : WFDBArchive, optional + If set, writes to a .wfdb archive instead of the local filesystem. Returns ------- @@ -1003,6 +1009,7 @@ def wr_dat_files(self, expanded=False, write_dir=""): [self.e_d_signal[ch] for ch in dat_channels[fn]], [self.samps_per_frame[ch] for ch in dat_channels[fn]], write_dir=write_dir, + wfdb_archive=wfdb_archive, ) else: dsig = self.d_signal @@ -1013,6 +1020,7 @@ def wr_dat_files(self, expanded=False, write_dir=""): dsig[:, dat_channels[fn][0] : dat_channels[fn][-1] + 1], dat_offsets[fn], write_dir=write_dir, + wfdb_archive=wfdb_archive, ) def smooth_frames(self, sigtype="physical"): @@ -2322,6 +2330,7 @@ def wr_dat_file( e_d_signal=None, samps_per_frame=None, write_dir="", + wfdb_archive=None, ): """ Write a dat file. All bytes are written one at a time to avoid @@ -2519,16 +2528,30 @@ def wr_dat_file( else: raise ValueError(f"unknown format ({fmt})") - sf = soundfile.SoundFile( - file_path, - mode="w", - samplerate=96000, - channels=n_sig, - subtype=subtype, - format="FLAC", - ) - with sf: - sf.write(d_signal) + if wfdb_archive: + with io.BytesIO() as f: + with soundfile.SoundFile( + f, + mode="w", + samplerate=96000, + channels=n_sig, + subtype=subtype, + format="FLAC", # required for file-like + ) as sf: + sf.write(d_signal) + wfdb_archive.write(os.path.basename(file_name), f.getvalue()) + return + else: + sf = soundfile.SoundFile( + file_path, + mode="w", + samplerate=96000, + channels=n_sig, + subtype=subtype, + format="FLAC", + ) + with sf: + sf.write(d_signal) return else: @@ -2549,8 +2572,13 @@ def wr_dat_file( b_write = np.append(np.zeros(byte_offset, dtype="uint8"), b_write) # Write the bytes to the file - with open(file_path, "wb") as f: - b_write.tofile(f) + if wfdb_archive: + with io.BytesIO() as f: + b_write.tofile(f) + wfdb_archive.write(os.path.basename(file_name), f.getvalue()) + else: + with open(file_path, "wb") as f: + b_write.tofile(f) def describe_list_indices(full_list): diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index dd37d383..ac915384 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -1,4 +1,5 @@ import os +import shutil import zipfile from contextlib import contextmanager @@ -60,6 +61,23 @@ def close(self): if self.zipfile: self.zipfile.close() + def write(self, filename, data): + """ + Write binary data to the archive (replaces if already exists). + """ + # Write to a new temporary archive + tmp_path = self.archive_path + ".tmp" + with zipfile.ZipFile(self.archive_path, mode="r") as zin: + with zipfile.ZipFile(tmp_path, mode="w") as zout: + for item in zin.infolist(): + if item.filename != filename: + zout.writestr(item, zin.read(item.filename)) + zout.writestr(filename, data) + + # Replace the original archive + shutil.move(tmp_path, self.archive_path) + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + def create_archive(self, file_list, output_path=None): """ Create a .wfdb archive containing the specified list of files. diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 29aa5639..707d0d95 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -905,7 +905,7 @@ def __eq__(self, other, verbose=False): return True - def wrsamp(self, expanded=False, write_dir=""): + def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): """ Write a WFDB header file and any associated dat files from this object. @@ -939,7 +939,8 @@ def wrsamp(self, expanded=False, write_dir=""): if self.n_sig > 0: # Perform signal validity and cohesion checks, and write the # associated dat files. - self.wr_dats(expanded=expanded, write_dir=write_dir) + self.wr_dats(expanded=expanded, write_dir=write_dir, + wfdb_archive=wfdb_archive) def _arrange_fields(self, channels, sampfrom, smooth_frames): """ @@ -2878,6 +2879,7 @@ def wrsamp( base_date=None, base_datetime=None, write_dir="", + archive=False, ): """ Write a single segment WFDB record, creating a WFDB header file and any @@ -2966,6 +2968,7 @@ def wrsamp( # Check for valid record name if "." in record_name: raise Exception("Record name must not contain '.'") + # Check input field combinations signal_list = [p_signal, d_signal, e_p_signal, e_d_signal] signals_set = sum(1 for var in signal_list if var is not None) @@ -3064,8 +3067,12 @@ def wrsamp( else: expanded = False + wfdb_archive = None + if archive: + wfdb_archive = get_archive(os.path.join(write_dir, record_name)) + # Write the record files - header and associated dat - record.wrsamp(write_dir=write_dir, expanded=expanded) + record.wrsamp(write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive) def dl_database( From a4608771c208151be86408a2c3a1620af59b9636 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Sat, 3 May 2025 11:42:47 -0400 Subject: [PATCH 03/14] Add support for writing .wfdb files. --- wfdb/io/_header.py | 35 +++++++++++++++++++++++++++-------- wfdb/io/_signal.py | 2 +- wfdb/io/archive.py | 45 +++++++++++++++++++++++++++++++++------------ wfdb/io/record.py | 42 +++++++++++++++++++++++++++--------------- 4 files changed, 88 insertions(+), 36 deletions(-) diff --git a/wfdb/io/_header.py b/wfdb/io/_header.py index 0d420521..97224c37 100644 --- a/wfdb/io/_header.py +++ b/wfdb/io/_header.py @@ -1,4 +1,5 @@ import datetime +import os from typing import Any, Dict, List, Optional, Sequence, Tuple import numpy as np @@ -278,7 +279,7 @@ def set_defaults(self): for f in sfields: self.set_default(f) - def wrheader(self, write_dir="", expanded=True): + def wrheader(self, write_dir="", expanded=True, wfdb_archive=None): """ Write a WFDB header file. The signals are not used. Before writing: @@ -325,7 +326,8 @@ def wrheader(self, write_dir="", expanded=True): self.check_field_cohesion(rec_write_fields, list(sig_write_fields)) # Write the header file using the specified fields - self.wr_header_file(rec_write_fields, sig_write_fields, write_dir) + self.wr_header_file(rec_write_fields, sig_write_fields, write_dir, + wfdb_archive=wfdb_archive) def get_write_fields(self): """ @@ -508,7 +510,8 @@ def check_field_cohesion(self, rec_write_fields, sig_write_fields): "Each file_name (dat file) specified must have the same byte offset" ) - def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): + def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir, + wfdb_archive=None): """ Write a header file using the specified fields. Converts Record attributes into appropriate WFDB format strings. @@ -522,6 +525,8 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): being equal to a list of channels to write for each field. write_dir : str The directory in which to write the header file. + wfdb_archive : WFDBArchive, optional + If provided, write the header into this archive instead of to disk. Returns ------- @@ -583,7 +588,13 @@ def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir): comment_lines = ["# " + comment for comment in self.comments] header_lines += comment_lines - util.lines_to_file(self.record_name + ".hea", write_dir, header_lines) + header_str = "\n".join(header_lines) + "\n" + hea_filename = os.path.basename(self.record_name) + ".hea" + + if wfdb_archive: + wfdb_archive.write(hea_filename, header_str.encode("utf-8")) + else: + util.lines_to_file(hea_filename, write_dir, header_lines) class MultiHeaderMixin(BaseHeaderMixin): @@ -621,7 +632,7 @@ def set_defaults(self): for field in self.get_write_fields(): self.set_default(field) - def wrheader(self, write_dir=""): + def wrheader(self, write_dir="", wfdb_archive=None): """ Write a multi-segment WFDB header file. The signals or segments are not used. Before writing: @@ -655,7 +666,7 @@ def wrheader(self, write_dir=""): self.check_field_cohesion() # Write the header file using the specified fields - self.wr_header_file(write_fields, write_dir) + self.wr_header_file(write_fields, write_dir, wfdb_archive=wfdb_archive) def get_write_fields(self): """ @@ -733,7 +744,7 @@ def check_field_cohesion(self): "The sum of the 'seg_len' fields do not match the 'sig_len' field" ) - def wr_header_file(self, write_fields, write_dir): + def wr_header_file(self, write_fields, write_dir, wfdb_archive=None): """ Write a header file using the specified fields. @@ -744,6 +755,8 @@ def wr_header_file(self, write_fields, write_dir): and their dependencies. write_dir : str The output directory in which the header is written. + wfdb_archive : WFDBArchive, optional + If provided, write the header into this archive instead of to disk. Returns ------- @@ -779,7 +792,13 @@ def wr_header_file(self, write_fields, write_dir): comment_lines = ["# " + comment for comment in self.comments] header_lines += comment_lines - util.lines_to_file(self.record_name + ".hea", write_dir, header_lines) + header_str = "\n".join(header_lines) + "\n" + hea_filename = os.path.basename(self.record_name) + ".hea" + + if wfdb_archive: + wfdb_archive.write(hea_filename, header_str.encode("utf-8")) + else: + util.lines_to_file(hea_filename, write_dir, header_lines) def get_sig_segments(self, sig_name=None): """ diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index e7c1dfd8..f6c14cb4 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -2574,7 +2574,7 @@ def wr_dat_file( # Write the bytes to the file if wfdb_archive: with io.BytesIO() as f: - b_write.tofile(f) + f.write(b_write.tobytes()) wfdb_archive.write(os.path.basename(file_name), f.getvalue()) else: with open(file_path, "wb") as f: diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index ac915384..8fc22eb0 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -10,25 +10,42 @@ class WFDBArchive: """ Helper class for working with WFDB .wfdb ZIP archives. + If used for reading, the archive must already exist. + If used for writing, use mode='w' and call `write(...)` or `create_archive(...)`. + Used only if: - .wfdb is included in the record_name explicitly, or - .wfdb is passed directly to the file loading function. """ - def __init__(self, record_name): + def __init__(self, record_name, mode="r"): """ Initialize a WFDBArchive for a given record name (without extension). + Parameters + ---------- record_name : str - The base name of the archive, without the .wfdb extension. + The base name of the archive, without the .wfdb extension. + mode : str + 'r' for read (default), 'w' for write. """ self.record_name = record_name self.archive_path = f"{record_name}.wfdb" + self.zipfile = None + self.mode = mode + + if mode == "r": + if not os.path.exists(self.archive_path): + raise FileNotFoundError(f"Archive not found: {self.archive_path}") + if not zipfile.is_zipfile(self.archive_path): + raise ValueError(f"Invalid WFDB archive: {self.archive_path}") + self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") - if not os.path.exists(self.archive_path): - raise FileNotFoundError(f"Archive not found: {self.archive_path}") - if not zipfile.is_zipfile(self.archive_path): - raise ValueError(f"Invalid WFDB archive: {self.archive_path}") - self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") + elif mode == "w": + # Initialize an empty archive on disk + if not os.path.exists(self.archive_path): + with zipfile.ZipFile(self.archive_path, mode="w"): + pass # Just create the file + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") def exists(self, filename): """ @@ -65,7 +82,12 @@ def write(self, filename, data): """ Write binary data to the archive (replaces if already exists). """ - # Write to a new temporary archive + if self.zipfile is None: + self.zipfile = zipfile.ZipFile(self.archive_path, mode="w") + self.zipfile.writestr(filename, data) + return + + # If already opened in read or append mode, use the replace-then-move trick tmp_path = self.archive_path + ".tmp" with zipfile.ZipFile(self.archive_path, mode="r") as zin: with zipfile.ZipFile(tmp_path, mode="w") as zout: @@ -73,8 +95,6 @@ def write(self, filename, data): if item.filename != filename: zout.writestr(item, zin.read(item.filename)) zout.writestr(filename, data) - - # Replace the original archive shutil.move(tmp_path, self.archive_path) self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") @@ -94,10 +114,11 @@ def create_archive(self, file_list, output_path=None): zf.write(file, arcname=os.path.basename(file), compress_type=compress) -def get_archive(record_base_name): +def get_archive(record_base_name, mode="r"): """ Get or create a WFDBArchive for the given record base name. """ if record_base_name not in _archive_cache: - _archive_cache[record_base_name] = WFDBArchive(record_base_name) + _archive_cache[record_base_name] = WFDBArchive(record_base_name, + mode=mode) return _archive_cache[record_base_name] diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 707d0d95..1c286e68 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -935,11 +935,12 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): # Perform field validity and cohesion checks, and write the # header file. - self.wrheader(write_dir=write_dir, expanded=expanded) + self.wrheader(write_dir=write_dir, expanded=expanded, + wfdb_archive=wfdb_archive) if self.n_sig > 0: # Perform signal validity and cohesion checks, and write the # associated dat files. - self.wr_dats(expanded=expanded, write_dir=write_dir, + self.wr_dats(expanded=expanded, write_dir=write_dir, wfdb_archive=wfdb_archive) def _arrange_fields(self, channels, sampfrom, smooth_frames): @@ -1162,7 +1163,7 @@ def __init__( if not seg_len: self.seg_len = [segment.sig_len for segment in segments] - def wrsamp(self, write_dir=""): + def wrsamp(self, write_dir="", wfdb_archive=None): """ Write a multi-segment header, along with headers and dat files for all segments, from this object. @@ -1179,11 +1180,11 @@ def wrsamp(self, write_dir=""): """ # Perform field validity and cohesion checks, and write the # header file. - self.wrheader(write_dir=write_dir) + self.wrheader(write_dir=write_dir, wfdb_archive=wfdb_archive) # Perform record validity and cohesion checks, and write the # associated segments. for seg in self.segments: - seg.wrsamp(write_dir=write_dir) + seg.wrsamp(write_dir=write_dir, wfdb_archive=wfdb_archive) def _check_segment_cohesion(self): """ @@ -1828,7 +1829,11 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): """ dir_name, base_record_name = os.path.split(record_name) - file_name = f"{base_record_name}.hea" + + if not base_record_name.endswith(".hea"): + file_name = f"{base_record_name}.hea" + else: + file_name = base_record_name # If this is a cloud path, use posixpath to construct the path and fsspec to open file if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): @@ -2032,17 +2037,23 @@ def rdrecord( channels=[1, 3]) """ + wfdb_archive = None is_wfdb_archive = record_name.endswith(".wfdb") if is_wfdb_archive: record_base = record_name[:-5] # remove ".wfdb" - archive = get_archive(record_base) + wfdb_archive = get_archive(record_base) hea_file = os.path.basename(record_base) + ".hea" - with archive.open(hea_file, "r") as f: - record = Record() - record.wfdb_archive = archive - record._read_header(f.read()) + import tempfile + with wfdb_archive.open(hea_file, "r") as f: + header_str = f.read() + + with tempfile.NamedTemporaryFile("w+", suffix=".hea", delete=False) as tmpf: + tmpf.write(header_str) + tmpf.flush() + record = rdheader(tmpf.name) + record.wfdb_archive = wfdb_archive # Set dir_name to the archive base (needed for _rd_segment) dir_name = os.path.dirname(record_base) @@ -2168,6 +2179,7 @@ def rdrecord( no_file=no_file, sig_data=sig_data, return_res=return_res, + wfdb_archive=wfdb_archive, ) # Only 1 sample/frame, or frames are smoothed. Return uniform numpy array @@ -2879,7 +2891,7 @@ def wrsamp( base_date=None, base_datetime=None, write_dir="", - archive=False, + wfdb_archive=None, ): """ Write a single segment WFDB record, creating a WFDB header file and any @@ -3067,9 +3079,9 @@ def wrsamp( else: expanded = False - wfdb_archive = None - if archive: - wfdb_archive = get_archive(os.path.join(write_dir, record_name)) + if wfdb_archive: + wfdb_archive = get_archive(os.path.join(write_dir, record_name), + mode="w") # Write the record files - header and associated dat record.wrsamp(write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive) From 86ac211f0f085a80962fd7c7f569199a4ee1431e Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Sat, 3 May 2025 11:54:28 -0400 Subject: [PATCH 04/14] Add tests. --- tests/test_archive.py | 189 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 tests/test_archive.py diff --git a/tests/test_archive.py b/tests/test_archive.py new file mode 100644 index 00000000..67edf890 --- /dev/null +++ b/tests/test_archive.py @@ -0,0 +1,189 @@ +import os +import numpy as np +import pytest +import tempfile +import zipfile + +from wfdb import rdrecord, wrsamp +from wfdb.io.archive import WFDBArchive + + +np.random.seed(1234) + + +@pytest.fixture +def temp_record(): + """ + Create a temporary WFDB record and archive for testing. + + This fixture generates a synthetic 2-channel signal, writes it to a temporary + directory using `wrsamp`, then creates an uncompressed `.wfdb` archive (ZIP container) + containing the `.hea` and `.dat` files. The archive is used to test read/write + round-trip support for WFDB archives. + + Yields + ------ + dict + A dictionary containing: + - 'record_name': Path to the record base name (without extension). + - 'archive_path': Full path to the created `.wfdb` archive. + - 'original_signal': The original NumPy array of the signal. + - 'fs': The sampling frequency. + """ + with tempfile.TemporaryDirectory() as tmpdir: + record_basename = "testrecord" + fs = 250 + sig_len = 1000 + sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) + + # Write into tmpdir with record name only + wrsamp( + record_name=record_basename, + fs=fs, + units=["mV", "mV"], + sig_name=["I", "II"], + p_signal=sig, + fmt=["24", "24"], + adc_gain=[200.0, 200.0], + baseline=[0, 0], + write_dir=tmpdir, + ) + + # Construct full paths for archive creation + hea_path = os.path.join(tmpdir, record_basename + ".hea") + dat_path = os.path.join(tmpdir, record_basename + ".dat") + archive_path = os.path.join(tmpdir, record_basename + ".wfdb") + + WFDBArchive.create_archive( + None, + file_list=[hea_path, dat_path], + output_path=archive_path, + ) + + yield { + "record_name": os.path.join(tmpdir, record_basename), + "archive_path": archive_path, + "original_signal": sig, + "fs": fs, + } + + +def test_wfdb_archive_inline_round_trip(): + """ + There are two ways of creating an archive: + + 1. Inline archive creation via wrsamp(..., wfdb_archive=...) + This creates the .hea and .dat files directly inside the archive as part of the record writing step. + + 2. Two-step creation via wrsamp(...) followed by WFDBArchive.create_archive(...) + This writes regular WFDB files to disk, which are then added to an archive container afterward. + + Test round-trip read/write using inline archive creation via `wrsamp(..., wfdb_archive=...)`. + """ + with tempfile.TemporaryDirectory() as tmpdir: + record_basename = "testrecord" + record_path = os.path.join(tmpdir, record_basename) + archive_path = record_path + ".wfdb" + fs = 250 + sig_len = 1000 + sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) + + # Create archive inline + wfdb_archive = WFDBArchive(record_basename, mode="w") + wrsamp( + record_name=record_basename, + fs=fs, + units=["mV", "mV"], + sig_name=["I", "II"], + p_signal=sig, + fmt=["24", "24"], + adc_gain=[200.0, 200.0], + baseline=[0, 0], + write_dir=tmpdir, + wfdb_archive=wfdb_archive, + ) + wfdb_archive.close() + + assert os.path.exists(archive_path), "Archive was not created" + + # Read back from archive + record = rdrecord(archive_path) + + assert record.fs == fs + assert record.n_sig == 2 + assert record.p_signal.shape == sig.shape + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose(record.p_signal, sig, rtol=1e-2, atol=3e-3) + + +def test_wfdb_archive_round_trip(temp_record): + record_name = temp_record["record_name"] + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + fs = temp_record["fs"] + + assert os.path.exists(archive_path), "Archive was not created" + + record = rdrecord(archive_path) + + assert record.fs == fs + assert record.n_sig == 2 + assert record.p_signal.shape == original_signal.shape + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose(record.p_signal, original_signal, rtol=1e-2, + atol=3e-3) + + +def test_archive_read_subset_channels(temp_record): + """ + Test reading a subset of channels from an archive. + """ + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + + record = rdrecord(archive_path, channels=[1]) + + assert record.n_sig == 1 + assert record.p_signal.shape[0] == original_signal.shape[0] + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose(record.p_signal[:, 0], original_signal[:, 1], + rtol=1e-2, atol=3e-3) + + +def test_archive_read_partial_samples(temp_record): + """ + Test reading a sample range from the archive. + """ + archive_path = temp_record["archive_path"] + original_signal = temp_record["original_signal"] + + start, stop = 100, 200 + record = rdrecord(archive_path, sampfrom=start, sampto=stop) + + assert record.p_signal.shape == (stop - start, original_signal.shape[1]) + np.testing.assert_allclose(record.p_signal, original_signal[start:stop], rtol=1e-2, atol=1e-3) + + +def test_archive_missing_file_error(temp_record): + """ + Ensure appropriate error is raised when expected files are missing from the archive. + """ + archive_path = temp_record["archive_path"] + + # Remove one file from archive (e.g. the .dat file) + with zipfile.ZipFile(archive_path, "a") as zf: + zf_name = [name for name in zf.namelist() if name.endswith(".dat")][0] + zf.fp = None # Prevent auto-close bug in some zipfile implementations + os.rename(archive_path, archive_path + ".bak") + with zipfile.ZipFile(archive_path + ".bak", "r") as zin, \ + zipfile.ZipFile(archive_path, "w") as zout: + for item in zin.infolist(): + if not item.filename.endswith(".dat"): + zout.writestr(item, zin.read(item.filename)) + os.remove(archive_path + ".bak") + + with pytest.raises(FileNotFoundError, match=".*\.dat.*"): + rdrecord(archive_path) From 76f4794dff07bd9639f03f9d32f60a92ce0e7a9f Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Sat, 3 May 2025 15:42:59 -0400 Subject: [PATCH 05/14] Fix style. --- tests/test_archive.py | 25 +++++++++++++---------- wfdb/io/_header.py | 16 +++++++++------ wfdb/io/_signal.py | 23 ++++++++++++++-------- wfdb/io/archive.py | 19 ++++++++++++------ wfdb/io/record.py | 46 +++++++++++++++++++++++++------------------ 5 files changed, 80 insertions(+), 49 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index 67edf890..a5880a58 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -1,13 +1,13 @@ import os -import numpy as np -import pytest import tempfile import zipfile +import numpy as np +import pytest + from wfdb import rdrecord, wrsamp from wfdb.io.archive import WFDBArchive - np.random.seed(1234) @@ -132,8 +132,9 @@ def test_wfdb_archive_round_trip(temp_record): assert record.p_signal.shape == original_signal.shape # Add tolerance to account for loss of precision during archive round-trip - np.testing.assert_allclose(record.p_signal, original_signal, rtol=1e-2, - atol=3e-3) + np.testing.assert_allclose( + record.p_signal, original_signal, rtol=1e-2, atol=3e-3 + ) def test_archive_read_subset_channels(temp_record): @@ -149,8 +150,9 @@ def test_archive_read_subset_channels(temp_record): assert record.p_signal.shape[0] == original_signal.shape[0] # Add tolerance to account for loss of precision during archive round-trip - np.testing.assert_allclose(record.p_signal[:, 0], original_signal[:, 1], - rtol=1e-2, atol=3e-3) + np.testing.assert_allclose( + record.p_signal[:, 0], original_signal[:, 1], rtol=1e-2, atol=3e-3 + ) def test_archive_read_partial_samples(temp_record): @@ -164,7 +166,9 @@ def test_archive_read_partial_samples(temp_record): record = rdrecord(archive_path, sampfrom=start, sampto=stop) assert record.p_signal.shape == (stop - start, original_signal.shape[1]) - np.testing.assert_allclose(record.p_signal, original_signal[start:stop], rtol=1e-2, atol=1e-3) + np.testing.assert_allclose( + record.p_signal, original_signal[start:stop], rtol=1e-2, atol=1e-3 + ) def test_archive_missing_file_error(temp_record): @@ -178,8 +182,9 @@ def test_archive_missing_file_error(temp_record): zf_name = [name for name in zf.namelist() if name.endswith(".dat")][0] zf.fp = None # Prevent auto-close bug in some zipfile implementations os.rename(archive_path, archive_path + ".bak") - with zipfile.ZipFile(archive_path + ".bak", "r") as zin, \ - zipfile.ZipFile(archive_path, "w") as zout: + with zipfile.ZipFile(archive_path + ".bak", "r") as zin, zipfile.ZipFile( + archive_path, "w" + ) as zout: for item in zin.infolist(): if not item.filename.endswith(".dat"): zout.writestr(item, zin.read(item.filename)) diff --git a/wfdb/io/_header.py b/wfdb/io/_header.py index 97224c37..d4d69e2c 100644 --- a/wfdb/io/_header.py +++ b/wfdb/io/_header.py @@ -5,8 +5,7 @@ import numpy as np import pandas as pd -from wfdb.io import _signal -from wfdb.io import util +from wfdb.io import _signal, util from wfdb.io.header import HeaderSyntaxError, rx_record, rx_segment, rx_signal """ @@ -326,8 +325,12 @@ def wrheader(self, write_dir="", expanded=True, wfdb_archive=None): self.check_field_cohesion(rec_write_fields, list(sig_write_fields)) # Write the header file using the specified fields - self.wr_header_file(rec_write_fields, sig_write_fields, write_dir, - wfdb_archive=wfdb_archive) + self.wr_header_file( + rec_write_fields, + sig_write_fields, + write_dir, + wfdb_archive=wfdb_archive, + ) def get_write_fields(self): """ @@ -510,8 +513,9 @@ def check_field_cohesion(self, rec_write_fields, sig_write_fields): "Each file_name (dat file) specified must have the same byte offset" ) - def wr_header_file(self, rec_write_fields, sig_write_fields, write_dir, - wfdb_archive=None): + def wr_header_file( + self, rec_write_fields, sig_write_fields, write_dir, wfdb_archive=None + ): """ Write a header file using the specified fields. Converts Record attributes into appropriate WFDB format strings. diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index f6c14cb4..7f70502c 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1,5 +1,5 @@ -import math import io +import math import os import posixpath import sys @@ -7,7 +7,7 @@ import fsspec import numpy as np -from wfdb.io import download, _coreio, util +from wfdb.io import _coreio, download, util from wfdb.io._coreio import CLOUD_PROTOCOLS MAX_I32 = 2147483647 @@ -163,8 +163,9 @@ def wr_dats(self, expanded, write_dir, wfdb_archive=None): self.check_sig_cohesion([], expanded) # Write each of the specified dat files - self.wr_dat_files(expanded=expanded, write_dir=write_dir, - wfdb_archive=wfdb_archive) + self.wr_dat_files( + expanded=expanded, write_dir=write_dir, wfdb_archive=wfdb_archive + ) def check_sig_cohesion(self, write_fields, expanded): """ @@ -1401,8 +1402,13 @@ def _rd_dat_signals( ) else: data_to_read = _rd_dat_file( - file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, - wfdb_archive=wfdb_archive + file_name, + dir_name, + pn_dir, + fmt, + start_byte, + n_read_samples, + wfdb_archive=wfdb_archive, ) if extra_flat_samples: @@ -1642,8 +1648,9 @@ def _required_byte_num(mode, fmt, n_samp): return int(n_bytes) -def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, - wfdb_archive=None): +def _rd_dat_file( + file_name, dir_name, pn_dir, fmt, start_byte, n_samp, wfdb_archive=None +): """ Read data from a dat file, either local or remote, into a 1d numpy array. diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index 8fc22eb0..2d97fa9e 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -17,6 +17,7 @@ class WFDBArchive: - .wfdb is included in the record_name explicitly, or - .wfdb is passed directly to the file loading function. """ + def __init__(self, record_name, mode="r"): """ Initialize a WFDBArchive for a given record name (without extension). @@ -35,7 +36,9 @@ def __init__(self, record_name, mode="r"): if mode == "r": if not os.path.exists(self.archive_path): - raise FileNotFoundError(f"Archive not found: {self.archive_path}") + raise FileNotFoundError( + f"Archive not found: {self.archive_path}" + ) if not zipfile.is_zipfile(self.archive_path): raise ValueError(f"Invalid WFDB archive: {self.archive_path}") self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") @@ -60,16 +63,17 @@ def open(self, filename, mode="r"): Mode 'r' (text) or 'rb' (binary) supported. """ if self.zipfile and filename in self.zipfile.namelist(): - with self.zipfile.open(filename, 'r') as f: + with self.zipfile.open(filename, "r") as f: if "b" in mode: yield f else: import io + yield io.TextIOWrapper(f) else: raise FileNotFoundError( f"Could not find '{filename}' as loose file or inside '{self.archive_path}'." - ) + ) def close(self): """ @@ -111,7 +115,9 @@ def create_archive(self, file_list, output_path=None): if file.endswith((".hea", ".hea.json", ".hea.yml")) else zipfile.ZIP_DEFLATED ) - zf.write(file, arcname=os.path.basename(file), compress_type=compress) + zf.write( + file, arcname=os.path.basename(file), compress_type=compress + ) def get_archive(record_base_name, mode="r"): @@ -119,6 +125,7 @@ def get_archive(record_base_name, mode="r"): Get or create a WFDBArchive for the given record base name. """ if record_base_name not in _archive_cache: - _archive_cache[record_base_name] = WFDBArchive(record_base_name, - mode=mode) + _archive_cache[record_base_name] = WFDBArchive( + record_base_name, mode=mode + ) return _archive_cache[record_base_name] diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 1c286e68..6fe1ac56 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1,22 +1,16 @@ import datetime import multiprocessing.dummy -import posixpath import os +import posixpath import re import fsspec import numpy as np import pandas as pd -from wfdb.io import _header -from wfdb.io import _signal -from wfdb.io import _url -from wfdb.io.archive import get_archive -from wfdb.io import download -from wfdb.io import header -from wfdb.io import util +from wfdb.io import _header, _signal, _url, download, header, util from wfdb.io._coreio import CLOUD_PROTOCOLS - +from wfdb.io.archive import get_archive # -------------- WFDB Signal Calibration and Classification ---------- # @@ -935,13 +929,17 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): # Perform field validity and cohesion checks, and write the # header file. - self.wrheader(write_dir=write_dir, expanded=expanded, - wfdb_archive=wfdb_archive) + self.wrheader( + write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive + ) if self.n_sig > 0: # Perform signal validity and cohesion checks, and write the # associated dat files. - self.wr_dats(expanded=expanded, write_dir=write_dir, - wfdb_archive=wfdb_archive) + self.wr_dats( + expanded=expanded, + write_dir=write_dir, + wfdb_archive=wfdb_archive, + ) def _arrange_fields(self, channels, sampfrom, smooth_frames): """ @@ -2046,10 +2044,13 @@ def rdrecord( hea_file = os.path.basename(record_base) + ".hea" import tempfile + with wfdb_archive.open(hea_file, "r") as f: header_str = f.read() - with tempfile.NamedTemporaryFile("w+", suffix=".hea", delete=False) as tmpf: + with tempfile.NamedTemporaryFile( + "w+", suffix=".hea", delete=False + ) as tmpf: tmpf.write(header_str) tmpf.flush() record = rdheader(tmpf.name) @@ -2074,7 +2075,9 @@ def rdrecord( if "." not in pn_dir: dir_list = pn_dir.split("/") pn_dir = posixpath.join( - dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] + dir_list[0], + download.get_version(dir_list[0]), + *dir_list[1:], ) record = rdheader(record_name, pn_dir=pn_dir, rd_segments=False) @@ -2783,7 +2786,9 @@ def wfdbtime(record_name, input_times, pn_dir=None): ) if not times.startswith("s"): sample_num = int( - sum(x * 60**i for i, x in enumerate([seconds, minutes, hours])) + sum( + x * 60**i for i, x in enumerate([seconds, minutes, hours]) + ) * record.fs ) sample_num = "s" + str(sample_num) @@ -3080,11 +3085,14 @@ def wrsamp( expanded = False if wfdb_archive: - wfdb_archive = get_archive(os.path.join(write_dir, record_name), - mode="w") + wfdb_archive = get_archive( + os.path.join(write_dir, record_name), mode="w" + ) # Write the record files - header and associated dat - record.wrsamp(write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive) + record.wrsamp( + write_dir=write_dir, expanded=expanded, wfdb_archive=wfdb_archive + ) def dl_database( From 10270b6e06f83628a675a60af5c6aeccb58f46f0 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Sun, 4 May 2025 16:01:27 -0400 Subject: [PATCH 06/14] Add __enter__ and __exit__ methods. --- wfdb/io/archive.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index 2d97fa9e..cc47040e 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -50,6 +50,12 @@ def __init__(self, record_name, mode="r"): pass # Just create the file self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + def exists(self, filename): """ Check if a file exists in the archive. From 4362af8a0c3241eea80cf622dab7db10b88d4b49 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Sun, 4 May 2025 16:59:29 -0400 Subject: [PATCH 07/14] Fix file close issue. --- tests/test_archive.py | 10 +++++----- wfdb/io/archive.py | 36 ++++++++++++++++++++---------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index a5880a58..a89ecae8 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -54,11 +54,11 @@ def temp_record(): dat_path = os.path.join(tmpdir, record_basename + ".dat") archive_path = os.path.join(tmpdir, record_basename + ".wfdb") - WFDBArchive.create_archive( - None, - file_list=[hea_path, dat_path], - output_path=archive_path, - ) + with WFDBArchive(record_name=record_basename, mode="w") as archive: + archive.create_archive( + file_list=[hea_path, dat_path], + output_path=archive_path, + ) yield { "record_name": os.path.join(tmpdir, record_basename), diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index cc47040e..c95c2fdd 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -1,3 +1,4 @@ +import io import os import shutil import zipfile @@ -44,10 +45,9 @@ def __init__(self, record_name, mode="r"): self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") elif mode == "w": - # Initialize an empty archive on disk + # Create archive file if needed if not os.path.exists(self.archive_path): - with zipfile.ZipFile(self.archive_path, mode="w"): - pass # Just create the file + WFDBArchive.make_archive_file([], self.archive_path) self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") def __enter__(self): @@ -62,6 +62,15 @@ def exists(self, filename): """ return self.zipfile and filename in self.zipfile.namelist() + @staticmethod + def make_archive_file(file_list, output_path): + with zipfile.ZipFile(output_path, mode="w") as zf: + for file in file_list: + compress = zipfile.ZIP_DEFLATED + zf.write( + file, arcname=os.path.basename(file), compress_type=compress + ) + @contextmanager def open(self, filename, mode="r"): """ @@ -73,8 +82,6 @@ def open(self, filename, mode="r"): if "b" in mode: yield f else: - import io - yield io.TextIOWrapper(f) else: raise FileNotFoundError( @@ -97,7 +104,7 @@ def write(self, filename, data): self.zipfile.writestr(filename, data) return - # If already opened in read or append mode, use the replace-then-move trick + # If already opened in read or append mode, use replace-then-move tmp_path = self.archive_path + ".tmp" with zipfile.ZipFile(self.archive_path, mode="r") as zin: with zipfile.ZipFile(tmp_path, mode="w") as zout: @@ -114,16 +121,13 @@ def create_archive(self, file_list, output_path=None): If output_path is not specified, uses self.archive_path. """ output_path = output_path or self.archive_path - with zipfile.ZipFile(output_path, mode="w") as zf: - for file in file_list: - compress = ( - zipfile.ZIP_STORED - if file.endswith((".hea", ".hea.json", ".hea.yml")) - else zipfile.ZIP_DEFLATED - ) - zf.write( - file, arcname=os.path.basename(file), compress_type=compress - ) + WFDBArchive.make_archive_file(file_list, output_path) + + # If this archive object points to the archive, reload the zipfile in append mode + if output_path == self.archive_path: + if self.zipfile: + self.zipfile.close() + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") def get_archive(record_base_name, mode="r"): From 676377bf78614e21d3d088f7c9721cf8861aa43a Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 13:13:15 -0400 Subject: [PATCH 08/14] Fix style --- tests/test_archive.py | 9 +++++---- wfdb/io/record.py | 4 +--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index a89ecae8..9650104f 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -182,13 +182,14 @@ def test_archive_missing_file_error(temp_record): zf_name = [name for name in zf.namelist() if name.endswith(".dat")][0] zf.fp = None # Prevent auto-close bug in some zipfile implementations os.rename(archive_path, archive_path + ".bak") - with zipfile.ZipFile(archive_path + ".bak", "r") as zin, zipfile.ZipFile( - archive_path, "w" - ) as zout: + with ( + zipfile.ZipFile(archive_path + ".bak", "r") as zin, + zipfile.ZipFile(archive_path, "w") as zout, + ): for item in zin.infolist(): if not item.filename.endswith(".dat"): zout.writestr(item, zin.read(item.filename)) os.remove(archive_path + ".bak") - with pytest.raises(FileNotFoundError, match=".*\.dat.*"): + with pytest.raises(FileNotFoundError, match=r".*\.dat.*"): rdrecord(archive_path) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 6fe1ac56..20315688 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -2786,9 +2786,7 @@ def wfdbtime(record_name, input_times, pn_dir=None): ) if not times.startswith("s"): sample_num = int( - sum( - x * 60**i for i, x in enumerate([seconds, minutes, hours]) - ) + sum(x * 60**i for i, x in enumerate([seconds, minutes, hours])) * record.fs ) sample_num = "s" + str(sample_num) From e4d023cd24191c8d4d704cfc4cd735d2a018f118 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 13:32:36 -0400 Subject: [PATCH 09/14] Use context manager for creating archive. --- tests/test_archive.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index 9650104f..387efc30 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -88,21 +88,20 @@ def test_wfdb_archive_inline_round_trip(): sig_len = 1000 sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) - # Create archive inline - wfdb_archive = WFDBArchive(record_basename, mode="w") - wrsamp( - record_name=record_basename, - fs=fs, - units=["mV", "mV"], - sig_name=["I", "II"], - p_signal=sig, - fmt=["24", "24"], - adc_gain=[200.0, 200.0], - baseline=[0, 0], - write_dir=tmpdir, - wfdb_archive=wfdb_archive, - ) - wfdb_archive.close() + # Create archive inline using context manager + with WFDBArchive(record_basename, mode="w") as wfdb_archive: + wrsamp( + record_name=record_basename, + fs=fs, + units=["mV", "mV"], + sig_name=["I", "II"], + p_signal=sig, + fmt=["24", "24"], + adc_gain=[200.0, 200.0], + baseline=[0, 0], + write_dir=tmpdir, + wfdb_archive=wfdb_archive, + ) assert os.path.exists(archive_path), "Archive was not created" From dffc6f18231918b1353fd5e36b03dd043423ea79 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 13:56:27 -0400 Subject: [PATCH 10/14] Fix handling in Windows. --- tests/test_archive.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index 387efc30..2c19b801 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -60,12 +60,20 @@ def temp_record(): output_path=archive_path, ) - yield { - "record_name": os.path.join(tmpdir, record_basename), - "archive_path": archive_path, - "original_signal": sig, - "fs": fs, - } + try: + yield { + "record_name": os.path.join(tmpdir, record_basename), + "archive_path": archive_path, + "original_signal": sig, + "fs": fs, + } + finally: + # Clean up any open archive handles + from wfdb.io.archive import _archive_cache + for archive in _archive_cache.values(): + if archive is not None: + archive.close() + _archive_cache.clear() def test_wfdb_archive_inline_round_trip(): @@ -108,12 +116,17 @@ def test_wfdb_archive_inline_round_trip(): # Read back from archive record = rdrecord(archive_path) - assert record.fs == fs - assert record.n_sig == 2 - assert record.p_signal.shape == sig.shape - - # Add tolerance to account for loss of precision during archive round-trip - np.testing.assert_allclose(record.p_signal, sig, rtol=1e-2, atol=3e-3) + try: + assert record.fs == fs + assert record.n_sig == 2 + assert record.p_signal.shape == sig.shape + + # Add tolerance to account for loss of precision during archive round-trip + np.testing.assert_allclose(record.p_signal, sig, rtol=1e-2, atol=3e-3) + finally: + # Ensure we close the archive after reading + if hasattr(record, 'wfdb_archive') and record.wfdb_archive is not None: + record.wfdb_archive.close() def test_wfdb_archive_round_trip(temp_record): From 388dfb001a6aa962cb4e14437627f851ca2f1902 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 14:19:32 -0400 Subject: [PATCH 11/14] Fix style. --- tests/test_archive.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index 2c19b801..d0c252f0 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -70,6 +70,7 @@ def temp_record(): finally: # Clean up any open archive handles from wfdb.io.archive import _archive_cache + for archive in _archive_cache.values(): if archive is not None: archive.close() @@ -122,10 +123,15 @@ def test_wfdb_archive_inline_round_trip(): assert record.p_signal.shape == sig.shape # Add tolerance to account for loss of precision during archive round-trip - np.testing.assert_allclose(record.p_signal, sig, rtol=1e-2, atol=3e-3) + np.testing.assert_allclose( + record.p_signal, sig, rtol=1e-2, atol=3e-3 + ) finally: # Ensure we close the archive after reading - if hasattr(record, 'wfdb_archive') and record.wfdb_archive is not None: + if ( + hasattr(record, "wfdb_archive") + and record.wfdb_archive is not None + ): record.wfdb_archive.close() From 8971d7970a3129282cd0e0ceb5bfc7262c00383e Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 16:27:25 -0400 Subject: [PATCH 12/14] Support writing directly with wrsamp. --- tests/test_archive.py | 2 +- wfdb/io/_signal.py | 10 ++++++++++ wfdb/io/archive.py | 27 +++++++++++++++++++++------ wfdb/io/record.py | 42 +++++++++++++++++++++++++++++++++++------- 4 files changed, 67 insertions(+), 14 deletions(-) diff --git a/tests/test_archive.py b/tests/test_archive.py index d0c252f0..3031237a 100644 --- a/tests/test_archive.py +++ b/tests/test_archive.py @@ -98,7 +98,7 @@ def test_wfdb_archive_inline_round_trip(): sig = (np.random.randn(sig_len, 2) * 1000).astype(np.float32) # Create archive inline using context manager - with WFDBArchive(record_basename, mode="w") as wfdb_archive: + with WFDBArchive(record_path, mode="w") as wfdb_archive: wrsamp( record_name=record_basename, fs=fs, diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 7f70502c..6b3f6918 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1710,6 +1710,16 @@ def _rd_dat_file( # Local file or .wfdb archive if wfdb_archive is not None: + # If the exact file name isn't found, look for any .dat file + if not wfdb_archive.exists(file_name): + dat_files = [f for f in wfdb_archive.zipfile.namelist() + if f.endswith('.dat')] + if not dat_files: + raise FileNotFoundError( + f"No dat file found in archive for {file_name}" + ) + file_name = dat_files[0] # Use the first dat file found + with wfdb_archive.open(file_name, "rb") as fp: fp.seek(start_byte) sig_data = util.fromfile( diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index c95c2fdd..e60ceae3 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -31,7 +31,11 @@ def __init__(self, record_name, mode="r"): 'r' for read (default), 'w' for write. """ self.record_name = record_name - self.archive_path = f"{record_name}.wfdb" + # Only append .wfdb if it's not already there + if record_name.endswith('.wfdb'): + self.archive_path = record_name + else: + self.archive_path = f"{record_name}.wfdb" self.zipfile = None self.mode = mode @@ -41,14 +45,24 @@ def __init__(self, record_name, mode="r"): f"Archive not found: {self.archive_path}" ) if not zipfile.is_zipfile(self.archive_path): - raise ValueError(f"Invalid WFDB archive: {self.archive_path}") - self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") + raise ValueError( + f"Invalid WFDB archive: {self.archive_path}" + ) + self.zipfile = zipfile.ZipFile( + self.archive_path, mode="r" + ) elif mode == "w": # Create archive file if needed if not os.path.exists(self.archive_path): - WFDBArchive.make_archive_file([], self.archive_path) - self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") + # Create the directory if it doesn't exist + os.makedirs(os.path.dirname(os.path.abspath(self.archive_path)), exist_ok=True) + WFDBArchive.make_archive_file( + [], self.archive_path + ) + self.zipfile = zipfile.ZipFile( + self.archive_path, mode="a" + ) def __enter__(self): return self @@ -85,7 +99,8 @@ def open(self, filename, mode="r"): yield io.TextIOWrapper(f) else: raise FileNotFoundError( - f"Could not find '{filename}' as loose file or inside '{self.archive_path}'." + f"Could not find '{filename}' as loose file or inside " + f"'{self.archive_path}'." ) def close(self): diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 20315688..baff953a 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -10,7 +10,7 @@ from wfdb.io import _header, _signal, _url, download, header, util from wfdb.io._coreio import CLOUD_PROTOCOLS -from wfdb.io.archive import get_archive +from wfdb.io.archive import get_archive, WFDBArchive # -------------- WFDB Signal Calibration and Classification ---------- # @@ -911,6 +911,10 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): of the uniform signal (d_signal). write_dir : str, optional The directory in which to write the files. + wfdb_archive : str or WFDBArchive, optional + If provided, writes the record to a .wfdb archive. Can be either: + - A string path to the archive file (e.g., 'record.wfdb') + - A WFDBArchive object for more advanced usage Returns ------- @@ -927,6 +931,15 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): checksums[ch] = old_val self.checksum = checksums + # Handle wfdb_archive parameter + if wfdb_archive: + if isinstance(wfdb_archive, str): + # If a string path is provided, create a WFDBArchive object + from wfdb.io.archive import get_archive + wfdb_archive = get_archive(wfdb_archive, mode="w") + elif not isinstance(wfdb_archive, WFDBArchive): + raise TypeError("wfdb_archive must be either a string path or WFDBArchive object") + # Perform field validity and cohesion checks, and write the # header file. self.wrheader( @@ -1975,7 +1988,6 @@ def rdrecord( Option used to stream data from Physionet. The Physionet database directory from which to find the required record files. eg. For record '100' in 'http://physionet.org/content/mitdb' - pn_dir='mitdb'. m2s : bool, optional Used when reading multi-segment records. Specifies whether to directly return a WFDB MultiRecord object (False), or to convert @@ -2033,7 +2045,6 @@ def rdrecord( -------- >>> record = wfdb.rdrecord('sample-data/test01_00s', sampfrom=800, channels=[1, 3]) - """ wfdb_archive = None is_wfdb_archive = record_name.endswith(".wfdb") @@ -2041,7 +2052,12 @@ def rdrecord( if is_wfdb_archive: record_base = record_name[:-5] # remove ".wfdb" wfdb_archive = get_archive(record_base) - hea_file = os.path.basename(record_base) + ".hea" + + # Find any .hea file in the archive + hea_files = [f for f in wfdb_archive.zipfile.namelist() if f.endswith('.hea')] + if not hea_files: + raise FileNotFoundError(f"No header file found in archive {record_name}") + hea_file = hea_files[0] # Use the first header file found import tempfile @@ -2954,6 +2970,10 @@ def wrsamp( setting both `base_date` and `base_time`. write_dir : str, optional The directory in which to write the files. + wfdb_archive : str or WFDBArchive, optional + If provided, writes the record to a .wfdb archive. Can be either: + - A string path to the archive file (e.g., 'record.wfdb') + - A WFDBArchive object for more advanced usage Returns ------- @@ -2978,6 +2998,10 @@ def wrsamp( >>> # Write a local WFDB record (manually inserting fields) >>> wfdb.wrsamp('ecgrecord', fs = 250, units=['mV', 'mV'], sig_name=['I', 'II'], p_signal=signals, fmt=['16', '16']) + >>> # Write to a .wfdb archive using a string path + >>> wfdb.wrsamp('ecgrecord', fs = 250, units=['mV', 'mV'], + sig_name=['I', 'II'], p_signal=signals, fmt=['16', '16'], + wfdb_archive='ecgrecord.wfdb') """ # Check for valid record name @@ -3082,10 +3106,14 @@ def wrsamp( else: expanded = False + # Handle wfdb_archive parameter if wfdb_archive: - wfdb_archive = get_archive( - os.path.join(write_dir, record_name), mode="w" - ) + if isinstance(wfdb_archive, str): + # If a string path is provided, create a WFDBArchive object + from wfdb.io.archive import get_archive + wfdb_archive = get_archive(wfdb_archive, mode="w") + elif not isinstance(wfdb_archive, WFDBArchive): + raise TypeError("wfdb_archive must be either a string path or WFDBArchive object") # Write the record files - header and associated dat record.wrsamp( From ba7992a786062d5ce3f1e8e519dc62a6ac309626 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 16:36:08 -0400 Subject: [PATCH 13/14] Fix style. --- wfdb/io/_signal.py | 5 +++-- wfdb/io/archive.py | 21 ++++++++------------- wfdb/io/record.py | 8 ++++++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 6b3f6918..735ac81c 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1712,8 +1712,9 @@ def _rd_dat_file( if wfdb_archive is not None: # If the exact file name isn't found, look for any .dat file if not wfdb_archive.exists(file_name): - dat_files = [f for f in wfdb_archive.zipfile.namelist() - if f.endswith('.dat')] + dat_files = [ + f for f in wfdb_archive.zipfile.namelist() if f.endswith(".dat") + ] if not dat_files: raise FileNotFoundError( f"No dat file found in archive for {file_name}" diff --git a/wfdb/io/archive.py b/wfdb/io/archive.py index e60ceae3..7f03b8cc 100644 --- a/wfdb/io/archive.py +++ b/wfdb/io/archive.py @@ -32,7 +32,7 @@ def __init__(self, record_name, mode="r"): """ self.record_name = record_name # Only append .wfdb if it's not already there - if record_name.endswith('.wfdb'): + if record_name.endswith(".wfdb"): self.archive_path = record_name else: self.archive_path = f"{record_name}.wfdb" @@ -45,24 +45,19 @@ def __init__(self, record_name, mode="r"): f"Archive not found: {self.archive_path}" ) if not zipfile.is_zipfile(self.archive_path): - raise ValueError( - f"Invalid WFDB archive: {self.archive_path}" - ) - self.zipfile = zipfile.ZipFile( - self.archive_path, mode="r" - ) + raise ValueError(f"Invalid WFDB archive: {self.archive_path}") + self.zipfile = zipfile.ZipFile(self.archive_path, mode="r") elif mode == "w": # Create archive file if needed if not os.path.exists(self.archive_path): # Create the directory if it doesn't exist - os.makedirs(os.path.dirname(os.path.abspath(self.archive_path)), exist_ok=True) - WFDBArchive.make_archive_file( - [], self.archive_path + os.makedirs( + os.path.dirname(os.path.abspath(self.archive_path)), + exist_ok=True, ) - self.zipfile = zipfile.ZipFile( - self.archive_path, mode="a" - ) + WFDBArchive.make_archive_file([], self.archive_path) + self.zipfile = zipfile.ZipFile(self.archive_path, mode="a") def __enter__(self): return self diff --git a/wfdb/io/record.py b/wfdb/io/record.py index baff953a..3f480fae 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -2054,9 +2054,13 @@ def rdrecord( wfdb_archive = get_archive(record_base) # Find any .hea file in the archive - hea_files = [f for f in wfdb_archive.zipfile.namelist() if f.endswith('.hea')] + hea_files = [ + f for f in wfdb_archive.zipfile.namelist() if f.endswith(".hea") + ] if not hea_files: - raise FileNotFoundError(f"No header file found in archive {record_name}") + raise FileNotFoundError( + f"No header file found in archive {record_name}" + ) hea_file = hea_files[0] # Use the first header file found import tempfile From 31fcfdb8e22f51a5e4b364292358d49fbef29296 Mon Sep 17 00:00:00 2001 From: Tom Pollard Date: Thu, 29 May 2025 17:13:09 -0400 Subject: [PATCH 14/14] Fix style. --- wfdb/io/record.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 3f480fae..ea195134 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -936,9 +936,12 @@ def wrsamp(self, expanded=False, write_dir="", wfdb_archive=None): if isinstance(wfdb_archive, str): # If a string path is provided, create a WFDBArchive object from wfdb.io.archive import get_archive + wfdb_archive = get_archive(wfdb_archive, mode="w") elif not isinstance(wfdb_archive, WFDBArchive): - raise TypeError("wfdb_archive must be either a string path or WFDBArchive object") + raise TypeError( + "wfdb_archive must be either a string path or WFDBArchive object" + ) # Perform field validity and cohesion checks, and write the # header file. @@ -3115,9 +3118,12 @@ def wrsamp( if isinstance(wfdb_archive, str): # If a string path is provided, create a WFDBArchive object from wfdb.io.archive import get_archive + wfdb_archive = get_archive(wfdb_archive, mode="w") elif not isinstance(wfdb_archive, WFDBArchive): - raise TypeError("wfdb_archive must be either a string path or WFDBArchive object") + raise TypeError( + "wfdb_archive must be either a string path or WFDBArchive object" + ) # Write the record files - header and associated dat record.wrsamp(