Skip to content

Commit 664a594

Browse files
authored
Remove file handle caching from LilcomChunkyReader (#737)
* Remove file handle caching from LilcomChunkyReader * Remove outdated comments
1 parent 4198446 commit 664a594

File tree

2 files changed

+6
-24
lines changed

2 files changed

+6
-24
lines changed

lhotse/cut.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1157,7 +1157,7 @@ def fill_supervision(
11571157
old_sup = self.supervisions[0]
11581158
if isclose(old_sup.start, 0) and isclose(old_sup.duration, self.duration):
11591159
return self
1160-
if old_sup.start < 0 or old_sup.end > self.end and not shrink_ok:
1160+
if (old_sup.start < 0 or old_sup.end > self.end) and not shrink_ok:
11611161
raise ValueError(
11621162
f"Cannot shrink supervision (start={old_sup.start}, end={old_sup.end}) to cut "
11631163
f"(start=0, duration={self.duration}) because the argument `shrink_ok` is `False`. "

lhotse/features/io.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -369,20 +369,6 @@ def lookup_cache_or_open(storage_path: str):
369369
return h5py.File(storage_path, "r")
370370

371371

372-
@lru_cache(maxsize=None)
373-
def lookup_cache_or_open_regular_file(storage_path: str):
374-
"""
375-
Helper internal function used in "fast" file readers.
376-
It opens regular files and keeps their handles open in a global program cache
377-
to avoid excessive amount of syscalls when the Reader class is instantiated
378-
and destroyed in a loop repeatedly (frequent use-case).
379-
380-
The file handles can be freed at any time by calling ``close_cached_file_handles()``.
381-
"""
382-
f = open(storage_path, "rb")
383-
return f
384-
385-
386372
@lru_cache(maxsize=None)
387373
def lookup_chunk_size(h5_file_handle) -> int:
388374
"""
@@ -394,7 +380,6 @@ def lookup_chunk_size(h5_file_handle) -> int:
394380

395381
def close_cached_file_handles() -> None:
396382
"""Closes the cached file handles in ``lookup_cache_or_open`` (see its docs for more details)."""
397-
lookup_cache_or_open_regular_file.cache_clear()
398383
lookup_cache_or_open.cache_clear()
399384
lookup_chunk_size.cache_clear()
400385

@@ -737,8 +722,7 @@ class LilcomChunkyReader(FeaturesReader):
737722

738723
def __init__(self, storage_path: Pathlike, *args, **kwargs):
739724
super().__init__()
740-
self.file = lookup_cache_or_open_regular_file(storage_path)
741-
self.lock = threading.Lock()
725+
self.storage_path = storage_path
742726

743727
@dynamic_lru_cache
744728
def read(
@@ -760,12 +744,10 @@ def read(
760744
chunk_offsets = chunk_offsets[left_chunk_idx:right_chunk_idx]
761745

762746
chunk_data = []
763-
for offset, end in pairwise(chunk_offsets):
764-
# We need to use locks to avoid race conditions between seek
765-
# and read in multi-threaded reads.
766-
with self.lock:
767-
self.file.seek(offset)
768-
chunk_data.append(self.file.read(end - offset))
747+
with open(self.storage_path, "rb") as file:
748+
for offset, end in pairwise(chunk_offsets):
749+
file.seek(offset)
750+
chunk_data.append(file.read(end - offset))
769751

770752
# Read, decode, concat
771753
decompressed_chunks = [lilcom.decompress(data) for data in chunk_data]

0 commit comments

Comments
 (0)