Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lhotse/cut.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ def fill_supervision(
old_sup = self.supervisions[0]
if isclose(old_sup.start, 0) and isclose(old_sup.duration, self.duration):
return self
if old_sup.start < 0 or old_sup.end > self.end and not shrink_ok:
if (old_sup.start < 0 or old_sup.end > self.end) and not shrink_ok:
raise ValueError(
f"Cannot shrink supervision (start={old_sup.start}, end={old_sup.end}) to cut "
f"(start=0, duration={self.duration}) because the argument `shrink_ok` is `False`. "
Expand Down
28 changes: 5 additions & 23 deletions lhotse/features/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,20 +369,6 @@ def lookup_cache_or_open(storage_path: str):
return h5py.File(storage_path, "r")


@lru_cache(maxsize=None)
def lookup_cache_or_open_regular_file(storage_path: str):
"""
Helper internal function used in "fast" file readers.
It opens regular files and keeps their handles open in a global program cache
to avoid excessive amount of syscalls when the Reader class is instantiated
and destroyed in a loop repeatedly (frequent use-case).

The file handles can be freed at any time by calling ``close_cached_file_handles()``.
"""
f = open(storage_path, "rb")
return f


@lru_cache(maxsize=None)
def lookup_chunk_size(h5_file_handle) -> int:
"""
Expand All @@ -394,7 +380,6 @@ def lookup_chunk_size(h5_file_handle) -> int:

def close_cached_file_handles() -> None:
"""Closes the cached file handles in ``lookup_cache_or_open`` (see its docs for more details)."""
lookup_cache_or_open_regular_file.cache_clear()
lookup_cache_or_open.cache_clear()
lookup_chunk_size.cache_clear()

Expand Down Expand Up @@ -737,8 +722,7 @@ class LilcomChunkyReader(FeaturesReader):

def __init__(self, storage_path: Pathlike, *args, **kwargs):
super().__init__()
self.file = lookup_cache_or_open_regular_file(storage_path)
self.lock = threading.Lock()
self.storage_path = storage_path

@dynamic_lru_cache
def read(
Expand All @@ -760,12 +744,10 @@ def read(
chunk_offsets = chunk_offsets[left_chunk_idx:right_chunk_idx]

chunk_data = []
for offset, end in pairwise(chunk_offsets):
# We need to use locks to avoid race conditions between seek
# and read in multi-threaded reads.
with self.lock:
self.file.seek(offset)
chunk_data.append(self.file.read(end - offset))
with open(self.storage_path, "rb") as file:
for offset, end in pairwise(chunk_offsets):
file.seek(offset)
chunk_data.append(file.read(end - offset))

# Read, decode, concat
decompressed_chunks = [lilcom.decompress(data) for data in chunk_data]
Expand Down