Skip to content

Commit cf576e5

Browse files
committed
fixes #64 : fix inconsistency between segments when there are empty text
1 parent 6197f08 commit cf576e5

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

whisper_timestamped/transcribe.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,9 @@ def filter_tokens(tokens):
905905
assert len(segment_logprobs) == len(segment_tokens), f"Inconsistent number of segments: logprobs ({len(segment_logprobs)}) != tokens ({len(segment_tokens)})"
906906

907907
whisper_segments = transcription["segments"]
908+
# See issue 64: some segments may have empty text
909+
if any(not s["text"] for s in whisper_segments):
910+
whisper_segments = [s for s in whisper_segments if s["text"]]
908911
l1 = len(whisper_segments)
909912
l2 = len(timestamped_word_segments)
910913
if l1 != l2 and l1 != 0:

0 commit comments

Comments
 (0)