Skip to content

Commit 297915b

Browse files
committed
Code cleanup
1 parent d30c74f commit 297915b

File tree

1 file changed

+2
-11
lines changed

1 file changed

+2
-11
lines changed

stable_whisper/whisper_word_level/hf_whisper.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,15 +86,6 @@ def load_hf_pipe(model_name: str, device: str = None, flash: bool = False, **pip
8686

8787
processor = AutoProcessor.from_pretrained(model_id)
8888

89-
# if not flash:
90-
# try:
91-
# model = model.to_bettertransformer()
92-
# except (ValueError, ImportError) as e:
93-
# import warnings
94-
# warnings.warn(
95-
# f'Failed convert model to BetterTransformer due to: {e}'
96-
# )
97-
9889
final_pipe_kwargs = dict(
9990
task="automatic-speech-recognition",
10091
model=model,
@@ -168,9 +159,9 @@ def _inner_transcribe(
168159
if not language and hasattr(output, 'get') and 'detected_language' in output:
169160
language = output['detected_language']
170161
if not language:
171-
# Use the pipeline's language detection by accessing the generated tokens
162+
# HF Pipelines have broken language detection.
163+
# Manually detect language by generating tokens from the first 10 seconds of the audio.
172164
try:
173-
# Get the raw generated tokens from the model
174165
import torch
175166
sample_audio = audio[:int(self.sampling_rate * 10)] # Use first 10 seconds
176167
inputs = self._pipe.feature_extractor(sample_audio, sampling_rate=self.sampling_rate, return_tensors="pt")

0 commit comments

Comments
 (0)