Skip to content

Commit 776e5a1

Browse files
Merge pull request #21 from HenestrosaDev/refactor
Refactor
2 parents f4a8da5 + 214eae8 commit 776e5a1

File tree

10 files changed

+661
-379
lines changed

10 files changed

+661
-379
lines changed

src/controllers/main_controller.py

Lines changed: 163 additions & 283 deletions
Large diffs are not rendered by default.

src/handlers/google_api_handler.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import os
2+
import shutil
3+
import traceback
4+
5+
import speech_recognition as sr
6+
import utils.config_manager as cm
7+
from models.transcription import Transcription
8+
from moviepy.video.io.VideoFileClip import VideoFileClip
9+
from pydub import AudioSegment
10+
from pydub.silence import split_on_silence
11+
from utils import constants as c
12+
from utils.path_helper import ROOT_PATH
13+
14+
15+
class GoogleApiHandler:
16+
@staticmethod
17+
async def transcribe_file(transcription: Transcription) -> str:
18+
"""
19+
Transcribes audio from a file using the Google Speech-to-Text API.
20+
21+
:param transcription: An instance of Transcription containing information
22+
about the audio file.
23+
:return: The transcribed text or an error message if transcription fails.
24+
"""
25+
# Can be the transcription or an error text
26+
text = ""
27+
28+
file_path = transcription.source_path
29+
30+
# Create a directory to store the audio chunks
31+
chunks_directory = ROOT_PATH / "audio-chunks"
32+
chunks_directory.mkdir(exist_ok=True)
33+
34+
try:
35+
# Get file extension
36+
content_type = file_path.suffix
37+
38+
sound = None
39+
# Open the audio file using pydub
40+
if content_type in c.AUDIO_FILE_EXTENSIONS:
41+
sound = AudioSegment.from_file(file_path)
42+
43+
elif content_type in c.VIDEO_FILE_EXTENSIONS:
44+
clip = VideoFileClip(str(file_path))
45+
video_audio_path = chunks_directory / f"{file_path.stem}.wav"
46+
clip.audio.write_audiofile(video_audio_path)
47+
sound = AudioSegment.from_wav(video_audio_path)
48+
49+
audio_chunks = split_on_silence(
50+
sound,
51+
# Minimum duration of silence required to consider a segment as a split point
52+
min_silence_len=500,
53+
# Audio with a level -X decibels below the original audio level will be considered as silence
54+
silence_thresh=sound.dBFS - 40,
55+
# Adds a buffer of silence before and after each split point
56+
keep_silence=100,
57+
)
58+
59+
# Create a speech recognition object
60+
r = sr.Recognizer()
61+
62+
# Get Google API key (if any)
63+
config_google_api = cm.ConfigManager.get_config_google_api()
64+
api_key = config_google_api.api_key or None
65+
66+
# Process each chunk
67+
for idx, audio_chunk in enumerate(audio_chunks):
68+
# Export audio chunk and save it in the `chunks_directory` directory.
69+
chunk_filename = os.path.join(chunks_directory, f"chunk{idx}.wav")
70+
audio_chunk.export(chunk_filename, bitrate="192k", format="wav")
71+
72+
# Recognize the chunk
73+
with sr.AudioFile(chunk_filename) as source:
74+
r.adjust_for_ambient_noise(source)
75+
audio_listened = r.record(source)
76+
77+
try:
78+
# Try converting it to text
79+
chunk_text = r.recognize_google(
80+
audio_listened,
81+
language=transcription.language_code,
82+
key=api_key,
83+
)
84+
85+
chunk_text = f"{chunk_text.capitalize()}. "
86+
text += chunk_text
87+
print(f"chunk text: {chunk_text}")
88+
89+
except Exception:
90+
continue
91+
92+
except Exception:
93+
text = traceback.format_exc()
94+
95+
finally:
96+
# Delete temporal directory and files
97+
shutil.rmtree(chunks_directory)
98+
99+
return text

src/handlers/whisperx_handler.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
import os
2+
import traceback
3+
from pathlib import Path
4+
5+
import utils.config_manager as cm
6+
import whisperx
7+
from models.transcription import Transcription
8+
9+
10+
class WhisperXHandler:
11+
def __init__(self):
12+
self._whisperx_result = None
13+
14+
async def transcribe_file(self, transcription: Transcription) -> str:
15+
"""
16+
Transcribe audio from a file using the WhisperX library.
17+
18+
:param transcription: An instance of Transcription containing information about
19+
the audio file.
20+
:type transcription: Transcription
21+
:return: The transcribed text or an error message if transcription fails.
22+
:rtype: str
23+
"""
24+
config_whisperx = cm.ConfigManager.get_config_whisperx()
25+
26+
device = "cpu" if config_whisperx.use_cpu else "cuda"
27+
task = "translate" if transcription.should_translate else "transcribe"
28+
29+
try:
30+
model = whisperx.load_model(
31+
config_whisperx.model_size,
32+
device,
33+
compute_type=config_whisperx.compute_type,
34+
task=task,
35+
language=transcription.language_code,
36+
)
37+
38+
audio_path = str(transcription.source_path)
39+
audio = whisperx.load_audio(audio_path)
40+
self._whisperx_result = model.transcribe(
41+
audio, batch_size=config_whisperx.batch_size
42+
)
43+
44+
text_combined = " ".join(
45+
segment["text"].strip() for segment in self._whisperx_result["segments"]
46+
)
47+
48+
# Align output if should subtitle
49+
if transcription.should_subtitle:
50+
model_aligned, metadata = whisperx.load_align_model(
51+
language_code=transcription.language_code, device=device
52+
)
53+
self._whisperx_result = whisperx.align(
54+
self._whisperx_result["segments"],
55+
model_aligned,
56+
metadata,
57+
audio,
58+
device,
59+
return_char_alignments=False,
60+
)
61+
62+
return text_combined
63+
64+
except Exception:
65+
return traceback.format_exc()
66+
67+
def generate_subtitles(self, file_path: Path, should_overwrite: bool):
68+
"""
69+
Generate subtitles for a video or audio file.
70+
71+
:param file_path: The path to the video or audio file for which subtitles are
72+
to be generated.
73+
:type file_path: Path
74+
:param should_overwrite: Indicates whether existing subtitle files should be
75+
overwritten if they exist. If False, subtitles will
76+
only be generated if no subtitle file exists for
77+
the given format.
78+
:type should_overwrite: bool
79+
"""
80+
config_subtitles = cm.ConfigManager.get_config_subtitles()
81+
82+
output_formats = ["srt", "vtt"]
83+
output_dir = file_path.parent
84+
85+
for output_format in output_formats:
86+
path_to_check = file_path.parent / f"{file_path.stem}.{output_format}"
87+
88+
if should_overwrite or not os.path.exists(path_to_check):
89+
writer = whisperx.transcribe.get_writer(output_format, str(output_dir))
90+
writer_args = {
91+
"highlight_words": config_subtitles.highlight_words,
92+
"max_line_count": config_subtitles.max_line_count,
93+
"max_line_width": config_subtitles.max_line_width,
94+
}
95+
96+
# https://github.com/m-bain/whisperX/issues/455#issuecomment-1707547704
97+
self._whisperx_result["language"] = "en"
98+
99+
writer(self._whisperx_result, file_path, writer_args)

src/handlers/youtube_handler.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from pathlib import Path
2+
from typing import Optional
3+
4+
from pytube import YouTube
5+
6+
7+
class YouTubeHandler:
8+
@staticmethod
9+
def download_audio_from_video(
10+
url: str,
11+
output_path: str = ".",
12+
output_filename: str = "yt-audio.mp3",
13+
) -> Optional[Path]:
14+
"""
15+
Downloads audio from a YouTube video.
16+
17+
:param url: The URL of the YouTube video.
18+
:param output_path: (Optional) The directory where the audio file will be saved.
19+
Default is the current directory.
20+
:param output_filename: (Optional) The name of the audio file to be saved.
21+
Default is "yt-audio.mp3".
22+
:return: The path to the downloaded audio file as a Path object,
23+
or None if the download fails.
24+
"""
25+
try:
26+
yt = YouTube(url)
27+
stream = yt.streams.filter(only_audio=True).first()
28+
output_file = stream.download(
29+
output_path=output_path, filename=output_filename
30+
)
31+
32+
return Path(output_file) if output_file else None
33+
34+
except Exception:
35+
return None

src/models/transcription.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
class Transcription:
1010
text: Optional[str] = None
1111
language_code: Optional[str] = None
12-
source: Optional[AudioSource] = None
12+
source_type: Optional[AudioSource] = None
1313
source_path: Path = Path("/")
1414
method: Optional[int] = None
1515
should_translate: bool = False

src/utils/audio_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@
22
from pydub import AudioSegment
33

44

5-
def save_audio_data(audio_data, filename):
5+
def save_audio_data(audio_data: list[sr.AudioData], filename: str):
6+
"""
7+
Save recorded audio data to a WAV file.
8+
9+
:param audio_data: A list of recorded audio chunks.
10+
:type audio_data: list[sr.AudioData]
11+
:param filename: The name of the file to save the audio data to.
12+
:type filename: str
13+
"""
614
if audio_data:
715
raw_audio_data = b"".join(
816
[

src/utils/config_manager.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,11 @@
99

1010

1111
class ConfigManager:
12-
_FILE_PATH = ROOT_PATH / "config.ini"
12+
_CONFIG_FILE_PATH = ROOT_PATH / "config.ini"
1313
KeyType = Union[ConfigWhisperX.Key, ConfigGoogleApi.Key, ConfigSubtitles.Key]
1414

1515
@staticmethod
16-
def read_config(file_path: Path = _FILE_PATH) -> Optional[ConfigParser]:
16+
def read_config(file_path: Path = _CONFIG_FILE_PATH) -> Optional[ConfigParser]:
1717
config = ConfigParser()
1818
config.read(file_path)
1919
return config
@@ -62,7 +62,7 @@ def get_config_subtitles() -> ConfigSubtitles:
6262
def get_value(
6363
section: KeyType,
6464
key: KeyType,
65-
file_path: Path = _FILE_PATH,
65+
file_path: Path = _CONFIG_FILE_PATH,
6666
) -> Optional[Union[str, bool, int, float]]:
6767
config = ConfigManager.read_config(file_path)
6868

@@ -91,7 +91,7 @@ def modify_value(
9191
section: KeyType,
9292
key: KeyType,
9393
new_value: str,
94-
file_path: Path = _FILE_PATH,
94+
file_path: Path = _CONFIG_FILE_PATH,
9595
):
9696
config = ConfigManager.read_config(file_path)
9797

src/utils/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,3 +128,6 @@
128128
".ogv", ".ogx", # OGG
129129
".wmv", ".asf" # AIFF / ASF
130130
]
131+
# fmt: on
132+
133+
SUPPORTED_FILE_EXTENSIONS = AUDIO_FILE_EXTENSIONS + VIDEO_FILE_EXTENSIONS

src/utils/i18n.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)