Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions whisper/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
OPENAI_API_KEY=OPENAI_API_KEY
15 changes: 15 additions & 0 deletions whisper/1_basic_call_english_only.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import whisper
from pathlib import Path


MODEL = whisper.load_model("medium.en")
AUDIO_DIR = Path(__file__).parent / "test_audio_files"


def get_transcription(audio_file: str):
result = MODEL.transcribe(audio_file)
print(result)
return result


get_transcription(str(AUDIO_DIR / "terrible_quality.mp3"))
35 changes: 35 additions & 0 deletions whisper/1_multiple_languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import whisper
from pathlib import Path

AUDIO_DIR = Path(__file__).parent / "test_audio_files"
model = whisper.load_model("medium")


def detect_language_and_transcribe(audio_file: str):
audio = whisper.load_audio(audio_file)
audio = whisper.pad_or_trim(audio)
mel = whisper.log_mel_spectrogram(audio).to(model.device)
_, language_probs = model.detect_language(mel)
language: str = max(language_probs, key=language_probs.get) # type: ignore
print(f"Detected language: {language}")
options = whisper.DecodingOptions(language=language, task="transcribe")
result = whisper.decode(model, mel, options)
print(result)
return result.text # type: ignore


# dutch_test = detect_language_and_transcribe(
# str(AUDIO_DIR / "dutch_the_netherlands.mp3")
# )


# result = model.transcribe(str(AUDIO_DIR / "dutch_the_netherlands.mp3"), verbose=True)
# print(result["text"])

result = model.transcribe(
str(AUDIO_DIR / "dutch_the_netherlands.mp3"),
verbose=True,
language="nl",
task="translate",
)
print(result["text"])
88 changes: 88 additions & 0 deletions whisper/2_whisper_pods.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import uuid
from pathlib import Path

import gradio as gr
import whisper
from whisper.utils import WriteSRT, WriteVTT

from settings import BASE_DIR, OUTPUT_TEMP_DIR, STYLES_DIR
from utils import podcast, subtitles


WHISPER_MODEL = whisper.load_model("base")
VTT_WRITER = WriteVTT(output_dir=str(OUTPUT_TEMP_DIR))
SRT_WRITER = WriteSRT(output_dir=str(OUTPUT_TEMP_DIR))


def transcribe_and_summarize(page_link: str) -> tuple[str, str, str, str]:
unique_id = uuid.uuid4()

podcast_download_url = podcast.scrape_link_from_page(page_link)
mp3_file: Path = podcast.download(podcast_download_url, unique_id, OUTPUT_TEMP_DIR)

whisper_output = WHISPER_MODEL.transcribe(str(mp3_file))
with open(BASE_DIR / "pods_log.txt", "w", encoding="utf-8") as f:
f.write(str(whisper_output))

transcription = str(whisper_output["text"])
summary = podcast.get_summary(transcription)

get_sub_path = lambda ext: OUTPUT_TEMP_DIR / f"{unique_id}{ext}"
vtt_subs = subtitles.write_to_file(whisper_output, VTT_WRITER, get_sub_path(".vtt"))
srt_subs = subtitles.write_to_file(whisper_output, SRT_WRITER, get_sub_path(".srt"))

return (summary, transcription, str(vtt_subs), str(srt_subs))


if __name__ == "__main__":
block = gr.Blocks(css=str(STYLES_DIR / "whisper_pods.css"))

with block:
with gr.Group():
gr.HTML(
f"""
<div class="header">
<img src="https://i.imgur.com/8Xu2rwG.png" referrerpolicy="no-referrer" />
</div>
"""
)

podcast_link_input = gr.Textbox(label="Google Podcasts Link:")

with gr.Row():
btn = gr.Button("🎙️ Transcribe and summarize my podcast! 🎙️")

summary_output = gr.Textbox(
label="Podcast Summary",
placeholder="Podcast Summary",
lines=4,
autoscroll=False,
)

transcription_output = gr.Textbox(
label="Podcast Transcription",
placeholder="Podcast Transcription",
lines=8,
autoscroll=False,
)

with gr.Row():
vtt_sub_output = gr.File(
label="VTT Subtitle file download", elem_classes=["vtt-sub-file"]
)
srt_sub_output = gr.File(
label="SRT Subtitle file download", elem_classes=["srt-sub-file"]
)

btn.click(
transcribe_and_summarize,
inputs=[podcast_link_input],
outputs=[
summary_output,
transcription_output,
vtt_sub_output,
srt_sub_output,
],
)

block.launch(debug=True)
76 changes: 76 additions & 0 deletions whisper/3_subtitle_master.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import uuid

import gradio as gr
import whisper
from whisper.utils import WriteVTT

from settings import BASE_DIR, OUTPUT_TEMP_DIR, OUTPUT_VIDEO_DIR, STYLES_DIR
from utils import command, subtitles, video


MODEL = whisper.load_model("base.en")
VTT_WRITER = WriteVTT(output_dir=str(OUTPUT_TEMP_DIR))


def get_unique_project_name(input_video: str) -> str:
"""Get a unique subtitle-master project name to avoid file-name clashes."""
unique_id = uuid.uuid4()
filename = os.path.basename(input_video)
base_fname, _ = os.path.splitext(filename)
return f"{base_fname}_{unique_id}"


def main(input_video: str) -> str:
"""Takes a video file as string path and returns a video file with subtitles embedded as string path."""
unique_project_name = get_unique_project_name(input_video)
get_temp_output_path = lambda ext: OUTPUT_TEMP_DIR / f"{unique_project_name}{ext}"
mp3_file = video.to_mp3(
input_video,
log_directory=BASE_DIR,
output_path=get_temp_output_path(".mp3"),
)

whisper_output = MODEL.transcribe(mp3_file, beam_size=5)
vtt_subs = subtitles.write_to_file(
whisper_output,
writer=VTT_WRITER,
output_path=get_temp_output_path(".vtt"),
)

vtt_string_path = command.format_ffmpeg_filepath(vtt_subs)
output_video_path = OUTPUT_VIDEO_DIR / f"{unique_project_name}_subs.mp4"
embed_subs_into_vid_command = f'ffmpeg -i "{input_video}" -vf "subtitles=\'{vtt_string_path}\'" "{output_video_path}"'

command.run_and_log(embed_subs_into_vid_command, log_directory=BASE_DIR)

return str(output_video_path)


if __name__ == "__main__":
block = gr.Blocks(
css=str(STYLES_DIR / "subtitle_master.css"),
theme=gr.themes.Soft(primary_hue=gr.themes.colors.emerald),
)

with block:
with gr.Group():
gr.HTML(
f"""
<div class="header">
<img src="https://i.imgur.com/dxHMfCI.png" referrerpolicy="no-referrer" />
</div>
"""
)
with gr.Row():
input_video = gr.Video(
label="Input Video", sources=["upload"], mirror_webcam=False
)
output_video = gr.Video()
with gr.Row():
button_text = "🎞️ Subtitle my video! 🎞️"
btn = gr.Button(value=button_text, elem_classes=["button-row"])

btn.click(main, inputs=[input_video], outputs=[output_video])

block.launch(debug=True)
22 changes: 22 additions & 0 deletions whisper/4_faster_whisper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from faster_whisper import WhisperModel
from settings import TEST_AUDIO_DIR

model_size = "small"

model = WhisperModel(model_size, device="cpu", compute_type="int8")
# # Choose only one of these, depending on if you're running on CPU or GPU (cuda). (I'll be using the second option)
# model = WhisperModel(model_size, device="cuda", compute_type="float16")


segments, info = model.transcribe(
str(TEST_AUDIO_DIR / "dutch_long_repeat_file.mp3"),
beam_size=5,
without_timestamps=True,
)

print(
f"Detected language '{info.language}' with probability {info.language_probability}"
)

for segment in segments:
print(f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}")
69 changes: 69 additions & 0 deletions whisper/4_vid_to_quiz.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os
import uuid
from pathlib import Path

import gradio as gr

from settings import BASE_DIR, OUTPUT_TEMP_DIR, STYLES_DIR
from utils import openai_api, video


API_UPLOAD_LIMIT_BYTES = 26214400 # 25mb


def check_upload_size(input_file: str) -> None:
"""Check the video file size is within the API upload limit."""
input_file_size = os.path.getsize(input_file)
if input_file_size > API_UPLOAD_LIMIT_BYTES:
raise ValueError(
f"File size of {input_file_size} bytes ({input_file_size / 1024 / 1024:.2f} MB) exceeds the API upload limit of {API_UPLOAD_LIMIT_BYTES} bytes ({API_UPLOAD_LIMIT_BYTES / 1024 / 1024:.2f} MB). Please use a shorter video or lower the audio quality settings."
)


def main(input_video: str) -> str:
"""Takes a video file as string path and returns a quiz as string."""
unique_id = uuid.uuid4()

mp3_file = video.to_mp3(
input_video,
log_directory=BASE_DIR,
output_path=OUTPUT_TEMP_DIR / f"{unique_id}.mp3",
mono=True,
)

check_upload_size(mp3_file)
transcription = openai_api.transcribe(
Path(mp3_file), language="en", translate=False, response_format="text"
)

quiz = openai_api.text_to_quiz(transcription)
return quiz


if __name__ == "__main__":
block = gr.Blocks(
css=str(STYLES_DIR / "vid2quiz.css"),
theme=gr.themes.Soft(primary_hue=gr.themes.colors.yellow),
)

with block:
with gr.Group():
gr.HTML(
f"""
<div class="header">
<img src="https://i.imgur.com/oEtZKEh.png" referrerpolicy="no-referrer" class="header-img" />
</div>
"""
)
with gr.Row():
input_video = gr.Video(
label="Input Video", sources=["upload"], mirror_webcam=False
)
output_quiz_text = gr.Textbox(label="Quiz")
with gr.Row():
button_text = "📝 Make a quiz about this video! 📝"
btn = gr.Button(value=button_text, elem_classes=["button-row"])

btn.click(main, inputs=[input_video], outputs=[output_quiz_text])

block.launch(debug=True)
Binary file added whisper/images/subtitle_master.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added whisper/images/vid2quiz.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added whisper/images/whisper_pods.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
7 changes: 7 additions & 0 deletions whisper/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path

BASE_DIR = Path(__file__).parent
OUTPUT_TEMP_DIR = BASE_DIR / "output_temp_files"
OUTPUT_VIDEO_DIR = BASE_DIR / "output_video"
STYLES_DIR = BASE_DIR / "styles"
TEST_AUDIO_DIR = BASE_DIR / "test_audio_files"
8 changes: 8 additions & 0 deletions whisper/styles/subtitle_master.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.header {
padding: 2em 8em;
}

.header,
.button-row {
background-color: #1d366f7e;
}
15 changes: 15 additions & 0 deletions whisper/styles/vid2quiz.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
.header {
display: flex;
justify-content: center;
align-items: center;
padding: 2em 8em;
}

.header-img {
max-width: 50%;
}

.header,
.button-row {
background-color: #0c1d36;
}
8 changes: 8 additions & 0 deletions whisper/styles/whisper_pods.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.header {
padding: 2em 8em;
}

.vtt-sub-file,
.srt-sub-file {
height: 80px;
}
Binary file not shown.
Binary file not shown.
Binary file added whisper/test_audio_files/high_quality.mp3
Binary file not shown.
Binary file added whisper/test_audio_files/low_quality.mp3
Binary file not shown.
Binary file added whisper/test_audio_files/terrible_quality.mp3
Binary file not shown.
Empty file added whisper/utils/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions whisper/utils/command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import datetime
import subprocess
from pathlib import Path


def print_blue(message: str) -> None:
print(f"\033[94m{message}\033[00m")


def run_and_log(command: str, log_directory: Path) -> None:
print_blue(f"Running command: \n{command}")
with open(log_directory / "commands_log.txt", "a+", encoding="utf-8") as file:
subprocess.call(
command,
stdout=file,
stderr=file,
)
file.write(
f"\nRan command: {command}\nDate/time: {datetime.datetime.now()}\n\n\n\n"
)


def format_ffmpeg_filepath(path: Path) -> str:
r"""Turns C:\Users\dirk\test/subtitle.vtt into C\:\\Users\\dirk\\test\\subtitle.vtt"""
string_path = str(path)
return string_path.replace("\\", "\\\\").replace("/", "\\\\").replace(":", "\\:")
Loading