Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .catgitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
# https://github.com/FlyingFathead/catgit
README.md
tests/diarize_with_whisper-test.py
utils/resemblyzer_safety_check.py
5 changes: 4 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:slim-bookworm
FROM python:3.12-slim

# Install dependencies & clean up after to reduce Docker file size
RUN apt-get update && apt-get install -y \
Expand All @@ -13,6 +13,9 @@ WORKDIR /app
# Copy the requirements file first to leverage Docker cache
COPY requirements.txt .

# Upgrade pip and setuptools
RUN pip install --upgrade pip setuptools wheel

# Install Python dependencies
RUN pip3 install --no-cache-dir -r requirements.txt

Expand Down
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ Replace `'YourTelegramBotToken'` with your actual Telegram bot token. This comma

## Usage

After launching the bot, you can interact with it via Telegram (message `@whatever_your_bot_name_is_Bot`):
After launching your bot successfully, you can interact with it via Telegram (send a message to `@your_bot_name_Bot`, or whatever your bot name is):

1. Send a video URL (for `yt-dlp` to download), a voice message or an audio file (i.e. `.wav` or `.mp3` format) to the bot.
2. The bot will acknowledge the request and begin processing, notifying the user of the process.
Expand All @@ -195,6 +195,14 @@ After launching the bot, you can interact with it via Telegram (message `@whatev
- `/language` - set the model's transcription language (`auto` = autodetect); if you know the language spoken in the audio, setting the transcription language manually with this command may improve both transcription speed and accuracy.

## Changes
- v0.1707 - New `config.ini` option: add sites that require full video download
- some media sites don't work well with `yt-dlp`'s audio-only download method
- there are now two new options in `config.ini` under `[YTDLPSettings]`:
- `download_original_video_for_domains_active = true` (default)
- `download_original_video_domains = site1.com, site2.com, site3.com`
- at the moment it's used for media platforms that have had reported issues during testing
- when active, a comma-separated list is used to check up on media sites that require their contents to be downloaded as the original video instead of audio-only
- _(the tradeoff is obviously download size and hence speed; the audio-only method is usually the fastest and should be preferred for most popular sites, hence only add problematic sites to the video-only list)_
- v0.1706 - Disable asking for token if running inside Docker
- by default, the app will ask for the token if it's not found, unless Dockerized
- can be better for headless use case scenarios where you need the error message rather than a prompt for the bot token
Expand Down
24 changes: 23 additions & 1 deletion config/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,27 @@ cooldown_seconds = 10
max_requests_per_minute = 5

[YTDLPSettings]
# use your own `cookies.txt` (true/false)
# this is sometimes required for sites that require login
# or, in some cases, with sites like YouTube that don't like downloaders.
use_cookies = False
cookies_file = config/cookies.txt
cookies_file = config/cookies.txt
# some media sites don't always work well with yt-dlp's audio download feature
# for compatibility, it's recommended to enable the flag below (true)
download_original_video_for_domains_active = true
# list your sites below to download original videos from, comma separated.
# example:
# download_original_video_domains = site1.com, site2.com, site3.com
# these are the sites we use to download original videos from
download_original_video_domains = rumble.com
# use worst video quality (true/false)
# this is usually recommended, because we will only need the _audio_ for transcription.
# adding a high-quality video will cause massive file size increases.
# however, in some cases you might want to turn this off
use_worst_video_quality = true

[VideoDescriptionSettings]
# Set to True to use only a snippet of the video description
use_snippet_for_description = False
# Maximum number of lines to include in the description snippet
description_max_lines = 30
14 changes: 14 additions & 0 deletions src/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,20 @@ def get_notification_settings(cls):
'completion_message': completion_message
}

# NEW: Method to get yt-dlp domain settings
@classmethod
def get_ytdlp_domain_settings(cls):
config = cls.get_config()
active = config.getboolean('YTDLPSettings', 'download_original_video_for_domains_active', fallback=False)
domains = config.get('YTDLPSettings', 'download_original_video_domains', fallback='')
# Split by comma and strip whitespace
domain_list = [domain.strip().lower() for domain in domains.split(',') if domain.strip()]

return {
'active': active,
'domains': domain_list
}

# Usage example:
# from config_loader import ConfigLoader
# notification_settings = ConfigLoader.get_notification_settings()
2 changes: 1 addition & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# openai-whisper transcriber-bot for Telegram

# version of this program
version_number = "0.1706"
version_number = "0.1707"

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/
Expand Down
240 changes: 149 additions & 91 deletions src/transcription_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,10 @@
from config_loader import ConfigLoader
config = ConfigLoader.get_config()

# # Load config
# config = configparser.ConfigParser()
# config.read('config/config.ini')
# send_as_files = config.getboolean('TranscriptionSettings', 'sendasfiles', fallback=True)
# send_as_messages = config.getboolean('TranscriptionSettings', 'sendasmessages', fallback=False)

# Toggle this to use the full description or a snippet.
USE_SNIPPET_FOR_DESCRIPTION = False

USE_SNIPPET_FOR_DESCRIPTION = config.getboolean('VideoDescriptionSettings', 'use_snippet_for_description', fallback=False)
# If we're using a snippet of the description, maximum number of lines to include
DESCRIPTION_MAX_LINES = 30
DESCRIPTION_MAX_LINES = config.getint('VideoDescriptionSettings', 'description_max_lines', fallback=30)

# Output directory for transcriptions; create if doesn't exist
output_dir = "transcriptions"
Expand Down Expand Up @@ -178,82 +171,134 @@ def get_transcription_settings():
'send_as_messages': False,
}

# # (old) get transcription settings
# def get_transcription_settings():
# config = configparser.ConfigParser()
# config_path = os.path.join(base_dir, 'config', 'config.ini')

# if not os.path.exists(config_path):
# logger.error("Error: config.ini not found at the expected path.")
# sys.exit(1)

# config.read(config_path)

# if 'TranscriptionSettings' not in config:
# logger.error("TranscriptionSettings section missing in config.ini")
# sys.exit(1)

# include_header = config.getboolean('TranscriptionSettings', 'IncludeHeaderInTranscription', fallback=False)
# keep_audio_files = config.getboolean('TranscriptionSettings', 'KeepAudioFiles', fallback=False)

# logger.info(f"Transcription settings loaded: include_header={include_header}, keep_audio_files={keep_audio_files}")

# return {
# 'include_header': include_header,
# 'keep_audio_files': keep_audio_files
# }

# split long messages
def split_message(message, max_length=4096):
return [message[i:i+max_length] for i in range(0, len(message), max_length)]

# audio download
async def download_audio(url, output_path):
logger.info(f"Attempting to download audio from: {url}")

# Read settings from configuration
# // audio download (new method)
async def download_audio(url, audio_path):
config = ConfigLoader.get_config()
ytdlp_settings = ConfigLoader.get_ytdlp_domain_settings()
use_cookies = config.getboolean('YTDLPSettings', 'use_cookies', fallback=False)
cookies_file = config.get('YTDLPSettings', 'cookies_file', fallback='config/cookies.txt')
use_worst_video_quality = config.getboolean('YTDLPSettings', 'use_worst_video_quality', fallback=True)

parsed_url = urlparse(url)
domain = parsed_url.netloc.lower()
if domain.startswith('www.'):
domain = domain[4:] # Remove 'www.'

should_download_video = ytdlp_settings['active'] and domain in ytdlp_settings['domains']

if should_download_video:
logger.info("Identified domain requiring full video download.")
# Step 1: Get available formats in JSON
command = [
"yt-dlp",
"--no-warnings",
"--dump-json",
url
]
if use_cookies and os.path.exists(cookies_file):
command.extend(["--cookies", cookies_file])

# Specify a cache directory that yt-dlp can write to
cache_dir = ".cache"

# Check if the cache directory exists, create it if it doesn't
if not os.path.exists(cache_dir):
try:
os.makedirs(cache_dir)
logger.info(f"Created cache directory: {cache_dir}")
except Exception as e:
logger.error(f"Failed to create cache directory {cache_dir}: {e}")

command = [
"yt-dlp",
"--extract-audio",
"--audio-format", "mp3",
"--cache-dir", cache_dir, # Specify the custom cache directory
]
process = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)

if use_cookies:
if os.path.exists(cookies_file):
command.extend(["--cookies", cookies_file])
logger.info(f"Using cookies file: {cookies_file}")
stdout_data, stderr_data = await process.communicate()
if process.returncode != 0:
stderr_output = stderr_data.decode()
logger.error(f"Failed to get video formats: {stderr_output}")
raise Exception(f"Failed to get video formats: {stderr_output}")

# Step 2: Parse JSON to find the appropriate format
video_info = json.loads(stdout_data.decode())
formats = video_info.get('formats', [])

if not formats:
raise Exception("No formats found for the video.")

# Filter out formats without audio
video_formats = [
fmt for fmt in formats
if fmt.get('vcodec') != 'none' and fmt.get('acodec') != 'none' and fmt.get('acodec') != 'video only'
]

if not video_formats:
raise Exception("No suitable video formats with audio available.")

if use_worst_video_quality:
# Sort video formats by resolution (width x height) or bitrate
def get_format_sort_key(fmt):
width = fmt.get('width') or 0
height = fmt.get('height') or 0
total_pixels = width * height
tbr = fmt.get('tbr') or 0
return (total_pixels, tbr)

selected_format = min(video_formats, key=get_format_sort_key)
logger.info("Selected worst quality video format.")
else:
logger.error(f"Cookies file {cookies_file} does not exist.")
raise Exception(f"Cookies file {cookies_file} does not exist.")
# Select best quality video format
def get_format_sort_key(fmt):
width = fmt.get('width') or 0
height = fmt.get('height') or 0
total_pixels = width * height
tbr = fmt.get('tbr') or 0
return (-total_pixels, -tbr)

selected_format = max(video_formats, key=get_format_sort_key)
logger.info("Selected best quality video format.")

selected_format_id = selected_format.get('format_id')

if not selected_format_id:
raise Exception("Could not determine selected format ID.")

logger.info(f"Selected format ID: {selected_format_id}")

# Step 3: Download video using the selected format
base_output_path = audio_path.replace('.mp3', '') # e.g., audio/12345_1618033988
video_output_template = f"{base_output_path}.%(ext)s" # e.g., audio/12345_1618033988.mp4

command = [
"yt-dlp",
# "--verbose", # uncomment to set verbose
"--format", selected_format_id,
"--output", video_output_template,
url
]
if use_cookies and os.path.exists(cookies_file):
command.extend(["--cookies", cookies_file])

command.extend([url, "-o", output_path])
logger.info("Downloading the selected quality video with audio...")
else:
# Download audio-only as mp3
command = [
"yt-dlp",
"--extract-audio",
"--audio-format", "mp3",
"--output", audio_path,
url
]
if use_cookies and os.path.exists(cookies_file):
command.extend(["--cookies", cookies_file])
logger.info("Downloading audio-only...")

# Start the subprocess
process = await asyncio.create_subprocess_exec(
*command,
stdout=asyncio.subprocess.PIPE,
*command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)

# Read and log output
stdout_lines = []
stderr_lines = []

# Define async functions to read from stdout and stderr
async def read_stream(stream, lines, log_func):
while True:
line = await stream.readline()
Expand All @@ -264,42 +309,55 @@ async def read_stream(stream, lines, log_func):
else:
break

# Read from stdout and stderr concurrently
await asyncio.gather(
read_stream(process.stdout, stdout_lines, logger.info),
read_stream(process.stderr, stderr_lines, logger.error)
)

# Wait for the process to finish
await process.wait()

# Check the return code
if process.returncode != 0:
stderr_output = '\n'.join(stderr_lines)
# Check for specific error messages
if any(keyword in stderr_output for keyword in [
"Sign in to confirm you're not a bot",
"unable to extract initial player response",
"This video is unavailable",
"ERROR:"
]):
custom_error_message = (
"Failed to download audio due to YouTube's anti-bot measures or video restrictions. "
"Possible reasons include age restrictions, region locks, or the video requiring sign-in. "
"Please try a different video, or if you're the administrator, consider using cookies with `yt-dlp`."
)
logger.error(f"Error: {custom_error_message}")
raise Exception(custom_error_message)
else:
# For other errors, raise a generic exception with stderr output
logger.error(f"yt-dlp failed with error:\n{stderr_output}")
raise Exception(f"Failed to download audio: {stderr_output}")
logger.error(f"yt-dlp failed with error:\n{stderr_output}")
raise Exception(f"Failed to download media: {stderr_output}")

if should_download_video:
# Step 4: Extract audio from the downloaded video
video_extensions = ['mp4', 'webm', 'mkv', 'avi', 'mov', 'flv', 'wmv', 'mpg', 'mpeg']
video_file = None
for ext in video_extensions:
potential_video = f"{base_output_path}.{ext}"
if os.path.exists(potential_video):
video_file = potential_video
break

if not video_file:
logger.error("Failed to locate the downloaded video file.")
raise Exception("Failed to locate the downloaded video file.")

# Verify the download success
if os.path.exists(output_path):
logger.info(f"Audio downloaded successfully: {output_path}")
logger.info(f"Video file downloaded: {video_file}")

try:
logger.info("Starting audio extraction from video file...")
# Use ffmpeg via pydub to extract audio
audio = AudioSegment.from_file(video_file)
logger.info("Audio file loaded, exporting to mp3...")
audio.export(audio_path, format="mp3")
logger.info(f"Audio extracted and saved to: {audio_path}")
except Exception as e:
logger.error(f"Failed to extract audio from video: {e}")
raise Exception(f"Failed to extract audio from video: {e}")

try:
logger.info(f"Removing temporary video file: {video_file}")
os.remove(video_file)
logger.info(f"Temporary video file {video_file} removed.")
except Exception as e:
logger.warning(f"Failed to remove temporary video file {video_file}: {e}")
else:
raise Exception(f"Failed to download audio: {output_path}")
if not os.path.exists(audio_path):
raise Exception(f"Failed to download audio: {audio_path}")
logger.info(f"Audio downloaded successfully: {audio_path}")

# Read from stream line by line until EOF, call callback on each line.
async def read_stream(stream, callback):
Expand Down