Skip to content

Commit 6c811f1

Browse files
committed
v0.1708 - direct video file upload support on TG
1 parent 173814f commit 6c811f1

File tree

3 files changed

+137
-17
lines changed

3 files changed

+137
-17
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ Can be safely installed and deployed with [Docker](https://www.docker.com/) by u
2121
- 🎥 Downloads and processes media URLs from any source supported by `yt-dlp`
2222
- _(can be configured to use `cookies.txt` in `config.ini` for better availability)_
2323
- 📲 Can receive Telegram audio messages as well as files, i.e. `.mp3` and `.wav` for transcription
24-
- _(all other `ffmpeg` supported formats also available, configurable via `config.ini`)_
24+
- Direct video file uploads in supported media formats is also supported
25+
- _(all other `ffmpeg` supported formats also available to be added, configurable via `config.ini`)_
2526
- 🤖 Uses a local Whisper model from the `openai-whisper` package for transcription
2627
- _(no API required, use your own PC & available CUDA GPU!)_
2728
- 🖥️ Automatically uses `GPUtil` to map out the best available CUDA-enabled local GPU
@@ -195,6 +196,8 @@ After launching your bot successfully, you can interact with it via Telegram (se
195196
- `/language` - set the model's transcription language (`auto` = autodetect); if you know the language spoken in the audio, setting the transcription language manually with this command may improve both transcription speed and accuracy.
196197

197198
## Changes
199+
- v0.1708 - Direct video file uploads are now available
200+
- (for spam/abuse prevention, they're disabled by default, see `config.ini`)
198201
- v0.1707 - New `config.ini` option: add sites that require full video download
199202
- some media sites don't work well with `yt-dlp`'s audio-only download method
200203
- there are now two new options in `config.ini` under `[YTDLPSettings]`:

config/config.ini

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,15 @@ validmodels = tiny.en, tiny, base.en, base, small.en, small, medium.en, medium,
4343
updateintervalseconds = 10
4444

4545
[AudioSettings]
46+
# Allow voice messages? (true/false), default: true
4647
allowvoicemessages = True
48+
# Allow audio file uploads? (true/false), default: true
4749
allowaudiofiles = True
50+
# Allow video file uploads? (true/false), default: false
51+
allowvideofiles = False
52+
# Maximum file size for uploaded files
53+
# NOTE: Telegram Bot API currently has a 20MB size limit.
54+
max_file_size_mb = 20
4855

4956
[AllowedFileFormats]
5057
allowed_formats = mp3, wav, m4a, aac, flac, ogg, wma, aiff

src/main.py

Lines changed: 126 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# openai-whisper transcriber-bot for Telegram
44

55
# version of this program
6-
version_number = "0.1707"
6+
version_number = "0.1708"
77

88
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99
# https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/
@@ -95,6 +95,10 @@ def __init__(self):
9595
self.allowed_formats = self.config.get('AllowedFileFormats', 'allowed_formats', fallback='mp3, wav, mp4').split(',')
9696
self.allowed_formats = [fmt.lower().strip() for fmt in self.allowed_formats]
9797

98+
# Allow video files (true/false)
99+
self.allow_video_files = self.config.getboolean('AudioSettings', 'allowvideofiles', fallback=False)
100+
logger.info(f"allow_video_files: {self.allow_video_files}")
101+
98102
self.model = self.config.get('WhisperSettings', 'Model', fallback='medium.en')
99103
self.valid_models = self.config.get('ModelSettings', 'ValidModels', fallback='tiny, base, small, medium, large, turbo').split(', ')
100104

@@ -112,17 +116,27 @@ def __init__(self):
112116
self.user_models = {} # Use a dictionary to manage models per user.
113117
self.user_models_lock = asyncio.Lock() # Lock for handling user_models dictionary
114118

119+
# Read the maximum file size setting
120+
self.max_file_size_mb = self.config.getint('AudioSettings', 'max_file_size_mb', fallback=20)
121+
self.max_file_size_bytes = self.max_file_size_mb * 1024 * 1024 # Convert MB to bytes
122+
logger.info(f"Maximum file size set to: {self.max_file_size_mb} MB")
123+
124+
# Define directories for storing video messages
125+
self.video_messages_dir = "video_messages"
126+
os.makedirs(self.video_messages_dir, exist_ok=True)
127+
115128
# Define output directory for transcriptions
116129
self.output_dir = "transcriptions"
117130
os.makedirs(self.output_dir, exist_ok=True)
118131

119132
async def start_command(self, update: Update, context: CallbackContext) -> None:
133+
max_file_size_mb = self.max_file_size_mb # Use the configured value
120134
welcome_message = (
121135
"👋 <b>Welcome to the Whisper Transcriber Bot!</b>\n\n"
122136
"I'm here to transcribe audio from various sources for you.\n\n"
123137
"📌 <b>How Does This Work?</b>\n"
124138
"- Send me a link to a supported media URL (e.g., YouTube).\n"
125-
"- Or, send an audio file (max 20MB in size), and I'll transcribe it.\n\n"
139+
f"- Or, send an audio file (max {max_file_size_mb} MB in size), and I'll transcribe it.\n\n"
126140
"💡 <b>Commands You Can Use:</b>\n"
127141
"- /start: Show this welcome message.\n"
128142
"- /help: Get detailed help on how to use this service.\n"
@@ -424,30 +438,53 @@ async def help_command(self, update: Update, context: CallbackContext) -> None:
424438
models_list = ', '.join(self.valid_models) # Dynamically generate the list of valid models
425439
allowed_formats_list = ', '.join(self.allowed_formats) # Get the list of allowed formats
426440

427-
# Access the 'allowaudiofiles' and 'allowvoicemessages' settings
441+
# Access the settings
428442
allow_audio_files = self.config.getboolean('AudioSettings', 'allowaudiofiles', fallback=True)
429443
allow_voice_messages = self.config.getboolean('AudioSettings', 'allowvoicemessages', fallback=True)
444+
allow_video_files = self.config.getboolean('AudioSettings', 'allowvideofiles', fallback=False) # Read the new setting
430445

431446
# Build the file upload info based on settings
432447
file_upload_info = ""
433-
if allow_audio_files and allow_voice_messages:
448+
max_file_size_mb = self.max_file_size_mb # Use the configured value
449+
450+
if allow_audio_files and allow_voice_messages and allow_video_files:
451+
file_upload_info = (
452+
f"- Or, send an audio message, an audio file, or a video file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n\n"
453+
f"<b>Currently supported audio file formats:</b> {allowed_formats_list}\n"
454+
)
455+
elif allow_audio_files and allow_voice_messages and not allow_video_files:
456+
file_upload_info = (
457+
f"- Or, send an audio message or an audio file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n\n"
458+
f"<b>Currently supported audio file formats:</b> {allowed_formats_list}\n"
459+
)
460+
elif allow_audio_files and not allow_voice_messages and allow_video_files:
461+
file_upload_info = (
462+
f"- Or, send an audio file or a video file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n\n"
463+
f"<b>Currently supported audio file formats:</b> {allowed_formats_list}\n"
464+
)
465+
elif not allow_audio_files and allow_voice_messages and allow_video_files:
434466
file_upload_info = (
435-
"- Or, send an audio message or an audio file to have its audio transcribed. (maximum file size: 20MB)\n\n"
436-
f"<b>Currently supported file formats:</b> {allowed_formats_list}\n"
467+
f"- Or, send an audio message or a video file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n"
468+
f"- Note: Direct audio file uploads are currently disabled.\n"
437469
)
438-
elif allow_audio_files and not allow_voice_messages:
470+
elif allow_audio_files and not allow_voice_messages and not allow_video_files:
439471
file_upload_info = (
440-
"- Or, send an audio file to have its audio transcribed. (maximum file size: 20MB)\n\n"
441-
f"<b>Currently supported file formats:</b> {allowed_formats_list}\n"
472+
f"- Or, send an audio file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n\n"
473+
f"<b>Currently supported audio file formats:</b> {allowed_formats_list}\n"
442474
)
443-
elif not allow_audio_files and allow_voice_messages:
475+
elif not allow_audio_files and allow_voice_messages and not allow_video_files:
444476
file_upload_info = (
445-
"- Or, send an audio message to have its audio transcribed. (maximum file size: 20MB)\n"
446-
"- Note: Direct file uploads are currently disabled.\n"
477+
f"- Or, send an audio message to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n"
478+
f"- Note: Direct audio file uploads are currently disabled.\n"
479+
)
480+
elif not allow_audio_files and not allow_voice_messages and allow_video_files:
481+
file_upload_info = (
482+
f"- Or, send a video file to have its audio transcribed. (maximum file size: {max_file_size_mb} MB)\n"
483+
f"- Note: Direct audio file uploads and audio messages are currently disabled.\n"
447484
)
448485
else:
449486
file_upload_info = (
450-
"- Note: Direct file uploads and audio messages are currently disabled.\n"
487+
f"- Note: Direct file uploads and audio messages are currently disabled.\n"
451488
)
452489

453490
help_text = f"""<b>Welcome to the Whisper Transcriber Bot!</b>
@@ -473,10 +510,10 @@ async def help_command(self, update: Update, context: CallbackContext) -> None:
473510
{models_list}
474511
475512
<b>Bot code by FlyingFathead.</b>
476-
Source code on <a href='https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/'>GitHub</a>.
513+
Source code: <a href='https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/'>GitHub</a>.
477514
478515
<b>Disclaimer:</b>
479-
The original author is NOT responsible for how this bot is utilized. All code and outputs are provided 'AS IS' without warranty of any kind. Users assume full responsibility for the operation and output of the bot. This applies to both legal and ethical responsibilities. Use at your own risk.
516+
The original author of this program is NOT responsible for how this service is utilized. All code and outputs are provided 'AS IS' without warranty of any kind. Users assume full responsibility for the operation and output of the bot. This applies to both legal and ethical responsibilities. Use at your own risk.
480517
"""
481518
await update.message.reply_text(help_text, parse_mode='HTML')
482519

@@ -545,6 +582,7 @@ async def handle_voice_message(self, update: Update, context: CallbackContext) -
545582
except subprocess.CalledProcessError as e:
546583
logger.error(f"Error converting voice message: {e}")
547584

585+
# // audio file handler
548586
async def handle_audio_file(self, update: Update, context: CallbackContext) -> None:
549587
logger.info("handle_audio_file called.")
550588

@@ -577,7 +615,8 @@ async def handle_audio_file(self, update: Update, context: CallbackContext) -> N
577615
try:
578616
# Check file size before downloading
579617
file_size = file_info.file_size
580-
if file_size > 20 * 1024 * 1024: # 20 MB in bytes
618+
# if file_size > 20 * 1024 * 1024: # 20 MB in bytes
619+
if file_size > self.max_file_size_bytes:
581620
await update.message.reply_text(
582621
"The file is too large to process. "
583622
"Telegram bots can only download files up to 20 MB in size. "
@@ -618,6 +657,74 @@ async def handle_audio_file(self, update: Update, context: CallbackContext) -> N
618657
logger.error(f"Exception in handle_audio_file: {e}")
619658
await update.message.reply_text("An error occurred while processing your file.")
620659

660+
# // video file handler
661+
async def handle_video_file(self, update: Update, context: CallbackContext) -> None:
662+
logger.info("handle_video_file called.")
663+
664+
user_id = update.effective_user.id
665+
666+
# Check if video file uploads are allowed
667+
if not self.allow_video_files:
668+
await update.message.reply_text(
669+
"Direct video uploads are currently disabled. "
670+
"Please send audio files only, or upload your video to a supported media platform and send the link."
671+
)
672+
logger.info("Video processing is not allowed according to config.")
673+
return
674+
675+
# Proceed to handle the video file
676+
video = update.message.video
677+
678+
try:
679+
# Check file size before downloading
680+
file_size = video.file_size
681+
if file_size > self.max_file_size_bytes:
682+
await update.message.reply_text(
683+
f"The video file is too large to process. "
684+
f"Maximum allowed file size is {self.max_file_size_mb} MB. "
685+
"Please send a smaller file or provide a link to the video."
686+
)
687+
logger.warning(f"Video file is too big: {file_size} bytes.")
688+
return
689+
690+
# Proceed with downloading the video file
691+
file = await context.bot.get_file(video.file_id)
692+
file_name = video.file_name or f"{video.file_unique_id}.mp4"
693+
file_extension = file_name.split('.')[-1].lower()
694+
video_file_path = os.path.join(self.video_messages_dir, f'{file.file_unique_id}.{file_extension}')
695+
await file.download_to_drive(video_file_path)
696+
logger.info(f"Video file downloaded to {video_file_path}")
697+
698+
# Extract audio from the video file using ffmpeg
699+
audio_file_path = os.path.join(self.audio_messages_dir, f'{file.file_unique_id}.mp3')
700+
try:
701+
subprocess.run(['ffmpeg', '-i', video_file_path, '-vn', '-acodec', 'libmp3lame', audio_file_path], check=True)
702+
logger.info(f"Extracted audio from video file: {audio_file_path}")
703+
704+
# Queue the audio file for transcription
705+
await self.task_queue.put((audio_file_path, context.bot, update))
706+
queue_length = self.task_queue.qsize()
707+
response_text = (
708+
"Your request is next and is currently being processed."
709+
if queue_length == 1
710+
else f"Your request has been added to the queue. There are {queue_length - 1} jobs ahead of yours."
711+
)
712+
await update.message.reply_text(response_text)
713+
logger.info(f"Audio file queued for transcription. Queue length: {queue_length}")
714+
715+
except subprocess.CalledProcessError as e:
716+
logger.error(f"Error extracting audio from video file: {e}")
717+
await update.message.reply_text("An error occurred while extracting audio from the video.")
718+
finally:
719+
# Clean up the video file if needed
720+
if os.path.exists(video_file_path):
721+
os.remove(video_file_path)
722+
logger.info(f"Deleted video file: {video_file_path}")
723+
724+
except Exception as e:
725+
logger.error(f"Exception in handle_video_file: {e}")
726+
await update.message.reply_text("An error occurred while processing your video file.")
727+
621728
async def info_command(self, update: Update, context: CallbackContext) -> None:
622729
user_id = update.effective_user.id
623730
current_model = get_whisper_model(user_id)
@@ -669,6 +776,9 @@ def run(self):
669776
self.application.add_handler(MessageHandler(filters.VOICE, self.handle_voice_message))
670777
self.application.add_handler(MessageHandler(filters.Document.ALL, self.handle_audio_file))
671778

779+
# Add this line to handle video messages
780+
self.application.add_handler(MessageHandler(filters.VIDEO, self.handle_video_file))
781+
672782
# Add generic message handler last
673783
self.application.add_handler(MessageHandler(filters.TEXT & ~filters.COMMAND, self.handle_message))
674784

0 commit comments

Comments
 (0)