Skip to content

Commit 90b26ab

Browse files
Merge pull request #14 from FlyingFathead/specialargs-domains
`v0.1716` - per-domain arg configs for yt-dlp
2 parents 24f3a55 + d0800eb commit 90b26ab

File tree

5 files changed

+102
-17
lines changed

5 files changed

+102
-17
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,14 @@ If you just need to see the options and help, type:
220220
```
221221

222222
## Changes
223+
- v0.1716 - **NEW: Configurable per-domain yt-dlp arguments**
224+
- Added new `[YTDLPSettings]` config options:
225+
- `use_special_commands_for_domains = true` (set to `true` to enable)
226+
- `special_domain_commands = ...` (multiline string)
227+
- Lets you specify domain-specific yt-dlp arguments (e.g., `--http-chunk-size 0 --downloader native`) for problematic sites like Rumble, BitChute, Odysee, etc.
228+
- (list can be expanded by the user in `config.ini` as needed)
229+
- This approach solves repeated “Separator not found” or TCP/SSL connection errors by applying fallback flags/headers strictly to domains known to need them—without affecting other sites or default performance.
230+
- The bot automatically detects if the domain portion of the URL matches your `special_domain_commands` and injects those extra yt-dlp flags into the download process, eliminating chunking/SSL issues specific to that site.
223231
- v0.1715 - **Timestamped TXT Output & Startup Ping Logging**
224232
- Added new config option `send_timestamped_txt` under `[TranscriptionSettings]` in `config.ini`.
225233
- If `sendasfiles = true` and `send_timestamped_txt = true`, the bot now generates and sends an additional `*_timestamped.txt` file along with the standard `.txt`, `.srt`, and `.vtt` files.

config/config.ini

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,28 @@ custom_cache_dir =
139139
# (((=== Video-only Sites ===)))
140140
# some media sites don't always work well with yt-dlp's audio download feature
141141
# for compatibility, it's recommended to enable the flag below (true)
142-
download_original_video_for_domains_active = true
142+
download_original_video_for_domains_active = false
143143
# list your sites below to download original videos from, comma separated.
144144
# Example:
145145
# download_original_video_domains = site1.com, site2.com, site3.com
146146
# In other words, these are the sites we use to download original videos from.
147147
# (i.e. rumble.com has been a site that's been widely reported as having broken downloads;
148148
# hence the video-only download method for that site and others alike.)
149-
download_original_video_domains = rumble.com
149+
download_original_video_domains = example.com
150150
# Use worst video quality when having to download videos (true/false)
151151
# this is usually recommended, because we will only need the _audio_ for transcription.
152152
# adding a high-quality video will cause massive file size increases.
153153
# however, in some cases you might want to turn this off
154154
use_worst_video_quality = true
155+
# Domain specific special commands
156+
# (i.e. for sites that are not working well with `yt-dlp`)
157+
# Special commands list active (true/false)
158+
use_special_commands_for_domains = true
159+
# special domain commands, if activated
160+
special_domain_commands =
161+
rumble.com | --http-chunk-size 0 --format worstaudio/worst --downloader native --add-header "User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
162+
bitchute.com | --http-chunk-size 0 --format worstaudio/worst --downloader native --add-header "User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
163+
odysee.com | --http-chunk-size 0 --format worstaudio/worst --downloader native --add-header "User-Agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
155164

156165
[VideoDescriptionSettings]
157166
# Set to True to use only a snippet of the video description

src/config_loader.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,37 @@ def get_ytdlp_domain_settings(cls):
133133
'domains': domain_list
134134
}
135135

136+
@classmethod
137+
def get_special_domain_commands(cls):
138+
"""
139+
Returns a dict of domain -> custom yt-dlp argument string,
140+
parsed from 'special_domain_commands' in the [YTDLPSettings] section.
141+
"""
142+
config = cls.get_config()
143+
144+
# Only parse them if usage is enabled
145+
enabled = config.getboolean("YTDLPSettings", "use_special_commands_for_domains", fallback=False)
146+
if not enabled:
147+
return {} # No special commands if disabled
148+
149+
raw = config.get("YTDLPSettings", "special_domain_commands", fallback="").strip()
150+
if not raw:
151+
return {}
152+
153+
commands = {}
154+
for line in raw.splitlines():
155+
line = line.strip()
156+
# Skip empty lines or comment lines if you want
157+
if not line or line.startswith("#"):
158+
continue
159+
if '|' not in line:
160+
continue
161+
domain, args = line.split('|', 1)
162+
domain = domain.strip().lower()
163+
args = args.strip()
164+
commands[domain] = args
165+
return commands
166+
136167
# get the owner ID's and ping on startup if needed
137168
@classmethod
138169
def get_owner_ids(cls):

src/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# openai-whisper transcriber-bot for Telegram
44

55
# version of this program
6-
version_number = "0.1715"
6+
version_number = "0.1716"
77

88
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99
# https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/

src/transcription_handler.py

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import configparser
2121
from urllib.parse import urlparse, parse_qs
2222
from datetime import datetime, timedelta
23+
from shlex import split as shlex_split
2324
# import wave
2425
from pydub import AudioSegment
2526

@@ -239,13 +240,27 @@ async def download_audio(url, audio_path):
239240
else:
240241
logger.warning(f"Cookies file not found: {cookies_file}")
241242

243+
# <<< ADDED FOR SPECIAL DOMAIN CMDS >>>
244+
# load special domain commands from config
245+
special_commands = ConfigLoader.get_special_domain_commands()
246+
242247
parsed_url = urlparse(url)
243248
domain = parsed_url.netloc.lower()
244249
if domain.startswith('www.'):
245250
domain = domain[4:] # Remove 'www.'
246251

252+
# If domain is in special_commands, parse them into a list
253+
domain_args = []
254+
if domain in special_commands:
255+
logger.info(f"Applying special yt-dlp args for domain '{domain}': {special_commands[domain]}")
256+
domain_args = shlex.split(special_commands[domain])
257+
247258
should_download_video = ytdlp_settings['active'] and domain in ytdlp_settings['domains']
248259

260+
# ---------------------------------------------------
261+
# VIDEO DOWNLOAD PATH
262+
# ---------------------------------------------------
263+
249264
if should_download_video:
250265
logger.info("Identified domain requiring full video download.")
251266
# Step 1: Get available formats in JSON
@@ -256,14 +271,15 @@ async def download_audio(url, audio_path):
256271
url
257272
]
258273

259-
# If there are custom args, parse them into a list and extend the command
274+
# <<< ADDED FOR SPECIAL DOMAIN CMDS >>>
275+
# Insert domain-specific args right after "yt-dlp"
276+
if domain_args:
277+
command[1:1] = domain_args
278+
260279
if extra_args_str:
261280
extra_args_list = shlex.split(extra_args_str)
262281
logger.info(f"Using custom yt-dlp arguments from config: {extra_args_list}")
263-
# Insert them right after 'yt-dlp':
264282
command[1:1] = extra_args_list
265-
# Or place them at the end:
266-
# command.extend(extra_args_list)
267283

268284
# Apply cache settings based on config
269285
if no_cache_dir:
@@ -356,14 +372,25 @@ def get_format_sort_key(fmt):
356372
url
357373
]
358374

359-
# If there are custom args, parse them into a list and extend the command
375+
# <<< ADDED FOR SPECIAL DOMAIN CMDS >>>
376+
# Insert domain_args, then extra_args_str if present
377+
if domain_args:
378+
command[1:1] = domain_args
379+
360380
if extra_args_str:
361381
extra_args_list = shlex.split(extra_args_str)
362382
logger.info(f"Using custom yt-dlp arguments from config: {extra_args_list}")
363-
# Insert them right after 'yt-dlp':
364383
command[1:1] = extra_args_list
365-
# Or place them at the end:
366-
# command.extend(extra_args_list)
384+
385+
# # // old method
386+
# # If there are custom args, parse them into a list and extend the command
387+
# if extra_args_str:
388+
# extra_args_list = shlex.split(extra_args_str)
389+
# logger.info(f"Using custom yt-dlp arguments from config: {extra_args_list}")
390+
# # Insert them right after 'yt-dlp':
391+
# command[1:1] = extra_args_list
392+
# # Or place them at the end:
393+
# # command.extend(extra_args_list)
367394

368395
# if use_cookies_file and os.path.exists(cookies_file):
369396
# command.extend(["--cookies", cookies_file])
@@ -395,17 +422,27 @@ def get_format_sort_key(fmt):
395422
url
396423
]
397424

398-
# If there are custom args, parse them into a list and extend the command
425+
# <<< ADDED FOR SPECIAL DOMAIN CMDS >>>
426+
if domain_args:
427+
command[1:1] = domain_args
428+
399429
if extra_args_str:
400430
extra_args_list = shlex.split(extra_args_str)
401431
logger.info(f"Using custom yt-dlp arguments from config: {extra_args_list}")
402-
# Insert them right after 'yt-dlp':
403432
command[1:1] = extra_args_list
404-
# Or place them at the end:
405-
# command.extend(extra_args_list)
406433

407-
# if use_cookies_file and os.path.exists(cookies_file):
408-
# command.extend(["--cookies", cookies_file])
434+
# # /// old method
435+
# # If there are custom args, parse them into a list and extend the command
436+
# if extra_args_str:
437+
# extra_args_list = shlex.split(extra_args_str)
438+
# logger.info(f"Using custom yt-dlp arguments from config: {extra_args_list}")
439+
# # Insert them right after 'yt-dlp':
440+
# command[1:1] = extra_args_list
441+
# # Or place them at the end:
442+
# # command.extend(extra_args_list)
443+
444+
# # if use_cookies_file and os.path.exists(cookies_file):
445+
# # command.extend(["--cookies", cookies_file])
409446

410447
# apply the cache logic
411448
if no_cache_dir:

0 commit comments

Comments
 (0)