Skip to content

Commit 02c5e36

Browse files
committed
recode helper script included
1 parent 2a1a1bf commit 02c5e36

File tree

2 files changed

+202
-2
lines changed

2 files changed

+202
-2
lines changed

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,15 @@ After launching your bot successfully, you can interact with it via Telegram (se
196196
- `/language` - set the model's transcription language (`auto` = autodetect); if you know the language spoken in the audio, setting the transcription language manually with this command may improve both transcription speed and accuracy.
197197

198198
## Changes
199-
- v0.1710 - rewrite for chunking logic when sending as messages
199+
- v0.1710 - rewrite for chunking logic when sending as messages & re-encoding tool
200200
- better step-by-step logging, better error catching, better fitting into TG message limits with fallbacks
201201
- again; please refer to i.e. [Issue #7](https://github.com/FlyingFathead/whisper-transcriber-telegram-bot/issues/7) (and open up a new issue if necessary) if the problem persists
202+
- included a helper script in `src/utils/reencode_to_target_size.py` for those who can't fit their media sources within Telegram's Bot API's 20 MB limit.
203+
- Please use it to recode your stuff before sending it over to your transcriber bot instance if need be.
204+
- Run with i.e.:
205+
```bash
206+
python src/utils/reencode_to_target_size.py /path/to/your_input_file
207+
```
202208
- v0.1709.2 - up & running greeting is now more prominent w/ both UTC+local times
203209
- v0.1709.1 - increased split message maximum character safe zone buffers to prevent chunk exceeding
204210
- added a further safeguard to fall back on character-level splitting if no whitespace is found
@@ -213,7 +219,7 @@ After launching your bot successfully, you can interact with it via Telegram (se
213219
- Changed the chunk sizes from `4096` to `4000` to avoid edge cases
214220
- v0.1708.1 - Small bug fixes in the output
215221
- Note that running the program within `firejail` using Nvidia driver v.560.xx or newer requires i.e.:
216-
```
222+
```bash
217223
firejail --noblacklist=/sys/module --whitelist=/sys/module/nvidia* --read-only=/sys/module/nvidia* python src/main.py
218224
```
219225
This is due to recent changes in Nvidia's driver handling on Linux, see i.e. [here](https://github.com/netblue30/firejail/issues/6509) or [here](https://github.com/netblue30/firejail/issues/6372)
Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
#
4+
# reencode_to_target_size.py
5+
#
6+
# Re-encode audio/video to MP3, ensuring file size is under some target MB.
7+
# - Binary search on bitrate from 8 kbps → 320 kbps
8+
# - Up to max_iterations
9+
# - If we can’t improve or we’re within a tolerance, we stop early.
10+
#
11+
# (c) 2024. Use at your own risk.
12+
13+
import sys
14+
import os
15+
import subprocess
16+
import shutil
17+
18+
# =========== GLOBALS ============
19+
TELEGRAM_BOT_SIZE_LIMIT_MB = 20.0
20+
SAFETY_MARGIN_MB = 0.1
21+
DEFAULT_TARGET_MB = TELEGRAM_BOT_SIZE_LIMIT_MB - SAFETY_MARGIN_MB
22+
23+
DEFAULT_MAX_ITERATIONS = 12 # Increase default to e.g. 12
24+
STALE_TRIES_LIMIT = 2 # If we fail to improve for 2 consecutive tries, bail out
25+
26+
# TOLERANCE: If we’re within X bytes of the target, we’ll consider that “close enough”
27+
SIZE_TOLERANCE_BYTES = 200_000 # ~200 KB; set to 0 to disable
28+
# =================================
29+
30+
def hz_line(char='-'):
31+
import shutil
32+
try:
33+
cols, _ = shutil.get_terminal_size(fallback=(80, 24))
34+
except:
35+
cols = 80
36+
print(char * cols)
37+
38+
def get_duration_seconds(inputfile):
39+
cmd = [
40+
"ffprobe", "-v", "error",
41+
"-select_streams", "a:0",
42+
"-show_entries", "format=duration",
43+
"-of", "default=noprint_wrappers=1:nokey=1",
44+
inputfile
45+
]
46+
try:
47+
proc = subprocess.run(cmd, capture_output=True, text=True, check=True)
48+
duration_str = proc.stdout.strip()
49+
return float(duration_str) if duration_str else None
50+
except Exception as e:
51+
print(f"[ERROR] ffprobe failed: {e}")
52+
return None
53+
54+
def reencode_mp3_strict(inputfile, target_mb, max_iterations):
55+
duration = get_duration_seconds(inputfile)
56+
if not duration:
57+
print("[ERROR] Could not get duration. Aborting.")
58+
return None
59+
60+
target_bytes = int(target_mb * 1024 * 1024)
61+
min_bitrate = 8
62+
max_bitrate = 320
63+
64+
base, ext = os.path.splitext(inputfile)
65+
ext_lower = ext.lower()
66+
if ext_lower not in (".mp3", ".aac", ".wav", ".m4a", ".mp4", ".ogg", ".flac"):
67+
ext_lower = ".mp3"
68+
69+
outputfile = f"{base}_{target_mb}MB_recode.mp3"
70+
71+
hz_line()
72+
print(f"[INFO] Input: {inputfile}")
73+
print(f"[INFO] Duration: {duration:.1f} s")
74+
print(f"[INFO] Target: {target_mb} MB → {target_bytes} bytes")
75+
print(f"[INFO] Output file: {outputfile}")
76+
print(f"[INFO] Max iterations: {max_iterations}")
77+
hz_line()
78+
79+
best_size = 0
80+
best_bitrate = None
81+
stale_tries = 0 # how many times in a row we fail to improve
82+
prev_size = None # to see if two consecutive attempts produce same size
83+
84+
iteration = 1
85+
while iteration <= max_iterations and min_bitrate <= max_bitrate:
86+
current_bitrate = (min_bitrate + max_bitrate) // 2
87+
print(f"[Iteration {iteration}] Trying bitrate: {current_bitrate} kbps")
88+
89+
tmp_out = outputfile + ".temp"
90+
cmd = [
91+
"ffmpeg", "-v", "error", "-y",
92+
"-i", inputfile,
93+
"-vn",
94+
"-c:a", "libmp3lame",
95+
f"-b:a", f"{current_bitrate}k",
96+
"-f", "mp3",
97+
tmp_out
98+
]
99+
try:
100+
subprocess.run(cmd, check=True)
101+
except subprocess.CalledProcessError as e:
102+
print(f"[ERROR] ffmpeg encoding failed: {e}")
103+
return None
104+
105+
actual_size = os.path.getsize(tmp_out)
106+
print(f" -> size: {actual_size} bytes")
107+
108+
# If under the limit
109+
if actual_size <= target_bytes:
110+
# Check if it’s bigger than our previous best
111+
if actual_size > best_size:
112+
best_size = actual_size
113+
best_bitrate = current_bitrate
114+
shutil.copyfile(tmp_out, outputfile + ".best")
115+
print(f" -> New best: {best_size} bytes at {best_bitrate} kbps")
116+
stale_tries = 0 # reset
117+
else:
118+
# Didn’t improve
119+
stale_tries += 1
120+
print(f" -> Not an improvement. stale_tries={stale_tries}")
121+
# Then try increasing min_bitrate to see if we can get closer
122+
min_bitrate = current_bitrate + 1
123+
else:
124+
# Over the limit => lower bitrate
125+
stale_tries += 1
126+
print(f" -> Over target. stale_tries={stale_tries}")
127+
max_bitrate = current_bitrate - 1
128+
129+
# If consecutive attempts produce same size, we might be pinned by encoder
130+
if prev_size is not None and prev_size == actual_size:
131+
stale_tries += 1
132+
print(" -> Same file size as previous iteration. Possibly pinned by encoder constraints.")
133+
134+
prev_size = actual_size
135+
os.remove(tmp_out)
136+
137+
# Check if we’re within tolerance
138+
if best_size > 0 and SIZE_TOLERANCE_BYTES > 0:
139+
diff = target_bytes - best_size
140+
if 0 <= diff <= SIZE_TOLERANCE_BYTES:
141+
print(f"[INFO] Within {SIZE_TOLERANCE_BYTES} bytes of target; stopping early.")
142+
break
143+
144+
# If we’ve not improved for too many tries, break
145+
if stale_tries >= STALE_TRIES_LIMIT:
146+
print(f"[INFO] Reached stale tries limit ({STALE_TRIES_LIMIT}). Stopping.")
147+
break
148+
149+
iteration += 1
150+
print()
151+
152+
best_file = outputfile + ".best"
153+
if os.path.exists(best_file):
154+
# Move final
155+
shutil.move(best_file, outputfile)
156+
final_size = os.path.getsize(outputfile)
157+
print(f"[FINAL] Best bitrate: {best_bitrate} kbps, size: {final_size} bytes")
158+
if final_size > target_bytes:
159+
print("[WARN] Final is STILL OVER target, but we can't do better with given constraints.")
160+
else:
161+
print("[INFO] Final is under target!")
162+
return outputfile
163+
else:
164+
print("[ERROR] Could not get under target (or no .best file).")
165+
return None
166+
167+
def main():
168+
if len(sys.argv) < 2:
169+
print(f"Usage: {sys.argv[0]} <inputfile> [<targetMB>] [<max_iterations>]")
170+
sys.exit(1)
171+
172+
inputfile = sys.argv[1]
173+
if not os.path.isfile(inputfile):
174+
print(f"[ERROR] File not found: {inputfile}")
175+
sys.exit(2)
176+
177+
if len(sys.argv) >= 3:
178+
target_mb = float(sys.argv[2])
179+
else:
180+
target_mb = DEFAULT_TARGET_MB
181+
182+
if len(sys.argv) >= 4:
183+
max_iterations = int(sys.argv[3])
184+
else:
185+
max_iterations = DEFAULT_MAX_ITERATIONS
186+
187+
result = reencode_mp3_strict(inputfile, target_mb, max_iterations)
188+
if result:
189+
print(f"[DONE] Re-encoded: {result}")
190+
else:
191+
print("[FAIL] No acceptable file produced.")
192+
193+
if __name__ == "__main__":
194+
main()

0 commit comments

Comments
 (0)