Skip to content

Commit 142f8cd

Browse files
authored
Merge pull request #3 from Noah-Jaffe/patch-for-mac
improve cross platform support & bugfix for requirements
2 parents 32bcc10 + 66ee7a2 commit 142f8cd

File tree

6 files changed

+228
-270
lines changed

6 files changed

+228
-270
lines changed

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,8 @@ models
66
*.mp3
77
*.xlsx
88

9-
__pycache__
9+
__pycache__
10+
.DS_STORE
11+
12+
13+
.hftoken

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,15 @@
55
- As always, TRUST NOTHING GENERATED BY AI, and always verify
66

77
## Requirements:
8-
- if you can, use cuda. https://pytorch.org/get-started/locally/ _do this before running the pip install requirements_
9-
- Python3+
8+
- if you can, use cuda. (you need NVIDA GPU) https://pytorch.org/get-started/locally/ _do this before running the pip install requirements_
9+
- Python 3.11+ reccomended
1010
- internet (for model download, on the first time you use it)
11+
- if on MacOS/Linux, install ffmpeg to be made available by the command line
12+
- MacOS: `brew install ffmpeg`
13+
- Python tkinter needs to be installed
14+
- Windows: use the default Python installer and select the tk option
15+
- MacOS: `brew install python-tk`
16+
1117

1218
---
1319
# How to use:

main.py

Lines changed: 112 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
import os
22
from time import sleep, time
33
import tkinter as tk
4-
from tkinter import BOTH, CENTER, E, LEFT, RIGHT, SOLID, TOP, W, X, Button, IntVar, Label, Spinbox, StringVar, Tk, Toplevel, filedialog, Frame, messagebox, font
4+
from tkinter import BOTH, CENTER, E, LEFT, RIGHT, SOLID, TOP, W, X, IntVar, Label, StringVar, Tk, Toplevel, filedialog, Frame, messagebox, font, Button
5+
from tkinter.ttk import Combobox, Spinbox
56
from tkinter.font import BOLD, ITALIC, NORMAL
67
# from tkinter.scrolledtext import ScrolledText
7-
from tkinter.ttk import Combobox
88
from types import FunctionType
99
from typing import List
1010
import traceback
1111
import ffmpeg
1212
import pycountry
1313
import requests
14-
# import batchalign as ba
1514
import sys
1615
import subprocess
1716
import pathlib
@@ -20,31 +19,53 @@
2019
from PIL import Image, ImageTk
2120
import psutil
2221
from torch.cuda import is_available as is_cuda_available, mem_get_info as get_cuda_mem_info
22+
from pathlib import Path
23+
import shutil
2324

24-
# import logging
25-
26-
# CONSTANTS
25+
# CONSTANTS/config
2726
class COLOR_THEME:
28-
IN_PROGRESS = "lightyellow"
29-
LOADED = "aqua"
30-
MAIN_WINDOW = "lightblue"
31-
FAILED = "lightred"
32-
COMPLETED = "green"
33-
BUTTON = "pink"
34-
35-
LABEL_FONT = ("Arial", 12, BOLD)
36-
BUTTON_FONT = ("Arial", 12, NORMAL)
37-
FILE_NAME_FONT = ("Consolas", 10, NORMAL)
38-
TOOLTIP_FONT = ("Consolas", 8, NORMAL)
39-
40-
HF_TOKEN_FILENAME = ".hftoken"
41-
MODELS_CFG_FILENAME = "cfg/models.json"
42-
CACHE_FILENAME = "cfg/cache.json"
43-
MASCOT_FILENAME = "cfg/mascot.png"
44-
TRANSCRIBE_SUBPROC_FILENAME = "transcribe_proc.py"
45-
FFMPEG_EXE_DIR = "tools"
46-
47-
# add ffmpeg tools to path so that downstream modules can use it
27+
IN_PROGRESS = "#FFFFE0" # lightyellow
28+
LOADED = "#00FFFF" # aqua
29+
MAIN_WINDOW = "#ADD8E6" # lightblue
30+
FAILED = "#E04545" # lightred
31+
COMPLETED = "#008000" # green
32+
BUTTON = "#FFC0CB" # pink
33+
34+
35+
DEFAULT_FONT = "Helvetica" if sys.platform == "darwin" else "Arial"
36+
MONO_FONT = "Menlo" if sys.platform == "darwin" else "Consolas"
37+
LABEL_FONT = (DEFAULT_FONT, 12, BOLD)
38+
BUTTON_FONT = (DEFAULT_FONT, 12, NORMAL)
39+
40+
FILE_NAME_FONT = (MONO_FONT, 10, NORMAL)
41+
TOOLTIP_FONT = (MONO_FONT, 8, NORMAL)
42+
43+
44+
THIS_DIR = Path(__file__).parent.expanduser().resolve()
45+
46+
# defaults
47+
TOOLS_DIR = "tools"
48+
CONFIG_FILES_DIRECTORY_REL = "cfg"
49+
MODELS_FN = "models.json"
50+
CACHE_FN = "cache.json"
51+
52+
MODELS_CFG_DEFAULT = Path(THIS_DIR, CONFIG_FILES_DIRECTORY_REL, MODELS_FN).expanduser().resolve()
53+
CACHE_DEFAULT = Path(THIS_DIR, CONFIG_FILES_DIRECTORY_REL, CACHE_FN).expanduser().resolve()
54+
55+
# per user config file location
56+
PER_USER_ROOT = Path.home()
57+
PER_USER_CONFIG_FILES_DIRECTORY_REL = f".{CONFIG_FILES_DIRECTORY_REL}"
58+
MODELS_CFG_FILENAME = Path(PER_USER_ROOT, PER_USER_CONFIG_FILES_DIRECTORY_REL, MODELS_FN).expanduser().resolve()
59+
CACHE_FILENAME = Path(PER_USER_ROOT, PER_USER_CONFIG_FILES_DIRECTORY_REL, CACHE_FN).expanduser().resolve()
60+
61+
62+
# functional config values
63+
HF_TOKEN_FILENAME = Path(THIS_DIR, ".hftoken").expanduser().resolve()
64+
MASCOT_FILENAME = Path(CONFIG_FILES_DIRECTORY_REL, "mascot.png").expanduser().resolve()
65+
TRANSCRIBE_SUBPROC_FILENAME = Path(THIS_DIR, "transcribe_proc.py").expanduser().resolve()
66+
FFMPEG_EXE_DIR = Path(TOOLS_DIR).expanduser().resolve()
67+
68+
# add ffmpeg tools to path so that downstream modules can use it (specifically for windows)
4869
sys.path.append(FFMPEG_EXE_DIR)
4970

5071

@@ -111,7 +132,7 @@ def __init__(self, root):
111132
model_list.append(model)
112133

113134
self.dropdown_selection_value = StringVar()
114-
self.dropdown_model_selector = Combobox(self.frame_model_selection_line, values=model_list, textvariable=self.dropdown_selection_value)
135+
self.dropdown_model_selector = Combobox(self.frame_model_selection_line, values=model_list, textvariable=self.dropdown_selection_value, width=35)
115136

116137
reccomended = [self.cache.get('selectedModel','openai/whisper-small.en'),'openai/whisper-small.en', 'openai/whisper-medium.en', 'openai/whisper-small', 'openai/whisper-medium.en', model_list[0] if len(model_list) else None]
117138
for r in reccomended:
@@ -169,7 +190,7 @@ def __init__(self, root):
169190

170191
# console monitor
171192
# Create a ScrolledText widget inside the frame
172-
# self.output_box = ScrolledText(self.root, wrap=tk.WORD, padx=5, pady=5, relief=SOLID, font=("consolas", 8, NORMAL), height=100)
193+
# self.output_box = ScrolledText(self.root, wrap=tk.WORD, padx=5, pady=5, relief=SOLID, font=(MONO_FONT, 8, NORMAL), height=100)
173194
# self.output_box.pack(fill=BOTH, expand=True)
174195
# self.output_box.configure(state="disabled")
175196
# self.output_handler = CustomStdOut(self.output_box)
@@ -204,12 +225,12 @@ def get_initial_geometry(self) -> str:
204225
Returns:
205226
str: window size geometry f"{PxX}x{PxY}"
206227
"""
207-
return f"{max(self.root.winfo_screenwidth()/3, 800)}x{max(self.root.winfo_screenheight()/3,430)}"
228+
return f"{max(self.root.winfo_screenwidth()//3, 800)}x{max(self.root.winfo_screenheight()//3,430)}"
208229

209230
def select_new_files(self):
210231
"""Selects new files to be added to the file managament list."""
211232
audio_video_types = get_audio_file_types() + get_video_file_types()
212-
file_paths = filedialog.askopenfilenames(filetypes=[("Audio/Video", ";".join([f"*.{x}" for x in audio_video_types])), ('All Files', "*.*")])
233+
file_paths = filedialog.askopenfilenames(filetypes=[("Audio/Video", " ".join([f"*.{x}" for x in audio_video_types])), ('All Files', " ".join(get_any_file_type()))])
213234
langs = list(get_available_langs())
214235
for file in file_paths:
215236
SelectedFileConfigElement(self.frame_file_management_list, filepath=os.path.normpath(file), min_speakers=1, max_speakers=99, languages=langs)
@@ -229,8 +250,6 @@ def start_transcribe(self):
229250
if len(SelectedFileConfigElement.MANAGER) == 0:
230251
raise Exception("Please select a file to transcribe first!")
231252

232-
#shell, exepath = shellingham.detect_shell()
233-
currloc = pathlib.Path(__file__).parent.resolve()
234253
mascot = self.show_mascot("IM TRANSCRIIIIBINNNG!!\nTRANSCRIPTION STARTED, DONT CLICK THE START TRANSCRIBE BUTTON AGAIN UNLESS YOU WANT MULTIPLE TRANSCRIPTIONS RUNNING FOR THE SELECTED THINGIES AT THE SAME TIME!")
235254
#spawn_popup_activity(title="TRANSCRIBING!", message="TRANSCRIPTION STARTED, DONT CLICK THE BUTTON UNLESS YOU WANT MULTIPLE TRANSCRIPTIONS RUNNING FOR THE SELECTED THINGIES")
236255
for item in SelectedFileConfigElement.MANAGER:
@@ -268,7 +287,7 @@ def start_transcribe(self):
268287
proc = subprocess.Popen(
269288
args=[
270289
sys.executable,
271-
f"{currloc}\\{TRANSCRIBE_SUBPROC_FILENAME}",
290+
TRANSCRIBE_SUBPROC_FILENAME,
272291
json.dumps({
273292
'input_file': item.get_file(),
274293
'num_speakers': item.get_speakers(),
@@ -286,6 +305,7 @@ def start_transcribe(self):
286305
while proc.poll() == None:
287306
try:
288307
self.root.update_idletasks()
308+
sleep(0.1)
289309
#proc.wait(timeout=1)
290310
except:
291311
pass
@@ -300,7 +320,7 @@ def show_error(self, *args):
300320
"""Display the error to the user as a popup window"""
301321
err = traceback.format_exception(*args)
302322
print("\n".join(err), flush=True)
303-
messagebox.showerror("Error!", f"{'\n'.join([str(a) for a in args[1].args])}\n\n\n\nPlease see the console for the full error message!")
323+
messagebox.showerror("Error!", '\n'.join([str(a) for a in args[1].args]) + "\n\n\n\nPlease see the console for the full error message!")
304324

305325
def get_model_list(self) -> List[str]:
306326
"""
@@ -326,6 +346,10 @@ def load_cache(self):
326346
if os.path.isfile(CACHE_FILENAME):
327347
with open(CACHE_FILENAME, 'r', encoding='utf-8') as f:
328348
self.cache = json.load(f)
349+
else:
350+
with open(CACHE_DEFAULT, 'r', encoding='utf-8') as f:
351+
self.cache = json.load(f)
352+
329353

330354
def update_cache(self):
331355
"""Saves an updated cache file"""
@@ -343,8 +367,8 @@ def update_cache(self):
343367
break
344368
if c == False:
345369
cache["fileCache"] = cache.get("fileCache",[]) + [{"filepath": entry.filepath, "min_speakers": entry.min_speakers, "max_speakers": entry.max_speakers, "languages": [entry.lang_combo.get(), *[x for x in entry.lang_combo['values'] if x != entry.lang_combo.get()]]}]
346-
if not os.path.exists(os.path.dirname(CACHE_FILENAME)):
347-
os.mkdir(os.path.pardir(CACHE_FILENAME))
370+
if os.path.dirname(CACHE_FILENAME) and not os.path.exists(os.path.dirname(CACHE_FILENAME)):
371+
os.mkdir(os.path.dirname(CACHE_FILENAME))
348372
with open(CACHE_FILENAME, 'w', encoding='utf-8') as f:
349373
json.dump(cache, indent=2, fp=f)
350374

@@ -354,14 +378,27 @@ def show_mascot(self, message):
354378
popup.title("AY, IM WORKIN ERE")
355379
popup.overrideredirect(True) # Remove window decorations
356380
# Set window transparency attributes (Windows only)
357-
popup.wm_attributes("-transparentcolor", "#f0f0f0")
381+
if sys.platform.startswith("win"):
382+
popup.wm_attributes("-transparentcolor", "#f0f0f0")
383+
elif sys.platform == "darwin":
384+
# On macOS Big Sur+ you can get a similar effect
385+
popup.attributes("-transparent", True)
386+
popup.configure(background='systemTransparent')
387+
388+
else:
389+
# other platforms – do nothing special
390+
pass
391+
358392

359393
# Get screen dimensions
360394
screen_width = self.root.winfo_screenwidth()
361395
screen_height = self.root.winfo_screenheight()
362-
396+
img = None
363397
# Load and scale the image
364-
img = Image.open(MASCOT_FILENAME)
398+
if os.path.isfile(MASCOT_FILENAME):
399+
img = Image.open(MASCOT_FILENAME)
400+
else:
401+
img = Image.new('RGBA', (100, 100), (255, 0, 0, 0))
365402
img_ratio = img.width / img.height
366403
max_width, max_height = screen_width - 100, screen_height - 100 # Add padding
367404
if img.width > max_width or img.height > max_height:
@@ -382,7 +419,7 @@ def show_mascot(self, message):
382419
popup.image = img_tk # Keep a reference
383420

384421
# Overlay text
385-
text_label = tk.Label(popup, text=message, font=("Arial", 16, "bold"),
422+
text_label = tk.Label(popup, text=message, font=(DEFAULT_FONT, 16, "bold"),
386423
fg="black", bg="white", wraplength=img.width - 20)
387424
text_label.place(anchor=CENTER, y=(img.height // 3) * 2, x = img.width//2, width=img.width - 20)
388425

@@ -409,8 +446,8 @@ def __init__(self, parent, filepath, min_speakers, max_speakers, languages):
409446
self.label_frame = Frame(self.row_frame, padx=0, pady=0)
410447
self.label_frame.pack(side=LEFT, expand=True, anchor="w", padx=0, pady=0)
411448
# insert file labels
412-
self.path_label = Label(self.label_frame, text=f"{parentDir}{os.path.sep}", font=("consolas", 8, ITALIC), anchor="w", justify=LEFT)
413-
self.file_label = Label(self.label_frame, text=filename, width=35, font=("consolas", 10, BOLD), anchor="w", justify=LEFT, )
449+
self.path_label = Label(self.label_frame, text=f"{parentDir}{os.path.sep}", font=(MONO_FONT, 8, ITALIC), anchor="w", justify=LEFT)
450+
self.file_label = Label(self.label_frame, text=filename, width=35, font=(MONO_FONT, 10, BOLD), anchor="w", justify=LEFT, )
414451
self.path_label.grid(row=0, column=0)
415452
self.file_label.grid(row=1, column=0)
416453
ToolTip(self.label_frame, f"File path to be transcribed:\n\t{self.filepath}")
@@ -448,8 +485,11 @@ def set_bg(self, color):
448485
self.path_label.configure(bg=color)
449486

450487
def set_clipboard_to_filepath(self, event):
451-
self.parent.clipboard_clear()
452-
self.parent.clipboard_append(self.filepath)
488+
try:
489+
self.parent.clipboard_clear()
490+
self.parent.clipboard_append(self.filepath)
491+
except:
492+
print(f"Failed to set clipboard to:\n{self.filepath}")
453493

454494
def get_pointer(self):
455495
return self.row_frame
@@ -461,7 +501,11 @@ def get_file(self):
461501
return self.filepath
462502

463503
def get_speakers(self):
464-
return int(self.spinbox_num_speakers.get())
504+
v = self.spinbox_num_speakers.get()
505+
if not v:
506+
print(f'no num speakers given for {self.filepath}, defaulting to 1')
507+
v = '1'
508+
return int(self.spinbox_num_speakers.get() or '1')
465509

466510
def delete_row(self):
467511
self.row_frame.destroy()
@@ -610,8 +654,16 @@ def search_for_hf_model(query):
610654
return None
611655

612656
def open_hf_search():
613-
os.startfile("https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers")
614-
spawn_popup_activity("Search", "Use the huggingface search to find the model ID or model name to use. Click yes or no to continue.")
657+
hf_search_url = "https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers"
658+
try:
659+
import webbrowser
660+
webbrowser.open(hf_search_url)
661+
except:
662+
try:
663+
os.startfile(hf_search_url)
664+
except:
665+
print(f"Visit the following URL to find additional models from huggingface:\n{hf_search_url}")
666+
spawn_popup_activity("Search", f"Use the huggingface search to find the model ID or model name to use. Click yes or no to continue.\n\nURL: {hf_search_url}")
615667

616668
def get_available_langs() -> List[str]:
617669
"""Returns:
@@ -630,6 +682,9 @@ def validate_language(inp):
630682
spawn_popup_activity("Language Error!", f"Unable to determine language: '{inp}'.\nValid language codes are:\nThe 2 letter code such as 'en', 'es', 'zh', etc.\nThe 3 letter code such as 'eng', 'spa', 'zho'\nThe full name such as 'english', 'spanish', 'chinese'.\nPress any button to continue.")
631683
return None
632684

685+
def get_any_file_type() -> List[str]:
686+
return ["*", ".*", "*.*"]
687+
633688
def get_audio_file_types() -> List[str]:
634689
return [
635690
"3gp", "aa", "aac", "aax", "act", "aiff", "alac", "amr",
@@ -677,6 +732,16 @@ def convert_file_to_type(inp_file: str, totype: str):
677732
return out_name
678733

679734
if __name__ == "__main__":
735+
# Make per user config files
736+
MODELS_CFG_FILENAME =Path(MODELS_CFG_FILENAME).expanduser()
737+
CACHE_FILENAME =Path(CACHE_FILENAME).expanduser()
738+
739+
if not MODELS_CFG_FILENAME.exists():
740+
MODELS_CFG_FILENAME.parent.mkdir(exist_ok=True, parents=True)
741+
shutil.copy(MODELS_CFG_DEFAULT, MODELS_CFG_FILENAME)
742+
if not CACHE_FILENAME.exists():
743+
CACHE_FILENAME.parent.mkdir(exist_ok=True, parents=True)
744+
shutil.copy(CACHE_DEFAULT, CACHE_FILENAME)
680745
root = tk.Tk()
681746
app = MainGUI(root=root)
682747
root.mainloop()

0 commit comments

Comments
 (0)