11import os
22from time import sleep , time
33import tkinter as tk
4- from tkinter import BOTH , CENTER , E , LEFT , RIGHT , SOLID , TOP , W , X , Button , IntVar , Label , Spinbox , StringVar , Tk , Toplevel , filedialog , Frame , messagebox , font
4+ from tkinter import BOTH , CENTER , E , LEFT , RIGHT , SOLID , TOP , W , X , IntVar , Label , StringVar , Tk , Toplevel , filedialog , Frame , messagebox , font , Button
5+ from tkinter .ttk import Combobox , Spinbox
56from tkinter .font import BOLD , ITALIC , NORMAL
67# from tkinter.scrolledtext import ScrolledText
7- from tkinter .ttk import Combobox
88from types import FunctionType
99from typing import List
1010import traceback
1111import ffmpeg
1212import pycountry
1313import requests
14- # import batchalign as ba
1514import sys
1615import subprocess
1716import pathlib
2019from PIL import Image , ImageTk
2120import psutil
2221from torch .cuda import is_available as is_cuda_available , mem_get_info as get_cuda_mem_info
22+ from pathlib import Path
23+ import shutil
2324
24- # import logging
25-
26- # CONSTANTS
25+ # CONSTANTS/config
2726class COLOR_THEME :
28- IN_PROGRESS = "lightyellow"
29- LOADED = "aqua"
30- MAIN_WINDOW = "lightblue"
31- FAILED = "lightred"
32- COMPLETED = "green"
33- BUTTON = "pink"
34-
35- LABEL_FONT = ("Arial" , 12 , BOLD )
36- BUTTON_FONT = ("Arial" , 12 , NORMAL )
37- FILE_NAME_FONT = ("Consolas" , 10 , NORMAL )
38- TOOLTIP_FONT = ("Consolas" , 8 , NORMAL )
39-
40- HF_TOKEN_FILENAME = ".hftoken"
41- MODELS_CFG_FILENAME = "cfg/models.json"
42- CACHE_FILENAME = "cfg/cache.json"
43- MASCOT_FILENAME = "cfg/mascot.png"
44- TRANSCRIBE_SUBPROC_FILENAME = "transcribe_proc.py"
45- FFMPEG_EXE_DIR = "tools"
46-
47- # add ffmpeg tools to path so that downstream modules can use it
27+ IN_PROGRESS = "#FFFFE0" # lightyellow
28+ LOADED = "#00FFFF" # aqua
29+ MAIN_WINDOW = "#ADD8E6" # lightblue
30+ FAILED = "#E04545" # lightred
31+ COMPLETED = "#008000" # green
32+ BUTTON = "#FFC0CB" # pink
33+
34+
35+ DEFAULT_FONT = "Helvetica" if sys .platform == "darwin" else "Arial"
36+ MONO_FONT = "Menlo" if sys .platform == "darwin" else "Consolas"
37+ LABEL_FONT = (DEFAULT_FONT , 12 , BOLD )
38+ BUTTON_FONT = (DEFAULT_FONT , 12 , NORMAL )
39+
40+ FILE_NAME_FONT = (MONO_FONT , 10 , NORMAL )
41+ TOOLTIP_FONT = (MONO_FONT , 8 , NORMAL )
42+
43+
44+ THIS_DIR = Path (__file__ ).parent .expanduser ().resolve ()
45+
46+ # defaults
47+ TOOLS_DIR = "tools"
48+ CONFIG_FILES_DIRECTORY_REL = "cfg"
49+ MODELS_FN = "models.json"
50+ CACHE_FN = "cache.json"
51+
52+ MODELS_CFG_DEFAULT = Path (THIS_DIR , CONFIG_FILES_DIRECTORY_REL , MODELS_FN ).expanduser ().resolve ()
53+ CACHE_DEFAULT = Path (THIS_DIR , CONFIG_FILES_DIRECTORY_REL , CACHE_FN ).expanduser ().resolve ()
54+
55+ # per user config file location
56+ PER_USER_ROOT = Path .home ()
57+ PER_USER_CONFIG_FILES_DIRECTORY_REL = f".{ CONFIG_FILES_DIRECTORY_REL } "
58+ MODELS_CFG_FILENAME = Path (PER_USER_ROOT , PER_USER_CONFIG_FILES_DIRECTORY_REL , MODELS_FN ).expanduser ().resolve ()
59+ CACHE_FILENAME = Path (PER_USER_ROOT , PER_USER_CONFIG_FILES_DIRECTORY_REL , CACHE_FN ).expanduser ().resolve ()
60+
61+
62+ # functional config values
63+ HF_TOKEN_FILENAME = Path (THIS_DIR , ".hftoken" ).expanduser ().resolve ()
64+ MASCOT_FILENAME = Path (CONFIG_FILES_DIRECTORY_REL , "mascot.png" ).expanduser ().resolve ()
65+ TRANSCRIBE_SUBPROC_FILENAME = Path (THIS_DIR , "transcribe_proc.py" ).expanduser ().resolve ()
66+ FFMPEG_EXE_DIR = Path (TOOLS_DIR ).expanduser ().resolve ()
67+
68+ # add ffmpeg tools to path so that downstream modules can use it (specifically for windows)
4869sys .path .append (FFMPEG_EXE_DIR )
4970
5071
@@ -111,7 +132,7 @@ def __init__(self, root):
111132 model_list .append (model )
112133
113134 self .dropdown_selection_value = StringVar ()
114- self .dropdown_model_selector = Combobox (self .frame_model_selection_line , values = model_list , textvariable = self .dropdown_selection_value )
135+ self .dropdown_model_selector = Combobox (self .frame_model_selection_line , values = model_list , textvariable = self .dropdown_selection_value , width = 35 )
115136
116137 reccomended = [self .cache .get ('selectedModel' ,'openai/whisper-small.en' ),'openai/whisper-small.en' , 'openai/whisper-medium.en' , 'openai/whisper-small' , 'openai/whisper-medium.en' , model_list [0 ] if len (model_list ) else None ]
117138 for r in reccomended :
@@ -169,7 +190,7 @@ def __init__(self, root):
169190
170191 # console monitor
171192 # Create a ScrolledText widget inside the frame
172- # self.output_box = ScrolledText(self.root, wrap=tk.WORD, padx=5, pady=5, relief=SOLID, font=("consolas" , 8, NORMAL), height=100)
193+ # self.output_box = ScrolledText(self.root, wrap=tk.WORD, padx=5, pady=5, relief=SOLID, font=(MONO_FONT , 8, NORMAL), height=100)
173194 # self.output_box.pack(fill=BOTH, expand=True)
174195 # self.output_box.configure(state="disabled")
175196 # self.output_handler = CustomStdOut(self.output_box)
@@ -204,12 +225,12 @@ def get_initial_geometry(self) -> str:
204225 Returns:
205226 str: window size geometry f"{PxX}x{PxY}"
206227 """
207- return f"{ max (self .root .winfo_screenwidth ()/ 3 , 800 )} x{ max (self .root .winfo_screenheight ()/ 3 ,430 )} "
228+ return f"{ max (self .root .winfo_screenwidth ()// 3 , 800 )} x{ max (self .root .winfo_screenheight ()/ /3 ,430 )} "
208229
209230 def select_new_files (self ):
210231 """Selects new files to be added to the file managament list."""
211232 audio_video_types = get_audio_file_types () + get_video_file_types ()
212- file_paths = filedialog .askopenfilenames (filetypes = [("Audio/Video" , "; " .join ([f"*.{ x } " for x in audio_video_types ])), ('All Files' , "*.*" )])
233+ file_paths = filedialog .askopenfilenames (filetypes = [("Audio/Video" , " " .join ([f"*.{ x } " for x in audio_video_types ])), ('All Files' , " " . join ( get_any_file_type ()) )])
213234 langs = list (get_available_langs ())
214235 for file in file_paths :
215236 SelectedFileConfigElement (self .frame_file_management_list , filepath = os .path .normpath (file ), min_speakers = 1 , max_speakers = 99 , languages = langs )
@@ -229,8 +250,6 @@ def start_transcribe(self):
229250 if len (SelectedFileConfigElement .MANAGER ) == 0 :
230251 raise Exception ("Please select a file to transcribe first!" )
231252
232- #shell, exepath = shellingham.detect_shell()
233- currloc = pathlib .Path (__file__ ).parent .resolve ()
234253 mascot = self .show_mascot ("IM TRANSCRIIIIBINNNG!!\n TRANSCRIPTION STARTED, DONT CLICK THE START TRANSCRIBE BUTTON AGAIN UNLESS YOU WANT MULTIPLE TRANSCRIPTIONS RUNNING FOR THE SELECTED THINGIES AT THE SAME TIME!" )
235254 #spawn_popup_activity(title="TRANSCRIBING!", message="TRANSCRIPTION STARTED, DONT CLICK THE BUTTON UNLESS YOU WANT MULTIPLE TRANSCRIPTIONS RUNNING FOR THE SELECTED THINGIES")
236255 for item in SelectedFileConfigElement .MANAGER :
@@ -268,7 +287,7 @@ def start_transcribe(self):
268287 proc = subprocess .Popen (
269288 args = [
270289 sys .executable ,
271- f" { currloc } \\ { TRANSCRIBE_SUBPROC_FILENAME } " ,
290+ TRANSCRIBE_SUBPROC_FILENAME ,
272291 json .dumps ({
273292 'input_file' : item .get_file (),
274293 'num_speakers' : item .get_speakers (),
@@ -286,6 +305,7 @@ def start_transcribe(self):
286305 while proc .poll () == None :
287306 try :
288307 self .root .update_idletasks ()
308+ sleep (0.1 )
289309 #proc.wait(timeout=1)
290310 except :
291311 pass
@@ -300,7 +320,7 @@ def show_error(self, *args):
300320 """Display the error to the user as a popup window"""
301321 err = traceback .format_exception (* args )
302322 print ("\n " .join (err ), flush = True )
303- messagebox .showerror ("Error!" , f" { '\n ' .join ([str (a ) for a in args [1 ].args ])} \n \n \n \n Please see the console for the full error message!" )
323+ messagebox .showerror ("Error!" , '\n ' .join ([str (a ) for a in args [1 ].args ]) + " \n \n \n \n Please see the console for the full error message!" )
304324
305325 def get_model_list (self ) -> List [str ]:
306326 """
@@ -326,6 +346,10 @@ def load_cache(self):
326346 if os .path .isfile (CACHE_FILENAME ):
327347 with open (CACHE_FILENAME , 'r' , encoding = 'utf-8' ) as f :
328348 self .cache = json .load (f )
349+ else :
350+ with open (CACHE_DEFAULT , 'r' , encoding = 'utf-8' ) as f :
351+ self .cache = json .load (f )
352+
329353
330354 def update_cache (self ):
331355 """Saves an updated cache file"""
@@ -343,8 +367,8 @@ def update_cache(self):
343367 break
344368 if c == False :
345369 cache ["fileCache" ] = cache .get ("fileCache" ,[]) + [{"filepath" : entry .filepath , "min_speakers" : entry .min_speakers , "max_speakers" : entry .max_speakers , "languages" : [entry .lang_combo .get (), * [x for x in entry .lang_combo ['values' ] if x != entry .lang_combo .get ()]]}]
346- if not os .path .exists (os .path .dirname (CACHE_FILENAME )):
347- os .mkdir (os .path .pardir (CACHE_FILENAME ))
370+ if os . path . dirname ( CACHE_FILENAME ) and not os .path .exists (os .path .dirname (CACHE_FILENAME )):
371+ os .mkdir (os .path .dirname (CACHE_FILENAME ))
348372 with open (CACHE_FILENAME , 'w' , encoding = 'utf-8' ) as f :
349373 json .dump (cache , indent = 2 , fp = f )
350374
@@ -354,14 +378,27 @@ def show_mascot(self, message):
354378 popup .title ("AY, IM WORKIN ERE" )
355379 popup .overrideredirect (True ) # Remove window decorations
356380 # Set window transparency attributes (Windows only)
357- popup .wm_attributes ("-transparentcolor" , "#f0f0f0" )
381+ if sys .platform .startswith ("win" ):
382+ popup .wm_attributes ("-transparentcolor" , "#f0f0f0" )
383+ elif sys .platform == "darwin" :
384+ # On macOS Big Sur+ you can get a similar effect
385+ popup .attributes ("-transparent" , True )
386+ popup .configure (background = 'systemTransparent' )
387+
388+ else :
389+ # other platforms – do nothing special
390+ pass
391+
358392
359393 # Get screen dimensions
360394 screen_width = self .root .winfo_screenwidth ()
361395 screen_height = self .root .winfo_screenheight ()
362-
396+ img = None
363397 # Load and scale the image
364- img = Image .open (MASCOT_FILENAME )
398+ if os .path .isfile (MASCOT_FILENAME ):
399+ img = Image .open (MASCOT_FILENAME )
400+ else :
401+ img = Image .new ('RGBA' , (100 , 100 ), (255 , 0 , 0 , 0 ))
365402 img_ratio = img .width / img .height
366403 max_width , max_height = screen_width - 100 , screen_height - 100 # Add padding
367404 if img .width > max_width or img .height > max_height :
@@ -382,7 +419,7 @@ def show_mascot(self, message):
382419 popup .image = img_tk # Keep a reference
383420
384421 # Overlay text
385- text_label = tk .Label (popup , text = message , font = ("Arial" , 16 , "bold" ),
422+ text_label = tk .Label (popup , text = message , font = (DEFAULT_FONT , 16 , "bold" ),
386423 fg = "black" , bg = "white" , wraplength = img .width - 20 )
387424 text_label .place (anchor = CENTER , y = (img .height // 3 ) * 2 , x = img .width // 2 , width = img .width - 20 )
388425
@@ -409,8 +446,8 @@ def __init__(self, parent, filepath, min_speakers, max_speakers, languages):
409446 self .label_frame = Frame (self .row_frame , padx = 0 , pady = 0 )
410447 self .label_frame .pack (side = LEFT , expand = True , anchor = "w" , padx = 0 , pady = 0 )
411448 # insert file labels
412- self .path_label = Label (self .label_frame , text = f"{ parentDir } { os .path .sep } " , font = ("consolas" , 8 , ITALIC ), anchor = "w" , justify = LEFT )
413- self .file_label = Label (self .label_frame , text = filename , width = 35 , font = ("consolas" , 10 , BOLD ), anchor = "w" , justify = LEFT , )
449+ self .path_label = Label (self .label_frame , text = f"{ parentDir } { os .path .sep } " , font = (MONO_FONT , 8 , ITALIC ), anchor = "w" , justify = LEFT )
450+ self .file_label = Label (self .label_frame , text = filename , width = 35 , font = (MONO_FONT , 10 , BOLD ), anchor = "w" , justify = LEFT , )
414451 self .path_label .grid (row = 0 , column = 0 )
415452 self .file_label .grid (row = 1 , column = 0 )
416453 ToolTip (self .label_frame , f"File path to be transcribed:\n \t { self .filepath } " )
@@ -448,8 +485,11 @@ def set_bg(self, color):
448485 self .path_label .configure (bg = color )
449486
450487 def set_clipboard_to_filepath (self , event ):
451- self .parent .clipboard_clear ()
452- self .parent .clipboard_append (self .filepath )
488+ try :
489+ self .parent .clipboard_clear ()
490+ self .parent .clipboard_append (self .filepath )
491+ except :
492+ print (f"Failed to set clipboard to:\n { self .filepath } " )
453493
454494 def get_pointer (self ):
455495 return self .row_frame
@@ -461,7 +501,11 @@ def get_file(self):
461501 return self .filepath
462502
463503 def get_speakers (self ):
464- return int (self .spinbox_num_speakers .get ())
504+ v = self .spinbox_num_speakers .get ()
505+ if not v :
506+ print (f'no num speakers given for { self .filepath } , defaulting to 1' )
507+ v = '1'
508+ return int (self .spinbox_num_speakers .get () or '1' )
465509
466510 def delete_row (self ):
467511 self .row_frame .destroy ()
@@ -610,8 +654,16 @@ def search_for_hf_model(query):
610654 return None
611655
612656def open_hf_search ():
613- os .startfile ("https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers" )
614- spawn_popup_activity ("Search" , "Use the huggingface search to find the model ID or model name to use. Click yes or no to continue." )
657+ hf_search_url = "https://huggingface.co/models?pipeline_tag=automatic-speech-recognition&library=transformers"
658+ try :
659+ import webbrowser
660+ webbrowser .open (hf_search_url )
661+ except :
662+ try :
663+ os .startfile (hf_search_url )
664+ except :
665+ print (f"Visit the following URL to find additional models from huggingface:\n { hf_search_url } " )
666+ spawn_popup_activity ("Search" , f"Use the huggingface search to find the model ID or model name to use. Click yes or no to continue.\n \n URL: { hf_search_url } " )
615667
616668def get_available_langs () -> List [str ]:
617669 """Returns:
@@ -630,6 +682,9 @@ def validate_language(inp):
630682 spawn_popup_activity ("Language Error!" , f"Unable to determine language: '{ inp } '.\n Valid language codes are:\n The 2 letter code such as 'en', 'es', 'zh', etc.\n The 3 letter code such as 'eng', 'spa', 'zho'\n The full name such as 'english', 'spanish', 'chinese'.\n Press any button to continue." )
631683 return None
632684
685+ def get_any_file_type () -> List [str ]:
686+ return ["*" , ".*" , "*.*" ]
687+
633688def get_audio_file_types () -> List [str ]:
634689 return [
635690 "3gp" , "aa" , "aac" , "aax" , "act" , "aiff" , "alac" , "amr" ,
@@ -677,6 +732,16 @@ def convert_file_to_type(inp_file: str, totype: str):
677732 return out_name
678733
679734if __name__ == "__main__" :
735+ # Make per user config files
736+ MODELS_CFG_FILENAME = Path (MODELS_CFG_FILENAME ).expanduser ()
737+ CACHE_FILENAME = Path (CACHE_FILENAME ).expanduser ()
738+
739+ if not MODELS_CFG_FILENAME .exists ():
740+ MODELS_CFG_FILENAME .parent .mkdir (exist_ok = True , parents = True )
741+ shutil .copy (MODELS_CFG_DEFAULT , MODELS_CFG_FILENAME )
742+ if not CACHE_FILENAME .exists ():
743+ CACHE_FILENAME .parent .mkdir (exist_ok = True , parents = True )
744+ shutil .copy (CACHE_DEFAULT , CACHE_FILENAME )
680745 root = tk .Tk ()
681746 app = MainGUI (root = root )
682747 root .mainloop ()
0 commit comments