Skip to content

Commit f8afc52

Browse files
committed
"save state" so that if we crash during the pipeline we might have some sort of output
1 parent 83433f9 commit f8afc52

File tree

2 files changed

+58
-12
lines changed

2 files changed

+58
-12
lines changed

main.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
import re
32
from time import sleep, time
43
import tkinter as tk
54
from tkinter import BOTH, CENTER, E, LEFT, RIGHT, SOLID, TOP, W, X, Button, IntVar, Label, Spinbox, StringVar, Tk, Toplevel, filedialog, Frame, messagebox, font
@@ -19,6 +18,8 @@
1918
import json
2019
from huggingface_hub.hf_api import repo_exists as is_valid_model_id
2120
from PIL import Image, ImageTk
21+
import psutil
22+
from torch.cuda import is_available as is_cuda_available, mem_get_info as get_cuda_mem_info
2223

2324
# import logging
2425

@@ -246,6 +247,24 @@ def start_transcribe(self):
246247
print(f"Converting {item.get_file()} to mp3 type so that it can be transcribed!")
247248
item.filepath = convert_file_to_type(item.get_file(), ntype)
248249
print(f"Convertion completed! Audio file can be found {item.get_file()}")
250+
251+
# priority_levels = [
252+
# psutil.NORMAL_PRIORITY_CLASS, # normal,
253+
# psutil.ABOVE_NORMAL_PRIORITY_CLASS, # above normal
254+
# psutil.ABOVE_NORMAL_PRIORITY_CLASS, # above normal
255+
# psutil.HIGH_PRIORITY_CLASS, # high priority
256+
# ]
257+
# priority_points = 0
258+
# curr_state = psutil.virtual_memory()
259+
# if (curr_state.total/(2**30) > 16):
260+
# # 16gb+ ram
261+
# priority_points += 1
262+
# if (is_cuda_available()):
263+
# # has cuda
264+
# priority_points += 1
265+
# if (get_cuda_mem_info()[1]/(2**30) > 10):
266+
# # has big cuda
267+
# priority_points += 1
249268
proc = subprocess.Popen(
250269
args=[
251270
sys.executable,
@@ -262,6 +281,7 @@ def start_transcribe(self):
262281
cwd=os.getcwd(),
263282
start_new_session=True
264283
)
284+
# psutil.Process(proc.pid).nice(priority_levels[priority_points])
265285
self.root.title("Transcriber - PLEASE DONT KILL ME - I AM WORKING! I PROMISE!")
266286
while proc.poll() == None:
267287
try:
@@ -274,6 +294,7 @@ def start_transcribe(self):
274294
except:
275295
pass
276296
self.root.title("Transcriber")
297+
# spawn_popup_activity("Transcriber", "Completed transcribing the files!")
277298

278299
def show_error(self, *args):
279300
"""Display the error to the user as a popup window"""

transcribe_proc.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from huggingface_hub.hf_api import repo_exists as is_valid_model_id
88
import pycountry
99
from CustomAiEngine import CustomAiEngine
10+
import traceback
1011

1112

1213
def transcribe_file(input_file, model_name=None, num_speakers=2, lang="eng"):
@@ -45,27 +46,50 @@ def transcribe_file(input_file, model_name=None, num_speakers=2, lang="eng"):
4546
retrace, # uncertain how this benifits us
4647
# morphosyntax,
4748
utr,
48-
fa
49+
# fa
4950
] if action]
5051

51-
# create a pipeline
52-
nlp = ba.BatchalignPipeline(*pipeline_activity)
53-
doc = ba.Document.new(media_path=input_file, lang=lang)
54-
doc = nlp(doc)
55-
chat = ba.CHATFile(doc=doc)
5652
n = 0
5753
output_file = f"{input_file}{'_'+str(n) if n > 0 else ''}.cha"
5854
while 1:
5955
output_file = f"{input_file}{'_'+str(n) if n > 0 else ''}.cha"
6056
if not os.path.exists(output_file):
6157
break
6258
n += 1
63-
chat.write(output_file, write_wor=False)
59+
doc = ba.Document.new(media_path=input_file, lang=lang)
60+
for idx, activity in enumerate(pipeline_activity, start=1):
61+
nlp = ba.BatchalignPipeline(activity)
62+
try:
63+
doc = nlp(doc)
64+
chat = ba.CHATFile(doc=doc)
65+
chat.write(output_file, write_wor=False)
66+
with open(output_file,'a',encoding='utf-8') as f:
67+
f.write(f"@DEBUG Completed step {idx}/{len(pipeline_activity)} - {(type(activity).__name__).replace('Engine','')}\n")
68+
except Exception as e:
69+
print(traceback.format_exc(e))
70+
print(f"{output_file} made it to step: {idx-1}/{len(pipeline_activity)}")
71+
with open(output_file,'a',encoding='utf-8') as f:
72+
f.write(f"@DEBUG error during step {idx}/{len(pipeline_activity)} - {(type(activity).__name__).replace('Engine','')}\n")
73+
74+
75+
# create a pipeline
76+
# nlp = ba.BatchalignPipeline(*pipeline_activity)
77+
# doc = ba.Document.new(media_path=input_file, lang=lang)
78+
# doc = nlp(doc)
79+
# chat = ba.CHATFile(doc=doc)
80+
# n = 0
81+
# output_file = f"{input_file}{'_'+str(n) if n > 0 else ''}.cha"
82+
# while 1:
83+
# output_file = f"{input_file}{'_'+str(n) if n > 0 else ''}.cha"
84+
# if not os.path.exists(output_file):
85+
# break
86+
# n += 1
87+
# chat.write(output_file, write_wor=False)
6488
print(f"Wrote to {output_file}", flush=True)
65-
try:
66-
os.startfile(output_file)
67-
except:
68-
pass
89+
# try:
90+
# os.startfile(output_file)
91+
# except:
92+
# pass
6993
# this is process blocking so we dont do it for now
7094
# return spawn_popup_activity(title="COMPLETED!",message=f"Completed transcription of\n{input_file}\nOutput file can be found here:\n{output_file}\nOpen file now?", yes=lambda: os.startfile(output_file))
7195

@@ -77,6 +101,7 @@ def spawn_popup_activity(title, message, yes=None, no=None):
77101
return no()
78102

79103
if __name__ == "__main__":
104+
sys.argv.append('{"input_file":"D:\\\\Parent-Child Interactions\\\\Parent-Child Interactions\\\\P002_PCI.mp3","num_speakers":3,"lang":"","model_name":"openai/whisper-base.en"}')
80105
print("Attempting to transcribe for:", sys.argv[1:], flush=True)
81106
print(sys.argv, flush=True)
82107
for data in sys.argv[1:]:

0 commit comments

Comments
 (0)