Skip to content

Commit be9cf38

Browse files
authored
Merge pull request #176 from thiswillbeyourgithub/allow-working-without-all-backends
allow working without all backends
2 parents 9adfd25 + eafc617 commit be9cf38

File tree

3 files changed

+110
-38
lines changed

3 files changed

+110
-38
lines changed

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,13 @@ subsai media.txt --model openai/whisper --format srt
172172
173173
### From Python
174174
175+
To install:
176+
1. `git clone https://github.com/absadiki/subsai`
177+
2. `cd subsai`
178+
3. `uv pip install -e .`
179+
180+
*Note: For minimal installs or if having issues installing dependencies, you can comment the dependencies for backends you won't use in the file `requirements.txt`.*
181+
175182
```python
176183
from subsai import SubsAI
177184

requirements.txt

Lines changed: 37 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,45 @@
1-
openai-whisper==20240930
1+
# always needed
2+
ffsubsync~=0.4.24
3+
pysubs2~=1.6.0
4+
dl_translate==0.3.0
5+
pydub==0.25.1
6+
7+
# to avoid problem with dependencies versions
8+
numpy<2
9+
torch==2.2.0
10+
torchaudio==2.2.0
11+
12+
13+
# For the Web-UI
14+
pandas==1.5.3
215
streamlit~=1.20.0
316
streamlit_player~=0.1.5
417
streamlit-aggrid~=0.3.4
5-
ffsubsync~=0.4.24
18+
19+
# Backend specific dependencies
20+
21+
# openai API:
22+
openai==1.60.1
23+
ffmpeg-python>=0.2.0
24+
25+
# whisper timestamped:
626
whisper-timestamped @ git+https://github.com/linto-ai/whisper-timestamped
7-
pandas==1.5.3
8-
pysubs2~=1.6.0
27+
28+
# pywhispercpp:
929
pywhispercpp==1.3.1
10-
dl_translate==0.3.0
30+
31+
# stable-ts and faster_whisper:
1132
faster_whisper
12-
whisperx @ git+https://github.com/m-bain/whisperx.git@8c58c54635cd6ee2d9d8665a3cf789863f6ed700
33+
34+
# stable ts:
1335
stable-ts==2.18.2
14-
openai==1.60.1
36+
37+
# whisperx:
38+
whisperx @ git+https://github.com/m-bain/whisperx.git@8c58c54635cd6ee2d9d8665a3cf789863f6ed700
39+
40+
# huggingface:
1541
transformers==4.48.1
16-
numpy<2
17-
torch==2.2.0
18-
torchaudio==2.2.0
19-
pydub==0.25.1
42+
43+
# stable-ts, whisper, whisperx, faster_whisper, whisper-timestamped
44+
openai-whisper==20240930
45+

src/subsai/configs.py

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,40 +8,74 @@
88
from ffsubsync.constants import DEFAULT_MAX_SUBTITLE_SECONDS, DEFAULT_START_SECONDS, DEFAULT_MAX_OFFSET_SECONDS, \
99
DEFAULT_APPLY_OFFSET_SECONDS, DEFAULT_FRAME_RATE, DEFAULT_VAD
1010

11-
from subsai.models.faster_whisper_model import FasterWhisperModel
12-
from subsai.models.hugging_face_model import HuggingFaceModel
13-
from subsai.models.whisperX_model import WhisperXModel
14-
from subsai.models.whisper_model import WhisperModel
15-
from subsai.models.whisper_timestamped_model import WhisperTimeStamped
16-
from subsai.models.whispercpp_model import WhisperCppModel
17-
from subsai.utils import get_available_devices, available_translation_models
18-
from subsai.models.stable_ts_model import StableTsModel
19-
from subsai.models.whisper_api_model import WhisperAPIModel
11+
try:
12+
from subsai.models.faster_whisper_model import FasterWhisperModel
13+
except ImportError as e:
14+
print(e)
15+
try:
16+
from subsai.models.hugging_face_model import HuggingFaceModel
17+
except ImportError as e:
18+
print(e)
19+
try:
20+
from subsai.models.whisperX_model import WhisperXModel
21+
except ImportError as e:
22+
print(e)
23+
try:
24+
from subsai.models.whisper_model import WhisperModel
25+
except ImportError as e:
26+
print(e)
27+
try:
28+
from subsai.models.whisper_timestamped_model import WhisperTimeStamped
29+
except ImportError as e:
30+
print(e)
31+
try:
32+
from subsai.models.whispercpp_model import WhisperCppModel
33+
except ImportError as e:
34+
print(e)
35+
try:
36+
from subsai.utils import get_available_devices, available_translation_models
37+
except ImportError as e:
38+
print(e)
39+
try:
40+
from subsai.models.stable_ts_model import StableTsModel
41+
except ImportError as e:
42+
print(e)
43+
try:
44+
from subsai.models.whisper_api_model import WhisperAPIModel
45+
except ImportError as e:
46+
print(e)
2047

21-
AVAILABLE_MODELS = {
22-
'openai/whisper': {
48+
AVAILABLE_MODELS = {}
49+
if "WhisperModel" in locals():
50+
AVAILABLE_MODELS['openai/whisper'] = {
2351
'class': WhisperModel,
2452
'description': 'Whisper is a general-purpose speech recognition model. It is trained on a large dataset of '
2553
'diverse audio and is also a multi-task model that can perform multilingual speech recognition '
2654
'as well as speech translation and language identification.',
2755
'url': 'https://github.com/openai/whisper',
2856
'config_schema': WhisperModel.config_schema,
29-
},
30-
'linto-ai/whisper-timestamped': {
57+
}
58+
59+
if "WhisperTimeStamped" in locals():
60+
AVAILABLE_MODELS['linto-ai/whisper-timestamped'] = {
3161
'class': WhisperTimeStamped,
3262
'description': 'Multilingual Automatic Speech Recognition with word-level timestamps and confidence.',
3363
'url': 'https://github.com/linto-ai/whisper-timestamped',
3464
'config_schema': WhisperTimeStamped.config_schema,
35-
},
36-
'ggerganov/whisper.cpp': {
65+
}
66+
67+
if "WhisperCppModel" in locals():
68+
AVAILABLE_MODELS['ggerganov/whisper.cpp'] = {
3769
'class': WhisperCppModel,
3870
'description': 'High-performance inference of OpenAI\'s Whisper automatic speech recognition (ASR) model\n'
3971
'* Plain C/C++ implementation without dependencies\n'
4072
'* Runs on the CPU\n',
4173
'url': 'https://github.com/ggerganov/whisper.cpp\nhttps://github.com/abdeladim-s/pywhispercpp',
4274
'config_schema': WhisperCppModel.config_schema,
43-
},
44-
'guillaumekln/faster-whisper': {
75+
}
76+
77+
if "FasterWhisperModel" in locals():
78+
AVAILABLE_MODELS['guillaumekln/faster-whisper'] = {
4579
'class': FasterWhisperModel,
4680
'description': '**faster-whisper** is a reimplementation of OpenAI\'s Whisper model using '
4781
'[CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for '
@@ -51,33 +85,38 @@
5185
'efficiency can be further improved with 8-bit quantization on both CPU and GPU.',
5286
'url': 'https://github.com/guillaumekln/faster-whisper',
5387
'config_schema': FasterWhisperModel.config_schema,
54-
},
55-
'm-bain/whisperX': {
88+
}
89+
if "WhisperXModel" in locals():
90+
AVAILABLE_MODELS['m-bain/whisperX'] = {
5691
'class': WhisperXModel,
5792
'description': """**whisperX** is a fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and speaker diarization.""",
5893
'url': 'https://github.com/m-bain/whisperX',
5994
'config_schema': WhisperXModel.config_schema,
60-
},
61-
'jianfch/stable-ts': {
95+
}
96+
if "StableTsModel" in locals():
97+
AVAILABLE_MODELS['jianfch/stable-ts'] = {
6298
'class': StableTsModel,
6399
'description': '**Stabilizing Timestamps for Whisper** This library modifies [Whisper](https://github.com/openai/whisper) to produce more reliable timestamps and extends its functionality.',
64100
'url': 'https://github.com/jianfch/stable-ts',
65101
'config_schema': StableTsModel.config_schema,
66-
},
67-
'API/openai/whisper': {
102+
}
103+
if "WhisperAPIModel" in locals():
104+
AVAILABLE_MODELS['API/openai/whisper'] = {
68105
'class': WhisperAPIModel,
69106
'description': 'API for the OpenAI large-v2 Whisper model, requires an API key.',
70107
'url': 'https://platform.openai.com/docs/guides/speech-to-text',
71108
'config_schema': WhisperAPIModel.config_schema,
72-
},
73-
'HuggingFace': {
109+
}
110+
if "HuggingFaceModel" in locals():
111+
AVAILABLE_MODELS['HuggingFaceModel'] = {
74112
'class': HuggingFaceModel,
75113
'description': 'Hugging Face implementation of Whisper. '
76114
'Any speech recognition pretrained model from the Hugging Face hub can be used as well',
77115
'url': 'https://huggingface.co/tasks/automatic-speech-recognition',
78116
'config_schema': HuggingFaceModel.config_schema,
79-
},
80-
}
117+
}
118+
if not AVAILABLE_MODELS:
119+
raise Exception("subsai couldn't find any available models")
81120

82121
BASIC_TOOLS_CONFIGS = {
83122
'set time': {

0 commit comments

Comments
 (0)