|
8 | 8 | from ffsubsync.constants import DEFAULT_MAX_SUBTITLE_SECONDS, DEFAULT_START_SECONDS, DEFAULT_MAX_OFFSET_SECONDS, \ |
9 | 9 | DEFAULT_APPLY_OFFSET_SECONDS, DEFAULT_FRAME_RATE, DEFAULT_VAD |
10 | 10 |
|
11 | | -from subsai.models.faster_whisper_model import FasterWhisperModel |
12 | | -from subsai.models.hugging_face_model import HuggingFaceModel |
13 | | -from subsai.models.whisperX_model import WhisperXModel |
14 | | -from subsai.models.whisper_model import WhisperModel |
15 | | -from subsai.models.whisper_timestamped_model import WhisperTimeStamped |
16 | | -from subsai.models.whispercpp_model import WhisperCppModel |
17 | | -from subsai.utils import get_available_devices, available_translation_models |
18 | | -from subsai.models.stable_ts_model import StableTsModel |
19 | | -from subsai.models.whisper_api_model import WhisperAPIModel |
| 11 | +try: |
| 12 | + from subsai.models.faster_whisper_model import FasterWhisperModel |
| 13 | +except ImportError as e: |
| 14 | + print(e) |
| 15 | +try: |
| 16 | + from subsai.models.hugging_face_model import HuggingFaceModel |
| 17 | +except ImportError as e: |
| 18 | + print(e) |
| 19 | +try: |
| 20 | + from subsai.models.whisperX_model import WhisperXModel |
| 21 | +except ImportError as e: |
| 22 | + print(e) |
| 23 | +try: |
| 24 | + from subsai.models.whisper_model import WhisperModel |
| 25 | +except ImportError as e: |
| 26 | + print(e) |
| 27 | +try: |
| 28 | + from subsai.models.whisper_timestamped_model import WhisperTimeStamped |
| 29 | +except ImportError as e: |
| 30 | + print(e) |
| 31 | +try: |
| 32 | + from subsai.models.whispercpp_model import WhisperCppModel |
| 33 | +except ImportError as e: |
| 34 | + print(e) |
| 35 | +try: |
| 36 | + from subsai.utils import get_available_devices, available_translation_models |
| 37 | +except ImportError as e: |
| 38 | + print(e) |
| 39 | +try: |
| 40 | + from subsai.models.stable_ts_model import StableTsModel |
| 41 | +except ImportError as e: |
| 42 | + print(e) |
| 43 | +try: |
| 44 | + from subsai.models.whisper_api_model import WhisperAPIModel |
| 45 | +except ImportError as e: |
| 46 | + print(e) |
20 | 47 |
|
21 | | -AVAILABLE_MODELS = { |
22 | | - 'openai/whisper': { |
| 48 | +AVAILABLE_MODELS = {} |
| 49 | +if "WhisperModel" in locals(): |
| 50 | + AVAILABLE_MODELS['openai/whisper'] = { |
23 | 51 | 'class': WhisperModel, |
24 | 52 | 'description': 'Whisper is a general-purpose speech recognition model. It is trained on a large dataset of ' |
25 | 53 | 'diverse audio and is also a multi-task model that can perform multilingual speech recognition ' |
26 | 54 | 'as well as speech translation and language identification.', |
27 | 55 | 'url': 'https://github.com/openai/whisper', |
28 | 56 | 'config_schema': WhisperModel.config_schema, |
29 | | - }, |
30 | | - 'linto-ai/whisper-timestamped': { |
| 57 | + } |
| 58 | + |
| 59 | +if "WhisperTimeStamped" in locals(): |
| 60 | + AVAILABLE_MODELS['linto-ai/whisper-timestamped'] = { |
31 | 61 | 'class': WhisperTimeStamped, |
32 | 62 | 'description': 'Multilingual Automatic Speech Recognition with word-level timestamps and confidence.', |
33 | 63 | 'url': 'https://github.com/linto-ai/whisper-timestamped', |
34 | 64 | 'config_schema': WhisperTimeStamped.config_schema, |
35 | | - }, |
36 | | - 'ggerganov/whisper.cpp': { |
| 65 | + } |
| 66 | + |
| 67 | +if "WhisperCppModel" in locals(): |
| 68 | + AVAILABLE_MODELS['ggerganov/whisper.cpp'] = { |
37 | 69 | 'class': WhisperCppModel, |
38 | 70 | 'description': 'High-performance inference of OpenAI\'s Whisper automatic speech recognition (ASR) model\n' |
39 | 71 | '* Plain C/C++ implementation without dependencies\n' |
40 | 72 | '* Runs on the CPU\n', |
41 | 73 | 'url': 'https://github.com/ggerganov/whisper.cpp\nhttps://github.com/abdeladim-s/pywhispercpp', |
42 | 74 | 'config_schema': WhisperCppModel.config_schema, |
43 | | - }, |
44 | | - 'guillaumekln/faster-whisper': { |
| 75 | + } |
| 76 | + |
| 77 | +if "FasterWhisperModel" in locals(): |
| 78 | + AVAILABLE_MODELS['guillaumekln/faster-whisper'] = { |
45 | 79 | 'class': FasterWhisperModel, |
46 | 80 | 'description': '**faster-whisper** is a reimplementation of OpenAI\'s Whisper model using ' |
47 | 81 | '[CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for ' |
|
51 | 85 | 'efficiency can be further improved with 8-bit quantization on both CPU and GPU.', |
52 | 86 | 'url': 'https://github.com/guillaumekln/faster-whisper', |
53 | 87 | 'config_schema': FasterWhisperModel.config_schema, |
54 | | - }, |
55 | | - 'm-bain/whisperX': { |
| 88 | + } |
| 89 | +if "WhisperXModel" in locals(): |
| 90 | + AVAILABLE_MODELS['m-bain/whisperX'] = { |
56 | 91 | 'class': WhisperXModel, |
57 | 92 | 'description': """**whisperX** is a fast automatic speech recognition (70x realtime with large-v2) with word-level timestamps and speaker diarization.""", |
58 | 93 | 'url': 'https://github.com/m-bain/whisperX', |
59 | 94 | 'config_schema': WhisperXModel.config_schema, |
60 | | - }, |
61 | | - 'jianfch/stable-ts': { |
| 95 | + } |
| 96 | +if "StableTsModel" in locals(): |
| 97 | + AVAILABLE_MODELS['jianfch/stable-ts'] = { |
62 | 98 | 'class': StableTsModel, |
63 | 99 | 'description': '**Stabilizing Timestamps for Whisper** This library modifies [Whisper](https://github.com/openai/whisper) to produce more reliable timestamps and extends its functionality.', |
64 | 100 | 'url': 'https://github.com/jianfch/stable-ts', |
65 | 101 | 'config_schema': StableTsModel.config_schema, |
66 | | - }, |
67 | | - 'API/openai/whisper': { |
| 102 | + } |
| 103 | +if "WhisperAPIModel" in locals(): |
| 104 | + AVAILABLE_MODELS['API/openai/whisper'] = { |
68 | 105 | 'class': WhisperAPIModel, |
69 | 106 | 'description': 'API for the OpenAI large-v2 Whisper model, requires an API key.', |
70 | 107 | 'url': 'https://platform.openai.com/docs/guides/speech-to-text', |
71 | 108 | 'config_schema': WhisperAPIModel.config_schema, |
72 | | - }, |
73 | | - 'HuggingFace': { |
| 109 | + } |
| 110 | +if "HuggingFaceModel" in locals(): |
| 111 | + AVAILABLE_MODELS['HuggingFaceModel'] = { |
74 | 112 | 'class': HuggingFaceModel, |
75 | 113 | 'description': 'Hugging Face implementation of Whisper. ' |
76 | 114 | 'Any speech recognition pretrained model from the Hugging Face hub can be used as well', |
77 | 115 | 'url': 'https://huggingface.co/tasks/automatic-speech-recognition', |
78 | 116 | 'config_schema': HuggingFaceModel.config_schema, |
79 | | - }, |
80 | | -} |
| 117 | + } |
| 118 | +if not AVAILABLE_MODELS: |
| 119 | + raise Exception("subsai couldn't find any available models") |
81 | 120 |
|
82 | 121 | BASIC_TOOLS_CONFIGS = { |
83 | 122 | 'set time': { |
|
0 commit comments