Merge pull request #177 from thiswillbeyourgithub/add-base_url-parameter

absadiki · web-flow · commit fdae1040b82d · 2025-09-02T20:56:16.000-04:00
add base url parameter
diff --git a/README.md b/README.md
@@ -46,7 +46,7 @@
   * [x] [Hugging Face Transformers](https://huggingface.co/tasks/automatic-speech-recognition)
     * > Hugging Face implementation of Whisper.  Any speech recognition pretrained model from the Hugging Face hub can be used as well.
   * [x] [API/openai/whisper](https://platform.openai.com/docs/guides/speech-to-text)
-    * > OpenAI Whisper via their API
+    * > OpenAI Whisper via their API. Or any other openai-like API for whisper (e.g. [speaches.ai](https://github.com/speaches-ai/speaches))
 
 * Web UI
   * Fully offline, no third party services 
diff --git a/src/subsai/main.py b/src/subsai/main.py
@@ -256,7 +256,7 @@ def merge_subs_with_video(subs: Dict[str, SSAFile],
             model = subs_ai.create_model('openai/whisper', {'model_type': 'tiny'})
             en_subs = subs_ai.transcribe(file, model)
             ar_subs = pysubs2.load('../../assets/video/test0-ar.srt')
-            Tools.merge_subs_with_video2({'English': subs, "Arabic": subs2}, file)
+            Tools.merge_subs_with_video({'English': subs, "Arabic": subs2}, file)
         ```
 
         :param subs: dict with (lang,`SSAFile` object) key,value pairs
@@ -311,5 +311,5 @@ def merge_subs_with_video(subs: Dict[str, SSAFile],
     subs = subs_ai.transcribe(file, model)
     subs.save('../../assets/video/test1.srt')
     subs2 = pysubs2.load('../../assets/video/test0-ar.srt')
-    Tools.merge_subs_with_video2({'English': subs, "Arabic": subs2}, file)
+    Tools.merge_subs_with_video({'English': subs, "Arabic": subs2}, file)
     # subs.save('test1.srt')
diff --git a/src/subsai/models/whisper_api_model.py b/src/subsai/models/whisper_api_model.py
@@ -37,7 +37,7 @@ def convert_video_to_audio_ffmpeg(video_file, output_ext="mp3"):
         .input(video_file)
         .output(output_file)
         .overwrite_output()
-        .run(quiet=True)
+        .run(quiet=False)
     )
     return output_file
 
@@ -75,6 +75,12 @@ class WhisperAPIModel(AbstractModel):
                 'description': "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use log probability to automatically increase the temperature until certain thresholds are hit.",
                 'options': None,
                 'default': 0
+            },
+            'base_url': {
+                'type': str,
+                'description': "The base URL for the API. Useful if you're already self hosting whisper for example.",
+                'options': None,
+                'default': "https://api.openai.com/v1/"
             }
         }
 
@@ -85,8 +91,11 @@ def __init__(self, model_config):
         self.language = _load_config('language', model_config, self.config_schema)
         self.prompt = _load_config('prompt', model_config, self.config_schema)
         self.temperature = _load_config('temperature', model_config, self.config_schema)
+        self.base_url = _load_config('base_url', model_config, self.config_schema)
+        if not self.base_url.endswith("/"):
+            self.base_url += "/"
 
-        self.client = OpenAI(api_key=self.api_key)
+        self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
 
     def chunk_audio(self,audio_file_path) -> list:
         # Load the audio file