@@ -39,8 +39,7 @@ def __init__(self) -> None:
3939 self .file : Path = store .get_plugin_config_dir () / "config.json"
4040 self .default_model : str = ds_config .get_enable_models ()[0 ]
4141 self .enable_md_to_pic : bool = ds_config .md_to_pic
42- self .tts_model_dict : dict [str , list [str ]] = {}
43- self .available_tts_models : list [str ] = []
42+ self .available_tts_models : dict [str , dict [str , list [str ]]] = {}
4443 self .default_tts_model : Optional [str ] = None
4544
4645 self .prompt_func : Optional [Callable [[dict [str , Any ]], str ]] = None
@@ -58,10 +57,7 @@ def load(self):
5857 self .enable_md_to_pic = data .get ("enable_md_to_pic" , self .enable_md_to_pic )
5958 self .default_tts_model = data .get ("default_tts_model" )
6059 if isinstance (data .get ("available_tts_models" ), dict ):
61- self .tts_model_dict = data .get ("available_tts_models" )
62- self .available_tts_models = [
63- f"{ model } -{ spk } " for model , speakers in self .tts_model_dict .items () for spk in speakers
64- ] + (tts_config .get_enable_tts () if tts_config .enable_models else [])
60+ self .available_tts_models = data .get ("available_tts_models" )
6561
6662 enable_models = ds_config .get_enable_models ()
6763 if self .default_model not in enable_models :
@@ -70,8 +66,10 @@ def load(self):
7066 if self .enable_md_to_pic != ds_config .md_to_pic :
7167 self .enable_md_to_pic = ds_config .md_to_pic
7268 self .save ()
73- if self .available_tts_models and self .default_tts_model not in self .available_tts_models :
74- self .default_tts_model = self .available_tts_models [0 ]
69+ if self .available_tts_models and self .default_tts_model not in (
70+ list (self .available_tts_models .keys ()) + tts_config .get_enable_tts ()
71+ ):
72+ self .default_tts_model = list (self .available_tts_models .keys ())[0 ]
7573 self .save ()
7674 if not self .available_tts_models and self .default_tts_model :
7775 self .save ()
@@ -81,10 +79,10 @@ def save(self):
8179 "default_model" : self .default_model ,
8280 "enable_md_to_pic" : self .enable_md_to_pic ,
8381 }
84- if self .default_tts_model in self .available_tts_models :
82+ if self .default_tts_model in ( list ( self .available_tts_models . keys ()) + tts_config . get_enable_tts ()) :
8583 config_data ["default_tts_model" ] = self .default_tts_model
8684 if self .available_tts_models :
87- config_data ["available_tts_models" ] = self .tts_model_dict
85+ config_data ["available_tts_models" ] = self .available_tts_models
8886 with open (self .file , "w" , encoding = "utf-8" ) as f :
8987 json .dump (config_data , f , ensure_ascii = False , indent = 2 )
9088 self .prompt_func = None
@@ -206,15 +204,15 @@ def to_dict(self):
206204class CustomTTS (BaseModel ):
207205 name : str
208206 """TTS Preset Parameters Name"""
207+ version : str = "v4"
208+ """GPT-Sovits API Version"""
209209 model_name : str
210210 """TTS Model Name"""
211- speaker_name : str
212- """TTS Speaker Name"""
213211 prompt_text_lang : str = "中文"
214212 """language of the prompt text for the reference audio"""
215- emotion : str = "随机 "
213+ emotion : str = "默认 "
216214 """Emotion"""
217- text_lang : str = "中文 "
215+ text_lang : str = "多语种混合 "
218216 """language of the text to be synthesized"""
219217 top_k : int = Field (default = 10 , ge = 1 , le = 100 )
220218 """top k sampling"""
@@ -242,6 +240,10 @@ class CustomTTS(BaseModel):
242240 """repetition penalty for T2S model."""
243241 seed : int = - 1
244242 """random seed for reproducibility."""
243+ sample_steps : int = 16
244+ """Number of steps sampled."""
245+ if_sr : bool = False
246+ """whether to use super-resolution model."""
245247
246248 if PYDANTIC_V2 :
247249 model_config = ConfigDict (extra = "allow" , arbitrary_types_allowed = True )
@@ -306,8 +308,11 @@ class ScopedTTSConfig(BaseModel):
306308 """Your GPT-Sovits API Url """
307309 access_token : str = ""
308310 """Your GPT-Sovits API Access Token"""
309- audio_dl_url : str = ""
311+ tts_version : str = "v4"
312+ """Your GPT-Sovits API Version"""
313+ dl_url : str = ""
310314 """audio download url"""
315+ timeout : int = Field (default = 60 )
311316
312317 @model_validator (mode = "before" )
313318 @classmethod
@@ -321,12 +326,12 @@ def get_enable_tts(self) -> list[str]:
321326 return []
322327 return [model .name for model in self .enable_models ]
323328
324- async def get_available_tts (self ) -> dict [str , list [str ]]:
329+ async def get_available_tts (self ) -> dict [str , dict [ str , list [str ] ]]:
325330 from .apis import API
326331
327332 try :
328333 tts_models = await API .get_tts_models ()
329- preset_dict = {model .model : list ( model .speakers ) for model in tts_models }
334+ preset_dict = {model .model_name : model .language_emotions for model in tts_models }
330335 except RequestException as e :
331336 preset_dict = {}
332337 tts_logger ("WARNING" , f"获取 TTS 模型列表失败: { e } " )
@@ -336,16 +341,10 @@ def get_tts_model(self, preset_name: str) -> CustomTTS:
336341 """Get TTS model config"""
337342 if not isinstance (self .enable_models , bool ):
338343 for model in self .enable_models :
339- if (
340- model .name == preset_name
341- and f"{ model .model_name } -{ model .speaker_name } " in json_config .available_tts_models
342- ):
344+ if model .name == preset_name and f"{ model .model_name } " in json_config .available_tts_models :
343345 return model
344- if "-" in preset_name :
345- model_name = preset_name .split ("-" )[0 ]
346- speaker_name = preset_name .split ("-" )[1 ]
347- if preset_name in json_config .available_tts_models :
348- return CustomTTS (name = preset_name , model_name = model_name , speaker_name = speaker_name )
346+ if preset_name in json_config .available_tts_models :
347+ return CustomTTS (name = preset_name , model_name = preset_name )
349348 raise ValueError (f"TTS Model { preset_name } not valid" )
350349
351350
0 commit comments