Skip to content

Commit eb38d08

Browse files
authored
✨ 适配 GPT-Sovits v4 版本 (#56)
* ✨ 适配 GPT-Sovits v4版本 * 🚚 修改css文件命名,支持配置超时时间
1 parent 75007e0 commit eb38d08

File tree

8 files changed

+163
-96
lines changed

8 files changed

+163
-96
lines changed

nonebot_plugin_deepseek/__init__.py

Lines changed: 65 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
import itertools
2+
from pathlib import Path
3+
from importlib import import_module
14
from importlib.util import find_spec
25

36
from nonebot import require
@@ -20,6 +23,7 @@
2023
MultiVar,
2124
Namespace,
2225
Subcommand,
26+
UniMessage,
2327
CommandMeta,
2428
on_alconna,
2529
)
@@ -29,6 +33,8 @@
2933
if find_spec("nonebot_plugin_htmlrender"):
3034
require("nonebot_plugin_htmlrender")
3135
htmlrender_enable = True
36+
text_to_pic = import_module("nonebot_plugin_htmlrender").text_to_pic
37+
3238
else:
3339
htmlrender_enable = False
3440

@@ -101,15 +107,15 @@
101107
),
102108
Subcommand(
103109
"tts",
104-
Option("-l|--list", help_text="支持的 TTS 模型列表"),
110+
Option("-l|--list", Args["page?#页码", int], help_text="支持的 TTS 模型列表"),
105111
Option(
106112
"--set-default",
107113
Args[
108114
"model#模型名称",
109115
str,
110116
Field(
111117
completion=lambda: f"请输入 TTS 模型预设名,预期为:"
112-
f"{json_config.available_tts_models[:10]}…… 其中之一\n"
118+
f"{list(json_config.available_tts_models.keys())[:10]}…… 其中之一\n"
113119
"输入 `/deepseek tts -l` 查看所有 TTS 模型及角色"
114120
),
115121
],
@@ -213,29 +219,69 @@ async def _(
213219

214220

215221
@deepseek.assign("tts.list")
216-
async def _():
222+
async def _(
223+
page: Query[int] = Query("tts.list.page"),
224+
):
217225
if not tts_config.enable_models:
218226
await deepseek.finish("当前未启用 TTS 功能")
219-
if json_config.tts_model_dict:
220-
model_list = "".join(
221-
f"{model}\n - "
222-
+ "|".join(f"{spk}(默认)" if default_model.name == f"{model}-{spk}" else spk for spk in speakers)
223-
+ "\n"
224-
for model, speakers in json_config.tts_model_dict.items()
227+
228+
def parse_model_dict(model_dict: dict[str, dict[str, list[str]]], start_index: int) -> str:
229+
return "\n".join(
230+
(f"{'✅️ ' if model_name == default_model.model_name else '⏹️'}{start_index + index + 1}.{model_name}")
231+
for index, model_name in enumerate(model_dict.keys())
225232
if json_config.default_tts_model
226233
and (default_model := tts_config.get_tts_model(json_config.default_tts_model))
227234
)
228-
custom_models = "\n".join(
229-
f"- {model}(默认)" if model == json_config.default_tts_model else f"- {model}"
230-
for model in tts_config.get_enable_tts()
235+
236+
if json_config.available_tts_models:
237+
page_size = 200
238+
page_num = page.result if page.available else 1
239+
start_index = (page_num - 1) * page_size
240+
page_model_dict = dict(
241+
itertools.islice(json_config.available_tts_models.items(), start_index, start_index + page_size)
242+
)
243+
if not page_model_dict:
244+
await deepseek.finish(f"页码 {page_num} 超出范围,没有找到任何模型。")
245+
246+
model_list_msg = parse_model_dict(page_model_dict, start_index)
247+
custom_models = (
248+
"\n".join(
249+
f"{'✅️ ' if model.name == json_config.default_tts_model else '⏹️'}{index + 1}.{model.name}"
250+
for index, model in enumerate(tts_config.enable_models)
251+
)
252+
if isinstance(tts_config.enable_models, list)
253+
else ""
231254
)
232-
custom_models_msg = f"\n自定义预设:\n{custom_models}"
233255
else:
234256
await deepseek.finish("当前未查找到可用模型")
235257

236-
message = f"支持的 TTS 模型列表: \n{model_list}"
237-
if isinstance(tts_config.enable_models, list):
238-
message += custom_models_msg
258+
total_models = len(json_config.available_tts_models)
259+
total_pages = (total_models + page_size - 1) // page_size
260+
261+
if page_num > total_pages or page_num < 1:
262+
await deepseek.finish("请输入正确的页码")
263+
264+
header_msg = (
265+
f"支持的 TTS 模型列表 \n(第 {page_num}/{total_pages} 页, 共 {total_models} 个):\n\n"
266+
f"当前TTS模型:\n✅️ {json_config.default_tts_model}\n\n"
267+
)
268+
message = (
269+
(f"自定义 TTS 模型预设:\n {custom_models}" if isinstance(tts_config.enable_models, list) else "")
270+
+ f"\n\n{header_msg}"
271+
+ model_list_msg
272+
)
273+
if htmlrender_enable:
274+
custom_models_html = "".join(f"<div>{line}</div>" for line in custom_models.split("\n") if line)
275+
header_html = (
276+
f"<header class='custom-header'>"
277+
f"<h2 class='header-title'>自定义 TTS 预设</h2>"
278+
f"<div class='models-container'>{custom_models_html}</div></header>"
279+
)
280+
model_lines = "".join(f"<div>{line}</div>" for line in model_list_msg.split("\n") if line)
281+
model_html = f"<h2 class='header-title'>{header_msg}</h2><div class='models-container'>{model_lines}</div>"
282+
final_html = header_html + model_html
283+
css_path = str(Path(__file__).parent / "resources/tts_models.css")
284+
await deepseek.finish(UniMessage.image(raw=await text_to_pic(text=final_html, css_path=css_path, width=1440)))
239285
await deepseek.finish(message)
240286

241287

@@ -248,10 +294,11 @@ async def _(
248294
await deepseek.finish("当前未启用 TTS 功能")
249295
if not is_superuser:
250296
await deepseek.finish("该指令仅超管可用")
251-
if model.result not in json_config.available_tts_models:
297+
available_tts_model_names = list(json_config.available_tts_models.keys()) + tts_config.get_enable_tts()
298+
if model.result not in available_tts_model_names:
252299
await deepseek.finish(
253300
f"请输入 TTS 模型预设名,预期为:"
254-
f"{json_config.available_tts_models[:10]}…… 其中之一\n"
301+
f"{list(json_config.available_tts_models.keys())[:10]}…… 其中之一\n"
255302
"输入 `/deepseek tts -l` 查看所有 TTS 模型及角色"
256303
)
257304
json_config.default_tts_model = model.result

nonebot_plugin_deepseek/apis/request.py

Lines changed: 18 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from ..log import ds_logger, tts_logger
1010
from ..exception import RequestException
1111
from ..config import ds_config, tts_config, json_config, uninfo_enable
12-
from ..schemas import Balance, TTSResponse, ChatCompletions, StreamChoiceList
12+
from ..schemas import Balance, TTSModelInfo, ChatCompletions, StreamChoiceList
1313

1414

1515
class API:
@@ -63,66 +63,56 @@ async def query_balance(cls, model_name: str) -> Balance:
6363
return Balance(**response.json())
6464

6565
@classmethod
66-
async def get_tts_models(cls) -> list[TTSResponse]:
66+
async def get_tts_models(cls) -> list[TTSModelInfo]:
6767
try:
6868
async with httpx.AsyncClient() as client:
69-
response = await client.get(
69+
response = await client.post(
7070
f"{tts_config.base_url}/models",
7171
headers={**cls._headers},
72-
timeout=30,
72+
json={"version": tts_config.tts_version},
73+
timeout=tts_config.timeout,
7374
)
7475
if response.status_code != 200:
7576
raise RequestException(f"获取 TTS 模型列表失败,状态码: {response.status_code}")
76-
return [await TTSResponse.create(model=model) for model in response.json()]
77+
return [
78+
TTSModelInfo(model_name=key, language_emotions=value)
79+
for key, value in response.json().get("models", {}).items()
80+
if isinstance(value, dict)
81+
]
7782
except httpx.ConnectError as e:
7883
raise RequestException(f"连接 TTS 模型服务器失败: {e}")
7984

80-
@classmethod
81-
async def get_tts_speakers(cls, model_name: str) -> list[str]:
82-
async with httpx.AsyncClient() as client:
83-
response = await client.post(
84-
f"{tts_config.base_url}/spks",
85-
headers={**cls._headers},
86-
json={"model": model_name},
87-
timeout=30,
88-
)
89-
if speakers := response.json().get("speakers"):
90-
return list(speakers.keys())
91-
else:
92-
raise RequestException("获取 TTS 模型讲话人列表失败")
93-
9485
@classmethod
9586
async def text_to_speach(cls, text: str, model: str) -> bytes:
9687
model_config = tts_config.get_tts_model(model)
9788
model_name = model_config.model_name
98-
speaker = model_config.speaker_name
9989
json = {
10090
"text": text,
10191
"model_name": model_name,
102-
"speaker_name": speaker,
10392
"app_key": tts_config.access_token,
10493
"access_token": tts_config.access_token,
105-
"audio_dl_url": tts_config.audio_dl_url,
94+
"version": tts_config.tts_version,
95+
"dl_url": tts_config.dl_url,
10696
**model_config.to_dict(),
10797
}
10898

109-
tts_logger("DEBUG", f"使用模型 {model}讲话人:{speaker}, 配置:{json}")
99+
tts_logger("DEBUG", f"使用模型 {model},配置:{json}")
110100
try:
111101
async with httpx.AsyncClient() as client:
112102
response = await client.post(
113103
f"{tts_config.base_url}/infer_single",
114-
headers={**cls._headers},
104+
headers={**cls._headers, "Authorization": f"Bearer {tts_config.access_token}"},
115105
json=json,
116-
timeout=50,
106+
timeout=tts_config.timeout,
117107
)
118-
tts_logger("DEBUG", f"Response: {response.text}")
108+
tts_logger("DEBUG", f"Response: {response.status_code} {response.text}")
119109
if audio_url := response.json().get("audio_url"):
120110
async with httpx.AsyncClient() as client:
121-
response = await client.get(audio_url)
111+
response = await client.get(audio_url, timeout=tts_config.timeout)
122112
return response.content
123113
else:
124114
raise RequestException("语音合成失败")
125-
except httpx.ConnectError as e:
115+
except (httpx.ConnectError, httpx.ReadTimeout) as e:
126116
raise RequestException(f"连接 TTS 服务器失败: {e}")
127117

128118

nonebot_plugin_deepseek/cli/plugins/tts.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,7 @@ def dispatch(self, result: Arparma) -> Union[bool, None]:
3131
if result.find("tts.update"):
3232
available_models = asyncio.run(tts_config.get_available_tts())
3333
if available_models:
34-
json_config.available_tts_models = [
35-
f"{model}-{spk}" for model, speakers in available_models.items() for spk in speakers
36-
]
37-
json_config.tts_model_dict = available_models
34+
json_config.available_tts_models = available_models
3835
json_config.save()
3936
tts_logger("SUCCESS", f"Update available TTS models: {available_models}")
4037
return

nonebot_plugin_deepseek/config.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ def __init__(self) -> None:
3939
self.file: Path = store.get_plugin_config_dir() / "config.json"
4040
self.default_model: str = ds_config.get_enable_models()[0]
4141
self.enable_md_to_pic: bool = ds_config.md_to_pic
42-
self.tts_model_dict: dict[str, list[str]] = {}
43-
self.available_tts_models: list[str] = []
42+
self.available_tts_models: dict[str, dict[str, list[str]]] = {}
4443
self.default_tts_model: Optional[str] = None
4544

4645
self.prompt_func: Optional[Callable[[dict[str, Any]], str]] = None
@@ -58,10 +57,7 @@ def load(self):
5857
self.enable_md_to_pic = data.get("enable_md_to_pic", self.enable_md_to_pic)
5958
self.default_tts_model = data.get("default_tts_model")
6059
if isinstance(data.get("available_tts_models"), dict):
61-
self.tts_model_dict = data.get("available_tts_models")
62-
self.available_tts_models = [
63-
f"{model}-{spk}" for model, speakers in self.tts_model_dict.items() for spk in speakers
64-
] + (tts_config.get_enable_tts() if tts_config.enable_models else [])
60+
self.available_tts_models = data.get("available_tts_models")
6561

6662
enable_models = ds_config.get_enable_models()
6763
if self.default_model not in enable_models:
@@ -70,8 +66,10 @@ def load(self):
7066
if self.enable_md_to_pic != ds_config.md_to_pic:
7167
self.enable_md_to_pic = ds_config.md_to_pic
7268
self.save()
73-
if self.available_tts_models and self.default_tts_model not in self.available_tts_models:
74-
self.default_tts_model = self.available_tts_models[0]
69+
if self.available_tts_models and self.default_tts_model not in (
70+
list(self.available_tts_models.keys()) + tts_config.get_enable_tts()
71+
):
72+
self.default_tts_model = list(self.available_tts_models.keys())[0]
7573
self.save()
7674
if not self.available_tts_models and self.default_tts_model:
7775
self.save()
@@ -81,10 +79,10 @@ def save(self):
8179
"default_model": self.default_model,
8280
"enable_md_to_pic": self.enable_md_to_pic,
8381
}
84-
if self.default_tts_model in self.available_tts_models:
82+
if self.default_tts_model in (list(self.available_tts_models.keys()) + tts_config.get_enable_tts()):
8583
config_data["default_tts_model"] = self.default_tts_model
8684
if self.available_tts_models:
87-
config_data["available_tts_models"] = self.tts_model_dict
85+
config_data["available_tts_models"] = self.available_tts_models
8886
with open(self.file, "w", encoding="utf-8") as f:
8987
json.dump(config_data, f, ensure_ascii=False, indent=2)
9088
self.prompt_func = None
@@ -206,15 +204,15 @@ def to_dict(self):
206204
class CustomTTS(BaseModel):
207205
name: str
208206
"""TTS Preset Parameters Name"""
207+
version: str = "v4"
208+
"""GPT-Sovits API Version"""
209209
model_name: str
210210
"""TTS Model Name"""
211-
speaker_name: str
212-
"""TTS Speaker Name"""
213211
prompt_text_lang: str = "中文"
214212
"""language of the prompt text for the reference audio"""
215-
emotion: str = "随机"
213+
emotion: str = "默认"
216214
"""Emotion"""
217-
text_lang: str = "中文"
215+
text_lang: str = "多语种混合"
218216
"""language of the text to be synthesized"""
219217
top_k: int = Field(default=10, ge=1, le=100)
220218
"""top k sampling"""
@@ -242,6 +240,10 @@ class CustomTTS(BaseModel):
242240
"""repetition penalty for T2S model."""
243241
seed: int = -1
244242
"""random seed for reproducibility."""
243+
sample_steps: int = 16
244+
"""Number of steps sampled."""
245+
if_sr: bool = False
246+
"""whether to use super-resolution model."""
245247

246248
if PYDANTIC_V2:
247249
model_config = ConfigDict(extra="allow", arbitrary_types_allowed=True)
@@ -306,8 +308,11 @@ class ScopedTTSConfig(BaseModel):
306308
"""Your GPT-Sovits API Url """
307309
access_token: str = ""
308310
"""Your GPT-Sovits API Access Token"""
309-
audio_dl_url: str = ""
311+
tts_version: str = "v4"
312+
"""Your GPT-Sovits API Version"""
313+
dl_url: str = ""
310314
"""audio download url"""
315+
timeout: int = Field(default=60)
311316

312317
@model_validator(mode="before")
313318
@classmethod
@@ -321,12 +326,12 @@ def get_enable_tts(self) -> list[str]:
321326
return []
322327
return [model.name for model in self.enable_models]
323328

324-
async def get_available_tts(self) -> dict[str, list[str]]:
329+
async def get_available_tts(self) -> dict[str, dict[str, list[str]]]:
325330
from .apis import API
326331

327332
try:
328333
tts_models = await API.get_tts_models()
329-
preset_dict = {model.model: list(model.speakers) for model in tts_models}
334+
preset_dict = {model.model_name: model.language_emotions for model in tts_models}
330335
except RequestException as e:
331336
preset_dict = {}
332337
tts_logger("WARNING", f"获取 TTS 模型列表失败: {e}")
@@ -336,16 +341,10 @@ def get_tts_model(self, preset_name: str) -> CustomTTS:
336341
"""Get TTS model config"""
337342
if not isinstance(self.enable_models, bool):
338343
for model in self.enable_models:
339-
if (
340-
model.name == preset_name
341-
and f"{model.model_name}-{model.speaker_name}" in json_config.available_tts_models
342-
):
344+
if model.name == preset_name and f"{model.model_name}" in json_config.available_tts_models:
343345
return model
344-
if "-" in preset_name:
345-
model_name = preset_name.split("-")[0]
346-
speaker_name = preset_name.split("-")[1]
347-
if preset_name in json_config.available_tts_models:
348-
return CustomTTS(name=preset_name, model_name=model_name, speaker_name=speaker_name)
346+
if preset_name in json_config.available_tts_models:
347+
return CustomTTS(name=preset_name, model_name=preset_name)
349348
raise ValueError(f"TTS Model {preset_name} not valid")
350349

351350

nonebot_plugin_deepseek/hook.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,11 @@ async def _() -> None:
1414
if tts_config.enable_models:
1515
if not json_config.available_tts_models:
1616
available_models = await tts_config.get_available_tts()
17-
json_config.available_tts_models = [
18-
f"{model}-{spk}" for model, speakers in available_models.items() for spk in speakers
19-
]
20-
json_config.tts_model_dict = available_models
17+
json_config.available_tts_models = available_models
2118
json_config.save()
22-
tts_logger("DEBUG", f"Loaded available TTS models: {available_models}")
19+
tts_logger("DEBUG", f"Loaded available TTS models: {len(available_models)}")
2320
else:
24-
tts_logger("DEBUG", f"Loaded available TTS models: {json_config.available_tts_models}")
21+
tts_logger("DEBUG", f"Loaded available TTS models: {len(json_config.available_tts_models)}")
2522
command_manager.load_cache(cach_dir)
2623
ds_logger("DEBUG", "DeepSeek shortcuts cache loaded")
2724

0 commit comments

Comments
 (0)