Skip to content

Commit 16f8df1

Browse files
authored
Merge pull request #300 from snakers4/tts_v5
V5
2 parents 9241c50 + 900aaaa commit 16f8df1

File tree

3 files changed

+170
-8
lines changed

3 files changed

+170
-8
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ Basic dependencies for Colab examples:
130130
[![Open on Torch Hub](https://img.shields.io/badge/Torch-Hub-red?logo=pytorch&style=for-the-badge)](https://pytorch.org/hub/snakers4_silero-models_tts/)
131131

132132
```python
133-
# V4
133+
# V5
134134
import torch
135135

136136
language = 'ru'
137-
model_id = 'v4_ru'
137+
model_id = 'v5_ru'
138138
sample_rate = 48000
139139
speaker = 'xenia'
140140
device = torch.device('cpu')
@@ -156,7 +156,7 @@ audio = model.apply_tts(text=example_text,
156156
- Please see the detailed examples in Colab;
157157

158158
```python
159-
# V4
159+
# V5
160160
import os
161161
import torch
162162

@@ -165,13 +165,13 @@ torch.set_num_threads(4)
165165
local_file = 'model.pt'
166166

167167
if not os.path.isfile(local_file):
168-
torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v4_ru.pt',
168+
torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v5_ru.pt',
169169
local_file)
170170

171171
model = torch.package.PackageImporter(local_file).load_pickle("tts_models", "model")
172172
model.to(device)
173173

174-
example_text = 'В недрах тундры выдры в г+етрах т+ырят в вёдра ядра кедров.'
174+
example_text = 'Меня зовут Лева Королев. Я из готов. И я уже готов открыть все ваши замки любой сложности!'
175175
sample_rate = 48000
176176
speaker='baya'
177177

examples_tts.ipynb

Lines changed: 164 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,130 @@
8181
" print(f'Available models for {lang}: {_models}')"
8282
]
8383
},
84+
{
85+
"cell_type": "markdown",
86+
"id": "8b37b3d0",
87+
"metadata": {},
88+
"source": [
89+
"## V5"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"id": "71bebc98",
96+
"metadata": {},
97+
"outputs": [],
98+
"source": [
99+
"import torch\n",
100+
"\n",
101+
"language = 'ru'\n",
102+
"model_id = 'v5_ru'\n",
103+
"device = torch.device('cpu')\n",
104+
"\n",
105+
"model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models',\n",
106+
" model='silero_tts',\n",
107+
" language=language,\n",
108+
" speaker=model_id)\n",
109+
"model.to(device) # gpu or cpu"
110+
]
111+
},
112+
{
113+
"cell_type": "markdown",
114+
"id": "4782713d",
115+
"metadata": {},
116+
"source": [
117+
"### Speakers"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"execution_count": null,
123+
"id": "8e8afb06",
124+
"metadata": {},
125+
"outputs": [],
126+
"source": [
127+
"model.speakers"
128+
]
129+
},
130+
{
131+
"cell_type": "markdown",
132+
"id": "3b7cf618",
133+
"metadata": {},
134+
"source": [
135+
"### Text"
136+
]
137+
},
138+
{
139+
"cell_type": "code",
140+
"execution_count": null,
141+
"id": "6875417c",
142+
"metadata": {},
143+
"outputs": [],
144+
"source": [
145+
"sample_rate = 48000\n",
146+
"speaker = 'xenia'\n",
147+
"put_accent=True\n",
148+
"put_yo=True\n",
149+
"put_stress_homo=True\n",
150+
"put_yo_homo=True\n",
151+
"\n",
152+
"example_text = 'Меня зовут Лева Королев. Я из готов. И я уже готов открыть все ваши замки любой сложности!'\n",
153+
"\n",
154+
"audio = model.apply_tts(text=example_text,\n",
155+
" speaker=speaker,\n",
156+
" sample_rate=sample_rate,\n",
157+
" put_accent=put_accent,\n",
158+
" put_yo=put_yo,\n",
159+
" put_stress_homo=put_stress_homo,\n",
160+
" put_yo_homo=put_yo_homo)\n",
161+
"print(example_text)\n",
162+
"display(Audio(audio, rate=sample_rate))"
163+
]
164+
},
165+
{
166+
"cell_type": "markdown",
167+
"id": "e0ce7df5",
168+
"metadata": {},
169+
"source": [
170+
"### SSML"
171+
]
172+
},
173+
{
174+
"cell_type": "code",
175+
"execution_count": null,
176+
"id": "e9e159a2",
177+
"metadata": {},
178+
"outputs": [],
179+
"source": [
180+
"ssml_sample = \"\"\"\n",
181+
" <speak>\n",
182+
" <p>\n",
183+
" Когда я просыпаюсь, <prosody rate=\"x-slow\">я говорю довольно медленно</prosody>.\n",
184+
" Пот+ом я начинаю говорить своим обычным голосом,\n",
185+
" <prosody pitch=\"x-high\"> а могу говорить тоном выше </prosody>,\n",
186+
" или <prosody pitch=\"x-low\">наоборот, ниже</prosody>.\n",
187+
" Пот+ом, если повезет – <prosody rate=\"fast\">я могу говорить и довольно быстро.</prosody>\n",
188+
" А еще я умею делать паузы любой длины, например, две секунды <break time=\"2000ms\"/>.\n",
189+
" <p>\n",
190+
" Также я умею делать паузы между параграфами.\n",
191+
" </p>\n",
192+
" <p>\n",
193+
" <s>И также я умею делать паузы между предложениями</s>\n",
194+
" <s>Вот например как сейчас</s>\n",
195+
" </p>\n",
196+
" </p>\n",
197+
" </speak>\n",
198+
" \"\"\"\n",
199+
"\n",
200+
"sample_rate = 48000\n",
201+
"speaker = 'xenia' \n",
202+
"audio = model.apply_tts(ssml_text=ssml_sample,\n",
203+
" speaker=speaker,\n",
204+
" sample_rate=sample_rate)\n",
205+
"display(Audio(audio, rate=sample_rate))"
206+
]
207+
},
84208
{
85209
"cell_type": "markdown",
86210
"id": "aebc6429",
@@ -563,7 +687,45 @@
563687
"source": [
564688
"#@title Install dependencies\n",
565689
"\n",
566-
"!pip install -q torch==1.10"
690+
"!pip install -q torch==1.12"
691+
]
692+
},
693+
{
694+
"cell_type": "markdown",
695+
"id": "20cf87d9",
696+
"metadata": {},
697+
"source": [
698+
"## V5"
699+
]
700+
},
701+
{
702+
"cell_type": "code",
703+
"execution_count": null,
704+
"id": "832b0ceb",
705+
"metadata": {},
706+
"outputs": [],
707+
"source": [
708+
"import os\n",
709+
"import torch\n",
710+
"\n",
711+
"device = torch.device('cpu')\n",
712+
"torch.set_num_threads(4)\n",
713+
"local_file = 'model.pt'\n",
714+
"\n",
715+
"if not os.path.isfile(local_file):\n",
716+
" torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v5_ru.pt',\n",
717+
" local_file) \n",
718+
"\n",
719+
"model = torch.package.PackageImporter(local_file).load_pickle(\"tts_models\", \"model\")\n",
720+
"model.to(device)\n",
721+
"\n",
722+
"example_text = 'Меня зовут Лева Королев. Я из готов. И я уже готов открыть все ваши замки любой сложности!'\n",
723+
"sample_rate = 48000\n",
724+
"speaker='baya'\n",
725+
"\n",
726+
"audio_paths = model.save_wav(text=example_text,\n",
727+
" speaker=speaker,\n",
728+
" sample_rate=sample_rate)"
567729
]
568730
},
569731
{
@@ -861,4 +1023,4 @@
8611023
},
8621024
"nbformat": 4,
8631025
"nbformat_minor": 5
864-
}
1026+
}

src/silero/silero.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def silero_tts(language='en',
7474
assert language == speaker_language[speaker], f"Incorrect language '{language}' for this speaker, please specify '{speaker_language[speaker]}'"
7575

7676
model_conf = models.tts_models[language][speaker].latest
77-
if '_v2' in speaker or '_v3' in speaker or 'v3_' in speaker or 'v4_' in speaker:
77+
if '_v2' in speaker or '_v3' in speaker or 'v3_' in speaker or 'v4_' in speaker or 'v5_' in speaker:
7878
from torch import package
7979
model_url = model_conf.package
8080
model_dir = os.path.join(os.path.dirname(__file__), "model")

0 commit comments

Comments
 (0)