8181 " print(f'Available models for {lang}: {_models}')"
8282 ]
8383 },
84+ {
85+ "cell_type" : " markdown" ,
86+ "id" : " 8b37b3d0" ,
87+ "metadata" : {},
88+ "source" : [
89+ " ## V5"
90+ ]
91+ },
92+ {
93+ "cell_type" : " code" ,
94+ "execution_count" : null ,
95+ "id" : " 71bebc98" ,
96+ "metadata" : {},
97+ "outputs" : [],
98+ "source" : [
99+ " import torch\n " ,
100+ " \n " ,
101+ " language = 'ru'\n " ,
102+ " model_id = 'v5_ru'\n " ,
103+ " device = torch.device('cpu')\n " ,
104+ " \n " ,
105+ " model, example_text = torch.hub.load(repo_or_dir='snakers4/silero-models',\n " ,
106+ " model='silero_tts',\n " ,
107+ " language=language,\n " ,
108+ " speaker=model_id)\n " ,
109+ " model.to(device) # gpu or cpu"
110+ ]
111+ },
112+ {
113+ "cell_type" : " markdown" ,
114+ "id" : " 4782713d" ,
115+ "metadata" : {},
116+ "source" : [
117+ " ### Speakers"
118+ ]
119+ },
120+ {
121+ "cell_type" : " code" ,
122+ "execution_count" : null ,
123+ "id" : " 8e8afb06" ,
124+ "metadata" : {},
125+ "outputs" : [],
126+ "source" : [
127+ " model.speakers"
128+ ]
129+ },
130+ {
131+ "cell_type" : " markdown" ,
132+ "id" : " 3b7cf618" ,
133+ "metadata" : {},
134+ "source" : [
135+ " ### Text"
136+ ]
137+ },
138+ {
139+ "cell_type" : " code" ,
140+ "execution_count" : null ,
141+ "id" : " 6875417c" ,
142+ "metadata" : {},
143+ "outputs" : [],
144+ "source" : [
145+ " sample_rate = 48000\n " ,
146+ " speaker = 'xenia'\n " ,
147+ " put_accent=True\n " ,
148+ " put_yo=True\n " ,
149+ " put_stress_homo=True\n " ,
150+ " put_yo_homo=True\n " ,
151+ " \n " ,
152+ " example_text = 'Меня зовут Лева Королев. Я из готов. И я уже готов открыть все ваши замки любой сложности!'\n " ,
153+ " \n " ,
154+ " audio = model.apply_tts(text=example_text,\n " ,
155+ " speaker=speaker,\n " ,
156+ " sample_rate=sample_rate,\n " ,
157+ " put_accent=put_accent,\n " ,
158+ " put_yo=put_yo,\n " ,
159+ " put_stress_homo=put_stress_homo,\n " ,
160+ " put_yo_homo=put_yo_homo)\n " ,
161+ " print(example_text)\n " ,
162+ " display(Audio(audio, rate=sample_rate))"
163+ ]
164+ },
165+ {
166+ "cell_type" : " markdown" ,
167+ "id" : " e0ce7df5" ,
168+ "metadata" : {},
169+ "source" : [
170+ " ### SSML"
171+ ]
172+ },
173+ {
174+ "cell_type" : " code" ,
175+ "execution_count" : null ,
176+ "id" : " e9e159a2" ,
177+ "metadata" : {},
178+ "outputs" : [],
179+ "source" : [
180+ " ssml_sample = \"\"\"\n " ,
181+ " <speak>\n " ,
182+ " <p>\n " ,
183+ " Когда я просыпаюсь, <prosody rate=\" x-slow\" >я говорю довольно медленно</prosody>.\n " ,
184+ " Пот+ом я начинаю говорить своим обычным голосом,\n " ,
185+ " <prosody pitch=\" x-high\" > а могу говорить тоном выше </prosody>,\n " ,
186+ " или <prosody pitch=\" x-low\" >наоборот, ниже</prosody>.\n " ,
187+ " Пот+ом, если повезет – <prosody rate=\" fast\" >я могу говорить и довольно быстро.</prosody>\n " ,
188+ " А еще я умею делать паузы любой длины, например, две секунды <break time=\" 2000ms\" />.\n " ,
189+ " <p>\n " ,
190+ " Также я умею делать паузы между параграфами.\n " ,
191+ " </p>\n " ,
192+ " <p>\n " ,
193+ " <s>И также я умею делать паузы между предложениями</s>\n " ,
194+ " <s>Вот например как сейчас</s>\n " ,
195+ " </p>\n " ,
196+ " </p>\n " ,
197+ " </speak>\n " ,
198+ " \"\"\"\n " ,
199+ " \n " ,
200+ " sample_rate = 48000\n " ,
201+ " speaker = 'xenia' \n " ,
202+ " audio = model.apply_tts(ssml_text=ssml_sample,\n " ,
203+ " speaker=speaker,\n " ,
204+ " sample_rate=sample_rate)\n " ,
205+ " display(Audio(audio, rate=sample_rate))"
206+ ]
207+ },
84208 {
85209 "cell_type" : " markdown" ,
86210 "id" : " aebc6429" ,
563687 "source" : [
564688 " #@title Install dependencies\n " ,
565689 " \n " ,
566- " !pip install -q torch==1.10"
690+ " !pip install -q torch==1.12"
691+ ]
692+ },
693+ {
694+ "cell_type" : " markdown" ,
695+ "id" : " 20cf87d9" ,
696+ "metadata" : {},
697+ "source" : [
698+ " ## V5"
699+ ]
700+ },
701+ {
702+ "cell_type" : " code" ,
703+ "execution_count" : null ,
704+ "id" : " 832b0ceb" ,
705+ "metadata" : {},
706+ "outputs" : [],
707+ "source" : [
708+ " import os\n " ,
709+ " import torch\n " ,
710+ " \n " ,
711+ " device = torch.device('cpu')\n " ,
712+ " torch.set_num_threads(4)\n " ,
713+ " local_file = 'model.pt'\n " ,
714+ " \n " ,
715+ " if not os.path.isfile(local_file):\n " ,
716+ " torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v5_ru.pt',\n " ,
717+ " local_file) \n " ,
718+ " \n " ,
719+ " model = torch.package.PackageImporter(local_file).load_pickle(\" tts_models\" , \" model\" )\n " ,
720+ " model.to(device)\n " ,
721+ " \n " ,
722+ " example_text = 'Меня зовут Лева Королев. Я из готов. И я уже готов открыть все ваши замки любой сложности!'\n " ,
723+ " sample_rate = 48000\n " ,
724+ " speaker='baya'\n " ,
725+ " \n " ,
726+ " audio_paths = model.save_wav(text=example_text,\n " ,
727+ " speaker=speaker,\n " ,
728+ " sample_rate=sample_rate)"
567729 ]
568730 },
569731 {
8611023 },
8621024 "nbformat" : 4 ,
8631025 "nbformat_minor" : 5
864- }
1026+ }
0 commit comments