Skip to content

Commit 1a74996

Browse files
authored
Merge pull request #543 from snakers4/adamnsandle
Adamnsandle
2 parents 87451b0 + d23867d commit 1a74996

File tree

2 files changed

+43
-35
lines changed

2 files changed

+43
-35
lines changed

examples/colab_record_example.ipynb

Lines changed: 23 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
},
1818
"outputs": [],
1919
"source": [
20+
"#!apt install ffmpeg\n",
2021
"!pip -q install pydub\n",
2122
"from google.colab import output\n",
2223
"from base64 import b64decode, b64encode\n",
@@ -37,13 +38,12 @@
3738
" model='silero_vad',\n",
3839
" force_reload=True)\n",
3940
"\n",
40-
"def int2float(sound):\n",
41-
" abs_max = np.abs(sound).max()\n",
42-
" sound = sound.astype('float32')\n",
43-
" if abs_max > 0:\n",
44-
" sound *= 1/32768\n",
45-
" sound = sound.squeeze()\n",
46-
" return sound\n",
41+
"def int2float(audio):\n",
42+
" samples = audio.get_array_of_samples()\n",
43+
" new_sound = audio._spawn(samples)\n",
44+
" arr = np.array(samples).astype(np.float32)\n",
45+
" arr = arr / np.abs(arr).max()\n",
46+
" return arr\n",
4747
"\n",
4848
"AUDIO_HTML = \"\"\"\n",
4949
"<script>\n",
@@ -68,18 +68,18 @@
6868
" //bitsPerSecond: 8000, //chrome seems to ignore, always 48k\n",
6969
" mimeType : 'audio/webm;codecs=opus'\n",
7070
" //mimeType : 'audio/webm;codecs=pcm'\n",
71-
" }; \n",
71+
" };\n",
7272
" //recorder = new MediaRecorder(stream, options);\n",
7373
" recorder = new MediaRecorder(stream);\n",
74-
" recorder.ondataavailable = function(e) { \n",
74+
" recorder.ondataavailable = function(e) {\n",
7575
" var url = URL.createObjectURL(e.data);\n",
7676
" // var preview = document.createElement('audio');\n",
7777
" // preview.controls = true;\n",
7878
" // preview.src = url;\n",
7979
" // document.body.appendChild(preview);\n",
8080
"\n",
8181
" reader = new FileReader();\n",
82-
" reader.readAsDataURL(e.data); \n",
82+
" reader.readAsDataURL(e.data);\n",
8383
" reader.onloadend = function() {\n",
8484
" base64data = reader.result;\n",
8585
" //console.log(\"Inside FileReader:\" + base64data);\n",
@@ -121,7 +121,7 @@
121121
"\n",
122122
"}\n",
123123
"});\n",
124-
" \n",
124+
"\n",
125125
"</script>\n",
126126
"\"\"\"\n",
127127
"\n",
@@ -133,8 +133,8 @@
133133
" audio.export('test.mp3', format='mp3')\n",
134134
" audio = audio.set_channels(1)\n",
135135
" audio = audio.set_frame_rate(16000)\n",
136-
" audio_float = int2float(np.array(audio.get_array_of_samples()))\n",
137-
" audio_tens = torch.tensor(audio_float )\n",
136+
" audio_float = int2float(audio)\n",
137+
" audio_tens = torch.tensor(audio_float)\n",
138138
" return audio_tens\n",
139139
"\n",
140140
"def make_animation(probs, audio_duration, interval=40):\n",
@@ -154,35 +154,29 @@
154154
" def animate(i):\n",
155155
" x = i * interval / 1000 - 0.04\n",
156156
" y = np.linspace(0, 1.02, 2)\n",
157-
" \n",
157+
"\n",
158158
" line.set_data(x, y)\n",
159159
" line.set_color('#990000')\n",
160160
" return line,\n",
161+
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=int(audio_duration / (interval / 1000)))\n",
161162
"\n",
162-
" anim = FuncAnimation(fig, animate, init_func=init, interval=interval, save_count=audio_duration / (interval / 1000))\n",
163-
"\n",
164-
" f = r\"animation.mp4\" \n",
165-
" writervideo = FFMpegWriter(fps=1000/interval) \n",
163+
" f = r\"animation.mp4\"\n",
164+
" writervideo = FFMpegWriter(fps=1000/interval)\n",
166165
" anim.save(f, writer=writervideo)\n",
167166
" plt.close('all')\n",
168167
"\n",
169-
"def combine_audio(vidname, audname, outname, fps=25): \n",
168+
"def combine_audio(vidname, audname, outname, fps=25):\n",
170169
" my_clip = mpe.VideoFileClip(vidname, verbose=False)\n",
171170
" audio_background = mpe.AudioFileClip(audname)\n",
172171
" final_clip = my_clip.set_audio(audio_background)\n",
173172
" final_clip.write_videofile(outname,fps=fps,verbose=False)\n",
174173
"\n",
175174
"def record_make_animation():\n",
176175
" tensor = record()\n",
177-
"\n",
178176
" print('Calculating probabilities...')\n",
179177
" speech_probs = []\n",
180178
" window_size_samples = 512\n",
181-
" for i in range(0, len(tensor), window_size_samples):\n",
182-
" if len(tensor[i: i+ window_size_samples]) < window_size_samples:\n",
183-
" break\n",
184-
" speech_prob = model(tensor[i: i+ window_size_samples], 16000).item()\n",
185-
" speech_probs.append(speech_prob)\n",
179+
" speech_probs = model.audio_forward(tensor, sr=16000)[0].tolist()\n",
186180
" model.reset_states()\n",
187181
" print('Making animation...')\n",
188182
" make_animation(speech_probs, len(tensor) / 16000)\n",
@@ -196,7 +190,9 @@
196190
" <video width=800 controls>\n",
197191
" <source src=\"%s\" type=\"video/mp4\">\n",
198192
" </video>\n",
199-
" \"\"\" % data_url))"
193+
" \"\"\" % data_url))\n",
194+
"\n",
195+
" return speech_probs"
200196
]
201197
},
202198
{
@@ -216,7 +212,7 @@
216212
},
217213
"outputs": [],
218214
"source": [
219-
"record_make_animation()"
215+
"speech_probs = record_make_animation()"
220216
]
221217
}
222218
],

examples/parallel_example.ipynb

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
{
22
"cells": [
33
{
4-
"attachments": {},
54
"cell_type": "markdown",
65
"metadata": {},
76
"source": [
@@ -18,17 +17,19 @@
1817
"SAMPLING_RATE = 16000\n",
1918
"import torch\n",
2019
"from pprint import pprint\n",
20+
"import time\n",
21+
"import shutil\n",
2122
"\n",
2223
"torch.set_num_threads(1)\n",
2324
"NUM_PROCESS=4 # set to the number of CPU cores in the machine\n",
2425
"NUM_COPIES=8\n",
2526
"# download wav files, make multiple copies\n",
26-
"for idx in range(NUM_COPIES):\n",
27-
" torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f\"en_example{idx}.wav\")\n"
27+
"torch.hub.download_url_to_file('https://models.silero.ai/vad_models/en.wav', f\"en_example0.wav\")\n",
28+
"for idx in range(NUM_COPIES-1):\n",
29+
" shutil.copy(f\"en_example0.wav\", f\"en_example{idx+1}.wav\")"
2830
]
2931
},
3032
{
31-
"attachments": {},
3233
"cell_type": "markdown",
3334
"metadata": {},
3435
"source": [
@@ -54,7 +55,6 @@
5455
]
5556
},
5657
{
57-
"attachments": {},
5858
"cell_type": "markdown",
5959
"metadata": {},
6060
"source": [
@@ -99,7 +99,6 @@
9999
]
100100
},
101101
{
102-
"attachments": {},
103102
"cell_type": "markdown",
104103
"metadata": {},
105104
"source": [
@@ -127,7 +126,7 @@
127126
],
128127
"metadata": {
129128
"kernelspec": {
130-
"display_name": "diarization",
129+
"display_name": "Python 3 (ipykernel)",
131130
"language": "python",
132131
"name": "python3"
133132
},
@@ -141,7 +140,20 @@
141140
"name": "python",
142141
"nbconvert_exporter": "python",
143142
"pygments_lexer": "ipython3",
144-
"version": "3.9.15"
143+
"version": "3.10.14"
144+
},
145+
"toc": {
146+
"base_numbering": 1,
147+
"nav_menu": {},
148+
"number_sections": true,
149+
"sideBar": true,
150+
"skip_h1_title": false,
151+
"title_cell": "Table of Contents",
152+
"title_sidebar": "Contents",
153+
"toc_cell": false,
154+
"toc_position": {},
155+
"toc_section_display": true,
156+
"toc_window_display": false
145157
}
146158
},
147159
"nbformat": 4,

0 commit comments

Comments
 (0)