Skip to content

Commit bdee5d3

Browse files
authored
Merge pull request #175 from linto-ai/features/explicit_vad
Support explicit start/end timestamps of speech activity detection (VAD) given by the user + add vad segments in the output
2 parents 79cc85e + f2f17bd commit bdee5d3

File tree

64 files changed

+9053
-8363
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+9053
-8363
lines changed

tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json

Lines changed: 212 additions & 212 deletions
Large diffs are not rendered by default.

tests/expected/corner_cases/issue24_empty.wav.words.json

Lines changed: 76 additions & 76 deletions
Large diffs are not rendered by default.

tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
50664
2323
],
2424
"temperature": 0.0,
25-
"avg_logprob": -0.3579153639546941,
25+
"avg_logprob": -0.35854845368460325,
2626
"compression_ratio": 1.425414364640884,
27-
"no_speech_prob": 0.24429768323898315,
27+
"no_speech_prob": 0.24429647624492645,
2828
"confidence": 0.452,
2929
"words": [
3030
{
@@ -37,7 +37,7 @@
3737
"text": "my",
3838
"start": 1.94,
3939
"end": 2.7,
40-
"confidence": 0.368
40+
"confidence": 0.367
4141
},
4242
{
4343
"text": "glorious",
@@ -92,9 +92,9 @@
9292
50814
9393
],
9494
"temperature": 0.0,
95-
"avg_logprob": -0.3579153639546941,
95+
"avg_logprob": -0.35854845368460325,
9696
"compression_ratio": 1.425414364640884,
97-
"no_speech_prob": 0.24429768323898315,
97+
"no_speech_prob": 0.24429647624492645,
9898
"confidence": 0.68,
9999
"words": [
100100
{
@@ -113,7 +113,7 @@
113113
"text": "okay.",
114114
"start": 7.02,
115115
"end": 7.64,
116-
"confidence": 0.605
116+
"confidence": 0.604
117117
},
118118
{
119119
"text": "I",
@@ -160,9 +160,9 @@
160160
50964
161161
],
162162
"temperature": 0.0,
163-
"avg_logprob": -0.3579153639546941,
163+
"avg_logprob": -0.35854845368460325,
164164
"compression_ratio": 1.425414364640884,
165-
"no_speech_prob": 0.24429768323898315,
165+
"no_speech_prob": 0.24429647624492645,
166166
"confidence": 0.571,
167167
"words": [
168168
{
@@ -261,16 +261,16 @@
261261
51014
262262
],
263263
"temperature": 0.0,
264-
"avg_logprob": -0.3579153639546941,
264+
"avg_logprob": -0.35854845368460325,
265265
"compression_ratio": 1.425414364640884,
266-
"no_speech_prob": 0.24429768323898315,
266+
"no_speech_prob": 0.24429647624492645,
267267
"confidence": 0.333,
268268
"words": [
269269
{
270270
"text": "Of",
271271
"start": 11.5,
272272
"end": 12.1,
273-
"confidence": 0.123
273+
"confidence": 0.124
274274
},
275275
{
276276
"text": "course",
@@ -312,10 +312,10 @@
312312
51114
313313
],
314314
"temperature": 0.0,
315-
"avg_logprob": -0.3579153639546941,
315+
"avg_logprob": -0.35854845368460325,
316316
"compression_ratio": 1.425414364640884,
317-
"no_speech_prob": 0.24429768323898315,
318-
"confidence": 0.649,
317+
"no_speech_prob": 0.24429647624492645,
318+
"confidence": 0.65,
319319
"words": [
320320
{
321321
"text": "No,",
@@ -381,9 +381,9 @@
381381
51214
382382
],
383383
"temperature": 0.0,
384-
"avg_logprob": -0.3579153639546941,
384+
"avg_logprob": -0.35854845368460325,
385385
"compression_ratio": 1.425414364640884,
386-
"no_speech_prob": 0.24429768323898315,
386+
"no_speech_prob": 0.24429647624492645,
387387
"confidence": 0.69,
388388
"words": [
389389
{
@@ -477,9 +477,9 @@
477477
51314
478478
],
479479
"temperature": 0.0,
480-
"avg_logprob": -0.3579153639546941,
480+
"avg_logprob": -0.35854845368460325,
481481
"compression_ratio": 1.425414364640884,
482-
"no_speech_prob": 0.24429768323898315,
482+
"no_speech_prob": 0.24429647624492645,
483483
"confidence": 0.529,
484484
"words": [
485485
{
@@ -522,9 +522,9 @@
522522
51364
523523
],
524524
"temperature": 0.0,
525-
"avg_logprob": -0.3579153639546941,
525+
"avg_logprob": -0.35854845368460325,
526526
"compression_ratio": 1.425414364640884,
527-
"no_speech_prob": 0.24429768323898315,
527+
"no_speech_prob": 0.24429647624492645,
528528
"confidence": 0.02,
529529
"words": [
530530
{

0 commit comments

Comments
 (0)