Skip to content

Commit 79cc85e

Browse files
authored
Merge pull request #166 from linto-ai/features/transformers
Enable transformers as a backend
2 parents 713626e + 58909dc commit 79cc85e

File tree

4 files changed

+554
-28
lines changed

4 files changed

+554
-28
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
},
5151
include_package_data=True,
5252
extras_require={
53-
'dev': ['matplotlib', 'transformers'],
53+
'dev': ['matplotlib==3.7.4', 'transformers'],
5454
'vad_silero': ['onnxruntime', 'torchaudio'],
5555
'vad_auditok': ['auditok'],
5656
'test': ['jsonschema'],

tests/json_schema.json

Lines changed: 102 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,108 @@
3939
},
4040
"language": {"type": "string"},
4141
"language_probs": {
42-
"type": "array",
43-
"items": {"type": "number", "minimum":0, "maximum":1}
42+
"type": "object",
43+
"properties": {
44+
"en": {"type": "number", "minimum":0, "maximum":1},
45+
"zh": {"type": "number", "minimum":0, "maximum":1},
46+
"de": {"type": "number", "minimum":0, "maximum":1},
47+
"es": {"type": "number", "minimum":0, "maximum":1},
48+
"ru": {"type": "number", "minimum":0, "maximum":1},
49+
"ko": {"type": "number", "minimum":0, "maximum":1},
50+
"fr": {"type": "number", "minimum":0, "maximum":1},
51+
"ja": {"type": "number", "minimum":0, "maximum":1},
52+
"pt": {"type": "number", "minimum":0, "maximum":1},
53+
"tr": {"type": "number", "minimum":0, "maximum":1},
54+
"pl": {"type": "number", "minimum":0, "maximum":1},
55+
"ca": {"type": "number", "minimum":0, "maximum":1},
56+
"nl": {"type": "number", "minimum":0, "maximum":1},
57+
"ar": {"type": "number", "minimum":0, "maximum":1},
58+
"sv": {"type": "number", "minimum":0, "maximum":1},
59+
"it": {"type": "number", "minimum":0, "maximum":1},
60+
"id": {"type": "number", "minimum":0, "maximum":1},
61+
"hi": {"type": "number", "minimum":0, "maximum":1},
62+
"fi": {"type": "number", "minimum":0, "maximum":1},
63+
"vi": {"type": "number", "minimum":0, "maximum":1},
64+
"he": {"type": "number", "minimum":0, "maximum":1},
65+
"uk": {"type": "number", "minimum":0, "maximum":1},
66+
"el": {"type": "number", "minimum":0, "maximum":1},
67+
"ms": {"type": "number", "minimum":0, "maximum":1},
68+
"cs": {"type": "number", "minimum":0, "maximum":1},
69+
"ro": {"type": "number", "minimum":0, "maximum":1},
70+
"da": {"type": "number", "minimum":0, "maximum":1},
71+
"hu": {"type": "number", "minimum":0, "maximum":1},
72+
"ta": {"type": "number", "minimum":0, "maximum":1},
73+
"no": {"type": "number", "minimum":0, "maximum":1},
74+
"th": {"type": "number", "minimum":0, "maximum":1},
75+
"ur": {"type": "number", "minimum":0, "maximum":1},
76+
"hr": {"type": "number", "minimum":0, "maximum":1},
77+
"bg": {"type": "number", "minimum":0, "maximum":1},
78+
"lt": {"type": "number", "minimum":0, "maximum":1},
79+
"la": {"type": "number", "minimum":0, "maximum":1},
80+
"mi": {"type": "number", "minimum":0, "maximum":1},
81+
"ml": {"type": "number", "minimum":0, "maximum":1},
82+
"cy": {"type": "number", "minimum":0, "maximum":1},
83+
"sk": {"type": "number", "minimum":0, "maximum":1},
84+
"te": {"type": "number", "minimum":0, "maximum":1},
85+
"fa": {"type": "number", "minimum":0, "maximum":1},
86+
"lv": {"type": "number", "minimum":0, "maximum":1},
87+
"bn": {"type": "number", "minimum":0, "maximum":1},
88+
"sr": {"type": "number", "minimum":0, "maximum":1},
89+
"az": {"type": "number", "minimum":0, "maximum":1},
90+
"sl": {"type": "number", "minimum":0, "maximum":1},
91+
"kn": {"type": "number", "minimum":0, "maximum":1},
92+
"et": {"type": "number", "minimum":0, "maximum":1},
93+
"mk": {"type": "number", "minimum":0, "maximum":1},
94+
"br": {"type": "number", "minimum":0, "maximum":1},
95+
"eu": {"type": "number", "minimum":0, "maximum":1},
96+
"is": {"type": "number", "minimum":0, "maximum":1},
97+
"hy": {"type": "number", "minimum":0, "maximum":1},
98+
"ne": {"type": "number", "minimum":0, "maximum":1},
99+
"mn": {"type": "number", "minimum":0, "maximum":1},
100+
"bs": {"type": "number", "minimum":0, "maximum":1},
101+
"kk": {"type": "number", "minimum":0, "maximum":1},
102+
"sq": {"type": "number", "minimum":0, "maximum":1},
103+
"sw": {"type": "number", "minimum":0, "maximum":1},
104+
"gl": {"type": "number", "minimum":0, "maximum":1},
105+
"mr": {"type": "number", "minimum":0, "maximum":1},
106+
"pa": {"type": "number", "minimum":0, "maximum":1},
107+
"si": {"type": "number", "minimum":0, "maximum":1},
108+
"km": {"type": "number", "minimum":0, "maximum":1},
109+
"sn": {"type": "number", "minimum":0, "maximum":1},
110+
"yo": {"type": "number", "minimum":0, "maximum":1},
111+
"so": {"type": "number", "minimum":0, "maximum":1},
112+
"af": {"type": "number", "minimum":0, "maximum":1},
113+
"oc": {"type": "number", "minimum":0, "maximum":1},
114+
"ka": {"type": "number", "minimum":0, "maximum":1},
115+
"be": {"type": "number", "minimum":0, "maximum":1},
116+
"tg": {"type": "number", "minimum":0, "maximum":1},
117+
"sd": {"type": "number", "minimum":0, "maximum":1},
118+
"gu": {"type": "number", "minimum":0, "maximum":1},
119+
"am": {"type": "number", "minimum":0, "maximum":1},
120+
"yi": {"type": "number", "minimum":0, "maximum":1},
121+
"lo": {"type": "number", "minimum":0, "maximum":1},
122+
"uz": {"type": "number", "minimum":0, "maximum":1},
123+
"fo": {"type": "number", "minimum":0, "maximum":1},
124+
"ht": {"type": "number", "minimum":0, "maximum":1},
125+
"ps": {"type": "number", "minimum":0, "maximum":1},
126+
"tk": {"type": "number", "minimum":0, "maximum":1},
127+
"nn": {"type": "number", "minimum":0, "maximum":1},
128+
"mt": {"type": "number", "minimum":0, "maximum":1},
129+
"sa": {"type": "number", "minimum":0, "maximum":1},
130+
"lb": {"type": "number", "minimum":0, "maximum":1},
131+
"my": {"type": "number", "minimum":0, "maximum":1},
132+
"bo": {"type": "number", "minimum":0, "maximum":1},
133+
"tl": {"type": "number", "minimum":0, "maximum":1},
134+
"mg": {"type": "number", "minimum":0, "maximum":1},
135+
"as": {"type": "number", "minimum":0, "maximum":1},
136+
"tt": {"type": "number", "minimum":0, "maximum":1},
137+
"haw": {"type": "number", "minimum":0, "maximum":1},
138+
"ln": {"type": "number", "minimum":0, "maximum":1},
139+
"ha": {"type": "number", "minimum":0, "maximum":1},
140+
"ba": {"type": "number", "minimum":0, "maximum":1},
141+
"jw": {"type": "number", "minimum":0, "maximum":1},
142+
"su": {"type": "number", "minimum":0, "maximum":1}
143+
}
44144
}
45145
}
46146
}

tests/test_transcribe.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -661,15 +661,17 @@ def test_hugging_face_model(self):
661661
)
662662

663663
import tempfile
664-
from transformers import WhisperForConditionalGeneration
664+
from transformers import WhisperForConditionalGeneration, WhisperProcessor, GenerationConfig
665665
tempfolder = os.path.join(tempfile.gettempdir(), "tmp_whisper-tiny-french-cased")
666666

667667
for safe_serialization in False, True,:
668668
for max_shard_size in "100MB", "10GB", :
669669
shutil.rmtree(tempfolder, ignore_errors=True)
670670
model = WhisperForConditionalGeneration.from_pretrained("qanastek/whisper-tiny-french-cased")
671+
processor = WhisperProcessor.from_pretrained("qanastek/whisper-tiny-french-cased")
671672
try:
672673
model.save_pretrained(tempfolder, safe_serialization=safe_serialization, max_shard_size=max_shard_size)
674+
processor.save_pretrained(tempfolder)
673675
self._test_cli_(
674676
["--model", tempfolder, "--verbose", "True"],
675677
"verbose", files=["bonjour.wav"], extensions=None,

0 commit comments

Comments
 (0)