Skip to content

Commit 88e89ee

Browse files
committed
fixes to igbo phonemiser and general updates
1 parent 4e62565 commit 88e89ee

File tree

5 files changed

+49
-10
lines changed

5 files changed

+49
-10
lines changed

MANIFEST.in

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
include cvutils/exporters/coqui.py
22
include cvutils/exporters/__init__.py
3+
include cvutils/exporters/nemo.py
34
include cvutils/data/
45
include cvutils/data/azz
56
include cvutils/data/azz/alphabet.txt
@@ -110,10 +111,16 @@ include cvutils/data/cs/alphabet.txt
110111
include cvutils/data/cs/validate.tsv
111112
include cvutils/data/cs/phon.tsv
112113
include cvutils/data/cs/vocab.tsv
114+
include cvutils/data/mhr
115+
include cvutils/data/mhr/alphabet.txt
116+
include cvutils/data/mhr/validate.tsv
113117
include cvutils/data/el
114118
include cvutils/data/el/alphabet.txt
115119
include cvutils/data/el/validate.tsv
116120
include cvutils/data/el/phon.tsv
121+
include cvutils/data/ti
122+
include cvutils/data/ti/alphabet.txt
123+
include cvutils/data/ti/validate.tsv
117124
include cvutils/data/pt
118125
include cvutils/data/pt/alphabet.txt
119126
include cvutils/data/pt/validate.tsv
@@ -174,6 +181,9 @@ include cvutils/data/az
174181
include cvutils/data/az/alphabet.txt
175182
include cvutils/data/az/validate.tsv
176183
include cvutils/data/az/phon.tsv
184+
include cvutils/data/ast
185+
include cvutils/data/ast/alphabet.txt
186+
include cvutils/data/ast/validate.tsv
177187
include cvutils/data/de
178188
include cvutils/data/de/alphabet.txt
179189
include cvutils/data/de/validate.tsv
@@ -190,6 +200,8 @@ include cvutils/data/ug/alphabet.txt
190200
include cvutils/data/ug/validate.tsv
191201
include cvutils/data/ug/phon.tsv
192202
include cvutils/data/tig
203+
include cvutils/data/tig/alphabet.txt
204+
include cvutils/data/tig/validate.tsv
193205
include cvutils/data/ta
194206
include cvutils/data/ta/alphabet.txt
195207
include cvutils/data/ta/validate.tsv
@@ -236,6 +248,10 @@ include cvutils/data/vi/validate.tsv
236248
include cvutils/data/vi/punct.tsv
237249
include cvutils/data/vi/abbr.tsv
238250
include cvutils/data/vi/phon.tsv
251+
include cvutils/data/skr
252+
include cvutils/data/skr/alphabet.txt
253+
include cvutils/data/skr/validate.tsv
254+
include cvutils/data/skr/phon.tsv
239255
include cvutils/data/pl
240256
include cvutils/data/pl/alphabet.txt
241257
include cvutils/data/pl/validate.tsv
@@ -304,6 +320,9 @@ include cvutils/data/da
304320
include cvutils/data/da/alphabet.txt
305321
include cvutils/data/da/validate.tsv
306322
include cvutils/data/da/vocab.tsv
323+
include cvutils/data/ne-NP
324+
include cvutils/data/ne-NP/alphabet.txt
325+
include cvutils/data/ne-NP/validate.tsv
307326
include cvutils/data/as
308327
include cvutils/data/as/alphabet.txt
309328
include cvutils/data/as/validate.tsv
@@ -328,6 +347,9 @@ include cvutils/data/yo/phon.tsv
328347
include cvutils/data/tos
329348
include cvutils/data/tos/alphabet.txt
330349
include cvutils/data/tos/validate.tsv
350+
include cvutils/data/sc
351+
include cvutils/data/sc/alphabet.txt
352+
include cvutils/data/sc/validate.tsv
331353
include cvutils/data/vot
332354
include cvutils/data/vot/alphabet.txt
333355
include cvutils/data/vot/validate.tsv
@@ -384,6 +406,9 @@ include cvutils/data/eu/punct.tsv
384406
include cvutils/data/eu/abbr.tsv
385407
include cvutils/data/eu/phon.tsv
386408
include cvutils/data/eu/vocab.tsv
409+
include cvutils/data/mrj
410+
include cvutils/data/mrj/alphabet.txt
411+
include cvutils/data/mrj/validate.tsv
387412
include cvutils/data/fr
388413
include cvutils/data/fr/alphabet.txt
389414
include cvutils/data/fr/validate.tsv

cvutils/data/ig/phon.tsv

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
Orth Phon
2-
a a
32
b b
43
ch
54
d d
6-
e e
75
f f
86
g ɡ
97
gb ɡ͡b
108
gb ~ɠ͡ɓ
119
gh ɣ
1210
gw ɡʷ
1311
h ɦ
14-
i i
15-
ɪ̙
1612
j
1713
k k
1814
kp k͡p
@@ -24,16 +20,33 @@ n n
2420
ŋ
2521
nw ŋʷ
2622
ny ɲ
27-
o o
28-
ɔ̙
2923
p p
3024
r ɹ
3125
s s
3226
sh ʃ
3327
t t
34-
u u
35-
ʊ̙
3628
v v
3729
w w
3830
y j
3931
z z
32+
oo
33+
ọọ ɔ̙ː
34+
aa
35+
ee
36+
ii
37+
ịị ɪ̙ː
38+
ịị ɪ̙ː
39+
uu
40+
ụụ ʊ̙
41+
ụụ ʊ̙ː
42+
a a
43+
e e
44+
i i
45+
ɪ̙
46+
ɪ̙
47+
o o
48+
ɔ̙
49+
ɔ̙
50+
u u
51+
ʊ̙
52+
ʊ̙

cvutils/data/ig/validate.tsv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
LOWER _ _ _ _
2-
NKFD _ _ _ _
2+
NKFC _ _ _ _
33
REPL " _ 0022 _
44
REPL - _ 002d _
55
REPL . _ 002e _

cvutils/phonemiser.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def lookup_tsv(self, token):
9696
ks = list(self.lkp.keys())
9797
ks.sort(key=lambda x : len(x), reverse=True)
9898
segs = self.maxmatch(token.lower())
99+
print(segs, file=sys.stderr)
99100
op = ''
100101
for seg in segs:
101102
if seg in self.lkp:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# This call to setup() does all the work
1111
setup(
1212
name="commonvoice-utils",
13-
version="0.2.27",
13+
version="0.2.29",
1414
description="Linguistic processing for languages in Common Voice",
1515
long_description=README,
1616
long_description_content_type="text/markdown",

0 commit comments

Comments
 (0)