Skip to content

Commit 9e51888

Browse files
committed
decomposed palatalization
1 parent 7ae9d17 commit 9e51888

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

Preprocessing/TextFrontend.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ def postprocess_phoneme_string(self, phoneme_string, for_feature_extraction, inc
355355
("ɬ", "s"), # lateral
356356
("ɮ", "z"), # lateral
357357
('ɺ', 'ɾ'), # lateral
358+
('ʲ', 'j'), # decomposed palatalization
358359
('\u02CC', ""), # secondary stress
359360
('\u030B', "˥"),
360361
('\u0301', "˦"),
@@ -380,7 +381,7 @@ def postprocess_phoneme_string(self, phoneme_string, for_feature_extraction, inc
380381
]
381382
unsupported_ipa_characters = {'̹', '̙', '̞', '̯', '̤', '̪', '̩', '̠', '̟', 'ꜜ',
382383
'̬', '̽', 'ʰ', '|', '̝', '•', 'ˠ', '↘',
383-
'‖', '̰', '‿', 'ᷝ', '̈', 'ᷠ', '̜', 'ʷ', 'ʲ',
384+
'‖', '̰', '‿', 'ᷝ', '̈', 'ᷠ', '̜', 'ʷ',
384385
'̚', '↗', 'ꜛ', '̻', '̥', 'ˁ', '̘', '͡', '̺'}
385386
# TODO support more of these. Problem: bridge over to aligner ID lookups after modifying the feature vector
386387
# https://en.wikipedia.org/wiki/IPA_number

0 commit comments

Comments
 (0)