decomposed palatalization

Flux9665 · Flux9665 · commit 9e51888c92d1 · 2023-04-06T17:00:55.000+02:00
diff --git a/Preprocessing/TextFrontend.py b/Preprocessing/TextFrontend.py
@@ -355,6 +355,7 @@ def postprocess_phoneme_string(self, phoneme_string, for_feature_extraction, inc
             ("ɬ", "s"),  # lateral
             ("ɮ", "z"),  # lateral
             ('ɺ', 'ɾ'),  # lateral
+            ('ʲ', 'j'),  # decomposed palatalization
             ('\u02CC', ""),  # secondary stress
             ('\u030B', "˥"),
             ('\u0301', "˦"),
@@ -380,7 +381,7 @@ def postprocess_phoneme_string(self, phoneme_string, for_feature_extraction, inc
         ]
         unsupported_ipa_characters = {'̹', '̙', '̞', '̯', '̤', '̪', '̩', '̠', '̟', 'ꜜ',
                                       '̬', '̽', 'ʰ', '|', '̝', '•', 'ˠ', '↘',
-                                      '‖', '̰', '‿', 'ᷝ', '̈', 'ᷠ', '̜', 'ʷ', 'ʲ',
+                                      '‖', '̰', '‿', 'ᷝ', '̈', 'ᷠ', '̜', 'ʷ',
                                       '̚', '↗', 'ꜛ', '̻', '̥', 'ˁ', '̘', '͡', '̺'}
         # TODO support more of these. Problem: bridge over to aligner ID lookups after modifying the feature vector
         #  https://en.wikipedia.org/wiki/IPA_number