-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtest_classifier_system.py
More file actions
402 lines (299 loc) · 15.4 KB
/
test_classifier_system.py
File metadata and controls
402 lines (299 loc) · 15.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
"""
Tests for the Myanmar Classifier System.
Tests cover:
1. Classifier recognition and categories
2. Numeral pattern detection
3. Classifier typo correction
4. Classifier-noun agreement validation
5. Integration with grammar rules
"""
from myspellchecker.grammar.checkers.classifier import ClassifierChecker
class TestClassifierConstants:
"""Test classifier constants loaded in ClassifierChecker."""
def test_myanmar_numerals_complete(self):
"""All Myanmar digit numerals (၀-၉) should be defined."""
checker = ClassifierChecker()
assert len(checker.numerals) == 10
# Check each digit
expected = {"၀", "၁", "၂", "၃", "၄", "၅", "၆", "၇", "၈", "၉"}
assert checker.numerals == expected
def test_myanmar_numeral_words_complete(self):
"""Numeral words (တစ်, နှစ်, etc.) should be defined."""
checker = ClassifierChecker()
assert "တစ်" in checker.numeral_words # one
assert "နှစ်" in checker.numeral_words # two
assert "သုံး" in checker.numeral_words # three
assert "ဆယ်" in checker.numeral_words # ten
assert "ရာ" in checker.numeral_words # hundred
assert "ထောင်" in checker.numeral_words # thousand
def test_classifier_map_has_50_plus_entries(self):
"""Classifier map should have 50+ classifiers."""
checker = ClassifierChecker()
# We might not have all 50 in yaml yet, but check significant number
assert len(checker.classifiers) >= 20
def test_classifier_map_structure(self):
"""Each classifier should have (category, description, examples)."""
checker = ClassifierChecker()
for classifier, info in checker.classifier_map.items():
assert isinstance(info, tuple), f"Invalid structure for {classifier}"
assert len(info) == 3, f"Expected 3 elements for {classifier}"
category, description, examples = info
assert isinstance(category, str)
assert isinstance(description, str)
assert isinstance(examples, list)
def test_people_classifiers_defined(self):
"""People classifiers should be properly defined."""
checker = ClassifierChecker()
assert "ယောက်" in checker.classifiers
assert "ဦး" in checker.classifiers
assert "ပါး" in checker.classifiers
# Check category
assert checker.get_classifier_category("ယောက်") == "people"
assert checker.get_classifier_category("ဦး") == "people"
def test_animal_classifier_defined(self):
"""Animal classifier should be defined."""
checker = ClassifierChecker()
assert "ကောင်" in checker.classifiers
assert checker.get_classifier_category("ကောင်") == "animals"
def test_object_classifiers_defined(self):
"""Object classifiers should be defined."""
checker = ClassifierChecker()
assert "ခု" in checker.classifiers
assert "လုံး" in checker.classifiers
assert "စင်း" in checker.classifiers
def test_book_classifiers_defined(self):
"""Book/document classifiers should be defined."""
checker = ClassifierChecker()
assert "အုပ်" in checker.classifiers
assert "စောင်" in checker.classifiers
assert "ပုဒ်" in checker.classifiers
def test_time_classifiers_defined(self):
"""Time classifiers should be defined."""
checker = ClassifierChecker()
assert "ခါ" in checker.classifiers
assert "ကြိမ်" in checker.classifiers
assert "နေ့" in checker.classifiers
def test_classifier_typo_map_defined(self):
"""Common classifier typos should be mapped."""
checker = ClassifierChecker()
# Verify via functionality
result = checker.check_classifier_typo("ယေက်")
assert result is not None
assert result[0] == "ယောက်"
class TestClassifierChecker:
"""Test the ClassifierChecker class."""
def test_is_numeral_digit(self):
"""Digit numerals should be recognized."""
checker = ClassifierChecker()
assert checker.is_numeral("၃")
assert checker.is_numeral("၁၀")
assert checker.is_numeral("၁၂၃")
def test_is_numeral_word(self):
"""Word numerals should be recognized."""
checker = ClassifierChecker()
assert checker.is_numeral("တစ်")
assert checker.is_numeral("နှစ်")
assert checker.is_numeral("သုံး")
assert checker.is_numeral("ဆယ်")
def test_is_not_numeral(self):
"""Non-numerals should not be recognized as numerals."""
checker = ClassifierChecker()
assert not checker.is_numeral("လူ")
assert not checker.is_numeral("စာအုပ်")
assert not checker.is_numeral("ကား")
def test_is_classifier(self):
"""Valid classifiers should be recognized."""
checker = ClassifierChecker()
assert checker.is_classifier("ယောက်")
assert checker.is_classifier("ကောင်")
assert checker.is_classifier("အုပ်")
assert checker.is_classifier("ခု")
def test_is_not_classifier(self):
"""Non-classifiers should not be recognized."""
checker = ClassifierChecker()
assert not checker.is_classifier("လူ")
assert not checker.is_classifier("စားပြီ")
assert not checker.is_classifier("တယ်")
def test_get_classifier_category(self):
"""Classifier categories should be returned correctly."""
checker = ClassifierChecker()
assert checker.get_classifier_category("ယောက်") == "people"
assert checker.get_classifier_category("ကောင်") == "animals"
assert checker.get_classifier_category("အုပ်") == "books"
# Note: "လုံး" category might be "objects" or "round" depending on yaml structure
cat = checker.get_classifier_category("လုံး")
assert cat in ["objects", "round", "spherical_objects"]
def test_check_classifier_typo_known(self):
"""Known typos should be corrected."""
checker = ClassifierChecker()
result = checker.check_classifier_typo("ယေက်")
assert result is not None
correction, confidence = result
assert correction == "ယောက်"
assert confidence >= 0.80
def test_check_classifier_typo_contextual_override_koing(self):
"""Numeral-adjacent classifier typo ကိုင် should map to ကောင်."""
checker = ClassifierChecker()
result = checker.check_classifier_typo("ကိုင်")
assert result is not None
correction, confidence = result
assert correction == "ကောင်"
assert confidence >= 0.90
def test_check_classifier_typo_missing_visarga(self):
"""Missing visarga should be detected."""
checker = ClassifierChecker()
# "လုံ" missing visarga should suggest "လုံး"
# Note: Need to ensure "လုံး" is in classifiers list
if "လုံး" in checker.classifiers:
result = checker.check_classifier_typo("လုံ")
if result: # Only assert if it finds it (logic depends on heuristic)
correction, confidence = result
assert correction == "လုံး"
def test_validate_sequence_typo(self):
"""Classifier typos should be detected in sequence."""
checker = ClassifierChecker()
words = ["သုံး", "ယေက်"] # typo for ယောက်
errors = checker.validate_sequence(words)
assert len(errors) >= 1
error = errors[0]
assert error.word == "ယေက်"
assert error.suggestion == "ယောက်"
assert error.error_type == "typo"
def test_validate_sequence_no_errors(self):
"""Valid sequences should have no errors."""
checker = ClassifierChecker()
words = ["သုံး", "ယောက်"] # valid pattern
errors = checker.validate_sequence(words)
# Should have no classifier typo errors
typo_errors = [e for e in errors if e.error_type == "typo"]
assert len(typo_errors) == 0
def test_get_compatible_classifiers(self):
"""Compatible classifiers should be returned for nouns."""
checker = ClassifierChecker()
# ခွေး (dog) should use ကောင် (animal classifier)
compatible = checker.get_compatible_classifiers("ခွေး")
assert "ကောင်" in compatible
# လူ (person) should use ယောက်
compatible = checker.get_compatible_classifiers("လူ")
assert "ယောက်" in compatible
def test_get_compatible_classifiers_prefers_canonical_chaung(self):
"""Classifier suggestions should canonicalize ခြောင်း to ချောင်း."""
checker = ClassifierChecker()
compatible = checker.get_compatible_classifiers("ကလောင်")
assert "ချောင်း" in compatible
assert "ခြောင်း" not in compatible
def test_check_agreement_valid(self):
"""Valid classifier-noun agreement should pass."""
checker = ClassifierChecker()
# ခွေး (dog) with ကောင် (animal classifier) - valid
error = checker.check_agreement("ကောင်", "ခွေး")
assert error is None
def test_check_agreement_invalid(self):
"""Invalid classifier-noun agreement should be detected."""
checker = ClassifierChecker()
# ခွေး (dog) with ယောက် (people classifier) - invalid
error = checker.check_agreement("ယောက်", "ခွေး")
assert error is not None
assert error.error_type == "agreement"
assert error.suggestion == "ကောင်"
class TestConvenienceFunctions:
"""Test module-level convenience functions."""
def test_is_classifier_function(self):
"""is_classifier convenience function should work."""
from myspellchecker.grammar.checkers.classifier import is_classifier
assert is_classifier("ယောက်")
assert is_classifier("ကောင်")
assert not is_classifier("လူ")
def test_is_numeral_function(self):
"""is_numeral convenience function should work."""
from myspellchecker.grammar.checkers.classifier import is_numeral
assert is_numeral("သုံး")
assert is_numeral("၃")
assert not is_numeral("လူ")
class TestGrammarRulesIntegration:
"""Test integration with SyntacticRuleChecker."""
def test_classifier_checker_initialized(self):
"""SyntacticRuleChecker should have classifier_checker."""
from myspellchecker.grammar.engine import SyntacticRuleChecker
from myspellchecker.providers.memory import MemoryProvider
provider = MemoryProvider()
checker = SyntacticRuleChecker(provider)
assert hasattr(checker, "classifier_checker")
assert checker.classifier_checker is not None
def test_check_classifiers_method_exists(self):
"""_check_classifiers method should exist."""
from myspellchecker.grammar.engine import SyntacticRuleChecker
from myspellchecker.providers.memory import MemoryProvider
provider = MemoryProvider()
checker = SyntacticRuleChecker(provider)
assert hasattr(checker, "_check_classifiers")
def test_check_sequence_includes_classifiers(self):
"""check_sequence should detect classifier typos."""
from myspellchecker.grammar.engine import SyntacticRuleChecker
from myspellchecker.providers.memory import MemoryProvider
provider = MemoryProvider()
checker = SyntacticRuleChecker(provider)
# Test with classifier typo
words = ["သုံး", "ယေက်"]
corrections = checker.check_sequence(words)
# Should find the classifier typo
found_correction = any(c[2] == "ယောက်" for c in corrections)
assert found_correction, f"Expected ယောက် correction, got {corrections}"
class TestClassifierCategories:
"""Test classifier categories are properly organized."""
def test_classifier_categories_defined(self):
"""Categories should be present in loaded config."""
checker = ClassifierChecker()
# Build categories from map
categories = set()
for _, info in checker.classifier_map.items():
categories.add(info[0])
assert "people" in categories
assert "animals" in categories
# assert "objects" in categories # Might be named differently in yaml
def test_categories_contain_classifiers(self):
"""Each category should contain valid classifiers."""
checker = ClassifierChecker()
for classifier in checker.classifiers:
assert classifier in checker.classifier_map
def test_people_category(self):
"""People category should have correct classifiers."""
checker = ClassifierChecker()
assert checker.get_classifier_category("ယောက်") == "people"
assert checker.get_classifier_category("ဦး") == "people"
assert checker.get_classifier_category("ပါး") == "people"
def test_animals_category(self):
"""Animals category should have correct classifiers."""
checker = ClassifierChecker()
assert checker.get_classifier_category("ကောင်") == "animals"
def test_vehicles_category(self):
"""Vehicles category should have correct classifiers."""
checker = ClassifierChecker()
assert checker.get_classifier_category("စင်း") == "vehicles"
assert checker.get_classifier_category("စီး") == "vehicles"
class TestRealWorldExamples:
"""Test with real-world Myanmar classifier usage patterns."""
def test_vehicle_counting(self):
"""Test vehicle counting: ကားတစ်စီး (1 car)."""
from myspellchecker.grammar.checkers.classifier import ClassifierChecker
checker = ClassifierChecker()
# "တစ်" is a numeral, "စီး" is vehicle classifier
assert checker.is_numeral("တစ်")
assert checker.is_classifier("စီး")
def test_money_counting(self):
"""Test money counting: ရာကျပ် (100 kyat)."""
from myspellchecker.grammar.checkers.classifier import ClassifierChecker
checker = ClassifierChecker()
# "ရာ" is numeral (hundred), "ကျပ်" is money classifier
assert checker.is_numeral("ရာ")
assert checker.is_classifier("ကျပ်")
def test_mixed_sentence(self):
"""Test classifier validation in mixed sentence."""
from myspellchecker.grammar.checkers.classifier import ClassifierChecker
checker = ClassifierChecker()
# "လူသုံးယောက်ရှိတယ်" (There are 3 people)
words = ["လူ", "သုံး", "ယောက်", "ရှိ", "တယ်"]
errors = checker.validate_sequence(words)
# Should have no classifier errors
typo_errors = [e for e in errors if e.error_type == "typo"]
assert len(typo_errors) == 0