Name	Name	Last commit message	Last commit date
parent directory ..
benchmarks	benchmarks
e2e	e2e
fixtures	fixtures
integration	integration
stress	stress
README.md	README.md
__init__.py	__init__.py
conftest.py	conftest.py
test_accuracy.py	test_accuracy.py
test_algorithms_cache_factory.py	test_algorithms_cache_factory.py
test_arbiter.py	test_arbiter.py
test_aspect_system.py	test_aspect_system.py
test_base_provider_comprehensive.py	test_base_provider_comprehensive.py
test_beam_pruning_edge_cases.py	test_beam_pruning_edge_cases.py
test_benchmark_integrity.py	test_benchmark_integrity.py
test_benchmark_yaml_hygiene.py	test_benchmark_yaml_hygiene.py
test_calibration.py	test_calibration.py
test_candidate_collection.py	test_candidate_collection.py
test_check_options.py	test_check_options.py
test_classifier_system.py	test_classifier_system.py
test_cli_commands.py	test_cli_commands.py
test_cli_integration.py	test_cli_integration.py
test_colloquial_variants.py	test_colloquial_variants.py
test_compatible_ha.py	test_compatible_ha.py
test_compound_merge_probe_strategy.py	test_compound_merge_probe_strategy.py
test_compound_resolver.py	test_compound_resolver.py
test_compound_system.py	test_compound_system.py
test_confidence_fusion.py	test_confidence_fusion.py
test_config_loader.py	test_config_loader.py
test_config_profiles.py	test_config_profiles.py
test_config_pydantic.py	test_config_pydantic.py
test_confusable_semantic_strategy.py	test_confusable_semantic_strategy.py
test_confusable_strategy.py	test_confusable_strategy.py
test_connection_pool.py	test_connection_pool.py
test_context_validator.py	test_context_validator.py
test_context_validator_strategies.py	test_context_validator_strategies.py
test_core_builder.py	test_core_builder.py
test_core_di.py	test_core_di.py
test_corpus_preprocessor.py	test_corpus_preprocessor.py
test_correction_utils.py	test_correction_utils.py
test_cross_whitespace_probe_strategy.py	test_cross_whitespace_probe_strategy.py
test_csv_provider.py	test_csv_provider.py
test_custom_segmenter.py	test_custom_segmenter.py
test_data_pipeline.py	test_data_pipeline.py
test_data_pipeline_config.py	test_data_pipeline_config.py
test_data_pipeline_ingester.py	test_data_pipeline_ingester.py
test_data_pipeline_segmenter.py	test_data_pipeline_segmenter.py
test_database_packager.py	test_database_packager.py
test_database_packager_edge_cases.py	test_database_packager_edge_cases.py
test_db_build_and_usage.py	test_db_build_and_usage.py
test_default_segmenter.py	test_default_segmenter.py
test_detection_registry.py	test_detection_registry.py
test_detector_config.py	test_detector_config.py
test_detector_context.py	test_detector_context.py
test_disk_space.py	test_disk_space.py
test_edit_distance.py	test_edit_distance.py
test_edit_distance_python_impl.py	test_edit_distance_python_impl.py
test_empty_inputs.py	test_empty_inputs.py
test_enrichment.py	test_enrichment.py
test_error_suppression.py	test_error_suppression.py
test_extended_myanmar_characters.py	test_extended_myanmar_characters.py
test_extended_myanmar_validation.py	test_extended_myanmar_validation.py
test_factory.py	test_factory.py
test_frequency_builder.py	test_frequency_builder.py
test_grammar_engine.py	test_grammar_engine.py
test_grammar_rules_critical.py	test_grammar_rules_critical.py
test_grammar_rules_improved.py	test_grammar_rules_improved.py
test_grammar_rules_schema_validation.py	test_grammar_rules_schema_validation.py
test_hidden_compound_strategy.py	test_hidden_compound_strategy.py
test_homophone_strategy.py	test_homophone_strategy.py
test_homophones.py	test_homophones.py
test_incremental_build.py	test_incremental_build.py
test_inference_backends.py	test_inference_backends.py
test_ingester_c.py	test_ingester_c.py
test_initialization_refactor.py	test_initialization_refactor.py
test_is_curated_vocabulary.py	test_is_curated_vocabulary.py
test_joint_decoder.py	test_joint_decoder.py
test_joint_segment_tagger.py	test_joint_segment_tagger.py
test_json_provider.py	test_json_provider.py
test_keyboard_distance.py	test_keyboard_distance.py
test_lazy_imports.py	test_lazy_imports.py
test_loan_word_corrections.py	test_loan_word_corrections.py
test_medial_confusions.py	test_medial_confusions.py
test_medial_consonant_compatibility.py	test_medial_consonant_compatibility.py
test_medial_ordering.py	test_medial_ordering.py
test_medial_ya_ai_vowel.py	test_medial_ya_ai_vowel.py
test_memory_provider.py	test_memory_provider.py
test_merge_probe_cascade.py	test_merge_probe_cascade.py
test_merge_probe_symspell.py	test_merge_probe_symspell.py
test_merged_word_checker.py	test_merged_word_checker.py
test_meta_fusion.py	test_meta_fusion.py
test_mined_confusable_pair_strategy.py	test_mined_confusable_pair_strategy.py
test_mlm_span_mask_candgen_strategy.py	test_mlm_span_mask_candgen_strategy.py
test_mmap_reader.py	test_mmap_reader.py
test_morpheme_suggestion_strategy.py	test_morpheme_suggestion_strategy.py
test_morphology.py	test_morphology.py
test_morphology_performance.py	test_morphology_performance.py
test_myanmar_confusables.py	test_myanmar_confusables.py
test_myanmar_constants.py	test_myanmar_constants.py
test_negation_system.py	test_negation_system.py

Name

Last commit message

Last commit date

benchmarks

test_algorithms_cache_factory.py

test_arbiter.py

test_aspect_system.py

test_base_provider_comprehensive.py

test_beam_pruning_edge_cases.py

test_benchmark_integrity.py

test_benchmark_yaml_hygiene.py

test_calibration.py

test_candidate_collection.py

test_check_options.py

test_classifier_system.py

test_cli_commands.py

test_cli_integration.py

test_colloquial_variants.py

test_compatible_ha.py

test_compound_merge_probe_strategy.py

test_compound_resolver.py

test_compound_system.py

test_confidence_fusion.py

test_config_loader.py

test_config_profiles.py

test_config_pydantic.py

test_confusable_semantic_strategy.py

test_confusable_strategy.py

test_connection_pool.py

test_context_validator.py

test_context_validator_strategies.py

test_core_builder.py

test_core_di.py

test_corpus_preprocessor.py

test_correction_utils.py

test_cross_whitespace_probe_strategy.py

test_csv_provider.py

test_custom_segmenter.py

test_data_pipeline.py

test_data_pipeline_config.py

test_data_pipeline_ingester.py

test_data_pipeline_segmenter.py

test_database_packager.py

test_database_packager_edge_cases.py

test_db_build_and_usage.py

test_default_segmenter.py

test_detection_registry.py

test_detector_config.py

test_detector_context.py

test_disk_space.py

test_edit_distance.py

test_edit_distance_python_impl.py

test_empty_inputs.py

test_enrichment.py

test_error_suppression.py

test_extended_myanmar_characters.py

test_extended_myanmar_validation.py

test_factory.py

test_frequency_builder.py

test_grammar_engine.py

test_grammar_rules_critical.py

test_grammar_rules_improved.py

test_grammar_rules_schema_validation.py

test_hidden_compound_strategy.py

test_homophone_strategy.py

test_homophones.py

test_incremental_build.py

test_inference_backends.py

test_ingester_c.py

test_initialization_refactor.py

test_is_curated_vocabulary.py

test_joint_decoder.py

test_joint_segment_tagger.py

test_json_provider.py

test_keyboard_distance.py

test_lazy_imports.py

test_loan_word_corrections.py

test_medial_confusions.py

test_medial_consonant_compatibility.py

test_medial_ordering.py

test_medial_ya_ai_vowel.py

test_memory_provider.py

test_merge_probe_cascade.py

test_merge_probe_symspell.py

test_merged_word_checker.py

test_meta_fusion.py

test_mined_confusable_pair_strategy.py

test_mlm_span_mask_candgen_strategy.py

test_mmap_reader.py

test_morpheme_suggestion_strategy.py

test_morphology.py

test_morphology_performance.py

test_myanmar_confusables.py

test_myanmar_constants.py

test_negation_system.py

Tests

Directory Structure

tests/
├── conftest.py              # Root fixtures + skipped test registry
├── test_*.py                # Unit tests (~170 files)
├── integration/             # Cross-component integration tests (~25 files)
├── e2e/                     # End-to-end CLI and library usage tests
│   ├── conftest.py          # E2E fixtures (myspell_cmd, e2e_test_db, run_myspell)
│   └── test_*.py            # CLI E2E, library usage, custom providers, robustness
├── stress/                  # Concurrency, batch processing, compound stress tests
├── fixtures/                # Shared test data
│   ├── myanmar_test_samples.py  # Ground-truth segmentations for Myanmar text
│   ├── config_templates.py      # YAML/JSON config templates for config tests
│   └── benchmarks/              # POS gold standard data
└── benchmarks/              # Benchmark suite (separate from tests/; lives at repo root)

Running Tests

# All tests
pytest tests/

# Single file
pytest tests/test_syllable_rules.py

# Single test method
pytest tests/test_syllable_rules.py::TestSyllableValidator::test_valid_syllable

# By marker
pytest tests/ -m unit
pytest tests/ -m integration
pytest tests/ -m "not slow"

# With coverage
pytest tests/ --cov=src/myspellchecker --cov-report=term-missing

# Pattern matching
pytest tests/ -k "context"

# Verbose
pytest tests/ -v

Markers

Defined in pyproject.toml under [tool.pytest.ini_options]:

Marker	Purpose	When to use
`unit`	Fast, isolated single-function tests	Default for most tests
`integration`	Cross-component interaction tests	Tests that wire multiple real components
`slow`	Tests that take >5s	Large corpus processing, full pipeline runs
`e2e`	End-to-end tests via CLI or public API	Tests that invoke `myspellchecker` as a user would
`benchmark`	Performance measurement tests	Requires `pytest-benchmark` installed

Usage: @pytest.mark.unit on test class or method.

Key Fixtures (conftest.py)

Session-scoped (run once per test session)

Fixture	Auto	Purpose
`mock_resource_downloads_session`	Yes	Prevents HuggingFace downloads; stubs `WordTokenizer` with `RegexSegmenter` fallback
`patch_default_db`	Yes	Patches `SQLiteProvider.__init__` to inject a test DB when no path is given
`test_database_path`	No	Creates a temporary SQLite DB with sample syllables, words, and bigrams

Per-test (run before each test)

Fixture	Auto	Purpose
`reset_grammar_config_singleton`	Yes	Clears `GrammarRuleConfig` singleton to prevent cross-test pollution
`reset_rich_console`	Yes	Redirects Rich console output to `StringIO`
`reset_singletons`	Yes	Clears all singleton instances (`clear_all_singletons`)
`clear_lru_caches`	Yes	Clears known `@lru_cache` functions (segmenter, logger)
`reset_logging`	Yes	Restores `myspellchecker` logger handlers/propagation after each test
`mock_console`	No	Returns a `MagicMock` PipelineConsole (use explicitly when needed)

E2E fixtures (tests/e2e/conftest.py)

Fixture	Scope	Purpose
`myspell_cmd`	Session	Resolves the `myspellchecker` CLI command path
`e2e_test_db`	Session	Builds a small test DB from a tiny corpus via the pipeline
`run_myspell`	Function	Helper to invoke CLI with args and optional stdin input

Skipped Test Registry

The top of conftest.py documents every skipped and xfailed test, organized by category:

Cython extension not compiled -- Needs python setup.py build_ext --inplace
Optional dependency not installed -- transformers, torch, jsonschema, pytest-benchmark, pyyaml
Transformers mocked by another module -- Test ordering issue
SpellChecker not available -- Needs a built DB or active patch_default_db
Production DB not available -- Needs data/mySpellChecker_production.db
xfail: known limitations -- Pali dictionary entries not yet added
Data-dependent -- Conditional on runtime data (e.g., SKIPPED_CONTEXT_WORDS)
OS-dependent -- Environment-specific guards (e.g., symlink loops)

Writing New Tests

Naming

File: test_<module_or_feature>.py
Class: Test<ComponentName> (optional, group related tests)
Method: test_<behavior_under_test>

Markers

Always mark tests appropriately:

import pytest

@pytest.mark.unit
class TestMyFeature:
    def test_basic_case(self):
        ...

    @pytest.mark.slow
    def test_large_corpus(self):
        ...

Using fixtures

The auto-use fixtures handle singleton/cache cleanup automatically. For tests needing a database:

def test_with_db(test_database_path):
    from myspellchecker.providers.sqlite import SQLiteProvider
    provider = SQLiteProvider(database_path=str(test_database_path))
    ...

Parametrize patterns

Use @pytest.mark.parametrize for Myanmar text validation:

@pytest.mark.parametrize("text,expected", [
    ("မြန်မာ", True),
    ("xyz", False),
])
def test_validation(text, expected):
    assert validate(text) == expected

Mocking heavy dependencies

For ONNX/semantic tests, mock the inference backend:

from unittest.mock import MagicMock, patch

def test_semantic_checker():
    mock_backend = MagicMock()
    mock_backend.predict.return_value = [0.95]
    with patch("myspellchecker.algorithms.semantic_checker.InferenceBackend", return_value=mock_backend):
        ...

Coverage requirement

Maintain >=65% code coverage. Check with:

pytest tests/ --cov=src/myspellchecker --cov-fail-under=65

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

README.md

Tests

Directory Structure

Running Tests

Markers

Key Fixtures (conftest.py)

Session-scoped (run once per test session)

Per-test (run before each test)

E2E fixtures (tests/e2e/conftest.py)

Skipped Test Registry

Writing New Tests

Naming

Markers

Using fixtures

Parametrize patterns

Mocking heavy dependencies

Coverage requirement

FilesExpand file tree

tests

Directory actions

More options

Directory actions

More options

Latest commit

History

tests

Folders and files

parent directory

README.md

Tests

Directory Structure

Running Tests

Markers

Key Fixtures (conftest.py)

Session-scoped (run once per test session)

Per-test (run before each test)

E2E fixtures (tests/e2e/conftest.py)

Skipped Test Registry

Writing New Tests

Naming

Markers

Using fixtures

Parametrize patterns

Mocking heavy dependencies

Coverage requirement