Skip to content

Add output validation script and tests for Markdown generation #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
__pycache__
# Ignore the extracted ISO 639-3 code tables directory and its zip archive.
# These files are likely large data resources not needed in version control.
iso-639-3_Code_Tables_20240415/
iso-639-3_Code_Tables_20240415.zip

6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ init:
get-table:
# https://iso639-3.sil.org/code_tables/download_tables
wget https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_20240415.zip
unzip -o iso-639-3_Code_Tables_20240415.zip

combine-wikipedia:
cat wikipedia_languages.csv wikipedia_languages_extra.csv > wikipedia_languages_all.csv

generate:
python generate.py
python3 generate.py

validate:
python3 validate_output.py
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ beautifulsoup4
jinja2
lxml
pyarrow
pytest
20 changes: 20 additions & 0 deletions test_validate_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from validate_output import validate_markdown_file
import os

def test_missing_title(tmp_path):
test_file = tmp_path / "test1.md"
test_file.write_text("Summary: This is a test.")
errors = validate_markdown_file(str(test_file))
assert "Missing title" in errors

def test_missing_summary(tmp_path):
test_file = tmp_path / "test2.md"
test_file.write_text("# Test Title")
errors = validate_markdown_file(str(test_file))
assert "Missing summary section" in errors

def test_valid_markdown(tmp_path):
test_file = tmp_path / "test3.md"
test_file.write_text("# Test Title\nSummary: All good.")
errors = validate_markdown_file(str(test_file))
assert not errors
44 changes: 44 additions & 0 deletions validate_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import sys
import os
import re

def validate_markdown_file(filepath):
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
errors = []
if not re.search(r'^# ', content, re.MULTILINE):
errors.append("Missing title")
if "Summary:" not in content:
errors.append("Missing summary section")
return errors

def validate_directory(output_dir):
failed = False
for fname in os.listdir(output_dir):
if fname.endswith('.md'):
errors = validate_markdown_file(os.path.join(output_dir, fname))
if errors:
failed = True
print(f"{os.path.join(output_dir, fname)}:")
for err in errors:
print(f" - {err}")
return failed

def main():
base_dir = sys.argv[1] if len(sys.argv) > 1 else "../web-languages"
failed = False
if os.path.isdir(base_dir):
# Validate all subdirectories
for subdir in os.listdir(base_dir):
full_path = os.path.join(base_dir, subdir)
if os.path.isdir(full_path):
if validate_directory(full_path):
failed = True
else:
print(f"Error: Directory '{base_dir}' does not exist.")
sys.exit(1)
if failed:
sys.exit(1)

if __name__ == "__main__":
main()