diff --git a/.gitignore b/.gitignore index bee8a64..056faa6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ __pycache__ +# Ignore the extracted ISO 639-3 code tables directory and its zip archive. +# These files are likely large data resources not needed in version control. +iso-639-3_Code_Tables_20240415/ +iso-639-3_Code_Tables_20240415.zip \ No newline at end of file diff --git a/Makefile b/Makefile index d296424..3a7d86f 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,10 @@ init: get-table: # https://iso639-3.sil.org/code_tables/download_tables wget https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_20240415.zip + unzip -o iso-639-3_Code_Tables_20240415.zip combine-wikipedia: cat wikipedia_languages.csv wikipedia_languages_extra.csv > wikipedia_languages_all.csv generate: - python generate.py + python3 generate.py diff --git a/README.md b/README.md index 73c5265..90332dc 100644 --- a/README.md +++ b/README.md @@ -7,13 +7,42 @@ web-languages dataset. -## Installing, etc. +## Install dependencies: +**Note:** This project requires Python 3. +## Setup + +Install dependencies: + +```bash +make init +``` + +Download and extract ISO-639-3 tables: +``` +make get-table ``` -make install +(Optional) Combine Wikipedia language files: +``` +make combine-wikipedia ``` +Generate language Markdown files: +``` +make generate +``` + +Downloaded data files (e.g., ISO-639-3 tables) are excluded from version control via `.gitignore`. + +## Makefile Targets + +| Target | Description | +|--------------------|--------------------------------------------------| +| init | Install Python dependencies | +| get-table | Download and unzip ISO-639-3 tables | +| combine-wikipedia | Combine Wikipedia language CSVs | +| generate | Generate Markdown files from data | ## License