diff --git a/.github/scripts/list_modified_files.py b/.github/scripts/list_modified_files.py index 52b1a3a08..c7acd1410 100644 --- a/.github/scripts/list_modified_files.py +++ b/.github/scripts/list_modified_files.py @@ -31,11 +31,24 @@ def process_files(files): ] +def get_modified_metas(files): + filenames = files.split(",") + return [ + { + "file": file, + "uid": uid, + } + for file in filenames if os.path.basename(file) == 'meta.yaml' + for uid, num_tests in [get_file_info(file)] + ] + + if __name__ == "__main__": changed_files = sys.stdin.read().strip() processed_files = process_files(changed_files) + modified_metas = get_modified_metas(changed_files) json_processed_files = json.dumps(processed_files) print(json_processed_files) with open(os.environ['GITHUB_OUTPUT'], 'a') as f: f.write( - f"processed_files={json.dumps({'file_info': processed_files})}\n") + f"processed_files={json.dumps({'file_info': processed_files, 'modified_metas': modified_metas})}\n") diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index 55e79c0ae..86b93ac06 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -5,7 +5,6 @@ on: types: [published] workflow_dispatch: {} - jobs: build_wheels: if: github.repository_owner == 'mlcommons' diff --git a/.github/workflows/document-scripts.yml b/.github/workflows/document-scripts.yml new file mode 100644 index 000000000..0024f284a --- /dev/null +++ b/.github/workflows/document-scripts.yml @@ -0,0 +1,80 @@ +# This workflow will automatically update the README for any updated MLC script +name: Document script on modified meta + +on: + push: + branches: [ "dev" ] + paths: + - 'script/**meta.yaml' + +jobs: + get_modified_files: + runs-on: ubuntu-latest + outputs: + processed_files: ${{ steps.modified-files.outputs.processed_files }} + + steps: + - name: 'Checkout' + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + + - name: Install dependencies + run: | + pip install pyyaml + + - name: Get changed files + id: modified-files + env: + filter: ${{ github.event.before }} + run: | + changed_files=$(git diff --name-only $filter | grep -E '.*\.yaml$') + echo "$changed_files" | python3 .github/scripts/list_modified_files.py + + document_modified_scripts: + runs-on: ubuntu-latest + needs: get_modified_files + if: ${{ needs.get_modified_files.outputs.processed_files != '[]' && needs.get_modified_files.outputs.processed_files != '' }} + permissions: + contents: write + + strategy: + fail-fast: false + matrix: + modified_metas: ${{ fromJSON(needs.get_modified_files.outputs.processed_files).modified_metas }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + path: automation-scripts + + - name: Set up Git for commit + run: | + git config --global user.name "github-actions[bot]" + git config --global user.email "github-actions[bot]@users.noreply.github.com" + + - name: Document meta.yaml file + run: | + echo "Documenting ${{ matrix.modified_metas.file }}" + + pip install mlcflow + mlc add repo automation-scripts + mlc doc script ${{ matrix.modified_metas.uid}} --quiet + cd automation-scripts + find . -type f -name README.md -exec git add {} + + # Use the GitHub actor's name and email + git config --global user.name github-actions[bot] + git config --global user.email "github-actions[bot]@users.noreply.github.com" + # Commit changes + git commit -m '[Automated Commit] Document ${{ matrix.modified_metas.file}} [skip ci]' + git push + + + diff --git a/.github/workflows/run-tests-on-modified-meta.yml b/.github/workflows/run-tests-on-modified-meta.yml index 7fbe79327..61f50f293 100644 --- a/.github/workflows/run-tests-on-modified-meta.yml +++ b/.github/workflows/run-tests-on-modified-meta.yml @@ -46,11 +46,6 @@ jobs: file_info: ${{ fromJSON(needs.get_modified_files.outputs.processed_files).file_info }} steps: - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 2 - - name: Process meta.yaml file run: | echo "Processing ${{ matrix.file_info.file }} with run number ${{ matrix.file_info.num_run }}" diff --git a/automation/script/doc.py b/automation/script/doc.py new file mode 100644 index 000000000..ddbfe450f --- /dev/null +++ b/automation/script/doc.py @@ -0,0 +1,149 @@ +import os +from mlc import utils +from utils import * +import logging +from pathlib import PureWindowsPath, PurePosixPath +import copy + + +def generate_doc(self_module, input_params): + """ + Generates the documentation of MLC scripts. + + Args: + self_module: Reference to the current module for internal calls. + i: Dictionary containing input parameters. + + Returns: + Dictionary with the result of the operation. Keys: + - 'return': 0 on success, >0 on error. + - 'error': Error message (if any). + """ + + # Extract and handle basic inputs + quiet = input_params.get('quiet', False) + logger = self_module.logger + env = input_params.get('env', {}) + generic_inputs = self_module.input_flags_converted_to_env + + # Step 2: Search for scripts + search_result = self_module.search(input_params.copy()) + if search_result['return'] > 0: + return search_result + + scripts_list = search_result['list'] + if not scripts_list: + return {'return': 1, 'error': 'No scripts were found'} + + env = input_params.get('env', {}) + state_data = input_params.get('state', {}) + constant_vars = input_params.get('const', {}) + constant_state = input_params.get('const_state', {}) + tag_values = input_params.get('tags', '').split(",") + variation_tags = [tag[1:] for tag in tag_values if tag.startswith("_")] + + # Step 4: Iterate over scripts and generate Dockerfile + for script in sorted(scripts_list, key=lambda x: x.meta.get('alias', '')): + metadata = script.meta + script_directory = script.path + script_tags = metadata.get("tags", []) + script_alias = metadata.get('alias', '') + script_uid = metadata.get('uid', '') + script_input_mapping = metadata.get('input_mapping', {}) + script_input_description = metadata.get('input_description', {}) + + r = generate_docs(metadata, script_directory, generic_inputs) + if r['return'] > 0: + continue + + return {'return': 0} + + +def generate_docs(metadata, script_path, generic_inputs): + script_name = metadata.get('alias', metadata['uid']) + readme_prefix = f"""This README is automatically generated. Please follow the [script execution document](https://docs.mlcommons.org/mlcflow/targets/script/execution-flow/) to understand more about the MLC script execution. +""" + doc_content = f"""# README for {script_name} +{readme_prefix} +""" + + readme_dir = os.path.join(script_path, "docs") + + if not os.path.exists(readme_dir): + os.makedirs(readme_dir) + + script_tags = metadata.get("tags", []) + script_tags_help = metadata.get("tags_help", '') + if not script_tags_help: + tags_string = ",".join(script_tags) + else: + tags_string = script_tags_help + + script_input_mapping = metadata.get('input_mapping', {}) + script_input_description = metadata.get('input_description', {}) + + r = get_run_readme( + tags_string, + script_input_mapping, + script_input_description, + generic_inputs) + if r['return'] > 0: + return r + + run_readme = r['run_readme'] + + doc_content += run_readme + + readme_path = os.path.join(readme_dir, "README.md") + with open(readme_path, "w") as f: + f.write(doc_content) + print(f"Readme generated at {readme_path}") + + return {'return': 0} + + +def get_run_readme(tags, input_mapping, input_description, generic_inputs): + run_readme = f"""## Run Commands + +```mlcr {tags} +``` +""" + + if input_description: + input_description_string = generate_markdown( + "Script Inputs", input_description) + else: + input_description_string = "No script specific inputs" + + run_readme += input_description_string + + generic_input_string = generate_markdown( + "Generic Script Inputs", generic_inputs) + + run_readme += generic_input_string + + return {'return': 0, 'run_readme': run_readme} + + +def infer_type(field): + if "dtype" in field: + return field["dtype"] + elif "default" in field: + return type(field["default"]).__name__ + else: + return "str" + + +def generate_markdown(heading, input_dict): + lines = [ + f"### {heading}\n", + "| Name | Description | Default | Type |", + "|------|-------------|---------|------|"] + for key in sorted( + input_dict, key=lambda k: input_dict[k].get("sort", 9999)): + field = input_dict[key] + desc = field.get("desc", "") + default = field.get("default", "") + dtype = infer_type(field) + lines.append(f"| `{key}` | {desc} | `{default}` | {dtype} |") + return "\n".join(lines) diff --git a/automation/script/module.py b/automation/script/module.py index 6cc851fc0..6ee906c01 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -68,22 +68,24 @@ def __init__(self, action_object, automation_file): 'MLC_GIT_*', 'MLC_RENEW_CACHE_ENTRY'] - self.input_flags_converted_to_tmp_env = ['path'] - - self.input_flags_converted_to_env = ['input', - 'output', - 'outdirname', - 'outbasename', - 'name', - 'extra_cache_tags', - 'skip_compile', - 'skip_run', - 'accept_license', - 'skip_system_deps', - 'git_ssh', - 'gh_token', - 'hf_token', - 'verify_ssl'] + self.input_flags_converted_to_tmp_env = { + 'path': {'desc': 'Filesystem path to search for executable', 'default': ''}} + + self.input_flags_converted_to_env = {'input': {'desc': 'Input to the script passed using the env key `MLC_INPUT`', 'default': ''}, + 'output': {'desc': 'Output from the script passed using the env key `MLC_OUTPUT`', 'default': ''}, + 'outdirname': {'desc': 'The directory to store the script output', 'default': 'cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory'}, + 'outbasename': {'desc': 'The output file/folder name', 'default': ''}, + 'name': {}, + 'extra_cache_tags': {'desc': 'Extra cache tags to be added to the cached entry when the script results are saved', 'default': ''}, + 'skip_compile': {'desc': 'Skip compilation', 'default': False}, + 'skip_run': {'desc': 'Skip run', 'default': False}, + 'accept_license': {'desc': 'Accept the required license requirement to run the script', 'default': False}, + 'skip_system_deps': {'desc': 'Skip installing any system dependencies', 'default': False}, + 'git_ssh': {'desc': 'Use SSH for git repos', 'default': False}, + 'gh_token': {'desc': 'Github Token', 'default': ''}, + 'hf_token': {'desc': 'Huggingface Token', 'default': ''}, + 'verify_ssl': {'desc': 'Verify SSL', 'default': False} + } ############################################################ @@ -4461,8 +4463,8 @@ def doc(self, i): """ - return utils.call_internal_module( - self, __file__, 'module_misc', 'doc', i) + from script.doc import generate_doc + return generate_doc(self, i) ############################################################ diff --git a/script/detect-cpu/customize.py b/script/detect-cpu/customize.py index b6d1d73fc..1e8584c03 100644 --- a/script/detect-cpu/customize.py +++ b/script/detect-cpu/customize.py @@ -172,7 +172,7 @@ def postprocess(i): else: env[env_key] = v[1].strip() - if env.get('MLC_HOST_CPU_SOCKETS', '') == '-': # assume as 1 + if env.get('MLC_HOST_CPU_SOCKETS', '') in ['-', '']: # assume as 1 env['MLC_HOST_CPU_SOCKETS'] = '1' if env.get('MLC_HOST_CPU_TOTAL_CORES', '') != '' and env.get( @@ -184,9 +184,17 @@ def postprocess(i): env['MLC_HOST_CPU_THREADS_PER_CORE'] = str(int(int(env['MLC_HOST_CPU_TOTAL_LOGICAL_CORES']) // int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES']))) - if env.get('MLC_HOST_CPU_SOCKETS', '') != '' and env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', - '') != '' and env.get('MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') == '': + if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') != '' and env.get( + 'MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') == '': env['MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET'] = str( int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES']) // int(env['MLC_HOST_CPU_SOCKETS'])) + if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') == '' and env.get( + 'MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') != '': + env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES'] = str(int( + env['MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET']) * int(env['MLC_HOST_CPU_SOCKETS'])) + + if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') != '': + env['MLC_HOST_CPU_PHYSICAL_CORES_LIST'] = f"""0-{int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES'])-1}""" + return {'return': 0} diff --git a/script/extract-file/customize.py b/script/extract-file/customize.py index c0e0957f3..ddefc0757 100644 --- a/script/extract-file/customize.py +++ b/script/extract-file/customize.py @@ -77,9 +77,39 @@ def preprocess(i): else: env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvJf' env['MLC_EXTRACT_TOOL'] = 'tar ' + elif filename.endswith(".tar.bz2"): + if windows: + x = '"' if ' ' in filename else '' + env['MLC_EXTRACT_CMD0'] = 'bzip2 -d ' + x + filename + x + filename = filename[:-4] # leave only .tar + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvf' + env['MLC_EXTRACT_TOOL'] = 'tar ' + elif os_info['platform'] == 'darwin': + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvjf ' + env['MLC_EXTRACT_TOOL'] = 'tar ' + else: + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' --skip-old-files -xvjf ' + env['MLC_EXTRACT_TOOL'] = 'tar ' elif filename.endswith(".tar"): env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvf' env['MLC_EXTRACT_TOOL'] = 'tar ' + elif filename.endswith(".7z"): + if windows: + env['MLC_EXTRACT_TOOL'] = '7z' + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y ' + else: + # Assumes p7zip is installed and provides the `7z` or `7zr` binary + env['MLC_EXTRACT_TOOL'] = '7z' + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y ' + + elif filename.endswith(".rar"): + if windows: + env['MLC_EXTRACT_TOOL'] = 'unrar' + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y ' + else: + # unrar or unar may be available on Unix-like systems + env['MLC_EXTRACT_TOOL'] = 'unrar' + env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y ' elif filename.endswith(".gz"): # Check target filename extracted_filename = env.get('MLC_EXTRACT_EXTRACTED_FILENAME', '') diff --git a/script/get-platform-details/parse.py b/script/get-platform-details/parse.py new file mode 100644 index 000000000..e3ad9efa3 --- /dev/null +++ b/script/get-platform-details/parse.py @@ -0,0 +1,58 @@ +import json +import re + +# Load the input text from the system-info.txt file +with open("system-info.txt", "r", encoding="utf-8") as f: + data = f.read() + +# Define patterns to extract key data points +extracts = { + "uname": r"uname -a\n(.+)", + "username": r"3\. Username\n(.+)", + "uptime": r"2\. w\n\s+.+\s+up\s+(.+?),", + "cpu_model": r"Model name:\s+(.+)", + "cpu_cores": r"Core\(s\) per socket:\s+(\d+)", + "threads_per_core": r"Thread\(s\) per core:\s+(\d+)", + "total_cpus": r"CPU\(s\):\s+(\d+)", + "mem_total_kb": r"MemTotal:\s+(\d+)\s+kB", + "mem_free_kb": r"MemFree:\s+(\d+)\s+kB", + "swap_total_kb": r"SwapTotal:\s+(\d+)\s+kB", + "swap_free_kb": r"SwapFree:\s+(\d+)\s+kB", + "kernel_version": r"kernel.version\s+=\s+(.+)", + "architecture": r"Architecture:\s+(\S+)", + "boot_args": r"13\. Linux kernel boot-time arguments, from /proc/cmdline\n(.+)", + "bios_vendor": r"Vendor:\s+(.+)", + "bios_version": r"Version:\s+([\d\.]+)", + "bios_release_date": r"Release Date:\s+(.+)", + "cpu_frequency_range": r"hardware limits:\s+(.+)", + "virtualization": r"Virtualization:\s+(.+)", + "l1d_cache": r"L1d cache:\s+(.+)", + "l2_cache": r"L2 cache:\s+(.+)", + "l3_cache": r"L3 cache:\s+(.+)", + "numa_nodes": r"NUMA node\(s\):\s+(\d+)", + "runlevel": r"who -r\n\s+run-level\s+(\d+)", + "systemd_version": r"Systemd service manager version\n(.+)", + "max_mhz": r"CPU max MHz:\s+([\d\.]+)", + "min_mhz": r"CPU min MHz:\s+([\d\.]+)", + "bogomips": r"BogoMIPS:\s+([\d\.]+)", + "cache_alignment": r"cache_alignment\s+:\s+(\d+)", + "address_sizes": r"Address sizes:\s+(.+)", + "numactl_total_mem_mb": r"node 0 size:\s+(\d+)\s+MB", + "dimm_actual_speed": r"Speed:\s+(\d+)\s+MT/s", + "dimm_configured_speed": r"Configured Memory Speed:\s+(\d+)\s+MT/s" +} + +# Extract matched values +results = [] +for key, pattern in extracts.items(): + match = re.search(pattern, data) + if match: + results.append({"key": key, "value": match.group(1)}) + +# Add derived field: number of services +services = re.findall(r"\.service", data) +results.append({"key": "total_services_detected", "value": len(services)}) + +# Output as JSON array +json_output = json.dumps(results, indent=2) +print(json_output) diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index ad5ba5d82..c1ed0119a 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -28,6 +28,7 @@ tags: tags_help: "run-mlperf,inference" predeps: False + default_env: MLC_MLPERF_IMPLEMENTATION: reference MLC_MLPERF_MODEL: resnet50