From 8233a81b091a7d9f438943d11688d7103724283d Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 8 May 2026 12:39:57 -0400 Subject: [PATCH 1/7] feat: single entrypoint for release builds --- .github/scripts/dispatch_release.py | 223 ++++++++++++++++++ .github/scripts/pr_comment_kernel_bot.py | 42 ++-- .github/workflows/build-release-dispatch.yaml | 67 ++++++ .github/workflows/build-release-mac.yaml | 24 +- .github/workflows/build-release-windows.yaml | 27 +-- .github/workflows/build-release.yaml | 24 +- 6 files changed, 320 insertions(+), 87 deletions(-) create mode 100644 .github/scripts/dispatch_release.py create mode 100644 .github/workflows/build-release-dispatch.yaml diff --git a/.github/scripts/dispatch_release.py b/.github/scripts/dispatch_release.py new file mode 100644 index 00000000..278bd436 --- /dev/null +++ b/.github/scripts/dispatch_release.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +Dispatch release workflows for a kernel. + +Three entrypoints call this script: + 1. The PR-merge dummy workflow (via CLI) + 2. The comment bot (via import) + 3. Local CLI invocation +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import urllib.error +import urllib.request +import uuid +from dataclasses import dataclass, field + + +RELEASE_WORKFLOWS = [ + "build-release.yaml", + "build-release-mac.yaml", + "build-release-windows.yaml", +] + +KERNEL_NAME_RE = re.compile(r"^[A-Za-z0-9_-]+$") + + +@dataclass +class ReleaseDispatchResult: + kernel_name: str + dispatched: list[tuple[str, str]] = field(default_factory=list) # (workflow, dispatch_key) + failed: list[tuple[str, int]] = field(default_factory=list) # (workflow, http_code) + skipped: list[str] = field(default_factory=list) # workflow filenames + + +def github_api_request( + url: str, token: str, method: str = "GET", data: dict | None = None +): + body = None + if data is not None: + body = json.dumps(data).encode("utf-8") + + # req = urllib.request.Request( + # url=url, + # data=body, + # method=method, + # headers={ + # "Accept": "application/vnd.github+json", + # "Authorization": f"Bearer {token}", + # "X-GitHub-Api-Version": "2022-11-28", + # "Content-Type": "application/json", + # }, + # ) + # with urllib.request.urlopen(req) as resp: + # return resp.status, resp.read().decode("utf-8") + + +def get_token() -> str | None: + """Resolve GitHub token: env var first, then ``gh auth token`` fallback.""" + token = os.environ.get("GITHUB_TOKEN") + if token: + return token + try: + result = subprocess.run( + ["gh", "auth", "token"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip() or None + except (FileNotFoundError, subprocess.CalledProcessError): + return None + + +def get_repo() -> str | None: + """Resolve repository: GITHUB_REPOSITORY env var, or parse from git remote.""" + repo = os.environ.get("GITHUB_REPOSITORY") + if repo: + return repo + try: + result = subprocess.run( + ["git", "remote", "get-url", "origin"], + capture_output=True, + text=True, + check=True, + ) + url = result.stdout.strip() + match = re.search(r"github\.com[:/](.+?)(?:\.git)?$", url) + if match: + return match.group(1) + except (FileNotFoundError, subprocess.CalledProcessError): + pass + return None + + +def select_workflows(kernel_name: str) -> list[str]: + """ + Determine which release workflows to dispatch for this kernel. + + Currently returns all 3 workflows. This is the extension point for + future filtering logic (e.g., skip mac for CUDA-only kernels). + """ + return list(RELEASE_WORKFLOWS) + + +def dispatch_release( + kernel_name: str, + *, + token: str, + repo: str, + ref: str = "main", + dispatch_key_prefix: str = "", +) -> ReleaseDispatchResult: + """ + Dispatch the appropriate release workflows for a kernel. + + Args: + kernel_name: Name of the kernel directory. + token: GitHub API token. + repo: GitHub repository in "owner/repo" format. + ref: Git ref to dispatch against (default "main"). + dispatch_key_prefix: Optional prefix for dispatch keys (e.g. "pr42-"). + + Returns: + ReleaseDispatchResult with dispatched/failed/skipped lists. + """ + if not KERNEL_NAME_RE.match(kernel_name): + print(f"Invalid kernel name: {kernel_name!r}", file=sys.stderr) + result = ReleaseDispatchResult(kernel_name=kernel_name) + for wf in RELEASE_WORKFLOWS: + result.failed.append((wf, 0)) + return result + + result = ReleaseDispatchResult(kernel_name=kernel_name) + + workflows = select_workflows(kernel_name) + skipped_workflows = set(RELEASE_WORKFLOWS) - set(workflows) + result.skipped = sorted(skipped_workflows) + + api_base = f"https://api.github.com/repos/{repo}" + for workflow in workflows: + dispatch_key = ( + f"{dispatch_key_prefix}{kernel_name}-{workflow}-{uuid.uuid4().hex[:12]}" + ) + dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" + dispatch_body = { + "ref": ref, + "inputs": { + "kernel_name": kernel_name, + "dispatch_key": dispatch_key, + }, + } + try: + print(f"Dispatching {workflow} for kernel `{kernel_name}` on ref `{ref}`") + github_api_request(dispatch_url, token, method="POST", data=dispatch_body) + result.dispatched.append((workflow, dispatch_key)) + except urllib.error.HTTPError as e: + err_text = e.read().decode("utf-8", errors="replace") + print(f"Failed to dispatch {workflow} (HTTP {e.code}): {err_text}", file=sys.stderr) + result.failed.append((workflow, e.code)) + + return result + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Dispatch release workflows for a kernel" + ) + parser.add_argument("kernel_name", help="Kernel directory name") + parser.add_argument( + "--ref", default="main", help="Git ref to dispatch on (default: main)" + ) + parser.add_argument( + "--repo", default=None, help="GitHub repo in owner/repo format (default: auto-detect)" + ) + args = parser.parse_args() + + token = get_token() + if not token: + print( + "Error: No GitHub token found. Set GITHUB_TOKEN or run `gh auth login`.", + file=sys.stderr, + ) + return 1 + + repo = args.repo or get_repo() + if not repo: + print( + "Error: Cannot determine repository. Set GITHUB_REPOSITORY or use --repo.", + file=sys.stderr, + ) + return 1 + + result = dispatch_release( + args.kernel_name, + token=token, + repo=repo, + ref=args.ref, + ) + + if result.dispatched: + print(f"\nDispatched ({len(result.dispatched)}):") + for wf, dk in result.dispatched: + print(f" - {wf} (key: {dk})") + if result.skipped: + print(f"\nSkipped ({len(result.skipped)}):") + for wf in result.skipped: + print(f" - {wf}") + if result.failed: + print(f"\nFailed ({len(result.failed)}):") + for wf, code in result.failed: + print(f" - {wf} (HTTP {code})") + return 1 + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/.github/scripts/pr_comment_kernel_bot.py b/.github/scripts/pr_comment_kernel_bot.py index 2f9772cb..6b2ea11d 100644 --- a/.github/scripts/pr_comment_kernel_bot.py +++ b/.github/scripts/pr_comment_kernel_bot.py @@ -10,6 +10,8 @@ import urllib.request import uuid +from dispatch_release import RELEASE_WORKFLOWS, dispatch_release as do_dispatch_release + KERNEL_RE = re.compile(r"^[A-Za-z0-9_-]+$") BRANCH_RE = re.compile(r"^[A-Za-z0-9._/-]+$") @@ -21,11 +23,6 @@ "release": {"admin"}, } FORK_BLOCKED_COMMANDS = {"build", "build-and-upload", "release"} -RELEASE_WORKFLOWS = [ - "build-release.yaml", - "build-release-mac.yaml", - "build-release-windows.yaml", -] MAX_COMMENT_LENGTH = 1024 DISPATCH_WORKFLOW = "manual-build-upload.yaml" RUN_LOOKUP_ATTEMPTS = 10 @@ -663,28 +660,19 @@ def main(): if command == "release": for kernel_name in kernels: - for workflow in RELEASE_WORKFLOWS: - release_dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" - dispatch_key = make_dispatch_key(issue_number, f"{kernel_name}-{workflow}") - dispatch_body = { - "ref": default_branch, - "inputs": { - "kernel_name": kernel_name, - "dispatch_key": dispatch_key, - }, - } - try: - print( - f"Dispatching {workflow} for kernel `{kernel_name}`" - ) - github_api_request(release_dispatch_url, token, method="POST", data=dispatch_body) - dispatches.append( - DispatchResult(kernel_name=f"{kernel_name} ({workflow})", dispatch_key=dispatch_key) - ) - except urllib.error.HTTPError as e: - err_text = e.read().decode("utf-8", errors="replace") - print(err_text, file=sys.stderr) - failed.append((f"{kernel_name} ({workflow})", e.code)) + release_result = do_dispatch_release( + kernel_name, + token=token, + repo=repository, + ref=default_branch, + dispatch_key_prefix=f"pr{issue_number}-", + ) + for wf, dk in release_result.dispatched: + dispatches.append( + DispatchResult(kernel_name=f"{kernel_name} ({wf})", dispatch_key=dk) + ) + for wf, code in release_result.failed: + failed.append((f"{kernel_name} ({wf})", code)) else: for kernel_name in kernels: dispatch_key = make_dispatch_key(issue_number, kernel_name) diff --git a/.github/workflows/build-release-dispatch.yaml b/.github/workflows/build-release-dispatch.yaml new file mode 100644 index 00000000..e248dcb4 --- /dev/null +++ b/.github/workflows/build-release-dispatch.yaml @@ -0,0 +1,67 @@ +name: Build Release (Dispatch) +run-name: >- + Build Release (Dispatch) / ${{ inputs.kernel_name || github.event.pull_request.title || '' }} / request=${{ inputs.dispatch_key || '' }} +on: + pull_request: + types: [closed] + workflow_dispatch: + inputs: + kernel_name: + description: "Kernel directory name to build" + required: true + type: string + dispatch_key: + description: "Unique key for matching this run back to a bot dispatch" + required: false + type: string + +permissions: + actions: write + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + setup: + if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true + runs-on: ubuntu-latest + outputs: + skip: ${{ steps.validate.outputs.skip }} + kernel: ${{ steps.validate.outputs.kernel }} + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Validate kernel directory + id: validate + env: + PR_TITLE: ${{ github.event.pull_request.title }} + run: | + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + KERNEL="${{ inputs.kernel_name }}" + if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + else + echo "skip=true" >> $GITHUB_OUTPUT + fi + else + if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "release"); then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + else + echo "skip=true" >> $GITHUB_OUTPUT + fi + fi + - name: Print result + run: | + echo "skip=${{ steps.validate.outputs.skip }}" + echo "kernel=${{ steps.validate.outputs.kernel }}" + - name: Dispatch release workflows + if: steps.validate.outputs.skip == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + KERNEL="${{ steps.validate.outputs.kernel }}" + REF="${{ github.event.repository.default_branch || 'main' }}" + python3 .github/scripts/dispatch_release.py "$KERNEL" --ref "$REF" diff --git a/.github/workflows/build-release-mac.yaml b/.github/workflows/build-release-mac.yaml index 20f9b416..4c5c375c 100644 --- a/.github/workflows/build-release-mac.yaml +++ b/.github/workflows/build-release-mac.yaml @@ -2,8 +2,6 @@ name: Build Release (macOS) run-name: >- Build Release (macOS) / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} on: - pull_request: - types: [closed] workflow_dispatch: inputs: kernel_name: @@ -20,7 +18,6 @@ concurrency: jobs: build-kernel: - if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true runs-on: macos-26 steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -39,24 +36,13 @@ jobs: USER: runner - name: Validate kernel directory id: validate - env: - PR_TITLE: ${{ github.event.pull_request.title }} run: | - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - KERNEL="${{ inputs.kernel_name }}" - if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi + KERNEL="${{ inputs.kernel_name }}" + if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT else - if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "release"); then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi + echo "skip=true" >> $GITHUB_OUTPUT fi - name: Install Metal toolchain if: steps.validate.outputs.skip == 'false' diff --git a/.github/workflows/build-release-windows.yaml b/.github/workflows/build-release-windows.yaml index 0b897eb8..c2708177 100644 --- a/.github/workflows/build-release-windows.yaml +++ b/.github/workflows/build-release-windows.yaml @@ -2,10 +2,6 @@ name: Build Release (Windows) run-name: >- Build Release (Windows) / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} on: - pull_request: - types: [closed] - paths-ignore: - - "**/README.md" workflow_dispatch: inputs: kernel_name: @@ -23,7 +19,6 @@ concurrency: jobs: build-kernel: - if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true strategy: matrix: os: [windows-2022] @@ -54,26 +49,14 @@ jobs: - name: Validate kernel directory id: validate shell: pwsh - env: - PR_TITLE: ${{ github.event.pull_request.title }} run: | $ErrorActionPreference = "Continue" - if ("${{ github.event_name }}" -eq "workflow_dispatch") { - $KERNEL = "${{ inputs.kernel_name }}" - if ((Test-Path "$KERNEL") -and (Test-Path "$KERNEL/flake.nix") -and (Test-Path "$KERNEL/build.toml")) { - echo "kernel=$KERNEL" >> $env:GITHUB_OUTPUT - echo "skip=false" >> $env:GITHUB_OUTPUT - } else { - echo "skip=true" >> $env:GITHUB_OUTPUT - } + $KERNEL = "${{ inputs.kernel_name }}" + if ((Test-Path "$KERNEL") -and (Test-Path "$KERNEL/flake.nix") -and (Test-Path "$KERNEL/build.toml")) { + echo "kernel=$KERNEL" >> $env:GITHUB_OUTPUT + echo "skip=false" >> $env:GITHUB_OUTPUT } else { - $KERNEL = python .github/workflows/validate-kernel-pr.py "release" 2>&1 - if ($LASTEXITCODE -eq 0) { - echo "kernel=$KERNEL" >> $env:GITHUB_OUTPUT - echo "skip=false" >> $env:GITHUB_OUTPUT - } else { - echo "skip=true" >> $env:GITHUB_OUTPUT - } + echo "skip=true" >> $env:GITHUB_OUTPUT } exit 0 diff --git a/.github/workflows/build-release.yaml b/.github/workflows/build-release.yaml index 3a2864d6..621ecbe4 100644 --- a/.github/workflows/build-release.yaml +++ b/.github/workflows/build-release.yaml @@ -2,8 +2,6 @@ name: Build Release run-name: >- Build Release / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} on: - pull_request: - types: [closed] workflow_dispatch: inputs: kernel_name: @@ -20,7 +18,6 @@ concurrency: jobs: setup: - if: github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true runs-on: ubuntu-latest outputs: skip: ${{ steps.validate.outputs.skip }} @@ -41,24 +38,13 @@ jobs: USER: runner - name: Validate kernel directory id: validate - env: - PR_TITLE: ${{ github.event.pull_request.title }} run: | - if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then - KERNEL="${{ inputs.kernel_name }}" - if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi + KERNEL="${{ inputs.kernel_name }}" + if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT else - if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "release"); then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi + echo "skip=true" >> $GITHUB_OUTPUT fi - name: Generate build matrix if: steps.validate.outputs.skip == 'false' From 1c4bac140a5c2c50201e633749853328a09014ba Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 8 May 2026 13:14:15 -0400 Subject: [PATCH 2/7] fix: support local build tests via act --- .github/scripts/dispatch_release.py | 193 ++++++++++++++----- .github/workflows/build-release-mac.yaml | 13 +- .github/workflows/build-release-windows.yaml | 31 +-- .github/workflows/build-release.yaml | 7 +- 4 files changed, 174 insertions(+), 70 deletions(-) diff --git a/.github/scripts/dispatch_release.py b/.github/scripts/dispatch_release.py index 278bd436..e7e64ac5 100644 --- a/.github/scripts/dispatch_release.py +++ b/.github/scripts/dispatch_release.py @@ -14,10 +14,12 @@ import re import subprocess import sys +import tomllib import urllib.error import urllib.request import uuid from dataclasses import dataclass, field +from pathlib import Path RELEASE_WORKFLOWS = [ @@ -44,19 +46,34 @@ def github_api_request( if data is not None: body = json.dumps(data).encode("utf-8") - # req = urllib.request.Request( - # url=url, - # data=body, - # method=method, - # headers={ - # "Accept": "application/vnd.github+json", - # "Authorization": f"Bearer {token}", - # "X-GitHub-Api-Version": "2022-11-28", - # "Content-Type": "application/json", - # }, - # ) - # with urllib.request.urlopen(req) as resp: - # return resp.status, resp.read().decode("utf-8") + req = urllib.request.Request( + url=url, + data=body, + method=method, + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {token}", + "X-GitHub-Api-Version": "2022-11-28", + "Content-Type": "application/json", + }, + ) + with urllib.request.urlopen(req) as resp: + return resp.status, resp.read().decode("utf-8") + + +def run_local(workflow: str, kernel_name: str, *, skip_build: bool = False) -> bool: + """Run a release workflow locally via act.""" + cmd = [ + "act", "workflow_dispatch", + "--container-options", "--privileged", + "-W", f".github/workflows/{workflow}", + "--input", f"kernel_name={kernel_name}", + ] + if skip_build: + cmd.extend(["--input", "skip_build=true"]) + print(f"Running locally: {' '.join(cmd)}") + result = subprocess.run(cmd) + return result.returncode == 0 def get_token() -> str | None: @@ -97,14 +114,56 @@ def get_repo() -> str | None: return None +BACKEND_TO_WORKFLOWS = { + "cuda": {"build-release.yaml", "build-release-windows.yaml"}, + "cpu": {"build-release.yaml"}, + "rocm": {"build-release.yaml"}, + "metal": {"build-release-mac.yaml"}, + "xpu": {"build-release.yaml", "build-release-windows.yaml"}, +} + + +def read_backends(kernel_name: str) -> list[str] | None: + """Read the backends list from a kernel's build.toml. Returns None if not found.""" + build_toml = Path(kernel_name) / "build.toml" + if not build_toml.exists(): + return None + with open(build_toml, "rb") as f: + config = tomllib.load(f) + backends = config.get("general", {}).get("backends") + if backends is None: + backends = config.get("backends") + if isinstance(backends, list): + return backends + return None + + def select_workflows(kernel_name: str) -> list[str]: """ - Determine which release workflows to dispatch for this kernel. + Determine which release workflows to dispatch based on the kernel's + backends declared in build.toml. - Currently returns all 3 workflows. This is the extension point for - future filtering logic (e.g., skip mac for CUDA-only kernels). + Mapping: + cuda, cpu, rocm -> build-release.yaml (Linux) + metal -> build-release-mac.yaml (macOS) + xpu -> build-release-windows.yaml (Windows) + + Falls back to all workflows if build.toml can't be read. """ - return list(RELEASE_WORKFLOWS) + backends = read_backends(kernel_name) + if backends is None: + print(f"Could not read backends for {kernel_name}, dispatching all workflows") + return set(RELEASE_WORKFLOWS) + + workflows = set() + for b in backends: + workflows.update(BACKEND_TO_WORKFLOWS.get(b, set())) + + if not workflows: + print(f"No known backends found for {kernel_name}: {backends}, dispatching all workflows") + return set(RELEASE_WORKFLOWS) + + return workflows def dispatch_release( @@ -114,6 +173,8 @@ def dispatch_release( repo: str, ref: str = "main", dispatch_key_prefix: str = "", + local: bool = False, + skip_build: bool = False, ) -> ReleaseDispatchResult: """ Dispatch the appropriate release workflows for a kernel. @@ -138,7 +199,7 @@ def dispatch_release( result = ReleaseDispatchResult(kernel_name=kernel_name) workflows = select_workflows(kernel_name) - skipped_workflows = set(RELEASE_WORKFLOWS) - set(workflows) + skipped_workflows = set(RELEASE_WORKFLOWS) - workflows result.skipped = sorted(skipped_workflows) api_base = f"https://api.github.com/repos/{repo}" @@ -146,22 +207,31 @@ def dispatch_release( dispatch_key = ( f"{dispatch_key_prefix}{kernel_name}-{workflow}-{uuid.uuid4().hex[:12]}" ) - dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" - dispatch_body = { - "ref": ref, - "inputs": { + if local: + if run_local(workflow, kernel_name, skip_build=skip_build): + result.dispatched.append((workflow, dispatch_key)) + else: + result.failed.append((workflow, 0)) + else: + dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" + inputs = { "kernel_name": kernel_name, "dispatch_key": dispatch_key, - }, - } - try: - print(f"Dispatching {workflow} for kernel `{kernel_name}` on ref `{ref}`") - github_api_request(dispatch_url, token, method="POST", data=dispatch_body) - result.dispatched.append((workflow, dispatch_key)) - except urllib.error.HTTPError as e: - err_text = e.read().decode("utf-8", errors="replace") - print(f"Failed to dispatch {workflow} (HTTP {e.code}): {err_text}", file=sys.stderr) - result.failed.append((workflow, e.code)) + } + if skip_build: + inputs["skip_build"] = "true" + dispatch_body = { + "ref": ref, + "inputs": inputs, + } + try: + print(f"Dispatching {workflow} for kernel `{kernel_name}` on ref `{ref}`") + github_api_request(dispatch_url, token, method="POST", data=dispatch_body) + result.dispatched.append((workflow, dispatch_key)) + except urllib.error.HTTPError as e: + err_text = e.read().decode("utf-8", errors="replace") + print(f"Failed to dispatch {workflow} (HTTP {e.code}): {err_text}", file=sys.stderr) + result.failed.append((workflow, e.code)) return result @@ -177,30 +247,49 @@ def main() -> int: parser.add_argument( "--repo", default=None, help="GitHub repo in owner/repo format (default: auto-detect)" ) + parser.add_argument( + "--local", action="store_true", + help="Run release workflows locally via act instead of dispatching remotely", + ) + parser.add_argument( + "--skip-build", action="store_true", + help="Skip build and upload steps (for testing workflow plumbing)", + ) args = parser.parse_args() - token = get_token() - if not token: - print( - "Error: No GitHub token found. Set GITHUB_TOKEN or run `gh auth login`.", - file=sys.stderr, + if args.local: + result = dispatch_release( + args.kernel_name, + token="", + repo="", + ref=args.ref, + local=True, + skip_build=args.skip_build, ) - return 1 - - repo = args.repo or get_repo() - if not repo: - print( - "Error: Cannot determine repository. Set GITHUB_REPOSITORY or use --repo.", - file=sys.stderr, + else: + token = get_token() + if not token: + print( + "Error: No GitHub token found. Set GITHUB_TOKEN or run `gh auth login`.", + file=sys.stderr, + ) + return 1 + + repo = args.repo or get_repo() + if not repo: + print( + "Error: Cannot determine repository. Set GITHUB_REPOSITORY or use --repo.", + file=sys.stderr, + ) + return 1 + + result = dispatch_release( + args.kernel_name, + token=token, + repo=repo, + ref=args.ref, + skip_build=args.skip_build, ) - return 1 - - result = dispatch_release( - args.kernel_name, - token=token, - repo=repo, - ref=args.ref, - ) if result.dispatched: print(f"\nDispatched ({len(result.dispatched)}):") diff --git a/.github/workflows/build-release-mac.yaml b/.github/workflows/build-release-mac.yaml index 4c5c375c..5b68d69e 100644 --- a/.github/workflows/build-release-mac.yaml +++ b/.github/workflows/build-release-mac.yaml @@ -12,6 +12,11 @@ on: description: "Unique key for matching this run back to a bot dispatch" required: false type: string + skip_build: + description: "Skip build and upload steps (for testing workflow plumbing)" + required: false + type: boolean + default: false concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -45,15 +50,15 @@ jobs: echo "skip=true" >> $GITHUB_OUTPUT fi - name: Install Metal toolchain - if: steps.validate.outputs.skip == 'false' + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true run: xcodebuild -downloadComponent MetalToolchain - name: Build kernel - if: steps.validate.outputs.skip == 'false' + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true run: | KERNEL="${{ steps.validate.outputs.kernel }}" ( cd "$KERNEL" && nix build -L ) - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -62,7 +67,7 @@ jobs: nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | diff --git a/.github/workflows/build-release-windows.yaml b/.github/workflows/build-release-windows.yaml index c2708177..b794e6c6 100644 --- a/.github/workflows/build-release-windows.yaml +++ b/.github/workflows/build-release-windows.yaml @@ -12,6 +12,11 @@ on: description: "Unique key for matching this run back to a bot dispatch" required: false type: string + skip_build: + description: "Skip build and upload steps (for testing workflow plumbing)" + required: false + type: boolean + default: false concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -103,7 +108,7 @@ jobs: } - name: Kernel Info - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh run: | $KERNEL = "${{ steps.validate.outputs.kernel }}" @@ -111,7 +116,7 @@ jobs: - name: Kernel extract required builder version id: extract-builder-version - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh run: | $KERNEL = "${{ steps.validate.outputs.kernel }}" @@ -121,33 +126,33 @@ jobs: echo "revision=$revision" >> $env:GITHUB_OUTPUT - uses: Jimver/cuda-toolkit@b6fc3a9f3f15256d9d94ffe1254f9c5a2565cde6 # v0.2.30 - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'cuda' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'cuda' id: setup-cuda-toolkit with: cuda: ${{ matrix.platform.cuda }} - name: Setup Intel oneAPI - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'xpu' && steps.check-backend.outputs.needs_oneapi == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'xpu' && steps.check-backend.outputs.needs_oneapi == 'true' shell: pwsh run: | & "$env:GITHUB_WORKSPACE\.github\scripts\windows\install-oneapi.ps1" -OneApiVersion "${{ matrix.platform.oneapi }}" -OneApiUrl "${{ matrix.platform.oneapi_url }}" - name: Setup Python - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python }} - name: Install PyTorch (CUDA) - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'cuda' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'cuda' run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.platform.wheel }} - name: Install PyTorch (XPU) - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'xpu' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'xpu' run: pip3 install torch==${{ matrix.platform.torch_version }} --index-url https://download.pytorch.org/whl/xpu - name: Checkout kernels - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true id: checkout-kernels uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: @@ -156,7 +161,7 @@ jobs: path: kernels - name: Cache Rust build - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 with: path: | @@ -168,13 +173,13 @@ jobs: ${{ runner.os }}-rust-debug- - name: Build kernel-builder - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true working-directory: kernels\kernel-builder shell: pwsh run: cargo build - name: Build kernel - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh env: KERNEL_SOURCE: ${{ steps.validate.outputs.kernel }} @@ -227,7 +232,7 @@ jobs: Pop-Location - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -240,7 +245,7 @@ jobs: & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "kernels-community/$env:KERNEL_SOURCE" - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.github/workflows/build-release.yaml b/.github/workflows/build-release.yaml index 621ecbe4..3eb35dd4 100644 --- a/.github/workflows/build-release.yaml +++ b/.github/workflows/build-release.yaml @@ -12,6 +12,11 @@ on: description: "Unique key for matching this run back to a bot dispatch" required: false type: string + skip_build: + description: "Skip build and upload steps (for testing workflow plumbing)" + required: false + type: boolean + default: false concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -60,7 +65,7 @@ jobs: build-kernel: needs: setup - if: needs.setup.outputs.skip == 'false' + if: needs.setup.outputs.skip == 'false' && inputs.skip_build != true strategy: fail-fast: false matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} From 5b03df48b8b960b0cc4b8e6036ed3e6bdf9fb6fb Mon Sep 17 00:00:00 2001 From: drbh Date: Fri, 8 May 2026 13:16:38 -0400 Subject: [PATCH 3/7] feat: reuse release workflows for all bot interactions --- .github/scripts/dispatch_release.py | 61 +++++++- .github/scripts/pr_comment_kernel_bot.py | 80 ++++------- .github/workflows/build-release-mac.yaml | 46 +++++- .github/workflows/build-release-windows.yaml | 47 ++++++- .github/workflows/build-release.yaml | 55 +++++++- .github/workflows/manual-build-upload.yaml | 140 ------------------- 6 files changed, 216 insertions(+), 213 deletions(-) delete mode 100644 .github/workflows/manual-build-upload.yaml diff --git a/.github/scripts/dispatch_release.py b/.github/scripts/dispatch_release.py index e7e64ac5..83278d2d 100644 --- a/.github/scripts/dispatch_release.py +++ b/.github/scripts/dispatch_release.py @@ -61,7 +61,15 @@ def github_api_request( return resp.status, resp.read().decode("utf-8") -def run_local(workflow: str, kernel_name: str, *, skip_build: bool = False) -> bool: +def run_local( + workflow: str, + kernel_name: str, + *, + skip_build: bool = False, + pr_number: str = "", + target_branch: str = "", + upload: bool = True, +) -> bool: """Run a release workflow locally via act.""" cmd = [ "act", "workflow_dispatch", @@ -71,6 +79,12 @@ def run_local(workflow: str, kernel_name: str, *, skip_build: bool = False) -> b ] if skip_build: cmd.extend(["--input", "skip_build=true"]) + if pr_number: + cmd.extend(["--input", f"pr_number={pr_number}"]) + if target_branch: + cmd.extend(["--input", f"target_branch={target_branch}"]) + if not upload: + cmd.extend(["--input", "upload=false"]) print(f"Running locally: {' '.join(cmd)}") result = subprocess.run(cmd) return result.returncode == 0 @@ -175,6 +189,9 @@ def dispatch_release( dispatch_key_prefix: str = "", local: bool = False, skip_build: bool = False, + pr_number: str = "", + target_branch: str = "", + upload: bool = True, ) -> ReleaseDispatchResult: """ Dispatch the appropriate release workflows for a kernel. @@ -185,6 +202,11 @@ def dispatch_release( repo: GitHub repository in "owner/repo" format. ref: Git ref to dispatch against (default "main"). dispatch_key_prefix: Optional prefix for dispatch keys (e.g. "pr42-"). + local: Run locally via act instead of remote dispatch. + skip_build: Skip build and upload steps. + pr_number: Optional PR number to checkout before building. + target_branch: Target branch for upload. + upload: Whether to upload after build. Returns: ReleaseDispatchResult with dispatched/failed/skipped lists. @@ -208,7 +230,13 @@ def dispatch_release( f"{dispatch_key_prefix}{kernel_name}-{workflow}-{uuid.uuid4().hex[:12]}" ) if local: - if run_local(workflow, kernel_name, skip_build=skip_build): + if run_local( + workflow, kernel_name, + skip_build=skip_build, + pr_number=pr_number, + target_branch=target_branch, + upload=upload, + ): result.dispatched.append((workflow, dispatch_key)) else: result.failed.append((workflow, 0)) @@ -220,6 +248,12 @@ def dispatch_release( } if skip_build: inputs["skip_build"] = "true" + if pr_number: + inputs["pr_number"] = pr_number + if target_branch: + inputs["target_branch"] = target_branch + if not upload: + inputs["upload"] = "false" dispatch_body = { "ref": ref, "inputs": inputs, @@ -255,8 +289,27 @@ def main() -> int: "--skip-build", action="store_true", help="Skip build and upload steps (for testing workflow plumbing)", ) + parser.add_argument( + "--pr-number", default="", + help="PR number to checkout before building", + ) + parser.add_argument( + "--target-branch", default="", + help="Target branch for upload", + ) + parser.add_argument( + "--no-upload", action="store_true", + help="Build only, do not upload", + ) args = parser.parse_args() + common = dict( + skip_build=args.skip_build, + pr_number=args.pr_number, + target_branch=args.target_branch, + upload=not args.no_upload, + ) + if args.local: result = dispatch_release( args.kernel_name, @@ -264,7 +317,7 @@ def main() -> int: repo="", ref=args.ref, local=True, - skip_build=args.skip_build, + **common, ) else: token = get_token() @@ -288,7 +341,7 @@ def main() -> int: token=token, repo=repo, ref=args.ref, - skip_build=args.skip_build, + **common, ) if result.dispatched: diff --git a/.github/scripts/pr_comment_kernel_bot.py b/.github/scripts/pr_comment_kernel_bot.py index 6b2ea11d..c9b75a11 100644 --- a/.github/scripts/pr_comment_kernel_bot.py +++ b/.github/scripts/pr_comment_kernel_bot.py @@ -24,7 +24,6 @@ } FORK_BLOCKED_COMMANDS = {"build", "build-and-upload", "release"} MAX_COMMENT_LENGTH = 1024 -DISPATCH_WORKFLOW = "manual-build-upload.yaml" RUN_LOOKUP_ATTEMPTS = 10 RUN_LOOKUP_SLEEP_SECONDS = 2 RUN_LOOKUP_PAGE_SIZE = 100 @@ -222,7 +221,7 @@ def resolve_dispatch_run_urls( return if workflows is None: - workflows = [DISPATCH_WORKFLOW] + workflows = RELEASE_WORKFLOWS for attempt in range(RUN_LOOKUP_ATTEMPTS): for workflow in workflows: @@ -295,7 +294,7 @@ def comment_base_lines( ] if pr_head_sha: lines.append(f"PR head SHA: `{pr_head_sha}`") - lines.append(f"Workflow: `{DISPATCH_WORKFLOW}`") + lines.append(f"Workflows: `{', '.join(RELEASE_WORKFLOWS)}`") return lines @@ -543,27 +542,22 @@ def main(): ) return 0 - dispatch_url = f"{api_base}/actions/workflows/{DISPATCH_WORKFLOW}/dispatches" if command == "build": target_branch = requested_branch or f"pr-{issue_number}" dispatch_pr_number = str(issue_number) - upload_flag = "false" - allow_main_dispatch = "false" + dispatch_upload = False elif command == "build-and-upload": target_branch = requested_branch or f"pr-{issue_number}" dispatch_pr_number = str(issue_number) - upload_flag = "true" - allow_main_dispatch = "false" + dispatch_upload = True elif command == "release": - target_branch = requested_branch or default_branch + target_branch = requested_branch or "" dispatch_pr_number = "" - upload_flag = "true" - allow_main_dispatch = "true" - else: + dispatch_upload = True + else: # merge-and-upload target_branch = requested_branch or "main" dispatch_pr_number = "" - upload_flag = "true" - allow_main_dispatch = "true" + dispatch_upload = True mode_text = { "build": "build only", @@ -658,47 +652,23 @@ def main(): dispatches = [] failed = [] - if command == "release": - for kernel_name in kernels: - release_result = do_dispatch_release( - kernel_name, - token=token, - repo=repository, - ref=default_branch, - dispatch_key_prefix=f"pr{issue_number}-", + for kernel_name in kernels: + release_result = do_dispatch_release( + kernel_name, + token=token, + repo=repository, + ref=default_branch, + dispatch_key_prefix=f"pr{issue_number}-", + pr_number=dispatch_pr_number, + target_branch=target_branch, + upload=dispatch_upload, + ) + for wf, dk in release_result.dispatched: + dispatches.append( + DispatchResult(kernel_name=f"{kernel_name} ({wf})", dispatch_key=dk) ) - for wf, dk in release_result.dispatched: - dispatches.append( - DispatchResult(kernel_name=f"{kernel_name} ({wf})", dispatch_key=dk) - ) - for wf, code in release_result.failed: - failed.append((f"{kernel_name} ({wf})", code)) - else: - for kernel_name in kernels: - dispatch_key = make_dispatch_key(issue_number, kernel_name) - dispatch_body = { - "ref": default_branch, - "inputs": { - "kernel_name": kernel_name, - "pr_number": dispatch_pr_number, - "target_branch": target_branch, - "upload": upload_flag, - "allow_main_dispatch": allow_main_dispatch, - "dispatch_key": dispatch_key, - }, - } - try: - print( - f"Dispatching workflow for command `{command}`, kernel `{kernel_name}`, branch `{target_branch}`" - ) - github_api_request(dispatch_url, token, method="POST", data=dispatch_body) - dispatches.append( - DispatchResult(kernel_name=kernel_name, dispatch_key=dispatch_key) - ) - except urllib.error.HTTPError as e: - err_text = e.read().decode("utf-8", errors="replace") - print(err_text, file=sys.stderr) - failed.append((kernel_name, e.code)) + for wf, code in release_result.failed: + failed.append((f"{kernel_name} ({wf})", code)) resolve_dispatch_run_urls( api_base, @@ -706,7 +676,7 @@ def main(): repository, default_branch, dispatches, - workflows=RELEASE_WORKFLOWS if command == "release" else None, + workflows=RELEASE_WORKFLOWS, ) comment_written = try_send_issue_comment( diff --git a/.github/workflows/build-release-mac.yaml b/.github/workflows/build-release-mac.yaml index 5b68d69e..1a909b28 100644 --- a/.github/workflows/build-release-mac.yaml +++ b/.github/workflows/build-release-mac.yaml @@ -17,6 +17,21 @@ on: required: false type: boolean default: false + pr_number: + description: "Optional PR number to checkout before building" + required: false + type: string + default: "" + target_branch: + description: "Target branch for upload (default: repo default)" + required: false + type: string + default: "" + upload: + description: "Whether to upload after build" + required: false + type: boolean + default: true concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -25,7 +40,24 @@ jobs: build-kernel: runs-on: macos-26 steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Validate PR number + if: inputs.pr_number != '' + run: | + case "${{ inputs.pr_number }}" in + ''|*[!0-9]*) + echo "Invalid pr_number input: must be numeric" + exit 1 + ;; + esac + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 with: extra-conf: | @@ -58,16 +90,20 @@ jobs: KERNEL="${{ steps.validate.outputs.kernel }}" ( cd "$KERNEL" && nix build -L ) - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.upload != false env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | KERNEL="${{ steps.validate.outputs.kernel }}" cd "$KERNEL" - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" + BRANCH_FLAG="" + if [ -n "${{ inputs.target_branch }}" ]; then + BRANCH_FLAG="--branch ${{ inputs.target_branch }}" + fi + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.upload != false && inputs.target_branch == '' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | diff --git a/.github/workflows/build-release-windows.yaml b/.github/workflows/build-release-windows.yaml index b794e6c6..2bfeee5a 100644 --- a/.github/workflows/build-release-windows.yaml +++ b/.github/workflows/build-release-windows.yaml @@ -17,6 +17,21 @@ on: required: false type: boolean default: false + pr_number: + description: "Optional PR number to checkout before building" + required: false + type: string + default: "" + target_branch: + description: "Target branch for upload (default: repo default)" + required: false + type: string + default: "" + upload: + description: "Whether to upload after build" + required: false + type: boolean + default: true concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} @@ -49,7 +64,23 @@ jobs: runs-on: windows-2022 steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Validate PR number + if: inputs.pr_number != '' + shell: pwsh + run: | + if ("${{ inputs.pr_number }}" -notmatch '^\d+$') { + Write-Error "Invalid pr_number input: must be numeric" + exit 1 + } + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Validate kernel directory id: validate @@ -232,20 +263,26 @@ jobs: Pop-Location - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.upload != false shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} KERNEL_SOURCE: ${{ steps.validate.outputs.kernel }} + TARGET_BRANCH: ${{ inputs.target_branch }} run: | $KB = "$env:GITHUB_WORKSPACE\kernels\kernel-builder\target\debug\kernel-builder.exe" + $branchArgs = @() + if ($env:TARGET_BRANCH -ne "") { + $branchArgs = @("--branch", $env:TARGET_BRANCH) + } + # Upload to both model and kernel repo types - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "kernels-community/$env:KERNEL_SOURCE" - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "kernels-community/$env:KERNEL_SOURCE" + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "kernels-community/$env:KERNEL_SOURCE" @branchArgs + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "kernels-community/$env:KERNEL_SOURCE" @branchArgs - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.upload != false && inputs.target_branch == '' shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} diff --git a/.github/workflows/build-release.yaml b/.github/workflows/build-release.yaml index 3eb35dd4..6a2a11c2 100644 --- a/.github/workflows/build-release.yaml +++ b/.github/workflows/build-release.yaml @@ -17,6 +17,21 @@ on: required: false type: boolean default: false + pr_number: + description: "Optional PR number to checkout before building" + required: false + type: string + default: "" + target_branch: + description: "Target branch for upload (default: repo default)" + required: false + type: string + default: "" + upload: + description: "Whether to upload after build" + required: false + type: boolean + default: true concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true @@ -29,7 +44,25 @@ jobs: kernel: ${{ steps.validate.outputs.kernel }} matrix: ${{ steps.matrix.outputs.matrix }} steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Validate PR number + if: inputs.pr_number != '' + id: validate-pr + run: | + case "${{ inputs.pr_number }}" in + ''|*[!0-9]*) + echo "Invalid pr_number input: must be numeric" + exit 1 + ;; + esac + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 with: extra-conf: | @@ -73,7 +106,15 @@ jobs: group: ${{ matrix.runner }} timeout-minutes: 1200 # 20h steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 with: extra-conf: | @@ -92,14 +133,20 @@ jobs: KERNEL="${{ needs.setup.outputs.kernel }}" ( cd "$KERNEL" && nix build -L .#backendBundle.${{ matrix.backend }} && ls -l result/ ) - name: Upload kernel to Hub + if: inputs.upload != false env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | KERNEL="${{ needs.setup.outputs.kernel }}" cd "$KERNEL" - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" + BRANCH_FLAG="" + if [ -n "${{ inputs.target_branch }}" ]; then + BRANCH_FLAG="--branch ${{ inputs.target_branch }}" + fi + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG - name: Upload v1 kernels to main + if: inputs.upload != false && inputs.target_branch == '' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | diff --git a/.github/workflows/manual-build-upload.yaml b/.github/workflows/manual-build-upload.yaml deleted file mode 100644 index 695183f4..00000000 --- a/.github/workflows/manual-build-upload.yaml +++ /dev/null @@ -1,140 +0,0 @@ -name: Manual Kernel Build -run-name: >- - Manual Kernel Build / ${{ inputs.kernel_name }} / target=${{ inputs.target_branch }} / request=${{ inputs.dispatch_key }} - -on: - workflow_dispatch: - inputs: - kernel_name: - description: "Kernel directory to build and upload (e.g. flash-attn3)" - required: true - pr_number: - description: "Optional PR number to checkout before building" - required: false - default: "" - target_branch: - description: "Target branch on kernels-community/ to publish to" - required: true - upload: - description: "Whether to upload after build (internal use by kernel-bot)" - required: false - default: "true" - allow_main_dispatch: - description: "Allow dispatch from default branch without pr_number (internal)" - required: false - default: "false" - dispatch_key: - description: "Unique request token for correlating workflow runs (internal)" - required: false - default: "manual" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build-and-upload: - runs-on: - group: aws-highmemory-32-plus-nix - steps: - - name: Ensure workflow is not run from main - if: ${{ github.ref == 'refs/heads/main' && inputs.pr_number == '' && inputs.allow_main_dispatch != 'true' }} - run: | - echo "❌ This workflow must be dispatched from a non-main branch." - exit 1 - - - name: Checkout selected branch - if: ${{ inputs.pr_number == '' }} - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - fetch-depth: 0 - - - name: Validate PR number - if: ${{ inputs.pr_number != '' }} - id: validate-pr-number - env: - PR_NUMBER: ${{ inputs.pr_number }} - run: | - set -eu - case "$PR_NUMBER" in - ''|*[!0-9]*) - echo "Invalid pr_number input: must be numeric" - exit 1 - ;; - esac - echo "pr_number=$PR_NUMBER" >> "$GITHUB_OUTPUT" - - - name: Checkout PR branch - if: ${{ inputs.pr_number != '' }} - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: refs/pull/${{ steps.validate-pr-number.outputs.pr_number }}/head - fetch-depth: 0 - - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 2 - cores = 12 - sandbox-fallback = false - - - name: Nix info - run: nix-shell -p nix-info --run "nix-info -m" - - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - - name: Validate kernel directory - id: validate - env: - KERNEL_INPUT: ${{ inputs.kernel_name }} - PR_TITLE: "${{ inputs.kernel_name }}: manual dispatch" - run: | - set -eu - case "$KERNEL_INPUT" in - ''|*[!A-Za-z0-9_-]*) - echo "Invalid kernel_name: must contain only alphanumeric characters, underscores, and hyphens" - exit 1 - ;; - esac - if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "release"); then - echo "kernel=$KERNEL" >> "$GITHUB_OUTPUT" - else - echo "Kernel validation failed." - exit 1 - fi - - - name: Build and copy kernel - run: | - set -eu - KERNEL="${{ steps.validate.outputs.kernel }}" - ( cd "$KERNEL" && nix run -L .#build-and-copy ) - - - name: Validate target branch - if: ${{ inputs.upload == 'true' }} - id: validate-target-branch - env: - TARGET_BRANCH: ${{ inputs.target_branch }} - run: | - set -eu - case "$TARGET_BRANCH" in - ''|*[!A-Za-z0-9/_-]*) - echo "Invalid target_branch: must contain only alphanumeric characters, underscores, hyphens, and slashes" - exit 1 - ;; - esac - echo "target_branch=$TARGET_BRANCH" >> "$GITHUB_OUTPUT" - - - name: Upload kernel - if: ${{ inputs.upload == 'true' }} - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - TARGET_BRANCH: ${{ steps.validate-target-branch.outputs.target_branch }} - run: | - set -eu - KERNEL="${{ steps.validate.outputs.kernel }}" - ( cd "$KERNEL" && nix run github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" --branch "${TARGET_BRANCH}" . ) - ( cd "$KERNEL" && nix run github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" --branch "${TARGET_BRANCH}" . ) From b40d9dbea947281f9fd0026ff6f8cff0cb66dba9 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 14 May 2026 12:22:52 -0400 Subject: [PATCH 4/7] feat: consolidate build actions and add ability to stage --- .../{dispatch_release.py => dispatch.py} | 124 +++++-- .github/scripts/pr_comment_kernel_bot.py | 20 +- ...{build-release-mac.yaml => build-mac.yaml} | 58 +++- .github/workflows/build-pr-dispatch.yaml | 46 +++ .github/workflows/build-pr-mac.yaml | 54 --- .github/workflows/build-pr-windows.yaml | 228 ------------- .github/workflows/build-pr.yaml | 164 --------- .github/workflows/build-release-dispatch.yaml | 2 +- .github/workflows/build-release.yaml | 164 --------- ...elease-windows.yaml => build-windows.yaml} | 105 +++--- .github/workflows/build.yaml | 322 ++++++++++++++++++ 11 files changed, 598 insertions(+), 689 deletions(-) rename .github/scripts/{dispatch_release.py => dispatch.py} (71%) rename .github/workflows/{build-release-mac.yaml => build-mac.yaml} (63%) create mode 100644 .github/workflows/build-pr-dispatch.yaml delete mode 100644 .github/workflows/build-pr-mac.yaml delete mode 100644 .github/workflows/build-pr-windows.yaml delete mode 100644 .github/workflows/build-pr.yaml delete mode 100644 .github/workflows/build-release.yaml rename .github/workflows/{build-release-windows.yaml => build-windows.yaml} (76%) create mode 100644 .github/workflows/build.yaml diff --git a/.github/scripts/dispatch_release.py b/.github/scripts/dispatch.py similarity index 71% rename from .github/scripts/dispatch_release.py rename to .github/scripts/dispatch.py index 83278d2d..eaf840e0 100644 --- a/.github/scripts/dispatch_release.py +++ b/.github/scripts/dispatch.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 """ -Dispatch release workflows for a kernel. +Dispatch build workflows for a kernel. -Three entrypoints call this script: - 1. The PR-merge dummy workflow (via CLI) - 2. The comment bot (via import) - 3. Local CLI invocation +Four entrypoints call this script: + 1. The PR-merge dispatch workflow (via CLI) + 2. The PR-open dispatch workflow (via CLI) + 3. The comment bot (via import) + 4. Local CLI invocation """ import argparse @@ -23,9 +24,9 @@ RELEASE_WORKFLOWS = [ - "build-release.yaml", - "build-release-mac.yaml", - "build-release-windows.yaml", + "build.yaml", + "build-mac.yaml", + "build-windows.yaml", ] KERNEL_NAME_RE = re.compile(r"^[A-Za-z0-9_-]+$") @@ -65,6 +66,9 @@ def run_local( workflow: str, kernel_name: str, *, + mode: str = "release", + backends: str = "", + repo_prefix: str = "kernels-community", skip_build: bool = False, pr_number: str = "", target_branch: str = "", @@ -76,6 +80,9 @@ def run_local( "--container-options", "--privileged", "-W", f".github/workflows/{workflow}", "--input", f"kernel_name={kernel_name}", + "--input", f"mode={mode}", + "--input", f"backends={backends}", + "--input", f"repo_prefix={repo_prefix}", ] if skip_build: cmd.extend(["--input", "skip_build=true"]) @@ -129,11 +136,19 @@ def get_repo() -> str | None: BACKEND_TO_WORKFLOWS = { - "cuda": {"build-release.yaml", "build-release-windows.yaml"}, - "cpu": {"build-release.yaml"}, - "rocm": {"build-release.yaml"}, - "metal": {"build-release-mac.yaml"}, - "xpu": {"build-release.yaml", "build-release-windows.yaml"}, + "cuda": {"build.yaml", "build-windows.yaml"}, + "cpu": {"build.yaml"}, + "rocm": {"build.yaml"}, + "metal": {"build-mac.yaml"}, + "xpu": {"build.yaml", "build-windows.yaml"}, +} + +# Only these kernels are known to build successfully on Windows. +# Add new entries here as Windows support is validated for a kernel. +WINDOWS_KERNELS = { + "relu", + "activation", + "flash-attn2", } @@ -152,15 +167,15 @@ def read_backends(kernel_name: str) -> list[str] | None: return None -def select_workflows(kernel_name: str) -> list[str]: +def select_workflows(kernel_name: str) -> set[str]: """ - Determine which release workflows to dispatch based on the kernel's + Determine which build workflows to dispatch based on the kernel's backends declared in build.toml. Mapping: - cuda, cpu, rocm -> build-release.yaml (Linux) - metal -> build-release-mac.yaml (macOS) - xpu -> build-release-windows.yaml (Windows) + cuda, cpu, rocm -> build.yaml (Linux) + metal -> build-mac.yaml (macOS) + cuda, xpu -> build-windows.yaml (Windows, allowlisted kernels only) Falls back to all workflows if build.toml can't be read. """ @@ -177,6 +192,11 @@ def select_workflows(kernel_name: str) -> list[str]: print(f"No known backends found for {kernel_name}: {backends}, dispatching all workflows") return set(RELEASE_WORKFLOWS) + # Only dispatch Windows builds for kernels known to build there. + if "build-windows.yaml" in workflows and kernel_name not in WINDOWS_KERNELS: + workflows.discard("build-windows.yaml") + print(f"Skipping Windows build for {kernel_name} (not in WINDOWS_KERNELS allowlist)") + return workflows @@ -186,23 +206,29 @@ def dispatch_release( token: str, repo: str, ref: str = "main", + mode: str = "release", + repo_prefix: str = "kernels-community", dispatch_key_prefix: str = "", local: bool = False, + dry_run: bool = False, skip_build: bool = False, pr_number: str = "", target_branch: str = "", upload: bool = True, ) -> ReleaseDispatchResult: """ - Dispatch the appropriate release workflows for a kernel. + Dispatch the appropriate build workflows for a kernel. Args: kernel_name: Name of the kernel directory. token: GitHub API token. repo: GitHub repository in "owner/repo" format. ref: Git ref to dispatch against (default "main"). + mode: Build mode - "pr" for CI builds, "release" for full builds. + repo_prefix: Hub org prefix for uploads (default "kernels-community"). dispatch_key_prefix: Optional prefix for dispatch keys (e.g. "pr42-"). local: Run locally via act instead of remote dispatch. + dry_run: Print what would be dispatched without actually dispatching. skip_build: Skip build and upload steps. pr_number: Optional PR number to checkout before building. target_branch: Target branch for upload. @@ -220,18 +246,54 @@ def dispatch_release( result = ReleaseDispatchResult(kernel_name=kernel_name) + backends = read_backends(kernel_name) or [] workflows = select_workflows(kernel_name) + + # Invert BACKEND_TO_WORKFLOWS so we can scope backends per workflow. + workflow_to_backends: dict[str, set[str]] = {} + for backend, wfs in BACKEND_TO_WORKFLOWS.items(): + for wf in wfs: + workflow_to_backends.setdefault(wf, set()).add(backend) + skipped_workflows = set(RELEASE_WORKFLOWS) - workflows result.skipped = sorted(skipped_workflows) api_base = f"https://api.github.com/repos/{repo}" for workflow in workflows: + # Only pass backends that this workflow can actually build. + scoped = sorted(b for b in backends if b in workflow_to_backends.get(workflow, set())) + backends_csv = ",".join(scoped) + dispatch_key = ( f"{dispatch_key_prefix}{kernel_name}-{workflow}-{uuid.uuid4().hex[:12]}" ) + if dry_run: + inputs = { + "kernel_name": kernel_name, + "dispatch_key": dispatch_key, + "mode": mode, + "backends": backends_csv, + "repo_prefix": repo_prefix, + } + if skip_build: + inputs["skip_build"] = "true" + if pr_number: + inputs["pr_number"] = pr_number + if target_branch: + inputs["target_branch"] = target_branch + if not upload: + inputs["upload"] = "false" + dispatch_body = {"ref": ref, "inputs": inputs} + print(f"\n[dry-run] {workflow}:") + print(json.dumps(dispatch_body, indent=2)) + result.dispatched.append((workflow, dispatch_key)) + continue if local: if run_local( workflow, kernel_name, + mode=mode, + backends=backends_csv, + repo_prefix=repo_prefix, skip_build=skip_build, pr_number=pr_number, target_branch=target_branch, @@ -245,6 +307,9 @@ def dispatch_release( inputs = { "kernel_name": kernel_name, "dispatch_key": dispatch_key, + "mode": mode, + "backends": backends_csv, + "repo_prefix": repo_prefix, } if skip_build: inputs["skip_build"] = "true" @@ -278,6 +343,10 @@ def main() -> int: parser.add_argument( "--ref", default="main", help="Git ref to dispatch on (default: main)" ) + parser.add_argument( + "--mode", default="release", choices=["pr", "release"], + help="Build mode: pr (CI only) or release (build + upload) (default: release)", + ) parser.add_argument( "--repo", default=None, help="GitHub repo in owner/repo format (default: auto-detect)" ) @@ -301,22 +370,33 @@ def main() -> int: "--no-upload", action="store_true", help="Build only, do not upload", ) + parser.add_argument( + "--dry-run", action="store_true", + help="Print the dispatch payloads without actually dispatching", + ) + parser.add_argument( + "--repo-prefix", default="kernels-community", + help="Hub org prefix for uploads (default: kernels-community)", + ) args = parser.parse_args() common = dict( + mode=args.mode, + repo_prefix=args.repo_prefix, + dry_run=args.dry_run, skip_build=args.skip_build, pr_number=args.pr_number, target_branch=args.target_branch, upload=not args.no_upload, ) - if args.local: + if args.dry_run or args.local: result = dispatch_release( args.kernel_name, token="", - repo="", + repo=args.repo or "", ref=args.ref, - local=True, + local=args.local, **common, ) else: diff --git a/.github/scripts/pr_comment_kernel_bot.py b/.github/scripts/pr_comment_kernel_bot.py index c9b75a11..daf32280 100644 --- a/.github/scripts/pr_comment_kernel_bot.py +++ b/.github/scripts/pr_comment_kernel_bot.py @@ -10,7 +10,7 @@ import urllib.request import uuid -from dispatch_release import RELEASE_WORKFLOWS, dispatch_release as do_dispatch_release +from dispatch import RELEASE_WORKFLOWS, dispatch_release as do_dispatch_release KERNEL_RE = re.compile(r"^[A-Za-z0-9_-]+$") @@ -18,17 +18,17 @@ COMMENT_CHARS_RE = re.compile(r"^/kernel-bot[ A-Za-z0-9_./-]*$") COMMAND_PERMISSIONS = { "build": {"admin", "write"}, - "build-and-upload": {"admin"}, + "build-and-stage": {"admin", "write"}, "merge-and-upload": {"admin"}, "release": {"admin"}, } -FORK_BLOCKED_COMMANDS = {"build", "build-and-upload", "release"} +FORK_BLOCKED_COMMANDS = {"build", "build-and-stage", "release"} MAX_COMMENT_LENGTH = 1024 RUN_LOOKUP_ATTEMPTS = 10 RUN_LOOKUP_SLEEP_SECONDS = 2 RUN_LOOKUP_PAGE_SIZE = 100 COMMAND_USAGE = ( - "Invalid command. Use `/kernel-bot " + "Invalid command. Use `/kernel-bot " " [kernel2 ...] [--branch ]`." ) @@ -375,7 +375,7 @@ def parse_command(comment: str) -> ParsedCommand: if not args: return ParsedCommand( - error="No kernels provided. Use `/kernel-bot [kernel2 ...]`.", + error="No kernels provided. Use `/kernel-bot [kernel2 ...]`.", ) kernels = [] @@ -546,22 +546,26 @@ def main(): target_branch = requested_branch or f"pr-{issue_number}" dispatch_pr_number = str(issue_number) dispatch_upload = False - elif command == "build-and-upload": + dispatch_repo_prefix = "kernels-community" + elif command == "build-and-stage": target_branch = requested_branch or f"pr-{issue_number}" dispatch_pr_number = str(issue_number) dispatch_upload = True + dispatch_repo_prefix = "kernels-staging" elif command == "release": target_branch = requested_branch or "" dispatch_pr_number = "" dispatch_upload = True + dispatch_repo_prefix = "kernels-community" else: # merge-and-upload target_branch = requested_branch or "main" dispatch_pr_number = "" dispatch_upload = True + dispatch_repo_prefix = "kernels-community" mode_text = { "build": "build only", - "build-and-upload": "build and upload", + "build-and-stage": "build and stage", "merge-and-upload": "merge, build and upload", "release": "release (linux + mac + windows)", }[command] @@ -658,6 +662,8 @@ def main(): token=token, repo=repository, ref=default_branch, + mode="release", + repo_prefix=dispatch_repo_prefix, dispatch_key_prefix=f"pr{issue_number}-", pr_number=dispatch_pr_number, target_branch=target_branch, diff --git a/.github/workflows/build-release-mac.yaml b/.github/workflows/build-mac.yaml similarity index 63% rename from .github/workflows/build-release-mac.yaml rename to .github/workflows/build-mac.yaml index 1a909b28..02d63534 100644 --- a/.github/workflows/build-release-mac.yaml +++ b/.github/workflows/build-mac.yaml @@ -1,6 +1,6 @@ -name: Build Release (macOS) +name: Build (macOS) run-name: >- - Build Release (macOS) / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} + Build (macOS) / ${{ inputs.kernel_name || '' }} / mode=${{ inputs.mode || 'release' }} / request=${{ inputs.dispatch_key || '' }} on: workflow_dispatch: inputs: @@ -12,6 +12,11 @@ on: description: "Unique key for matching this run back to a bot dispatch" required: false type: string + mode: + description: "Build mode: pr (CI only) or release (build + upload)" + required: false + type: string + default: "release" skip_build: description: "Skip build and upload steps (for testing workflow plumbing)" required: false @@ -32,14 +37,21 @@ on: required: false type: boolean default: true + repo_prefix: + description: "Hub org prefix for uploads (e.g. kernels-community, kernels-staging)" + required: false + type: string + default: "kernels-community" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + # Build the Metal kernel; PR mode builds .#ci, release builds the full bundle and uploads. build-kernel: runs-on: macos-26 steps: + # Guard against injection via pr_number input. - name: Validate PR number if: inputs.pr_number != '' run: | @@ -49,6 +61,8 @@ jobs: exit 1 ;; esac + + # When building for a PR, check out the PR head; otherwise use default branch. - name: Checkout PR branch if: inputs.pr_number != '' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -58,6 +72,8 @@ jobs: - name: Checkout default branch if: inputs.pr_number == '' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Nix toolchain + binary cache setup. - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 with: extra-conf: | @@ -71,6 +87,8 @@ jobs: name: huggingface env: USER: runner + + # Ensure the kernel directory exists and has the required config files. - name: Validate kernel directory id: validate run: | @@ -81,16 +99,36 @@ jobs: else echo "skip=true" >> $GITHUB_OUTPUT fi + + # PR-only: verify the kernel has a Hub repo-id before burning CI time. + - name: Check that repo-id is present + if: steps.validate.outputs.skip == 'false' && inputs.mode == 'pr' + run: | + KERNEL="${{ steps.validate.outputs.kernel }}" + if ! cat $KERNEL/build.toml | nix run nixpkgs#dasel -- -i toml '$root.general.hub.get("repo-id")' &> /dev/null ; then + echo "Mandatory repo-id is missing in $KERNEL/build.toml" + exit 1 + fi + + # Metal toolchain is required for macOS GPU kernel compilation. - name: Install Metal toolchain if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true run: xcodebuild -downloadComponent MetalToolchain + + # PR mode builds the CI target (.#ci); release builds the full bundle. - name: Build kernel if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true run: | KERNEL="${{ steps.validate.outputs.kernel }}" - ( cd "$KERNEL" && nix build -L ) + if [ "${{ inputs.mode }}" = "pr" ]; then + ( cd "$KERNEL" && nix build -L .#ci && ls -l result/ ) + else + ( cd "$KERNEL" && nix build -L ) + fi + + # Upload built artifacts to both model and kernel Hub repos. - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.upload != false + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.mode != 'pr' && inputs.upload != false env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -100,10 +138,12 @@ jobs: if [ -n "${{ inputs.target_branch }}" ]; then BRANCH_FLAG="--branch ${{ inputs.target_branch }}" fi - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "${{ inputs.repo_prefix }}/$KERNEL" $BRANCH_FLAG + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$KERNEL" $BRANCH_FLAG + + # v1 kernels without an explicit branch override also get uploaded to main. - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.upload != false && inputs.target_branch == '' + if: steps.validate.outputs.skip == 'false' && inputs.skip_build != true && inputs.mode != 'pr' && inputs.upload != false && inputs.target_branch == '' env: HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | @@ -114,7 +154,7 @@ jobs: VERSION=$(grep -E '^\s*version\s*=\s*1\s*$' build.toml || true) BRANCH=$(grep -E '^\s*branch\s*=' build.toml || true) if [ -n "$VERSION" ] && [ -z "$BRANCH" ]; then - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" --branch main - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" --branch main + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "${{ inputs.repo_prefix }}/$KERNEL" --branch main + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$KERNEL" --branch main fi fi diff --git a/.github/workflows/build-pr-dispatch.yaml b/.github/workflows/build-pr-dispatch.yaml new file mode 100644 index 00000000..e0f9baf4 --- /dev/null +++ b/.github/workflows/build-pr-dispatch.yaml @@ -0,0 +1,46 @@ +name: Build PR (Dispatch) +on: + pull_request: + paths-ignore: + - "**/README.md" +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +permissions: + actions: write + +jobs: + # Resolve the kernel from the PR title and fan out to per-platform build workflows. + dispatch: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Extract kernel name from PR title (format: "kernel-name: description") + # and verify the directory has the required flake.nix + build.toml files. + - name: Validate kernel directory + id: validate + env: + PR_TITLE: ${{ github.event.pull_request.title }} + run: | + if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "pr"); then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + else + echo "skip=true" >> $GITHUB_OUTPUT + fi + + # Fan out to per-platform build workflows (linux, mac, windows) based + # on the backends declared in the kernel's build.toml. + - name: Dispatch build workflows + if: steps.validate.outputs.skip == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_REPOSITORY: ${{ github.repository }} + run: | + KERNEL="${{ steps.validate.outputs.kernel }}" + python3 .github/scripts/dispatch.py "$KERNEL" \ + --mode pr \ + --pr-number "${{ github.event.pull_request.number }}" \ + --no-upload diff --git a/.github/workflows/build-pr-mac.yaml b/.github/workflows/build-pr-mac.yaml deleted file mode 100644 index efbbcb31..00000000 --- a/.github/workflows/build-pr-mac.yaml +++ /dev/null @@ -1,54 +0,0 @@ -name: Build PR (macOS) -on: - pull_request: - paths-ignore: - - "**/README.md" -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build-kernel: - runs-on: macos-26 - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 1 - cores = 3 - sandbox = relaxed - - name: Nix info - run: nix-shell -p nix-info --run "nix-info -m" - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Validate kernel directory - id: validate - env: - PR_TITLE: ${{ github.event.pull_request.title }} - run: | - if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "pr"); then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi - - name: "Check that repo-id is present" - if: steps.validate.outputs.skip == 'false' - run: | - KERNEL="${{ steps.validate.outputs.kernel }}" - if ! cat $KERNEL/build.toml | nix run nixpkgs#dasel -- -i toml '$root.general.hub.get("repo-id")' &> /dev/null ; then - echo "Mandatory repo-id is missing in $KERNEL/build.toml" - exit 1 - fi - - name: Install Metal toolchain - if: steps.validate.outputs.skip == 'false' - run: xcodebuild -downloadComponent MetalToolchain - - name: Build kernel - if: steps.validate.outputs.skip == 'false' - run: | - KERNEL="${{ steps.validate.outputs.kernel }}" - ( cd "$KERNEL" && nix build -L .#ci && ls -l result/ ) diff --git a/.github/workflows/build-pr-windows.yaml b/.github/workflows/build-pr-windows.yaml deleted file mode 100644 index 8a56d801..00000000 --- a/.github/workflows/build-pr-windows.yaml +++ /dev/null @@ -1,228 +0,0 @@ -name: Build PR (Windows) -on: - pull_request: - types: [opened, synchronize] - paths-ignore: - - "**/README.md" - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - build-kernel: - strategy: - matrix: - os: [windows-2022] - python: [3.12] - platform: [ - # CUDA platforms - # { backend: 'cuda', torch_version: '2.9.1', cuda: '12.6.3', wheel: '126' }, - { - backend: "cuda", - torch_version: "2.9.1", - cuda: "12.8.1", - wheel: "128", - }, - # { backend: 'cuda', torch_version: '2.9.1', cuda: '13.0.1', wheel: '130' }, - # Intel XPU platform - { - backend: "xpu", - torch_version: "2.10.0", - oneapi: "2025.3.1", - oneapi_url: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/076e961b-2c29-48a8-9203-c96f00e7051b/intel-oneapi-base-toolkit-2025.3.1.35_offline.exe", - }, - ] - - runs-on: windows-2022 - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - name: Validate kernel directory - id: validate - shell: pwsh - env: - PR_TITLE: ${{ github.event.pull_request.title }} - run: | - $ErrorActionPreference = "Continue" - $KERNEL = python .github/workflows/validate-kernel-pr.py "pr" 2>&1 - if ($LASTEXITCODE -eq 0) { - echo "kernel=$KERNEL" >> $env:GITHUB_OUTPUT - echo "skip=false" >> $env:GITHUB_OUTPUT - } else { - echo "skip=true" >> $env:GITHUB_OUTPUT - } - exit 0 - - - name: Check backend support - id: check-backend - if: steps.validate.outputs.skip == 'false' - shell: pwsh - run: | - $KERNEL = "${{ steps.validate.outputs.kernel }}" - $BACKEND = "${{ matrix.platform.backend }}" - $buildToml = Get-Content "${KERNEL}/build.toml" -Raw - - # Check CUDA minimum version requirement from build.toml [general.cuda] minver - if ($BACKEND -eq "cuda") { - $CUDA_VERSION = "${{ matrix.platform.cuda }}" - if ($buildToml -match 'minver\s*=\s*"([^"]+)"') { - $minver = $matches[1] - # Compare versions: strip patch from CUDA version (e.g. 12.8.1 -> 12.8) - $cudaMajorMinor = ($CUDA_VERSION -split '\.')[0..1] -join '.' - if ([version]$cudaMajorMinor -lt [version]$minver) { - Write-Output "Kernel '$KERNEL' requires CUDA >= $minver but matrix provides $CUDA_VERSION - skipping" - echo "supported=false" >> $env:GITHUB_OUTPUT - exit 0 - } - } - } - - # XPU block list for Windows - these kernels are not compatible with Windows XPU builds - $xpuBlockList = @("megablocks", "flash-attn2") - - # Check if XPU backend and kernel is in block list - if ($BACKEND -eq "xpu" -and $KERNEL -in $xpuBlockList) { - Write-Output "Kernel '$KERNEL' is not compatible with Windows XPU builds - skipping" - Write-Output "Blocked XPU kernels: $($xpuBlockList -join ', ')" - echo "supported=false" >> $env:GITHUB_OUTPUT - exit 0 - } - - # Kernels that require oneAPI setup for XPU builds - $xpuNeedsOneApi = @("relu", "rotary", "rmsnorm") - if ($BACKEND -eq "xpu" -and $KERNEL -in $xpuNeedsOneApi) { - echo "needs_oneapi=true" >> $env:GITHUB_OUTPUT - } else { - echo "needs_oneapi=false" >> $env:GITHUB_OUTPUT - } - - # Check two formats: - # 1. [kernel.*] section with: backend = "xpu" - # 2. [general] section with: backends = ["cuda", "xpu", ...] (can be multi-line) - $kernelPattern = "backend\s*=\s*[`"']${BACKEND}[`"']" - $backendsPattern = "(?s)backends\s*=\s*\[.*?[`"']${BACKEND}[`"'].*?\]" - - if (($buildToml -match $kernelPattern) -or ($buildToml -match $backendsPattern)) { - Write-Output "Kernel '$KERNEL' supports backend '$BACKEND'" - echo "supported=true" >> $env:GITHUB_OUTPUT - } else { - Write-Output "Kernel '$KERNEL' does NOT support backend '$BACKEND' - skipping build" - echo "supported=false" >> $env:GITHUB_OUTPUT - } - - - name: Kernel Info - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - shell: pwsh - run: | - $KERNEL = "${{ steps.validate.outputs.kernel }}" - Write-Output "Building Kernel: $KERNEL" - - - name: Kernel extract required builder version - id: extract-builder-version - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - shell: pwsh - run: | - $KERNEL = "${{ steps.validate.outputs.kernel }}" - $lock = Get-Content "${KERNEL}/flake.lock" | ConvertFrom-Json - $revision = $lock.nodes."kernel-builder".locked.rev - Write-Output "Building Kernel with revision: $revision" - echo "revision=$revision" >> $env:GITHUB_OUTPUT - - - uses: Jimver/cuda-toolkit@b6fc3a9f3f15256d9d94ffe1254f9c5a2565cde6 # v0.2.30 - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'cuda' - id: setup-cuda-toolkit - with: - cuda: ${{ matrix.platform.cuda }} - - - name: Setup Intel oneAPI - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'xpu' && steps.check-backend.outputs.needs_oneapi == 'true' - shell: pwsh - run: | - & "$env:GITHUB_WORKSPACE\.github\scripts\windows\install-oneapi.ps1" -OneApiVersion "${{ matrix.platform.oneapi }}" -OneApiUrl "${{ matrix.platform.oneapi_url }}" - - - name: Setup Python - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: ${{ matrix.python }} - - - name: Install PyTorch (CUDA) - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'cuda' - run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.platform.wheel }} - - - name: Install PyTorch (XPU) - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && matrix.platform.backend == 'xpu' - run: pip3 install torch==${{ matrix.platform.torch_version }} --index-url https://download.pytorch.org/whl/xpu - - - name: Checkout kernels - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - id: checkout-kernels - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - repository: huggingface/kernels - ref: "${{ steps.extract-builder-version.outputs.revision }}" - path: kernels - - - name: Cache Rust build - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 - with: - path: | - kernels/kernel-builder/target - ~/.cargo/registry - ~/.cargo/git - key: ${{ runner.os }}-rust-debug-${{ hashFiles('kernels/kernel-builder/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-rust-debug- - - - name: Build kernel-builder - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - working-directory: kernels\kernel-builder - shell: pwsh - run: cargo build - - - name: Build kernel - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' - shell: pwsh - run: | - $KERNEL = "${{ steps.validate.outputs.kernel }}" - $NEEDS_ONEAPI = "${{ steps.check-backend.outputs.needs_oneapi }}" - - # Initialize oneAPI environment for XPU builds that require it - if ("${{ matrix.platform.backend }}" -eq "xpu" -and $NEEDS_ONEAPI -eq "true") { - $setvarsPath = "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - if (Test-Path $setvarsPath) { - Write-Host "Initializing Intel oneAPI environment for XPU build..." -ForegroundColor Cyan - - # Create a temporary file to capture environment variables - $tempFile = [System.IO.Path]::GetTempFileName() - - # Run setvars.bat and capture all environment variables - cmd.exe /c "`"$setvarsPath`" && set > `"$tempFile`"" - - # Parse and set each environment variable in PowerShell - Get-Content $tempFile | ForEach-Object { - if ($_ -match "^(.*?)=(.*)$") { - $varName = $matches[1] - $varValue = $matches[2] - [System.Environment]::SetEnvironmentVariable($varName, $varValue, [System.EnvironmentVariableTarget]::Process) - } - } - - Remove-Item $tempFile -ErrorAction SilentlyContinue - - # Verify Intel compiler is now in PATH - $icxPath = (Get-Command icx-cl -ErrorAction SilentlyContinue).Path - if ($icxPath) { - Write-Host "Intel oneAPI environment initialized successfully" -ForegroundColor Green - Write-Host "Intel C++ Compiler found at: $icxPath" -ForegroundColor Green - } else { - Write-Error "Intel compiler (icx-cl) still not found in PATH after initialization" - exit 1 - } - } else { - Write-Error "setvars.bat not found at $setvarsPath" - exit 1 - } - } - & "$env:GITHUB_WORKSPACE\kernels\nix-builder\scripts\windows\builder.ps1" -Backend ${{ matrix.platform.backend }} -SourceFolder "$KERNEL" -BuildConfig Release -Build diff --git a/.github/workflows/build-pr.yaml b/.github/workflows/build-pr.yaml deleted file mode 100644 index f85ccc41..00000000 --- a/.github/workflows/build-pr.yaml +++ /dev/null @@ -1,164 +0,0 @@ -name: Build PR -on: - pull_request: - paths-ignore: - - "**/README.md" -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - skip: ${{ steps.validate.outputs.skip }} - kernel: ${{ steps.validate.outputs.kernel }} - matrix: ${{ steps.matrix.outputs.matrix }} - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 2 - cores = 4 - sandbox-fallback = false - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Validate kernel directory - id: validate - env: - PR_TITLE: ${{ github.event.pull_request.title }} - run: | - if KERNEL=$(python3 .github/workflows/validate-kernel-pr.py "pr"); then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi - - name: Check that repo-id is present - if: steps.validate.outputs.skip == 'false' - run: | - KERNEL="${{ steps.validate.outputs.kernel }}" - if ! cat $KERNEL/build.toml | nix run nixpkgs#dasel -- -i toml '$root.general.hub.get("repo-id")' &> /dev/null ; then - echo "Mandatory repo-id is missing in $KERNEL/build.toml" - exit 1 - fi - - name: Generate build matrix - if: steps.validate.outputs.skip == 'false' - id: matrix - env: - KERNEL: ${{ steps.validate.outputs.kernel }} - run: | - KERNEL="${{ steps.validate.outputs.kernel }}" - X86_BACKENDS=$(cd "$KERNEL" && nix eval .#backendCi --apply builtins.attrNames --json --system x86_64-linux) - ARM_BACKENDS=$(cd "$KERNEL" && nix eval .#backendCi --apply builtins.attrNames --json --system aarch64-linux) - MATRIX=$(python3 .github/workflows/generate-build-matrix.py "$X86_BACKENDS" "$ARM_BACKENDS") - echo "matrix=$MATRIX" >> $GITHUB_OUTPUT - - build-kernel: - needs: setup - if: needs.setup.outputs.skip == 'false' - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} - runs-on: - group: ${{ matrix.runner }} - timeout-minutes: 600 # 10h - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = ${{ matrix.max_jobs }} - cores = ${{ matrix.cores }} - sandbox-fallback = false - - name: Nix info - run: nix-shell -p nix-info --run "nix-info -m" - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Build kernel - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - ( cd "$KERNEL" && nix build -L .#backendCi.${{ matrix.backend }} && ls -l result/ ) - - build-ci-test: - needs: setup - if: needs.setup.outputs.skip == 'false' - runs-on: - group: aws-highmemory-32-plus-nix - timeout-minutes: 600 # 10h - outputs: - ci-test-path: ${{ steps.export-closure.outputs.ci-test-path }} - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 2 - cores = 12 - sandbox-fallback = false - - name: Nix info - run: nix-shell -p nix-info --run "nix-info -m" - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Build ci-test - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - ( cd "$KERNEL" && nix build -L .#ci-test ) - - name: Export ci-test closure - id: export-closure - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - CI_TEST_PATH=$(readlink -f "$KERNEL/result") - echo "ci-test-path=$CI_TEST_PATH" >> $GITHUB_OUTPUT - nix-store --export $(nix-store -qR "$CI_TEST_PATH") | nix run nixpkgs#zstd -- -T0 > ci-test-closure.nar.zst - - name: Upload ci-test closure - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 - with: - name: ci-test-closure - path: ci-test-closure.nar.zst - retention-days: 1 - - test-kernel-gpu: - needs: [setup, build-kernel, build-ci-test] - if: needs.setup.outputs.skip == 'false' - runs-on: - group: aws-g6-12xlarge-plus - steps: - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 2 - cores = 12 - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Setup Nix driver location - run: | - sudo mkdir -p /run/opengl-driver/lib - sudo find /usr/lib64 \ - -name 'libcuda.so*' \ - -exec ln -s {} /run/opengl-driver/lib/ \; - find /run/opengl-driver - - name: Download ci-test closure - uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 - with: - name: ci-test-closure - - name: Import ci-test closure - run: | - nix run nixpkgs#zstd -- -d ci-test-closure.nar.zst -c | nix-store --import - - name: Run GPU tests - run: | - CI_TEST_PATH="${{ needs.build-ci-test.outputs.ci-test-path }}" - "$CI_TEST_PATH/bin/ci-test" diff --git a/.github/workflows/build-release-dispatch.yaml b/.github/workflows/build-release-dispatch.yaml index e248dcb4..2d16b624 100644 --- a/.github/workflows/build-release-dispatch.yaml +++ b/.github/workflows/build-release-dispatch.yaml @@ -64,4 +64,4 @@ jobs: run: | KERNEL="${{ steps.validate.outputs.kernel }}" REF="${{ github.event.repository.default_branch || 'main' }}" - python3 .github/scripts/dispatch_release.py "$KERNEL" --ref "$REF" + python3 .github/scripts/dispatch.py "$KERNEL" --ref "$REF" --mode release diff --git a/.github/workflows/build-release.yaml b/.github/workflows/build-release.yaml deleted file mode 100644 index 6a2a11c2..00000000 --- a/.github/workflows/build-release.yaml +++ /dev/null @@ -1,164 +0,0 @@ -name: Build Release -run-name: >- - Build Release / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} -on: - workflow_dispatch: - inputs: - kernel_name: - description: "Kernel directory name to build" - required: true - type: string - dispatch_key: - description: "Unique key for matching this run back to a bot dispatch" - required: false - type: string - skip_build: - description: "Skip build and upload steps (for testing workflow plumbing)" - required: false - type: boolean - default: false - pr_number: - description: "Optional PR number to checkout before building" - required: false - type: string - default: "" - target_branch: - description: "Target branch for upload (default: repo default)" - required: false - type: string - default: "" - upload: - description: "Whether to upload after build" - required: false - type: boolean - default: true -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - setup: - runs-on: ubuntu-latest - outputs: - skip: ${{ steps.validate.outputs.skip }} - kernel: ${{ steps.validate.outputs.kernel }} - matrix: ${{ steps.matrix.outputs.matrix }} - steps: - - name: Validate PR number - if: inputs.pr_number != '' - id: validate-pr - run: | - case "${{ inputs.pr_number }}" in - ''|*[!0-9]*) - echo "Invalid pr_number input: must be numeric" - exit 1 - ;; - esac - - name: Checkout PR branch - if: inputs.pr_number != '' - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: refs/pull/${{ inputs.pr_number }}/head - fetch-depth: 0 - - name: Checkout default branch - if: inputs.pr_number == '' - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = 2 - cores = 4 - sandbox-fallback = false - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Validate kernel directory - id: validate - run: | - KERNEL="${{ inputs.kernel_name }}" - if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then - echo "kernel=$KERNEL" >> $GITHUB_OUTPUT - echo "skip=false" >> $GITHUB_OUTPUT - else - echo "skip=true" >> $GITHUB_OUTPUT - fi - - name: Generate build matrix - if: steps.validate.outputs.skip == 'false' - id: matrix - env: - KERNEL: ${{ steps.validate.outputs.kernel }} - run: | - KERNEL="${{ steps.validate.outputs.kernel }}" - X86_BACKENDS=$(cd "$KERNEL" && nix eval .#backendBundle --apply builtins.attrNames --json --system x86_64-linux) - ARM_BACKENDS=$(cd "$KERNEL" && nix eval .#backendBundle --apply builtins.attrNames --json --system aarch64-linux) - MATRIX=$(python3 .github/workflows/generate-build-matrix.py "$X86_BACKENDS" "$ARM_BACKENDS") - echo "matrix=$MATRIX" >> $GITHUB_OUTPUT - - build-kernel: - needs: setup - if: needs.setup.outputs.skip == 'false' && inputs.skip_build != true - strategy: - fail-fast: false - matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} - runs-on: - group: ${{ matrix.runner }} - timeout-minutes: 1200 # 20h - steps: - - name: Checkout PR branch - if: inputs.pr_number != '' - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - with: - ref: refs/pull/${{ inputs.pr_number }}/head - fetch-depth: 0 - - name: Checkout default branch - if: inputs.pr_number == '' - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 - with: - extra-conf: | - max-jobs = ${{ matrix.max_jobs }} - cores = ${{ matrix.cores }} - sandbox-fallback = false - - name: Nix info - run: nix-shell -p nix-info --run "nix-info -m" - - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 - with: - name: huggingface - env: - USER: runner - - name: Build kernel - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - ( cd "$KERNEL" && nix build -L .#backendBundle.${{ matrix.backend }} && ls -l result/ ) - - name: Upload kernel to Hub - if: inputs.upload != false - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - cd "$KERNEL" - BRANCH_FLAG="" - if [ -n "${{ inputs.target_branch }}" ]; then - BRANCH_FLAG="--branch ${{ inputs.target_branch }}" - fi - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" $BRANCH_FLAG - - name: Upload v1 kernels to main - if: inputs.upload != false && inputs.target_branch == '' - env: - HF_TOKEN: ${{ secrets.HF_TOKEN }} - run: | - KERNEL="${{ needs.setup.outputs.kernel }}" - cd "$KERNEL" - - # Check if build.toml exists, has version = 1, and does not specify a branch. - if [ -f "build.toml" ]; then - VERSION=$(grep -E '^\s*version\s*=\s*1\s*$' build.toml || true) - BRANCH=$(grep -E '^\s*branch\s*=' build.toml || true) - if [ -n "$VERSION" ] && [ -z "$BRANCH" ]; then - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "kernels-community/$KERNEL" --branch main - nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "kernels-community/$KERNEL" --branch main - fi - fi diff --git a/.github/workflows/build-release-windows.yaml b/.github/workflows/build-windows.yaml similarity index 76% rename from .github/workflows/build-release-windows.yaml rename to .github/workflows/build-windows.yaml index 2bfeee5a..31c7c459 100644 --- a/.github/workflows/build-release-windows.yaml +++ b/.github/workflows/build-windows.yaml @@ -1,6 +1,6 @@ -name: Build Release (Windows) +name: Build (Windows) run-name: >- - Build Release (Windows) / ${{ inputs.kernel_name || '' }} / request=${{ inputs.dispatch_key || '' }} + Build (Windows) / ${{ inputs.kernel_name || '' }} / mode=${{ inputs.mode || 'release' }} / request=${{ inputs.dispatch_key || '' }} on: workflow_dispatch: inputs: @@ -12,6 +12,11 @@ on: description: "Unique key for matching this run back to a bot dispatch" required: false type: string + mode: + description: "Build mode: pr (CI only) or release (build + upload)" + required: false + type: string + default: "release" skip_build: description: "Skip build and upload steps (for testing workflow plumbing)" required: false @@ -32,12 +37,23 @@ on: required: false type: boolean default: true + backends: + description: "Comma-separated list of backends from build.toml (set by dispatch script)" + required: false + type: string + default: "" + repo_prefix: + description: "Hub org prefix for uploads (e.g. kernels-community, kernels-staging)" + required: false + type: string + default: "kernels-community" concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: + # Build the kernel for each CUDA/XPU variant; release mode uploads to the Hub. build-kernel: strategy: matrix: @@ -64,6 +80,7 @@ jobs: runs-on: windows-2022 steps: + # Guard against injection via pr_number input. - name: Validate PR number if: inputs.pr_number != '' shell: pwsh @@ -72,6 +89,8 @@ jobs: Write-Error "Invalid pr_number input: must be numeric" exit 1 } + + # When building for a PR, check out the PR head; otherwise use default branch. - name: Checkout PR branch if: inputs.pr_number != '' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 @@ -82,6 +101,7 @@ jobs: if: inputs.pr_number == '' uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + # Ensure the kernel directory exists and has the required config files. - name: Validate kernel directory id: validate shell: pwsh @@ -96,6 +116,8 @@ jobs: } exit 0 + # Check if the kernel supports this matrix backend and meets CUDA + # minimum version. Backend list is passed by the dispatch script. - name: Check backend support id: check-backend if: steps.validate.outputs.skip == 'false' @@ -103,41 +125,33 @@ jobs: run: | $KERNEL = "${{ steps.validate.outputs.kernel }}" $BACKEND = "${{ matrix.platform.backend }}" - $buildToml = Get-Content "${KERNEL}/build.toml" -Raw - - # XPU block list for Windows - these kernels are not compatible with Windows XPU builds - $xpuBlockList = @("megablocks", "flash-attn2") + $backends = "${{ inputs.backends }}" -split "," - # Check if XPU backend and kernel is in block list - if ($BACKEND -eq "xpu" -and $KERNEL -in $xpuBlockList) { - Write-Output "Kernel '$KERNEL' is not compatible with Windows XPU builds - skipping" - Write-Output "Blocked XPU kernels: $($xpuBlockList -join ', ')" + if ($BACKEND -notin $backends) { + Write-Output "Kernel '$KERNEL' does not support backend '$BACKEND' - skipping" echo "supported=false" >> $env:GITHUB_OUTPUT exit 0 } - # Kernels that require oneAPI setup for XPU builds - $xpuNeedsOneApi = @("relu", "rotary", "rmsnorm") - if ($BACKEND -eq "xpu" -and $KERNEL -in $xpuNeedsOneApi) { - echo "needs_oneapi=true" >> $env:GITHUB_OUTPUT - } else { - echo "needs_oneapi=false" >> $env:GITHUB_OUTPUT + # Check CUDA minimum version requirement from build.toml [general.cuda] minver + if ($BACKEND -eq "cuda") { + $CUDA_VERSION = "${{ matrix.platform.cuda }}" + $buildToml = Get-Content "${KERNEL}/build.toml" -Raw + if ($buildToml -match 'minver\s*=\s*"([^"]+)"') { + $minver = $matches[1] + $cudaMajorMinor = ($CUDA_VERSION -split '\.')[0..1] -join '.' + if ([version]$cudaMajorMinor -lt [version]$minver) { + Write-Output "Kernel '$KERNEL' requires CUDA >= $minver but matrix provides $CUDA_VERSION - skipping" + echo "supported=false" >> $env:GITHUB_OUTPUT + exit 0 + } + } } - # Check two formats: - # 1. [kernel.*] section with: backend = "xpu" - # 2. [general] section with: backends = ["cuda", "xpu", ...] (can be multi-line) - $kernelPattern = "backend\s*=\s*[`"']${BACKEND}[`"']" - $backendsPattern = "(?s)backends\s*=\s*\[.*?[`"']${BACKEND}[`"'].*?\]" - - if (($buildToml -match $kernelPattern) -or ($buildToml -match $backendsPattern)) { - Write-Output "Kernel '$KERNEL' supports backend '$BACKEND'" - echo "supported=true" >> $env:GITHUB_OUTPUT - } else { - Write-Output "Kernel '$KERNEL' does NOT support backend '$BACKEND' - skipping build" - echo "supported=false" >> $env:GITHUB_OUTPUT - } + Write-Output "Kernel '$KERNEL' supports backend '$BACKEND'" + echo "supported=true" >> $env:GITHUB_OUTPUT + # Log the kernel being built for easier debugging in CI output. - name: Kernel Info if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh @@ -145,6 +159,8 @@ jobs: $KERNEL = "${{ steps.validate.outputs.kernel }}" Write-Output "Building Kernel: $KERNEL" + # Read the pinned kernel-builder revision from flake.lock so we build + # with the exact same tooling the kernel was developed against. - name: Kernel extract required builder version id: extract-builder-version if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true @@ -156,32 +172,36 @@ jobs: Write-Output "Building Kernel with revision: $revision" echo "revision=$revision" >> $env:GITHUB_OUTPUT + # Install the CUDA toolkit for CUDA backend builds. - uses: Jimver/cuda-toolkit@b6fc3a9f3f15256d9d94ffe1254f9c5a2565cde6 # v0.2.30 if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'cuda' id: setup-cuda-toolkit with: cuda: ${{ matrix.platform.cuda }} + # Install Intel oneAPI for XPU builds (provides the icx-cl compiler). - name: Setup Intel oneAPI - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'xpu' && steps.check-backend.outputs.needs_oneapi == 'true' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'xpu' shell: pwsh run: | & "$env:GITHUB_WORKSPACE\.github\scripts\windows\install-oneapi.ps1" -OneApiVersion "${{ matrix.platform.oneapi }}" -OneApiUrl "${{ matrix.platform.oneapi_url }}" + # Python is needed for PyTorch and the build toolchain. - name: Setup Python if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: ${{ matrix.python }} + # Install backend-specific PyTorch wheel. - name: Install PyTorch (CUDA) if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'cuda' run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.platform.wheel }} - - name: Install PyTorch (XPU) if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && matrix.platform.backend == 'xpu' run: pip3 install torch==${{ matrix.platform.torch_version }} --index-url https://download.pytorch.org/whl/xpu + # Check out the kernel-builder repo at the pinned revision for building. - name: Checkout kernels if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true id: checkout-kernels @@ -191,6 +211,7 @@ jobs: ref: "${{ steps.extract-builder-version.outputs.revision }}" path: kernels + # Cache Rust compilation artifacts to speed up kernel-builder builds. - name: Cache Rust build if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 @@ -203,22 +224,24 @@ jobs: restore-keys: | ${{ runner.os }}-rust-debug- + # Build the kernel-builder CLI tool from source. - name: Build kernel-builder if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true working-directory: kernels\kernel-builder shell: pwsh run: cargo build + # Compile the kernel using the Windows build script, then run + # cmake local_install to create the directory layout for upload. - name: Build kernel if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true shell: pwsh env: KERNEL_SOURCE: ${{ steps.validate.outputs.kernel }} - NEEDS_ONEAPI: ${{ steps.check-backend.outputs.needs_oneapi }} PLATFORM_BACKEND: ${{ matrix.platform.backend }} run: | - # Initialize oneAPI environment for XPU builds that require it - if ($env:PLATFORM_BACKEND -eq "xpu" -and $env:NEEDS_ONEAPI -eq "true") { + # Initialize oneAPI environment for XPU builds + if ($env:PLATFORM_BACKEND -eq "xpu") { $setvarsPath = "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" if (Test-Path $setvarsPath) { Write-Host "Initializing Intel oneAPI environment for XPU build..." -ForegroundColor Cyan @@ -262,8 +285,9 @@ jobs: cmake --build . --config Release --target local_install Pop-Location + # Upload built artifacts to both model and kernel Hub repos. - name: Upload kernel to Hub - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.upload != false + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.mode != 'pr' && inputs.upload != false shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -278,11 +302,12 @@ jobs: } # Upload to both model and kernel repo types - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "kernels-community/$env:KERNEL_SOURCE" @branchArgs - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "kernels-community/$env:KERNEL_SOURCE" @branchArgs + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "${{ inputs.repo_prefix }}/$env:KERNEL_SOURCE" @branchArgs + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$env:KERNEL_SOURCE" @branchArgs + # v1 kernels without an explicit branch override also get uploaded to main. - name: Upload v1 kernels to main - if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.upload != false && inputs.target_branch == '' + if: steps.validate.outputs.skip == 'false' && steps.check-backend.outputs.supported == 'true' && inputs.skip_build != true && inputs.mode != 'pr' && inputs.upload != false && inputs.target_branch == '' shell: pwsh env: HF_TOKEN: ${{ secrets.HF_TOKEN }} @@ -296,8 +321,8 @@ jobs: $content = Get-Content $buildTomlPath -Raw if ($content -match '(?m)^\s*version\s*=\s*1\s*(\r)?$' -and $content -notmatch '(?m)^\s*branch\s*=') { Write-Host "Kernel version is 1 and no branch override, uploading to main branch..." - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "kernels-community/$env:KERNEL_SOURCE" --branch main - & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "kernels-community/$env:KERNEL_SOURCE" --branch main + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type model --repo-id "${{ inputs.repo_prefix }}/$env:KERNEL_SOURCE" --branch main + & $KB upload "$env:KERNEL_SOURCE\build" --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$env:KERNEL_SOURCE" --branch main } else { Write-Host "Kernel version is not 1 or branch is overridden, skipping main branch upload" } diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml new file mode 100644 index 00000000..1de24749 --- /dev/null +++ b/.github/workflows/build.yaml @@ -0,0 +1,322 @@ +name: Build +run-name: >- + Build / ${{ inputs.kernel_name || '' }} / mode=${{ inputs.mode || 'release' }} / request=${{ inputs.dispatch_key || '' }} +on: + workflow_dispatch: + inputs: + kernel_name: + description: "Kernel directory name to build" + required: true + type: string + dispatch_key: + description: "Unique key for matching this run back to a bot dispatch" + required: false + type: string + mode: + description: "Build mode: pr (CI only) or release (build + upload)" + required: false + type: string + default: "release" + skip_build: + description: "Skip build and upload steps (for testing workflow plumbing)" + required: false + type: boolean + default: false + pr_number: + description: "Optional PR number to checkout before building" + required: false + type: string + default: "" + target_branch: + description: "Target branch for upload (default: repo default)" + required: false + type: string + default: "" + upload: + description: "Whether to upload after build" + required: false + type: boolean + default: true + repo_prefix: + description: "Hub org prefix for uploads (e.g. kernels-community, kernels-staging)" + required: false + type: string + default: "kernels-community" +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +jobs: + # Validate inputs, resolve the kernel directory, and generate the build matrix. + setup: + runs-on: ubuntu-latest + outputs: + skip: ${{ steps.validate.outputs.skip }} + kernel: ${{ steps.validate.outputs.kernel }} + matrix: ${{ steps.matrix.outputs.matrix }} + steps: + # Guard against injection via pr_number input. + - name: Validate PR number + if: inputs.pr_number != '' + id: validate-pr + run: | + case "${{ inputs.pr_number }}" in + ''|*[!0-9]*) + echo "Invalid pr_number input: must be numeric" + exit 1 + ;; + esac + + # When building for a PR, check out the PR head; otherwise use default branch. + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Nix toolchain + binary cache setup. + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + with: + extra-conf: | + max-jobs = 2 + cores = 4 + sandbox-fallback = false + - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 + with: + name: huggingface + env: + USER: runner + + # Ensure the kernel directory exists and has the required config files. + - name: Validate kernel directory + id: validate + run: | + KERNEL="${{ inputs.kernel_name }}" + if [ -d "$KERNEL" ] && [ -f "$KERNEL/flake.nix" ] && [ -f "$KERNEL/build.toml" ]; then + echo "kernel=$KERNEL" >> $GITHUB_OUTPUT + echo "skip=false" >> $GITHUB_OUTPUT + else + echo "skip=true" >> $GITHUB_OUTPUT + fi + + # PR-only: verify the kernel has a Hub repo-id before burning CI time. + - name: Check that repo-id is present + if: steps.validate.outputs.skip == 'false' && inputs.mode == 'pr' + run: | + KERNEL="${{ steps.validate.outputs.kernel }}" + if ! cat $KERNEL/build.toml | nix run nixpkgs#dasel -- -i toml '$root.general.hub.get("repo-id")' &> /dev/null ; then + echo "Mandatory repo-id is missing in $KERNEL/build.toml" + exit 1 + fi + + # PR mode uses backendCi (lighter, faster); release uses backendBundle (all variants). + - name: Generate build matrix + if: steps.validate.outputs.skip == 'false' + id: matrix + env: + KERNEL: ${{ steps.validate.outputs.kernel }} + run: | + KERNEL="${{ steps.validate.outputs.kernel }}" + if [ "${{ inputs.mode }}" = "pr" ]; then + NIX_TARGET="backendCi" + else + NIX_TARGET="backendBundle" + fi + X86_BACKENDS=$(cd "$KERNEL" && nix eval .#${NIX_TARGET} --apply builtins.attrNames --json --system x86_64-linux) + ARM_BACKENDS=$(cd "$KERNEL" && nix eval .#${NIX_TARGET} --apply builtins.attrNames --json --system aarch64-linux) + MATRIX=$(python3 .github/workflows/generate-build-matrix.py "$X86_BACKENDS" "$ARM_BACKENDS") + echo "matrix=$MATRIX" >> $GITHUB_OUTPUT + + # Compile the kernel for each (backend, arch) pair; release mode also uploads to the Hub. + build-kernel: + needs: setup + if: needs.setup.outputs.skip == 'false' && inputs.skip_build != true + strategy: + fail-fast: false + matrix: ${{ fromJSON(needs.setup.outputs.matrix) }} + runs-on: + group: ${{ matrix.runner }} + # PR builds get 10h; release builds get 20h (more variants to compile). + timeout-minutes: ${{ inputs.mode == 'pr' && 600 || 1200 }} + steps: + # When building for a PR, check out the PR head; otherwise use default branch. + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Nix toolchain + binary cache setup (uses matrix-specific concurrency limits). + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + with: + extra-conf: | + max-jobs = ${{ matrix.max_jobs }} + cores = ${{ matrix.cores }} + sandbox-fallback = false + - name: Nix info + run: nix-shell -p nix-info --run "nix-info -m" + - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 + with: + name: huggingface + env: + USER: runner + + # PR mode builds backendCi (single representative variant); + # release mode builds backendBundle (full set of variants). + - name: Build kernel + run: | + KERNEL="${{ needs.setup.outputs.kernel }}" + if [ "${{ inputs.mode }}" = "pr" ]; then + ( cd "$KERNEL" && nix build -L .#backendCi.${{ matrix.backend }} && ls -l result/ ) + else + ( cd "$KERNEL" && nix build -L .#backendBundle.${{ matrix.backend }} && ls -l result/ ) + fi + + # Upload built artifacts to both model and kernel Hub repos. + - name: Upload kernel to Hub + if: inputs.mode != 'pr' && inputs.upload != false + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + KERNEL="${{ needs.setup.outputs.kernel }}" + cd "$KERNEL" + BRANCH_FLAG="" + if [ -n "${{ inputs.target_branch }}" ]; then + BRANCH_FLAG="--branch ${{ inputs.target_branch }}" + fi + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "${{ inputs.repo_prefix }}/$KERNEL" $BRANCH_FLAG + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$KERNEL" $BRANCH_FLAG + + # v1 kernels without an explicit branch override also get uploaded to main. + - name: Upload v1 kernels to main + if: inputs.mode != 'pr' && inputs.upload != false && inputs.target_branch == '' + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + run: | + KERNEL="${{ needs.setup.outputs.kernel }}" + cd "$KERNEL" + + # Check if build.toml exists, has version = 1, and does not specify a branch. + if [ -f "build.toml" ]; then + VERSION=$(grep -E '^\s*version\s*=\s*1\s*$' build.toml || true) + BRANCH=$(grep -E '^\s*branch\s*=' build.toml || true) + if [ -n "$VERSION" ] && [ -z "$BRANCH" ]; then + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type model --repo-id "${{ inputs.repo_prefix }}/$KERNEL" --branch main + nix run -L github:huggingface/kernels#kernel-builder -- upload --repo-type kernel --repo-id "${{ inputs.repo_prefix }}/$KERNEL" --branch main + fi + fi + + # Build the ci-test derivation and export its Nix closure as an artifact + # so the GPU test job can import it on a GPU-enabled runner. + build-ci-test: + needs: setup + if: needs.setup.outputs.skip == 'false' && inputs.mode == 'pr' + runs-on: + group: aws-highmemory-32-plus-nix + timeout-minutes: 600 # 10h + outputs: + ci-test-path: ${{ steps.export-closure.outputs.ci-test-path }} + steps: + # When building for a PR, check out the PR head; otherwise use default branch. + - name: Checkout PR branch + if: inputs.pr_number != '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: refs/pull/${{ inputs.pr_number }}/head + fetch-depth: 0 + - name: Checkout default branch + if: inputs.pr_number == '' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + # Nix toolchain + binary cache setup. + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + with: + extra-conf: | + max-jobs = 2 + cores = 12 + sandbox-fallback = false + - name: Nix info + run: nix-shell -p nix-info --run "nix-info -m" + - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 + with: + name: huggingface + env: + USER: runner + + # Build the test binary that will run on the GPU runner. + - name: Build ci-test + run: | + KERNEL="${{ needs.setup.outputs.kernel }}" + ( cd "$KERNEL" && nix build -L .#ci-test ) + + # Serialize the full Nix closure so the GPU runner can import it + # without needing to rebuild or have access to the Nix store. + - name: Export ci-test closure + id: export-closure + run: | + KERNEL="${{ needs.setup.outputs.kernel }}" + CI_TEST_PATH=$(readlink -f "$KERNEL/result") + echo "ci-test-path=$CI_TEST_PATH" >> $GITHUB_OUTPUT + nix-store --export $(nix-store -qR "$CI_TEST_PATH") | nix run nixpkgs#zstd -- -T0 > ci-test-closure.nar.zst + + # Upload the closure as a short-lived artifact for the GPU test job. + - name: Upload ci-test closure + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ci-test-closure + path: ci-test-closure.nar.zst + retention-days: 1 + + # Import the ci-test closure onto a GPU runner and execute the test suite. + # Runs after both build-kernel and build-ci-test complete. + test-kernel-gpu: + needs: [setup, build-kernel, build-ci-test] + if: needs.setup.outputs.skip == 'false' && inputs.mode == 'pr' + runs-on: + group: aws-g6-12xlarge-plus + steps: + # Nix toolchain + binary cache setup (needed to import the closure). + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + with: + extra-conf: | + max-jobs = 2 + cores = 12 + - uses: cachix/cachix-action@3ba601ff5bbb07c7220846facfa2cd81eeee15a1 # v16 + with: + name: huggingface + env: + USER: runner + + # Symlink host CUDA driver into the Nix-expected location. + - name: Setup Nix driver location + run: | + sudo mkdir -p /run/opengl-driver/lib + sudo find /usr/lib64 \ + -name 'libcuda.so*' \ + -exec ln -s {} /run/opengl-driver/lib/ \; + find /run/opengl-driver + + # Retrieve the ci-test closure built in the previous job. + - name: Download ci-test closure + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: ci-test-closure + + # Decompress and import the closure into the local Nix store. + - name: Import ci-test closure + run: | + nix run nixpkgs#zstd -- -d ci-test-closure.nar.zst -c | nix-store --import + + # Run the kernel's test suite on a real GPU. + - name: Run GPU tests + run: | + CI_TEST_PATH="${{ needs.build-ci-test.outputs.ci-test-path }}" + "$CI_TEST_PATH/bin/ci-test" From b82e03ba14309f90ced07a8d33c0e175223375b0 Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 14 May 2026 12:53:35 -0400 Subject: [PATCH 5/7] fix: add temp ref to allow testing --- .github/workflows/build-pr-dispatch.yaml | 2 ++ .github/workflows/build-release-dispatch.yaml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-pr-dispatch.yaml b/.github/workflows/build-pr-dispatch.yaml index e0f9baf4..84ea4b0c 100644 --- a/.github/workflows/build-pr-dispatch.yaml +++ b/.github/workflows/build-pr-dispatch.yaml @@ -40,7 +40,9 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} run: | KERNEL="${{ steps.validate.outputs.kernel }}" + # TODO: revert --ref to main before merging (needed for bootstrap testing) python3 .github/scripts/dispatch.py "$KERNEL" \ + --ref revamp-release-pipeline \ --mode pr \ --pr-number "${{ github.event.pull_request.number }}" \ --no-upload diff --git a/.github/workflows/build-release-dispatch.yaml b/.github/workflows/build-release-dispatch.yaml index 2d16b624..daba9dc2 100644 --- a/.github/workflows/build-release-dispatch.yaml +++ b/.github/workflows/build-release-dispatch.yaml @@ -63,5 +63,5 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} run: | KERNEL="${{ steps.validate.outputs.kernel }}" - REF="${{ github.event.repository.default_branch || 'main' }}" - python3 .github/scripts/dispatch.py "$KERNEL" --ref "$REF" --mode release + # TODO: revert --ref to default_branch before merging (needed for bootstrap testing) + python3 .github/scripts/dispatch.py "$KERNEL" --ref revamp-release-pipeline --mode release From 5bcf05273ea30a15b2a6adb02f46c5ff8776a57f Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 14 May 2026 12:56:49 -0400 Subject: [PATCH 6/7] fix: remove ref for testing --- .github/workflows/build-pr-dispatch.yaml | 2 -- .github/workflows/build-release-dispatch.yaml | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-pr-dispatch.yaml b/.github/workflows/build-pr-dispatch.yaml index 84ea4b0c..e0f9baf4 100644 --- a/.github/workflows/build-pr-dispatch.yaml +++ b/.github/workflows/build-pr-dispatch.yaml @@ -40,9 +40,7 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} run: | KERNEL="${{ steps.validate.outputs.kernel }}" - # TODO: revert --ref to main before merging (needed for bootstrap testing) python3 .github/scripts/dispatch.py "$KERNEL" \ - --ref revamp-release-pipeline \ --mode pr \ --pr-number "${{ github.event.pull_request.number }}" \ --no-upload diff --git a/.github/workflows/build-release-dispatch.yaml b/.github/workflows/build-release-dispatch.yaml index daba9dc2..2d16b624 100644 --- a/.github/workflows/build-release-dispatch.yaml +++ b/.github/workflows/build-release-dispatch.yaml @@ -63,5 +63,5 @@ jobs: GITHUB_REPOSITORY: ${{ github.repository }} run: | KERNEL="${{ steps.validate.outputs.kernel }}" - # TODO: revert --ref to default_branch before merging (needed for bootstrap testing) - python3 .github/scripts/dispatch.py "$KERNEL" --ref revamp-release-pipeline --mode release + REF="${{ github.event.repository.default_branch || 'main' }}" + python3 .github/scripts/dispatch.py "$KERNEL" --ref "$REF" --mode release From 3b312b8a9f0c828cba8861bd9790b5d0fe3a504d Mon Sep 17 00:00:00 2001 From: drbh Date: Thu, 14 May 2026 13:00:07 -0400 Subject: [PATCH 7/7] fix: remove unneeded local run logic --- .github/scripts/dispatch.py | 114 +++++++++--------------------------- 1 file changed, 29 insertions(+), 85 deletions(-) diff --git a/.github/scripts/dispatch.py b/.github/scripts/dispatch.py index eaf840e0..aba2308d 100644 --- a/.github/scripts/dispatch.py +++ b/.github/scripts/dispatch.py @@ -62,40 +62,6 @@ def github_api_request( return resp.status, resp.read().decode("utf-8") -def run_local( - workflow: str, - kernel_name: str, - *, - mode: str = "release", - backends: str = "", - repo_prefix: str = "kernels-community", - skip_build: bool = False, - pr_number: str = "", - target_branch: str = "", - upload: bool = True, -) -> bool: - """Run a release workflow locally via act.""" - cmd = [ - "act", "workflow_dispatch", - "--container-options", "--privileged", - "-W", f".github/workflows/{workflow}", - "--input", f"kernel_name={kernel_name}", - "--input", f"mode={mode}", - "--input", f"backends={backends}", - "--input", f"repo_prefix={repo_prefix}", - ] - if skip_build: - cmd.extend(["--input", "skip_build=true"]) - if pr_number: - cmd.extend(["--input", f"pr_number={pr_number}"]) - if target_branch: - cmd.extend(["--input", f"target_branch={target_branch}"]) - if not upload: - cmd.extend(["--input", "upload=false"]) - print(f"Running locally: {' '.join(cmd)}") - result = subprocess.run(cmd) - return result.returncode == 0 - def get_token() -> str | None: """Resolve GitHub token: env var first, then ``gh auth token`` fallback.""" @@ -209,7 +175,6 @@ def dispatch_release( mode: str = "release", repo_prefix: str = "kernels-community", dispatch_key_prefix: str = "", - local: bool = False, dry_run: bool = False, skip_build: bool = False, pr_number: str = "", @@ -227,7 +192,6 @@ def dispatch_release( mode: Build mode - "pr" for CI builds, "release" for full builds. repo_prefix: Hub org prefix for uploads (default "kernels-community"). dispatch_key_prefix: Optional prefix for dispatch keys (e.g. "pr42-"). - local: Run locally via act instead of remote dispatch. dry_run: Print what would be dispatched without actually dispatching. skip_build: Skip build and upload steps. pr_number: Optional PR number to checkout before building. @@ -288,49 +252,34 @@ def dispatch_release( print(json.dumps(dispatch_body, indent=2)) result.dispatched.append((workflow, dispatch_key)) continue - if local: - if run_local( - workflow, kernel_name, - mode=mode, - backends=backends_csv, - repo_prefix=repo_prefix, - skip_build=skip_build, - pr_number=pr_number, - target_branch=target_branch, - upload=upload, - ): - result.dispatched.append((workflow, dispatch_key)) - else: - result.failed.append((workflow, 0)) - else: - dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" - inputs = { - "kernel_name": kernel_name, - "dispatch_key": dispatch_key, - "mode": mode, - "backends": backends_csv, - "repo_prefix": repo_prefix, - } - if skip_build: - inputs["skip_build"] = "true" - if pr_number: - inputs["pr_number"] = pr_number - if target_branch: - inputs["target_branch"] = target_branch - if not upload: - inputs["upload"] = "false" - dispatch_body = { - "ref": ref, - "inputs": inputs, - } - try: - print(f"Dispatching {workflow} for kernel `{kernel_name}` on ref `{ref}`") - github_api_request(dispatch_url, token, method="POST", data=dispatch_body) - result.dispatched.append((workflow, dispatch_key)) - except urllib.error.HTTPError as e: - err_text = e.read().decode("utf-8", errors="replace") - print(f"Failed to dispatch {workflow} (HTTP {e.code}): {err_text}", file=sys.stderr) - result.failed.append((workflow, e.code)) + dispatch_url = f"{api_base}/actions/workflows/{workflow}/dispatches" + inputs = { + "kernel_name": kernel_name, + "dispatch_key": dispatch_key, + "mode": mode, + "backends": backends_csv, + "repo_prefix": repo_prefix, + } + if skip_build: + inputs["skip_build"] = "true" + if pr_number: + inputs["pr_number"] = pr_number + if target_branch: + inputs["target_branch"] = target_branch + if not upload: + inputs["upload"] = "false" + dispatch_body = { + "ref": ref, + "inputs": inputs, + } + try: + print(f"Dispatching {workflow} for kernel `{kernel_name}` on ref `{ref}`") + github_api_request(dispatch_url, token, method="POST", data=dispatch_body) + result.dispatched.append((workflow, dispatch_key)) + except urllib.error.HTTPError as e: + err_text = e.read().decode("utf-8", errors="replace") + print(f"Failed to dispatch {workflow} (HTTP {e.code}): {err_text}", file=sys.stderr) + result.failed.append((workflow, e.code)) return result @@ -350,10 +299,6 @@ def main() -> int: parser.add_argument( "--repo", default=None, help="GitHub repo in owner/repo format (default: auto-detect)" ) - parser.add_argument( - "--local", action="store_true", - help="Run release workflows locally via act instead of dispatching remotely", - ) parser.add_argument( "--skip-build", action="store_true", help="Skip build and upload steps (for testing workflow plumbing)", @@ -390,13 +335,12 @@ def main() -> int: upload=not args.no_upload, ) - if args.dry_run or args.local: + if args.dry_run: result = dispatch_release( args.kernel_name, token="", repo=args.repo or "", ref=args.ref, - local=args.local, **common, ) else: