|
| 1 | +name: "Example: Batched Matrix Workflow" |
| 2 | + |
| 3 | +# This is an example workflow demonstrating how to use the batching feature |
| 4 | +# to work around GitHub Actions' 256 job matrix limit |
| 5 | + |
| 6 | +on: |
| 7 | + workflow_dispatch: |
| 8 | + inputs: |
| 9 | + model-prefix: |
| 10 | + description: "Model prefix to benchmark" |
| 11 | + required: true |
| 12 | + type: string |
| 13 | + seq-lens: |
| 14 | + description: "Sequence length config (e.g., 1k1k)" |
| 15 | + required: true |
| 16 | + type: string |
| 17 | + |
| 18 | +jobs: |
| 19 | + # Step 1: Determine how many batches are needed |
| 20 | + get-batch-count: |
| 21 | + runs-on: ubuntu-latest |
| 22 | + outputs: |
| 23 | + batch-count: ${{ steps.count.outputs.batch-count }} |
| 24 | + steps: |
| 25 | + - name: Checkout code |
| 26 | + uses: actions/checkout@v4 |
| 27 | + |
| 28 | + - id: count |
| 29 | + run: | |
| 30 | + pip install pydantic |
| 31 | + BATCH_COUNT=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \ |
| 32 | + full-sweep \ |
| 33 | + --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \ |
| 34 | + ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \ |
| 35 | + --seq-lens ${{ inputs.seq-lens }} \ |
| 36 | + --model-prefix ${{ inputs.model-prefix }} \ |
| 37 | + --get-batch-count) |
| 38 | + echo "batch-count=$BATCH_COUNT" >> $GITHUB_OUTPUT |
| 39 | + echo "Total batches needed: $BATCH_COUNT" |
| 40 | +
|
| 41 | + # Step 2: Generate config for each batch |
| 42 | + # This job runs once per batch (up to the batch-count) |
| 43 | + get-batch-configs: |
| 44 | + needs: get-batch-count |
| 45 | + runs-on: ubuntu-latest |
| 46 | + # Create a matrix with one entry per batch |
| 47 | + strategy: |
| 48 | + matrix: |
| 49 | + # Generate array [0, 1, 2, ..., batch-count-1] |
| 50 | + batch-index: ${{ fromJson(format('[{0}]', join(range(0, fromJson(needs.get-batch-count.outputs.batch-count)), ','))) }} |
| 51 | + outputs: |
| 52 | + # Each batch gets its own output |
| 53 | + configs-${{ matrix.batch-index }}: ${{ steps.get-configs.outputs.configs }} |
| 54 | + steps: |
| 55 | + - name: Checkout code |
| 56 | + uses: actions/checkout@v4 |
| 57 | + |
| 58 | + - id: get-configs |
| 59 | + run: | |
| 60 | + pip install pydantic |
| 61 | + CONFIG_JSON=$(python3 ${GITHUB_WORKSPACE}/utils/matrix-logic/generate_sweep_configs.py \ |
| 62 | + full-sweep \ |
| 63 | + --config-files ${GITHUB_WORKSPACE}/.github/configs/nvidia-master.yaml \ |
| 64 | + ${GITHUB_WORKSPACE}/.github/configs/amd-master.yaml \ |
| 65 | + --seq-lens ${{ inputs.seq-lens }} \ |
| 66 | + --model-prefix ${{ inputs.model-prefix }} \ |
| 67 | + --batch-index ${{ matrix.batch-index }}) |
| 68 | + echo "configs=$CONFIG_JSON" >> $GITHUB_OUTPUT |
| 69 | + echo "Generated batch ${{ matrix.batch-index }}" |
| 70 | +
|
| 71 | + # Step 3: Run benchmarks for batch 0 |
| 72 | + # You would create similar jobs for batch-1, batch-2, etc. if needed |
| 73 | + benchmark-batch-0: |
| 74 | + needs: get-batch-configs |
| 75 | + # Only run if batch 0 exists |
| 76 | + if: ${{ fromJson(needs.get-batch-count.outputs.batch-count) > 0 }} |
| 77 | + uses: ./.github/workflows/benchmark-tmpl.yml |
| 78 | + name: ${{ inputs.model-prefix }} ${{ inputs.seq-lens }} batch-0 / |
| 79 | + strategy: |
| 80 | + fail-fast: false |
| 81 | + matrix: |
| 82 | + config: ${{ fromJson(needs.get-batch-configs.outputs.configs-0) }} |
| 83 | + secrets: inherit |
| 84 | + with: |
| 85 | + exp-name: "${{ inputs.model-prefix }}_${{ inputs.seq-lens }}_batch0" |
| 86 | + isl: 1024 |
| 87 | + osl: 1024 |
| 88 | + max-model-len: 2048 |
| 89 | + runner: ${{ matrix.config.runner }} |
| 90 | + image: ${{ matrix.config.image }} |
| 91 | + model: ${{ matrix.config.model }} |
| 92 | + framework: ${{ matrix.config.framework }} |
| 93 | + precision: ${{ matrix.config.precision }} |
| 94 | + tp: ${{ matrix.config.tp }} |
| 95 | + ep: ${{ matrix.config.ep }} |
| 96 | + dp-attn: ${{ matrix.config.dp-attn }} |
| 97 | + conc: ${{ matrix.config.conc }} |
| 98 | + |
| 99 | + # Step 4 (optional): Collect results from all batches |
| 100 | + collect-results: |
| 101 | + needs: [get-batch-count, benchmark-batch-0] |
| 102 | + if: ${{ always() }} |
| 103 | + runs-on: ubuntu-latest |
| 104 | + steps: |
| 105 | + - name: Summary |
| 106 | + run: | |
| 107 | + echo "Processed ${{ needs.get-batch-count.outputs.batch-count }} batch(es)" |
| 108 | + echo "Benchmark complete" |
| 109 | +
|
| 110 | +# Note: For production use with multiple batches, you would either: |
| 111 | +# 1. Create multiple benchmark-batch-N jobs (one per possible batch) |
| 112 | +# 2. Use a dynamic workflow generation approach |
| 113 | +# 3. Use GitHub's reusable workflows with a loop construct (when available) |
| 114 | +# |
| 115 | +# The current InferenceMAX workflows split by model-prefix instead, |
| 116 | +# which naturally keeps each job under the 256 limit. |
0 commit comments