|
13 | 13 | using: "composite" |
14 | 14 | steps: |
15 | 15 | - name: Submit and Delete Kubernetes job |
16 | | - uses: ./.github/actions/with-post-step |
17 | | - with: |
| 16 | + uses: ./.github/actions/with-post-step |
| 17 | + with: |
18 | 18 | main: | |
19 | 19 | set -x |
20 | 20 | TIMEOUT_JOB_CREATION=60s |
|
35 | 35 | kubectl wait --for=condition=Ready \ |
36 | 36 | --selector=batch.kubernetes.io/job-name=${{ inputs.job-name }} \ |
37 | 37 | --timeout=$TIMEOUT_JOB_START pod |
38 | | - |
| 38 | +
|
39 | 39 | # Stream logs |
40 | 40 | kubectl logs --all-containers=true --all-pods=true --follow job/${{ inputs.job-name }} |
41 | | - |
42 | | - post: | |
| 41 | +
|
| 42 | + # Detect job parallelism |
| 43 | + parallelism=$(kubectl get job/"${{ inputs.job-name }}" -o jsonpath='{.spec.parallelism}') |
| 44 | + # if parallelism is not set, use default value of 1 |
| 45 | + echo "Parallelism ${parallelism}" |
| 46 | + if [ -z "${parallelism}" ]; then |
| 47 | + echo "No parallelism specified, defaulting to 1" |
| 48 | + parallelism=1 |
| 49 | + fi |
| 50 | +
|
| 51 | + while IFS=: read -r failures successes; do |
| 52 | + failures="${failures:-0}" |
| 53 | + successes="${successes:-0}" |
| 54 | + total=$((failures + successes)) |
| 55 | +
|
| 56 | + if [ $total -lt $parallelism ]; then |
| 57 | + # neither "failed" nor "succeeded", so wait |
| 58 | + sleep 1 |
| 59 | + elif [ $total -eq $parallelism ]; then |
| 60 | + # we have total=parallelism => either X successes or X failures |
| 61 | + # In any case, the job is done |
| 62 | + break |
| 63 | + else |
| 64 | + # Log here |
| 65 | + echo "Unexpected number of completed pods ${total} with parallelism ${parallelism}" |
| 66 | + exit 255 |
| 67 | + fi |
| 68 | + done <<EOF |
| 69 | + $(kubectl get job/"${{ inputs.job-name }}" -o 'jsonpath={.status.failed}:{.status.succeeded}') |
| 70 | + EOF |
| 71 | +
|
| 72 | + # If job indicates a failure try to print out the info |
| 73 | + if [ "${failures:-0}" -gt 0 ]; then |
| 74 | + echo "Job ${{ inputs.job-name }} has $failures failures" |
| 75 | + # this is for batch jobs only |
| 76 | + pods=$(kubectl get pods --selector=batch.kubernetes.io/job-name=${{ inputs.job-name }} -o name) |
| 77 | + if [ -n "${pods}" ]; then |
| 78 | + kubectl describe ${pods} |
| 79 | + fi |
| 80 | + exit 1 |
| 81 | + fi |
| 82 | +
|
| 83 | + post: | |
| 84 | + echo "Deleting K8s job: ${{ inputs.job-name }}" |
43 | 85 | kubectl delete -f "${{ inputs.job-config-file }}" |
0 commit comments