@@ -20,59 +20,58 @@ runs:
2020 TIMEOUT_JOB_CREATION=60s
2121 TIMEOUT_JOB_WAIT=14400s
2222 TIMEOUT_JOB_START=600s
23+ INPUT_JOB_NAME=${{ inputs.job-name }}
24+ INPUT_JOB_CONFIG_FILE=${{ inputs.job-config-file }}
2325
2426 echo "Submit K8s job"
25- kubectl apply -f "${{ inputs.job-config-file } }"
26- kubectl get event | grep ${{ inputs.job-name } }
27+ kubectl apply -f "${INPUT_JOB_CONFIG_FILE }"
28+ kubectl get event | grep ${INPUT_JOB_NAME }
2729 # Wait for job to be created
28- kubectl wait --for=create job/${{ inputs.job-name }} --timeout=$TIMEOUT_JOB_CREATION
29-
30+ kubectl wait --for=create job/${INPUT_JOB_NAME} --timeout=$TIMEOUT_JOB_CREATION
3031 # Wait for job to be unsuspended
31- kubectl wait --for=jsonpath='{.spec.suspend}=false' job/${{ inputs.job-name }} --timeout=$TIMEOUT_JOB_WAIT
32-
32+ kubectl wait --for=jsonpath='{.spec.suspend}=false' job/${INPUT_JOB_NAME} --timeout=$TIMEOUT_JOB_WAIT
3333 # Wait for pods to be running
3434 kubectl wait --for=condition=Ready \
35- --selector=batch.kubernetes.io/job-name=${{ inputs.job-name } } \
35+ --selector=batch.kubernetes.io/job-name=${INPUT_JOB_NAME } \
3636 --timeout=$TIMEOUT_JOB_START pod
3737
3838 # Stream logs
39- kubectl logs --all-containers=true --all-pods=true --follow job/${{ inputs.job-name } }
39+ kubectl logs --all-containers=true --all-pods=true --follow job/${INPUT_JOB_NAME }
4040
4141 # Detect job parallelism
42- parallelism=$(kubectl get job/"${{ inputs.job-name }}" -o jsonpath='{.spec.parallelism}')
42+ parallelism=$(kubectl get job/${INPUT_JOB_NAME} -o jsonpath='{.spec.parallelism}')
4343 # if parallelism is not set, use default value of 1
44- echo "Parallelism ${parallelism}"
4544 if [ -z "${parallelism}" ]; then
4645 echo "No parallelism specified, defaulting to 1"
4746 parallelism=1
4847 fi
4948
50- while IFS=: read -r failures successes; do
51- failures="${failures:-0}"
52- successes="${successes:-0}"
49+ while true; do
50+ job_status_counts=$(kubectl get job/${INPUT_JOB_NAME} -o 'jsonpath={.status.failed}:{.status.succeeded}')
51+
52+ IFS=:
53+ set -- $job_status_counts
54+ failures=${1:-0}
55+ successes=${2:-0}
56+
5357 total=$((failures + successes))
5458
59+ echo "status: failures=${failures}, successes=${successes}, total=${total}, parallelism=${parallelism}"
60+
5561 if [ $total -lt $parallelism ]; then
5662 # neither "failed" nor "succeeded", so wait
57- sleep 1
58- elif [ $total -eq $parallelism ]; then
59- # we have total=parallelism => either X successes or X failures
60- # In any case, the job is done
61- break
62- else
63- # Log here
64- echo "Unexpected number of completed pods ${total} with parallelism ${parallelism}"
65- exit 255
63+ sleep 2
64+ continue
6665 fi
67- done <<EOF
68- $(kubectl get job/"${{ inputs.job-name }}" -o 'jsonpath={.status.failed}:{.status.succeeded}')
69- EOF
66+ break
67+ done
68+
7069
7170 # If job indicates a failure try to print out the info
7271 if [ "${failures:-0}" -gt 0 ]; then
73- echo "Job ${{ inputs.job-name } } has $failures failures"
72+ echo "Job ${INPUT_JOB_NAME } has $failures failures"
7473 # this is for batch jobs only
75- pods=$(kubectl get pods --selector=batch.kubernetes.io/job-name=${{ inputs.job-name } } -o name)
74+ pods=$(kubectl get pods --selector=batch.kubernetes.io/job-name=${INPUT_JOB_NAME } -o name)
7675 if [ -n "${pods}" ]; then
7776 kubectl describe ${pods}
7877 fi
0 commit comments