Skip to content

Commit f0bb1cd

Browse files
committed
Add demo specs for working with compute domains
Signed-off-by: Kevin Klues <[email protected]>
1 parent c11dc37 commit f0bb1cd

File tree

5 files changed

+455
-0
lines changed

5 files changed

+455
-0
lines changed

demo/specs/imex/README.sh

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
###########################
2+
#### Setup and Overview ###
3+
###########################
4+
5+
# Look at the set of nodes on the cluster
6+
kubectl get node
7+
8+
# Look at all pods running on the cluster
9+
kubectl get pod -A
10+
11+
# Look at the set of nodes on the cluster and its <clusterUID.cliqueID> labels
12+
(echo -e "NODE\tLABEL\tCLIQUE"; kubectl get nodes -o json | \
13+
jq -r '.items[] | [.metadata.name, "nvidia.com/gpu.clique", .metadata.labels["nvidia.com/gpu.clique"]] | @tsv') | \
14+
column -t
15+
16+
# Install the DRA driver for ComputeDomains
17+
helm upgrade -i \
18+
--create-namespace \
19+
--namespace nvidia \
20+
nvidia-dra-driver-gpu \
21+
../../../deployments/helm/nvidia-dra-driver-gpu \
22+
--set nvidiaDriverRoot="/" \
23+
--set resources.gpus.enabled=false \
24+
--wait
25+
26+
# Show the DRA driver components running
27+
kubectl get pod -n nvidia
28+
29+
# Show two MPI jobs, one traditional, and one referencing a ComputeDomain
30+
vim -O mpi-memcpy-test-job.yaml mpi-memcpy-imex-test-job.yaml
31+
32+
# Show the diff between the two MPI jobs
33+
diff -ruN mpi-memcpy-test-job.yaml mpi-memcpy-imex-test-job.yaml
34+
35+
36+
#############################ä#########################
37+
#### Run an MPI job together *with* a ComputeDomain ###
38+
#############################ä#########################
39+
40+
# Create the ComputeDomain and Run the MPI job
41+
kubectl apply -f mpi-memcpy-imex-test-job.yaml
42+
43+
# Look at the pods for the MPI job *within* a ComputeDomain
44+
kubectl get pods
45+
46+
# Look at the pods for the IMEX daemons running on behalf of the MPI job *within* a ComputeDomain
47+
kubectl get pods -n nvidia
48+
49+
# Look at the status of the newly created ComputeDomain
50+
kubectl get -o yaml computedomains.resource.nvidia.com
51+
52+
# Look at the logs of the MPI job *within* a ComputeDomain
53+
kubectl logs --tail=-1 -l job-name=mpi-memcpy-imex-test-launcher
54+
55+
# Verify workers and IMEX daemons already gone
56+
kubectl get pod -A
57+
58+
# Delete the MPI job and its ComputeDomain
59+
kubectl delete -f mpi-memcpy-imex-test-job.yaml
60+
61+
62+
#################################################
63+
#### Run an MPI job *without* a ComputeDomain ###
64+
#################################################
65+
66+
# Run the MPI job
67+
kubectl apply -f mpi-memcpy-test-job.yaml
68+
69+
# Verify that no ComputeDomains exist
70+
kubectl get -o yaml computedomains.resource.nvidia.com
71+
72+
# Look at the pods for the MPI job *without* a ComputeDomain
73+
kubectl get pods
74+
75+
# Verify that no extra pods have been started in the nvidia namespace
76+
kubectl get pods -n nvidia
77+
78+
# Look at the logs of the MPI job *without* a ComputeDomain
79+
kubectl logs --tail=-1 -l job-name=mpi-memcpy-test-launcher
80+
81+
# Delete the MPI job *without* a ComputeDomain
82+
kubectl delete -f mpi-memcpy-test-job.yaml
83+
84+
# Show everything cleaned up
85+
kubectl get pod -A
86+
87+
88+
####################################################
89+
#### Run 2 MPI jobs with *unique* ComputeDomains ###
90+
####################################################
91+
92+
# Show the diff of the original MPI job to one of these smaller MPI jobs
93+
diff -ruN mpi-memcpy-imex-test-job.yaml mpi-memcpy-imex-test-job-1.yaml
94+
95+
# Run the 2 smaller MPI jobs
96+
kubectl apply -f mpi-memcpy-imex-test-job-1.yaml
97+
kubectl apply -f mpi-memcpy-imex-test-job-2.yaml
98+
99+
# Look at the pods for the 2 MPI jobs with *unique* ComputeDomains
100+
kubectl get pods
101+
102+
# Look at the pods for the IMEX daemons running on behalf of the 2 MPI jobs with *unique* ComputeDomains
103+
kubectl get pods -n nvidia
104+
105+
# Look at the status of the newly created ComputeDomain
106+
kubectl get -o yaml computedomains.resource.nvidia.com
107+
108+
# Look at the logs of the first MPI job with a *unique* ComputeDomain
109+
kubectl logs --tail=-1 -l job-name=mpi-memcpy-imex-test-1-launcher
110+
111+
# Look at the logs of the first MPI job with a *unique* ComputeDomain
112+
kubectl logs --tail=-1 -l job-name=mpi-memcpy-imex-test-2-launcher
113+
114+
# Delete the 2 MPI jobs and their ComputeDomains
115+
kubectl delete -f mpi-memcpy-imex-test-job-1.yaml
116+
kubectl delete -f mpi-memcpy-imex-test-job-2.yaml
117+
118+
# Show everything cleaned up
119+
kubectl get pod -A
120+
121+
#########################################
122+
#### Queue up all 3 MPI jobs at once ####
123+
#########################################
124+
125+
# Run all 3 MPI jobs
126+
kubectl apply -f mpi-memcpy-imex-test-job.yaml
127+
kubectl apply -f mpi-memcpy-imex-test-job-1.yaml
128+
kubectl apply -f mpi-memcpy-imex-test-job-2.yaml
129+
130+
# Watch them all run to completion
131+
kubectl get pod -A
132+
133+
# Run all 3 MPI jobs
134+
kubectl delete -f mpi-memcpy-imex-test-job.yaml
135+
kubectl delete -f mpi-memcpy-imex-test-job-1.yaml
136+
kubectl delete -f mpi-memcpy-imex-test-job-2.yaml
137+
138+
# Verify everything is gone
139+
kubectl get pod -A
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
---
2+
apiVersion: resource.nvidia.com/v1beta1
3+
kind: ComputeDomain
4+
metadata:
5+
name: mpi-memcpy-imex-test-compute-domain-1
6+
spec:
7+
numNodes: 2
8+
channel:
9+
resourceClaimTemplate:
10+
name: mpi-memcpy-imex-test-compute-domain-1-resource-claim
11+
12+
---
13+
apiVersion: kubeflow.org/v2beta1
14+
kind: MPIJob
15+
metadata:
16+
name: mpi-memcpy-imex-test-1
17+
spec:
18+
slotsPerWorker: 2
19+
launcherCreationPolicy: WaitForWorkersReady
20+
runPolicy:
21+
cleanPodPolicy: Running
22+
sshAuthMountPath: /home/mpiuser/.ssh
23+
mpiReplicaSpecs:
24+
Launcher:
25+
replicas: 1
26+
template:
27+
metadata:
28+
labels:
29+
mpi-memcpy-dra-test-replica: mpi-launcher
30+
spec:
31+
containers:
32+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
33+
name: mpi-launcher
34+
securityContext:
35+
runAsUser: 1000
36+
command:
37+
- mpirun
38+
args:
39+
- --bind-to
40+
- core
41+
- --map-by
42+
- ppr:2:node
43+
- -np
44+
- "4"
45+
- --report-bindings
46+
- -q
47+
- nvbandwidth
48+
- -t
49+
- multinode_device_to_device_memcpy_read_ce
50+
affinity:
51+
nodeAffinity:
52+
requiredDuringSchedulingIgnoredDuringExecution:
53+
nodeSelectorTerms:
54+
- matchExpressions:
55+
- key: node-role.kubernetes.io/control-plane
56+
operator: Exists
57+
Worker:
58+
replicas: 2
59+
template:
60+
metadata:
61+
labels:
62+
mpi-memcpy-dra-test-replica: mpi-worker
63+
spec:
64+
containers:
65+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
66+
name: mpi-worker
67+
securityContext:
68+
runAsUser: 1000
69+
env:
70+
command:
71+
- /usr/sbin/sshd
72+
args:
73+
- -De
74+
- -f
75+
- /home/mpiuser/.sshd_config
76+
resources:
77+
limits:
78+
nvidia.com/gpu: 2
79+
claims:
80+
- name: compute-domain
81+
resourceClaims:
82+
- name: compute-domain
83+
resourceClaimTemplateName: mpi-memcpy-imex-test-compute-domain-1-resource-claim
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
---
2+
apiVersion: resource.nvidia.com/v1beta1
3+
kind: ComputeDomain
4+
metadata:
5+
name: mpi-memcpy-imex-test-compute-domain-2
6+
spec:
7+
numNodes: 2
8+
channel:
9+
resourceClaimTemplate:
10+
name: mpi-memcpy-imex-test-compute-domain-2-resource-claim
11+
12+
---
13+
apiVersion: kubeflow.org/v2beta1
14+
kind: MPIJob
15+
metadata:
16+
name: mpi-memcpy-imex-test-2
17+
spec:
18+
slotsPerWorker: 2
19+
launcherCreationPolicy: WaitForWorkersReady
20+
runPolicy:
21+
cleanPodPolicy: Running
22+
sshAuthMountPath: /home/mpiuser/.ssh
23+
mpiReplicaSpecs:
24+
Launcher:
25+
replicas: 1
26+
template:
27+
metadata:
28+
labels:
29+
mpi-memcpy-dra-test-replica: mpi-launcher
30+
spec:
31+
containers:
32+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
33+
name: mpi-launcher
34+
securityContext:
35+
runAsUser: 1000
36+
command:
37+
- mpirun
38+
args:
39+
- --bind-to
40+
- core
41+
- --map-by
42+
- ppr:2:node
43+
- -np
44+
- "4"
45+
- --report-bindings
46+
- -q
47+
- nvbandwidth
48+
- -t
49+
- multinode_device_to_device_memcpy_read_ce
50+
affinity:
51+
nodeAffinity:
52+
requiredDuringSchedulingIgnoredDuringExecution:
53+
nodeSelectorTerms:
54+
- matchExpressions:
55+
- key: node-role.kubernetes.io/control-plane
56+
operator: Exists
57+
Worker:
58+
replicas: 2
59+
template:
60+
metadata:
61+
labels:
62+
mpi-memcpy-dra-test-replica: mpi-worker
63+
spec:
64+
containers:
65+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
66+
name: mpi-worker
67+
securityContext:
68+
runAsUser: 1000
69+
env:
70+
command:
71+
- /usr/sbin/sshd
72+
args:
73+
- -De
74+
- -f
75+
- /home/mpiuser/.sshd_config
76+
resources:
77+
limits:
78+
nvidia.com/gpu: 2
79+
claims:
80+
- name: compute-domain
81+
resourceClaims:
82+
- name: compute-domain
83+
resourceClaimTemplateName: mpi-memcpy-imex-test-compute-domain-2-resource-claim
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
---
2+
apiVersion: resource.nvidia.com/v1beta1
3+
kind: ComputeDomain
4+
metadata:
5+
name: mpi-memcpy-imex-test-compute-domain
6+
spec:
7+
numNodes: 4
8+
channel:
9+
resourceClaimTemplate:
10+
name: mpi-memcpy-imex-test-compute-domain-resource-claim
11+
12+
---
13+
apiVersion: kubeflow.org/v2beta1
14+
kind: MPIJob
15+
metadata:
16+
name: mpi-memcpy-imex-test
17+
spec:
18+
slotsPerWorker: 2
19+
launcherCreationPolicy: WaitForWorkersReady
20+
runPolicy:
21+
cleanPodPolicy: Running
22+
sshAuthMountPath: /home/mpiuser/.ssh
23+
mpiReplicaSpecs:
24+
Launcher:
25+
replicas: 1
26+
template:
27+
metadata:
28+
labels:
29+
mpi-memcpy-dra-test-replica: mpi-launcher
30+
spec:
31+
containers:
32+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
33+
name: mpi-launcher
34+
securityContext:
35+
runAsUser: 1000
36+
command:
37+
- mpirun
38+
args:
39+
- --bind-to
40+
- core
41+
- --map-by
42+
- ppr:2:node
43+
- -np
44+
- "8"
45+
- --report-bindings
46+
- -q
47+
- nvbandwidth
48+
- -t
49+
- multinode_device_to_device_memcpy_read_ce
50+
affinity:
51+
nodeAffinity:
52+
requiredDuringSchedulingIgnoredDuringExecution:
53+
nodeSelectorTerms:
54+
- matchExpressions:
55+
- key: node-role.kubernetes.io/control-plane
56+
operator: Exists
57+
Worker:
58+
replicas: 4
59+
template:
60+
metadata:
61+
labels:
62+
mpi-memcpy-dra-test-replica: mpi-worker
63+
spec:
64+
containers:
65+
- image: nvcr.io/nvidia/nvbandwidth:v0.7
66+
name: mpi-worker
67+
securityContext:
68+
runAsUser: 1000
69+
env:
70+
command:
71+
- /usr/sbin/sshd
72+
args:
73+
- -De
74+
- -f
75+
- /home/mpiuser/.sshd_config
76+
resources:
77+
limits:
78+
nvidia.com/gpu: 2
79+
claims:
80+
- name: compute-domain
81+
resourceClaims:
82+
- name: compute-domain
83+
resourceClaimTemplateName: mpi-memcpy-imex-test-compute-domain-resource-claim

0 commit comments

Comments
 (0)