Skip to content

Commit ef8930f

Browse files
authored
Merge pull request #6947 from owenowenisme/lfx/add-interpret-for-ray-job
Add custom interpreter for RayJob
2 parents ebd956d + fc86224 commit ef8930f

File tree

6 files changed

+684
-0
lines changed

6 files changed

+684
-0
lines changed
Lines changed: 388 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,388 @@
1+
apiVersion: config.karmada.io/v1alpha1
2+
kind: ResourceInterpreterCustomization
3+
metadata:
4+
name: declarative-configuration-rayjob
5+
spec:
6+
target:
7+
apiVersion: ray.io/v1
8+
kind: RayJob
9+
customizations:
10+
componentResource:
11+
luaScript: |
12+
local kube = require("kube")
13+
14+
local function isempty(s)
15+
return s == nil or s == ''
16+
end
17+
18+
function GetComponents(desiredObj)
19+
local components = {}
20+
21+
-- RayJob contains a RayCluster spec
22+
if desiredObj.spec == nil or desiredObj.spec.rayClusterSpec == nil then
23+
return components
24+
end
25+
26+
local clusterSpec = desiredObj.spec.rayClusterSpec
27+
28+
-- Head component (always 1 replica)
29+
if clusterSpec.headGroupSpec ~= nil and clusterSpec.headGroupSpec.template ~= nil then
30+
local headRequires = kube.accuratePodRequirements(clusterSpec.headGroupSpec.template)
31+
local headComponent = {
32+
name = "ray-head",
33+
replicas = 1,
34+
replicaRequirements = headRequires
35+
}
36+
table.insert(components, headComponent)
37+
end
38+
39+
-- Worker group components
40+
if clusterSpec.workerGroupSpecs ~= nil then
41+
for i, workerGroup in ipairs(clusterSpec.workerGroupSpecs) do
42+
local name = workerGroup.groupName
43+
if isempty(name) then
44+
name = "worker-" .. tostring(i)
45+
end
46+
local replicas = workerGroup.replicas or 0
47+
local requires = nil
48+
if workerGroup.template ~= nil then
49+
requires = kube.accuratePodRequirements(workerGroup.template)
50+
end
51+
local wgComponent = {
52+
name = name,
53+
replicas = replicas,
54+
replicaRequirements = requires
55+
}
56+
table.insert(components, wgComponent)
57+
end
58+
end
59+
60+
return components
61+
end
62+
healthInterpretation:
63+
luaScript: >
64+
function InterpretHealth(observedObj)
65+
if observedObj.status == nil then
66+
return false
67+
end
68+
69+
local jobDeploymentStatus = observedObj.status.jobDeploymentStatus
70+
local jobStatus = observedObj.status.jobStatus
71+
72+
-- Consider healthy if job is running or completed successfully
73+
if jobDeploymentStatus == 'Running' then
74+
return true
75+
end
76+
77+
if jobDeploymentStatus == 'Complete' and jobStatus == 'SUCCEEDED' then
78+
return true
79+
end
80+
81+
-- Unhealthy states
82+
if jobDeploymentStatus == 'Failed' or jobDeploymentStatus == 'ValidationFailed' then
83+
return false
84+
end
85+
86+
if jobStatus == 'FAILED' or jobStatus == 'STOPPED' then
87+
return false
88+
end
89+
90+
-- Initializing, Waiting, Retrying, Suspending, Suspended are transitional - not fully healthy yet
91+
return false
92+
end
93+
statusAggregation:
94+
luaScript: >
95+
function AggregateStatus(desiredObj, statusItems)
96+
if statusItems == nil then
97+
return desiredObj
98+
end
99+
if desiredObj.status == nil then
100+
desiredObj.status = {}
101+
end
102+
103+
-- If only one item, use it directly
104+
if #statusItems == 1 then
105+
desiredObj.status = statusItems[1].status
106+
return desiredObj
107+
end
108+
109+
-- Initialize aggregated values
110+
local jobStatus = nil
111+
local jobDeploymentStatus = nil
112+
local reason = nil
113+
local message = nil
114+
115+
-- Priority for jobDeploymentStatus (worst state wins)
116+
local deploymentStatusPriority = {
117+
[""] = 0,
118+
["Initializing"] = 1,
119+
["Waiting"] = 2,
120+
["Suspending"] = 3,
121+
["Suspended"] = 3,
122+
["Retrying"] = 4,
123+
["Running"] = 5,
124+
["Complete"] = 6,
125+
["ValidationFailed"] = 7,
126+
["Failed"] = 8,
127+
}
128+
129+
-- Priority for jobStatus (worst/most critical state wins)
130+
local jobStatusPriority = {
131+
[""] = 0,
132+
["PENDING"] = 1,
133+
["RUNNING"] = 2,
134+
["SUCCEEDED"] = 3,
135+
["STOPPED"] = 4,
136+
["FAILED"] = 5,
137+
}
138+
139+
140+
local worstDeploymentPriority = -1
141+
local worstJobStatusPriority = -1
142+
local worstClusterStatePriority = -1
143+
144+
-- Aggregate status from all member clusters
145+
for i = 1, #statusItems do
146+
local currentStatus = statusItems[i].status
147+
if currentStatus ~= nil then
148+
-- Take the worst jobDeploymentStatus
149+
if currentStatus.jobDeploymentStatus ~= nil then
150+
local priority = deploymentStatusPriority[currentStatus.jobDeploymentStatus] or 0
151+
if priority > worstDeploymentPriority then
152+
worstDeploymentPriority = priority
153+
jobDeploymentStatus = currentStatus.jobDeploymentStatus
154+
reason = currentStatus.reason
155+
message = currentStatus.message
156+
end
157+
end
158+
159+
-- Take the worst jobStatus
160+
if currentStatus.jobStatus ~= nil and currentStatus.jobStatus ~= "" then
161+
local priority = jobStatusPriority[currentStatus.jobStatus] or 0
162+
if priority > worstJobStatusPriority then
163+
worstJobStatusPriority = priority
164+
jobStatus = currentStatus.jobStatus
165+
end
166+
end
167+
end
168+
end
169+
170+
-- Set aggregated status
171+
desiredObj.status.jobStatus = jobStatus
172+
desiredObj.status.jobDeploymentStatus = jobDeploymentStatus
173+
desiredObj.status.reason = reason
174+
desiredObj.status.message = message
175+
return desiredObj
176+
end
177+
dependencyInterpretation:
178+
luaScript: >
179+
function GetDependencies(desiredObj)
180+
dependentConfigMaps = {}
181+
dependentSecrets = {}
182+
dependentSas = {}
183+
dependentPVCs = {}
184+
refs = {}
185+
local idx = 1
186+
187+
-- Helper function to extract dependencies from a pod template spec
188+
local function extractDependenciesFromPodSpec(podSpec)
189+
if podSpec == nil then
190+
return
191+
end
192+
193+
-- Service account
194+
if podSpec.serviceAccountName ~= nil and podSpec.serviceAccountName ~= '' and podSpec.serviceAccountName ~= 'default' then
195+
dependentSas[podSpec.serviceAccountName] = true
196+
end
197+
198+
-- Image pull secrets
199+
if podSpec.imagePullSecrets ~= nil then
200+
for _, secretRef in pairs(podSpec.imagePullSecrets) do
201+
if secretRef.name ~= nil and secretRef.name ~= '' then
202+
dependentSecrets[secretRef.name] = true
203+
end
204+
end
205+
end
206+
207+
-- Volumes
208+
if podSpec.volumes ~= nil then
209+
for _, volume in pairs(podSpec.volumes) do
210+
-- ConfigMap volumes
211+
if volume.configMap ~= nil and volume.configMap.name ~= nil and volume.configMap.name ~= '' then
212+
dependentConfigMaps[volume.configMap.name] = true
213+
end
214+
-- Secret volumes
215+
if volume.secret ~= nil and volume.secret.secretName ~= nil and volume.secret.secretName ~= '' then
216+
dependentSecrets[volume.secret.secretName] = true
217+
end
218+
-- Projected volumes
219+
if volume.projected ~= nil and volume.projected.sources ~= nil then
220+
for _, source in pairs(volume.projected.sources) do
221+
if source.configMap ~= nil and source.configMap.name ~= nil and source.configMap.name ~= '' then
222+
dependentConfigMaps[source.configMap.name] = true
223+
end
224+
if source.secret ~= nil and source.secret.name ~= nil and source.secret.name ~= '' then
225+
dependentSecrets[source.secret.name] = true
226+
end
227+
if source.serviceAccountToken ~= nil then
228+
-- ServiceAccount tokens don't need explicit dependency tracking
229+
end
230+
end
231+
end
232+
-- PVC volumes
233+
if volume.persistentVolumeClaim ~= nil and volume.persistentVolumeClaim.claimName ~= nil and volume.persistentVolumeClaim.claimName ~= '' then
234+
dependentPVCs[volume.persistentVolumeClaim.claimName] = true
235+
end
236+
-- Other secret references in volumes
237+
if volume.azureFile ~= nil and volume.azureFile.secretName ~= nil and volume.azureFile.secretName ~= '' then
238+
dependentSecrets[volume.azureFile.secretName] = true
239+
end
240+
if volume.cephfs ~= nil and volume.cephfs.secretRef ~= nil and volume.cephfs.secretRef.name ~= nil and volume.cephfs.secretRef.name ~= '' then
241+
dependentSecrets[volume.cephfs.secretRef.name] = true
242+
end
243+
if volume.cinder ~= nil and volume.cinder.secretRef ~= nil and volume.cinder.secretRef.name ~= nil and volume.cinder.secretRef.name ~= '' then
244+
dependentSecrets[volume.cinder.secretRef.name] = true
245+
end
246+
if volume.flexVolume ~= nil and volume.flexVolume.secretRef ~= nil and volume.flexVolume.secretRef.name ~= nil and volume.flexVolume.secretRef.name ~= '' then
247+
dependentSecrets[volume.flexVolume.secretRef.name] = true
248+
end
249+
if volume.rbd ~= nil and volume.rbd.secretRef ~= nil and volume.rbd.secretRef.name ~= nil and volume.rbd.secretRef.name ~= '' then
250+
dependentSecrets[volume.rbd.secretRef.name] = true
251+
end
252+
if volume.scaleIO ~= nil and volume.scaleIO.secretRef ~= nil and volume.scaleIO.secretRef.name ~= nil and volume.scaleIO.secretRef.name ~= '' then
253+
dependentSecrets[volume.scaleIO.secretRef.name] = true
254+
end
255+
if volume.iscsi ~= nil and volume.iscsi.secretRef ~= nil and volume.iscsi.secretRef.name ~= nil and volume.iscsi.secretRef.name ~= '' then
256+
dependentSecrets[volume.iscsi.secretRef.name] = true
257+
end
258+
if volume.storageos ~= nil and volume.storageos.secretRef ~= nil and volume.storageos.secretRef.name ~= nil and volume.storageos.secretRef.name ~= '' then
259+
dependentSecrets[volume.storageos.secretRef.name] = true
260+
end
261+
if volume.csi ~= nil and volume.csi.nodePublishSecretRef ~= nil and volume.csi.nodePublishSecretRef.name ~= nil and volume.csi.nodePublishSecretRef.name ~= '' then
262+
dependentSecrets[volume.csi.nodePublishSecretRef.name] = true
263+
end
264+
end
265+
end
266+
267+
-- Container envFrom references
268+
if podSpec.containers ~= nil then
269+
for _, container in pairs(podSpec.containers) do
270+
if container.envFrom ~= nil then
271+
for _, envFromSource in pairs(container.envFrom) do
272+
if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
273+
dependentConfigMaps[envFromSource.configMapRef.name] = true
274+
end
275+
if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
276+
dependentSecrets[envFromSource.secretRef.name] = true
277+
end
278+
end
279+
end
280+
-- Container env valueFrom references
281+
if container.env ~= nil then
282+
for _, envVar in pairs(container.env) do
283+
if envVar.valueFrom ~= nil then
284+
if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
285+
dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
286+
end
287+
if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
288+
dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
289+
end
290+
end
291+
end
292+
end
293+
end
294+
end
295+
296+
-- Init containers
297+
if podSpec.initContainers ~= nil then
298+
for _, container in pairs(podSpec.initContainers) do
299+
if container.envFrom ~= nil then
300+
for _, envFromSource in pairs(container.envFrom) do
301+
if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
302+
dependentConfigMaps[envFromSource.configMapRef.name] = true
303+
end
304+
if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
305+
dependentSecrets[envFromSource.secretRef.name] = true
306+
end
307+
end
308+
end
309+
if container.env ~= nil then
310+
for _, envVar in pairs(container.env) do
311+
if envVar.valueFrom ~= nil then
312+
if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
313+
dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
314+
end
315+
if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
316+
dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
317+
end
318+
end
319+
end
320+
end
321+
end
322+
end
323+
end
324+
325+
-- Extract dependencies from rayClusterSpec
326+
if desiredObj.spec ~= nil and desiredObj.spec.rayClusterSpec ~= nil then
327+
local clusterSpec = desiredObj.spec.rayClusterSpec
328+
329+
-- Head group
330+
if clusterSpec.headGroupSpec ~= nil and clusterSpec.headGroupSpec.template ~= nil and clusterSpec.headGroupSpec.template.spec ~= nil then
331+
extractDependenciesFromPodSpec(clusterSpec.headGroupSpec.template.spec)
332+
end
333+
334+
-- Worker groups
335+
if clusterSpec.workerGroupSpecs ~= nil then
336+
for _, workerGroup in pairs(clusterSpec.workerGroupSpecs) do
337+
if workerGroup.template ~= nil and workerGroup.template.spec ~= nil then
338+
extractDependenciesFromPodSpec(workerGroup.template.spec)
339+
end
340+
end
341+
end
342+
end
343+
344+
-- Extract dependencies from submitterPodTemplate
345+
if desiredObj.spec ~= nil and desiredObj.spec.submitterPodTemplate ~= nil and desiredObj.spec.submitterPodTemplate.spec ~= nil then
346+
extractDependenciesFromPodSpec(desiredObj.spec.submitterPodTemplate.spec)
347+
end
348+
349+
-- Build dependency references array
350+
for key, _ in pairs(dependentConfigMaps) do
351+
local dependObj = {}
352+
dependObj.apiVersion = 'v1'
353+
dependObj.kind = 'ConfigMap'
354+
dependObj.name = key
355+
dependObj.namespace = desiredObj.metadata.namespace
356+
refs[idx] = dependObj
357+
idx = idx + 1
358+
end
359+
for key, _ in pairs(dependentSecrets) do
360+
local dependObj = {}
361+
dependObj.apiVersion = 'v1'
362+
dependObj.kind = 'Secret'
363+
dependObj.name = key
364+
dependObj.namespace = desiredObj.metadata.namespace
365+
refs[idx] = dependObj
366+
idx = idx + 1
367+
end
368+
for key, _ in pairs(dependentSas) do
369+
local dependObj = {}
370+
dependObj.apiVersion = 'v1'
371+
dependObj.kind = 'ServiceAccount'
372+
dependObj.name = key
373+
dependObj.namespace = desiredObj.metadata.namespace
374+
refs[idx] = dependObj
375+
idx = idx + 1
376+
end
377+
for key, _ in pairs(dependentPVCs) do
378+
local dependObj = {}
379+
dependObj.apiVersion = 'v1'
380+
dependObj.kind = 'PersistentVolumeClaim'
381+
dependObj.name = key
382+
dependObj.namespace = desiredObj.metadata.namespace
383+
refs[idx] = dependObj
384+
idx = idx + 1
385+
end
386+
387+
return refs
388+
end

0 commit comments

Comments
 (0)