Skip to content

Commit e848a62

Browse files
committed
update
Signed-off-by: You-Cheng Lin (Owen) <[email protected]>
1 parent 0230887 commit e848a62

File tree

3 files changed

+350
-64
lines changed

3 files changed

+350
-64
lines changed

pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/ray.io/v1/RayJob/customizations.yaml

Lines changed: 223 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -107,20 +107,10 @@ spec:
107107
end
108108
109109
-- Initialize aggregated values
110-
local jobId = nil
111-
local rayClusterName = nil
112-
local dashboardURL = nil
113110
local jobStatus = nil
114111
local jobDeploymentStatus = nil
115112
local reason = nil
116113
local message = nil
117-
local startTime = nil
118-
local endTime = nil
119-
local succeeded = 0
120-
local failed = 0
121-
local observedGeneration = nil
122-
local rayJobStatusInfo = nil
123-
local rayClusterStatus = nil
124114
125115
-- Priority for jobDeploymentStatus (worst state wins)
126116
local deploymentStatusPriority = {
@@ -136,7 +126,20 @@ spec:
136126
["Failed"] = 8,
137127
}
138128
129+
-- Priority for jobStatus (worst/most critical state wins)
130+
local jobStatusPriority = {
131+
[""] = 0,
132+
["PENDING"] = 1,
133+
["RUNNING"] = 2,
134+
["SUCCEEDED"] = 3,
135+
["STOPPED"] = 4,
136+
["FAILED"] = 5,
137+
}
138+
139+
139140
local worstDeploymentPriority = -1
141+
local worstJobStatusPriority = -1
142+
local worstClusterStatePriority = -1
140143
141144
-- Aggregate status from all member clusters
142145
for i = 1, #statusItems do
@@ -153,77 +156,233 @@ spec:
153156
end
154157
end
155158
156-
-- Take first non-nil jobStatus (should be same across clusters)
157-
if jobStatus == nil and currentStatus.jobStatus ~= nil and currentStatus.jobStatus ~= "" then
158-
jobStatus = currentStatus.jobStatus
159+
-- Take the worst jobStatus
160+
if currentStatus.jobStatus ~= nil and currentStatus.jobStatus ~= "" then
161+
local priority = jobStatusPriority[currentStatus.jobStatus] or 0
162+
if priority > worstJobStatusPriority then
163+
worstJobStatusPriority = priority
164+
jobStatus = currentStatus.jobStatus
165+
end
159166
end
167+
end
168+
end
160169
161-
-- Take first non-nil identifiers
162-
if jobId == nil and currentStatus.jobId ~= nil then
163-
jobId = currentStatus.jobId
164-
end
165-
if rayClusterName == nil and currentStatus.rayClusterName ~= nil then
166-
rayClusterName = currentStatus.rayClusterName
167-
end
168-
if dashboardURL == nil and currentStatus.dashboardURL ~= nil then
169-
dashboardURL = currentStatus.dashboardURL
170-
end
170+
-- Set aggregated status
171+
desiredObj.status.jobStatus = jobStatus
172+
desiredObj.status.jobDeploymentStatus = jobDeploymentStatus
173+
desiredObj.status.reason = reason
174+
desiredObj.status.message = message
175+
return desiredObj
176+
end
177+
dependencyInterpretation:
178+
luaScript: >
179+
function GetDependencies(desiredObj)
180+
dependentConfigMaps = {}
181+
dependentSecrets = {}
182+
dependentSas = {}
183+
dependentPVCs = {}
184+
refs = {}
185+
local idx = 1
186+
187+
-- Helper function to extract dependencies from a pod template spec
188+
local function extractDependenciesFromPodSpec(podSpec)
189+
if podSpec == nil then
190+
return
191+
end
171192
172-
-- Take earliest startTime
173-
if currentStatus.startTime ~= nil then
174-
if startTime == nil or currentStatus.startTime < startTime then
175-
startTime = currentStatus.startTime
193+
-- Service account
194+
if podSpec.serviceAccountName ~= nil and podSpec.serviceAccountName ~= '' and podSpec.serviceAccountName ~= 'default' then
195+
dependentSas[podSpec.serviceAccountName] = true
196+
end
197+
198+
-- Image pull secrets
199+
if podSpec.imagePullSecrets ~= nil then
200+
for _, secretRef in pairs(podSpec.imagePullSecrets) do
201+
if secretRef.name ~= nil and secretRef.name ~= '' then
202+
dependentSecrets[secretRef.name] = true
176203
end
177204
end
205+
end
178206
179-
-- Take latest endTime
180-
if currentStatus.endTime ~= nil then
181-
if endTime == nil or currentStatus.endTime > endTime then
182-
endTime = currentStatus.endTime
207+
-- Volumes
208+
if podSpec.volumes ~= nil then
209+
for _, volume in pairs(podSpec.volumes) do
210+
-- ConfigMap volumes
211+
if volume.configMap ~= nil and volume.configMap.name ~= nil and volume.configMap.name ~= '' then
212+
dependentConfigMaps[volume.configMap.name] = true
213+
end
214+
-- Secret volumes
215+
if volume.secret ~= nil and volume.secret.secretName ~= nil and volume.secret.secretName ~= '' then
216+
dependentSecrets[volume.secret.secretName] = true
217+
end
218+
-- Projected volumes
219+
if volume.projected ~= nil and volume.projected.sources ~= nil then
220+
for _, source in pairs(volume.projected.sources) do
221+
if source.configMap ~= nil and source.configMap.name ~= nil and source.configMap.name ~= '' then
222+
dependentConfigMaps[source.configMap.name] = true
223+
end
224+
if source.secret ~= nil and source.secret.name ~= nil and source.secret.name ~= '' then
225+
dependentSecrets[source.secret.name] = true
226+
end
227+
if source.serviceAccountToken ~= nil then
228+
-- ServiceAccount tokens don't need explicit dependency tracking
229+
end
230+
end
231+
end
232+
-- PVC volumes
233+
if volume.persistentVolumeClaim ~= nil and volume.persistentVolumeClaim.claimName ~= nil and volume.persistentVolumeClaim.claimName ~= '' then
234+
dependentPVCs[volume.persistentVolumeClaim.claimName] = true
235+
end
236+
-- Other secret references in volumes
237+
if volume.azureFile ~= nil and volume.azureFile.secretName ~= nil and volume.azureFile.secretName ~= '' then
238+
dependentSecrets[volume.azureFile.secretName] = true
239+
end
240+
if volume.cephfs ~= nil and volume.cephfs.secretRef ~= nil and volume.cephfs.secretRef.name ~= nil and volume.cephfs.secretRef.name ~= '' then
241+
dependentSecrets[volume.cephfs.secretRef.name] = true
242+
end
243+
if volume.cinder ~= nil and volume.cinder.secretRef ~= nil and volume.cinder.secretRef.name ~= nil and volume.cinder.secretRef.name ~= '' then
244+
dependentSecrets[volume.cinder.secretRef.name] = true
245+
end
246+
if volume.flexVolume ~= nil and volume.flexVolume.secretRef ~= nil and volume.flexVolume.secretRef.name ~= nil and volume.flexVolume.secretRef.name ~= '' then
247+
dependentSecrets[volume.flexVolume.secretRef.name] = true
248+
end
249+
if volume.rbd ~= nil and volume.rbd.secretRef ~= nil and volume.rbd.secretRef.name ~= nil and volume.rbd.secretRef.name ~= '' then
250+
dependentSecrets[volume.rbd.secretRef.name] = true
251+
end
252+
if volume.scaleIO ~= nil and volume.scaleIO.secretRef ~= nil and volume.scaleIO.secretRef.name ~= nil and volume.scaleIO.secretRef.name ~= '' then
253+
dependentSecrets[volume.scaleIO.secretRef.name] = true
254+
end
255+
if volume.iscsi ~= nil and volume.iscsi.secretRef ~= nil and volume.iscsi.secretRef.name ~= nil and volume.iscsi.secretRef.name ~= '' then
256+
dependentSecrets[volume.iscsi.secretRef.name] = true
257+
end
258+
if volume.storageos ~= nil and volume.storageos.secretRef ~= nil and volume.storageos.secretRef.name ~= nil and volume.storageos.secretRef.name ~= '' then
259+
dependentSecrets[volume.storageos.secretRef.name] = true
260+
end
261+
if volume.csi ~= nil and volume.csi.nodePublishSecretRef ~= nil and volume.csi.nodePublishSecretRef.name ~= nil and volume.csi.nodePublishSecretRef.name ~= '' then
262+
dependentSecrets[volume.csi.nodePublishSecretRef.name] = true
183263
end
184264
end
265+
end
185266
186-
-- Sum succeeded and failed counts
187-
if currentStatus.succeeded ~= nil then
188-
succeeded = succeeded + currentStatus.succeeded
189-
end
190-
if currentStatus.failed ~= nil then
191-
failed = failed + currentStatus.failed
267+
-- Container envFrom references
268+
if podSpec.containers ~= nil then
269+
for _, container in pairs(podSpec.containers) do
270+
if container.envFrom ~= nil then
271+
for _, envFromSource in pairs(container.envFrom) do
272+
if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
273+
dependentConfigMaps[envFromSource.configMapRef.name] = true
274+
end
275+
if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
276+
dependentSecrets[envFromSource.secretRef.name] = true
277+
end
278+
end
279+
end
280+
-- Container env valueFrom references
281+
if container.env ~= nil then
282+
for _, envVar in pairs(container.env) do
283+
if envVar.valueFrom ~= nil then
284+
if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
285+
dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
286+
end
287+
if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
288+
dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
289+
end
290+
end
291+
end
292+
end
192293
end
294+
end
193295
194-
-- Take minimum observedGeneration (most conservative)
195-
if currentStatus.observedGeneration ~= nil then
196-
if observedGeneration == nil or currentStatus.observedGeneration < observedGeneration then
197-
observedGeneration = currentStatus.observedGeneration
296+
-- Init containers
297+
if podSpec.initContainers ~= nil then
298+
for _, container in pairs(podSpec.initContainers) do
299+
if container.envFrom ~= nil then
300+
for _, envFromSource in pairs(container.envFrom) do
301+
if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
302+
dependentConfigMaps[envFromSource.configMapRef.name] = true
303+
end
304+
if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
305+
dependentSecrets[envFromSource.secretRef.name] = true
306+
end
307+
end
308+
end
309+
if container.env ~= nil then
310+
for _, envVar in pairs(container.env) do
311+
if envVar.valueFrom ~= nil then
312+
if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
313+
dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
314+
end
315+
if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
316+
dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
317+
end
318+
end
319+
end
198320
end
199321
end
322+
end
323+
end
200324
201-
-- Take first non-nil rayJobStatusInfo and rayClusterStatus
202-
if rayJobStatusInfo == nil and currentStatus.rayJobInfo ~= nil then
203-
rayJobStatusInfo = currentStatus.rayJobInfo
204-
end
205-
if rayClusterStatus == nil and currentStatus.rayClusterStatus ~= nil then
206-
rayClusterStatus = currentStatus.rayClusterStatus
325+
-- Extract dependencies from rayClusterSpec
326+
if desiredObj.spec ~= nil and desiredObj.spec.rayClusterSpec ~= nil then
327+
local clusterSpec = desiredObj.spec.rayClusterSpec
328+
329+
-- Head group
330+
if clusterSpec.headGroupSpec ~= nil and clusterSpec.headGroupSpec.template ~= nil and clusterSpec.headGroupSpec.template.spec ~= nil then
331+
extractDependenciesFromPodSpec(clusterSpec.headGroupSpec.template.spec)
332+
end
333+
334+
-- Worker groups
335+
if clusterSpec.workerGroupSpecs ~= nil then
336+
for _, workerGroup in pairs(clusterSpec.workerGroupSpecs) do
337+
if workerGroup.template ~= nil and workerGroup.template.spec ~= nil then
338+
extractDependenciesFromPodSpec(workerGroup.template.spec)
339+
end
207340
end
208341
end
209342
end
210343
211-
-- Set aggregated status
212-
desiredObj.status.jobId = jobId
213-
desiredObj.status.rayClusterName = rayClusterName
214-
desiredObj.status.dashboardURL = dashboardURL
215-
desiredObj.status.jobStatus = jobStatus
216-
desiredObj.status.jobDeploymentStatus = jobDeploymentStatus
217-
desiredObj.status.reason = reason
218-
desiredObj.status.message = message
219-
desiredObj.status.startTime = startTime
220-
desiredObj.status.endTime = endTime
221-
desiredObj.status.succeeded = succeeded
222-
desiredObj.status.failed = failed
223-
desiredObj.status.observedGeneration = observedGeneration
224-
desiredObj.status.rayJobInfo = rayJobStatusInfo
225-
desiredObj.status.rayClusterStatus = rayClusterStatus
344+
-- Extract dependencies from submitterPodTemplate
345+
if desiredObj.spec ~= nil and desiredObj.spec.submitterPodTemplate ~= nil and desiredObj.spec.submitterPodTemplate.spec ~= nil then
346+
extractDependenciesFromPodSpec(desiredObj.spec.submitterPodTemplate.spec)
347+
end
226348
227-
return desiredObj
228-
end
349+
-- Build dependency references array
350+
for key, _ in pairs(dependentConfigMaps) do
351+
local dependObj = {}
352+
dependObj.apiVersion = 'v1'
353+
dependObj.kind = 'ConfigMap'
354+
dependObj.name = key
355+
dependObj.namespace = desiredObj.metadata.namespace
356+
refs[idx] = dependObj
357+
idx = idx + 1
358+
end
359+
for key, _ in pairs(dependentSecrets) do
360+
local dependObj = {}
361+
dependObj.apiVersion = 'v1'
362+
dependObj.kind = 'Secret'
363+
dependObj.name = key
364+
dependObj.namespace = desiredObj.metadata.namespace
365+
refs[idx] = dependObj
366+
idx = idx + 1
367+
end
368+
for key, _ in pairs(dependentSas) do
369+
local dependObj = {}
370+
dependObj.apiVersion = 'v1'
371+
dependObj.kind = 'ServiceAccount'
372+
dependObj.name = key
373+
dependObj.namespace = desiredObj.metadata.namespace
374+
refs[idx] = dependObj
375+
idx = idx + 1
376+
end
377+
for key, _ in pairs(dependentPVCs) do
378+
local dependObj = {}
379+
dependObj.apiVersion = 'v1'
380+
dependObj.kind = 'PersistentVolumeClaim'
381+
dependObj.name = key
382+
dependObj.namespace = desiredObj.metadata.namespace
383+
refs[idx] = dependObj
384+
idx = idx + 1
385+
end
229386
387+
return refs
388+
end

pkg/resourceinterpreter/default/thirdparty/resourcecustomizations/ray.io/v1/RayJob/customizations_tests.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,6 @@ tests:
66
operation: InterpretHealth
77
- observedInputPath: testdata/observed-rayjob.yaml
88
operation: InterpretComponent
9+
- desiredInputPath: testdata/desired-rayjob-with-dependencies.yaml
10+
operation: InterpretDependency
911

0 commit comments

Comments
 (0)