@@ -107,20 +107,10 @@ spec:
107107 end
108108
109109 -- Initialize aggregated values
110- local jobId = nil
111- local rayClusterName = nil
112- local dashboardURL = nil
113110 local jobStatus = nil
114111 local jobDeploymentStatus = nil
115112 local reason = nil
116113 local message = nil
117- local startTime = nil
118- local endTime = nil
119- local succeeded = 0
120- local failed = 0
121- local observedGeneration = nil
122- local rayJobStatusInfo = nil
123- local rayClusterStatus = nil
124114
125115 -- Priority for jobDeploymentStatus (worst state wins)
126116 local deploymentStatusPriority = {
@@ -136,7 +126,20 @@ spec:
136126 ["Failed"] = 8,
137127 }
138128
129+ -- Priority for jobStatus (worst/most critical state wins)
130+ local jobStatusPriority = {
131+ [""] = 0,
132+ ["PENDING"] = 1,
133+ ["RUNNING"] = 2,
134+ ["SUCCEEDED"] = 3,
135+ ["STOPPED"] = 4,
136+ ["FAILED"] = 5,
137+ }
138+
139+
139140 local worstDeploymentPriority = -1
141+ local worstJobStatusPriority = -1
142+ local worstClusterStatePriority = -1
140143
141144 -- Aggregate status from all member clusters
142145 for i = 1, #statusItems do
@@ -153,77 +156,233 @@ spec:
153156 end
154157 end
155158
156- -- Take first non-nil jobStatus (should be same across clusters)
157- if jobStatus == nil and currentStatus.jobStatus ~= nil and currentStatus.jobStatus ~= "" then
158- jobStatus = currentStatus.jobStatus
159+ -- Take the worst jobStatus
160+ if currentStatus.jobStatus ~= nil and currentStatus.jobStatus ~= "" then
161+ local priority = jobStatusPriority[currentStatus.jobStatus] or 0
162+ if priority > worstJobStatusPriority then
163+ worstJobStatusPriority = priority
164+ jobStatus = currentStatus.jobStatus
165+ end
159166 end
167+ end
168+ end
160169
161- -- Take first non-nil identifiers
162- if jobId == nil and currentStatus.jobId ~= nil then
163- jobId = currentStatus.jobId
164- end
165- if rayClusterName == nil and currentStatus.rayClusterName ~= nil then
166- rayClusterName = currentStatus.rayClusterName
167- end
168- if dashboardURL == nil and currentStatus.dashboardURL ~= nil then
169- dashboardURL = currentStatus.dashboardURL
170- end
170+ -- Set aggregated status
171+ desiredObj.status.jobStatus = jobStatus
172+ desiredObj.status.jobDeploymentStatus = jobDeploymentStatus
173+ desiredObj.status.reason = reason
174+ desiredObj.status.message = message
175+ return desiredObj
176+ end
177+ dependencyInterpretation :
178+ luaScript : >
179+ function GetDependencies(desiredObj)
180+ dependentConfigMaps = {}
181+ dependentSecrets = {}
182+ dependentSas = {}
183+ dependentPVCs = {}
184+ refs = {}
185+ local idx = 1
186+
187+ -- Helper function to extract dependencies from a pod template spec
188+ local function extractDependenciesFromPodSpec(podSpec)
189+ if podSpec == nil then
190+ return
191+ end
171192
172- -- Take earliest startTime
173- if currentStatus.startTime ~= nil then
174- if startTime == nil or currentStatus.startTime < startTime then
175- startTime = currentStatus.startTime
193+ -- Service account
194+ if podSpec.serviceAccountName ~= nil and podSpec.serviceAccountName ~= '' and podSpec.serviceAccountName ~= 'default' then
195+ dependentSas[podSpec.serviceAccountName] = true
196+ end
197+
198+ -- Image pull secrets
199+ if podSpec.imagePullSecrets ~= nil then
200+ for _, secretRef in pairs(podSpec.imagePullSecrets) do
201+ if secretRef.name ~= nil and secretRef.name ~= '' then
202+ dependentSecrets[secretRef.name] = true
176203 end
177204 end
205+ end
178206
179- -- Take latest endTime
180- if currentStatus.endTime ~= nil then
181- if endTime == nil or currentStatus.endTime > endTime then
182- endTime = currentStatus.endTime
207+ -- Volumes
208+ if podSpec.volumes ~= nil then
209+ for _, volume in pairs(podSpec.volumes) do
210+ -- ConfigMap volumes
211+ if volume.configMap ~= nil and volume.configMap.name ~= nil and volume.configMap.name ~= '' then
212+ dependentConfigMaps[volume.configMap.name] = true
213+ end
214+ -- Secret volumes
215+ if volume.secret ~= nil and volume.secret.secretName ~= nil and volume.secret.secretName ~= '' then
216+ dependentSecrets[volume.secret.secretName] = true
217+ end
218+ -- Projected volumes
219+ if volume.projected ~= nil and volume.projected.sources ~= nil then
220+ for _, source in pairs(volume.projected.sources) do
221+ if source.configMap ~= nil and source.configMap.name ~= nil and source.configMap.name ~= '' then
222+ dependentConfigMaps[source.configMap.name] = true
223+ end
224+ if source.secret ~= nil and source.secret.name ~= nil and source.secret.name ~= '' then
225+ dependentSecrets[source.secret.name] = true
226+ end
227+ if source.serviceAccountToken ~= nil then
228+ -- ServiceAccount tokens don't need explicit dependency tracking
229+ end
230+ end
231+ end
232+ -- PVC volumes
233+ if volume.persistentVolumeClaim ~= nil and volume.persistentVolumeClaim.claimName ~= nil and volume.persistentVolumeClaim.claimName ~= '' then
234+ dependentPVCs[volume.persistentVolumeClaim.claimName] = true
235+ end
236+ -- Other secret references in volumes
237+ if volume.azureFile ~= nil and volume.azureFile.secretName ~= nil and volume.azureFile.secretName ~= '' then
238+ dependentSecrets[volume.azureFile.secretName] = true
239+ end
240+ if volume.cephfs ~= nil and volume.cephfs.secretRef ~= nil and volume.cephfs.secretRef.name ~= nil and volume.cephfs.secretRef.name ~= '' then
241+ dependentSecrets[volume.cephfs.secretRef.name] = true
242+ end
243+ if volume.cinder ~= nil and volume.cinder.secretRef ~= nil and volume.cinder.secretRef.name ~= nil and volume.cinder.secretRef.name ~= '' then
244+ dependentSecrets[volume.cinder.secretRef.name] = true
245+ end
246+ if volume.flexVolume ~= nil and volume.flexVolume.secretRef ~= nil and volume.flexVolume.secretRef.name ~= nil and volume.flexVolume.secretRef.name ~= '' then
247+ dependentSecrets[volume.flexVolume.secretRef.name] = true
248+ end
249+ if volume.rbd ~= nil and volume.rbd.secretRef ~= nil and volume.rbd.secretRef.name ~= nil and volume.rbd.secretRef.name ~= '' then
250+ dependentSecrets[volume.rbd.secretRef.name] = true
251+ end
252+ if volume.scaleIO ~= nil and volume.scaleIO.secretRef ~= nil and volume.scaleIO.secretRef.name ~= nil and volume.scaleIO.secretRef.name ~= '' then
253+ dependentSecrets[volume.scaleIO.secretRef.name] = true
254+ end
255+ if volume.iscsi ~= nil and volume.iscsi.secretRef ~= nil and volume.iscsi.secretRef.name ~= nil and volume.iscsi.secretRef.name ~= '' then
256+ dependentSecrets[volume.iscsi.secretRef.name] = true
257+ end
258+ if volume.storageos ~= nil and volume.storageos.secretRef ~= nil and volume.storageos.secretRef.name ~= nil and volume.storageos.secretRef.name ~= '' then
259+ dependentSecrets[volume.storageos.secretRef.name] = true
260+ end
261+ if volume.csi ~= nil and volume.csi.nodePublishSecretRef ~= nil and volume.csi.nodePublishSecretRef.name ~= nil and volume.csi.nodePublishSecretRef.name ~= '' then
262+ dependentSecrets[volume.csi.nodePublishSecretRef.name] = true
183263 end
184264 end
265+ end
185266
186- -- Sum succeeded and failed counts
187- if currentStatus.succeeded ~= nil then
188- succeeded = succeeded + currentStatus.succeeded
189- end
190- if currentStatus.failed ~= nil then
191- failed = failed + currentStatus.failed
267+ -- Container envFrom references
268+ if podSpec.containers ~= nil then
269+ for _, container in pairs(podSpec.containers) do
270+ if container.envFrom ~= nil then
271+ for _, envFromSource in pairs(container.envFrom) do
272+ if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
273+ dependentConfigMaps[envFromSource.configMapRef.name] = true
274+ end
275+ if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
276+ dependentSecrets[envFromSource.secretRef.name] = true
277+ end
278+ end
279+ end
280+ -- Container env valueFrom references
281+ if container.env ~= nil then
282+ for _, envVar in pairs(container.env) do
283+ if envVar.valueFrom ~= nil then
284+ if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
285+ dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
286+ end
287+ if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
288+ dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
289+ end
290+ end
291+ end
292+ end
192293 end
294+ end
193295
194- -- Take minimum observedGeneration (most conservative)
195- if currentStatus.observedGeneration ~= nil then
196- if observedGeneration == nil or currentStatus.observedGeneration < observedGeneration then
197- observedGeneration = currentStatus.observedGeneration
296+ -- Init containers
297+ if podSpec.initContainers ~= nil then
298+ for _, container in pairs(podSpec.initContainers) do
299+ if container.envFrom ~= nil then
300+ for _, envFromSource in pairs(container.envFrom) do
301+ if envFromSource.configMapRef ~= nil and envFromSource.configMapRef.name ~= nil and envFromSource.configMapRef.name ~= '' then
302+ dependentConfigMaps[envFromSource.configMapRef.name] = true
303+ end
304+ if envFromSource.secretRef ~= nil and envFromSource.secretRef.name ~= nil and envFromSource.secretRef.name ~= '' then
305+ dependentSecrets[envFromSource.secretRef.name] = true
306+ end
307+ end
308+ end
309+ if container.env ~= nil then
310+ for _, envVar in pairs(container.env) do
311+ if envVar.valueFrom ~= nil then
312+ if envVar.valueFrom.configMapKeyRef ~= nil and envVar.valueFrom.configMapKeyRef.name ~= nil and envVar.valueFrom.configMapKeyRef.name ~= '' then
313+ dependentConfigMaps[envVar.valueFrom.configMapKeyRef.name] = true
314+ end
315+ if envVar.valueFrom.secretKeyRef ~= nil and envVar.valueFrom.secretKeyRef.name ~= nil and envVar.valueFrom.secretKeyRef.name ~= '' then
316+ dependentSecrets[envVar.valueFrom.secretKeyRef.name] = true
317+ end
318+ end
319+ end
198320 end
199321 end
322+ end
323+ end
200324
201- -- Take first non-nil rayJobStatusInfo and rayClusterStatus
202- if rayJobStatusInfo == nil and currentStatus.rayJobInfo ~= nil then
203- rayJobStatusInfo = currentStatus.rayJobInfo
204- end
205- if rayClusterStatus == nil and currentStatus.rayClusterStatus ~= nil then
206- rayClusterStatus = currentStatus.rayClusterStatus
325+ -- Extract dependencies from rayClusterSpec
326+ if desiredObj.spec ~= nil and desiredObj.spec.rayClusterSpec ~= nil then
327+ local clusterSpec = desiredObj.spec.rayClusterSpec
328+
329+ -- Head group
330+ if clusterSpec.headGroupSpec ~= nil and clusterSpec.headGroupSpec.template ~= nil and clusterSpec.headGroupSpec.template.spec ~= nil then
331+ extractDependenciesFromPodSpec(clusterSpec.headGroupSpec.template.spec)
332+ end
333+
334+ -- Worker groups
335+ if clusterSpec.workerGroupSpecs ~= nil then
336+ for _, workerGroup in pairs(clusterSpec.workerGroupSpecs) do
337+ if workerGroup.template ~= nil and workerGroup.template.spec ~= nil then
338+ extractDependenciesFromPodSpec(workerGroup.template.spec)
339+ end
207340 end
208341 end
209342 end
210343
211- -- Set aggregated status
212- desiredObj.status.jobId = jobId
213- desiredObj.status.rayClusterName = rayClusterName
214- desiredObj.status.dashboardURL = dashboardURL
215- desiredObj.status.jobStatus = jobStatus
216- desiredObj.status.jobDeploymentStatus = jobDeploymentStatus
217- desiredObj.status.reason = reason
218- desiredObj.status.message = message
219- desiredObj.status.startTime = startTime
220- desiredObj.status.endTime = endTime
221- desiredObj.status.succeeded = succeeded
222- desiredObj.status.failed = failed
223- desiredObj.status.observedGeneration = observedGeneration
224- desiredObj.status.rayJobInfo = rayJobStatusInfo
225- desiredObj.status.rayClusterStatus = rayClusterStatus
344+ -- Extract dependencies from submitterPodTemplate
345+ if desiredObj.spec ~= nil and desiredObj.spec.submitterPodTemplate ~= nil and desiredObj.spec.submitterPodTemplate.spec ~= nil then
346+ extractDependenciesFromPodSpec(desiredObj.spec.submitterPodTemplate.spec)
347+ end
226348
227- return desiredObj
228- end
349+ -- Build dependency references array
350+ for key, _ in pairs(dependentConfigMaps) do
351+ local dependObj = {}
352+ dependObj.apiVersion = 'v1'
353+ dependObj.kind = 'ConfigMap'
354+ dependObj.name = key
355+ dependObj.namespace = desiredObj.metadata.namespace
356+ refs[idx] = dependObj
357+ idx = idx + 1
358+ end
359+ for key, _ in pairs(dependentSecrets) do
360+ local dependObj = {}
361+ dependObj.apiVersion = 'v1'
362+ dependObj.kind = 'Secret'
363+ dependObj.name = key
364+ dependObj.namespace = desiredObj.metadata.namespace
365+ refs[idx] = dependObj
366+ idx = idx + 1
367+ end
368+ for key, _ in pairs(dependentSas) do
369+ local dependObj = {}
370+ dependObj.apiVersion = 'v1'
371+ dependObj.kind = 'ServiceAccount'
372+ dependObj.name = key
373+ dependObj.namespace = desiredObj.metadata.namespace
374+ refs[idx] = dependObj
375+ idx = idx + 1
376+ end
377+ for key, _ in pairs(dependentPVCs) do
378+ local dependObj = {}
379+ dependObj.apiVersion = 'v1'
380+ dependObj.kind = 'PersistentVolumeClaim'
381+ dependObj.name = key
382+ dependObj.namespace = desiredObj.metadata.namespace
383+ refs[idx] = dependObj
384+ idx = idx + 1
385+ end
229386
387+ return refs
388+ end
0 commit comments