Skip to content

Commit 23ba1a6

Browse files
authored
feat: compartment status (#107)
* feat: compartment status * add tests
1 parent 1d1a86b commit 23ba1a6

File tree

9 files changed

+756
-262
lines changed

9 files changed

+756
-262
lines changed

chart/templates/skyhook-crd.yaml

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -498,38 +498,69 @@ spec:
498498
status:
499499
description: SkyhookStatus defines the observed state of Skyhook
500500
properties:
501-
compartmentBatchStates:
501+
compartmentStatuses:
502502
additionalProperties:
503-
description: BatchProcessingState tracks the current state of batch
504-
processing for a compartment
503+
description: CompartmentStatus tracks the detailed state of a compartment
505504
properties:
506-
completedNodes:
507-
description: Total number of nodes that have completed successfully
508-
(cumulative across all batches)
505+
batchState:
506+
description: BatchState tracks the batch processing state for
507+
this compartment
508+
properties:
509+
completedNodes:
510+
description: Total number of nodes that have completed successfully
511+
(cumulative across all batches)
512+
type: integer
513+
consecutiveFailures:
514+
description: Number of consecutive failures
515+
type: integer
516+
currentBatch:
517+
description: Current batch number (starts at 1)
518+
type: integer
519+
failedNodes:
520+
description: Total number of nodes that have failed (cumulative
521+
across all batches)
522+
type: integer
523+
lastBatchFailed:
524+
description: Whether the last batch failed (for slowdown
525+
logic)
526+
type: boolean
527+
lastBatchSize:
528+
description: Last batch size (for slowdown calculations)
529+
type: integer
530+
shouldStop:
531+
description: Whether the strategy should stop processing
532+
due to failures
533+
type: boolean
534+
type: object
535+
ceiling:
536+
description: Ceiling is the maximum number of nodes that can
537+
be in progress at once
509538
type: integer
510-
consecutiveFailures:
511-
description: Number of consecutive failures
539+
completed:
540+
description: Completed is the number of nodes that have completed
541+
successfully
512542
type: integer
513-
currentBatch:
514-
description: Current batch number (starts at 1)
543+
inProgress:
544+
description: InProgress is the number of nodes currently in
545+
progress
515546
type: integer
516-
failedNodes:
517-
description: Total number of nodes that have failed (cumulative
518-
across all batches)
547+
matched:
548+
description: Matched is the number of nodes that match this
549+
compartment's selector
519550
type: integer
520-
lastBatchFailed:
521-
description: Whether the last batch failed (for slowdown logic)
522-
type: boolean
523-
lastBatchSize:
524-
description: Last batch size (for slowdown calculations)
551+
progressPercent:
552+
description: ProgressPercent is the percentage of nodes completed
553+
(0-100)
525554
type: integer
526-
shouldStop:
527-
description: Whether the strategy should stop processing due
528-
to failures
529-
type: boolean
555+
required:
556+
- ceiling
557+
- completed
558+
- inProgress
559+
- matched
560+
- progressPercent
530561
type: object
531-
description: CompartmentBatchStates tracks batch processing state
532-
per compartment
562+
description: CompartmentStatuses tracks the detailed status of each
563+
compartment
533564
type: object
534565
completeNodes:
535566
default: 0/0

operator/api/v1alpha1/skyhook_types.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,28 @@ const (
280280
RESTART_ALL_SERVICES InterruptType = "restartAllServices"
281281
)
282282

283+
// CompartmentStatus tracks the detailed state of a compartment
284+
type CompartmentStatus struct {
285+
// Matched is the number of nodes that match this compartment's selector
286+
Matched int `json:"matched"`
287+
288+
// Ceiling is the maximum number of nodes that can be in progress at once
289+
Ceiling int `json:"ceiling"`
290+
291+
// InProgress is the number of nodes currently in progress
292+
InProgress int `json:"inProgress"`
293+
294+
// Completed is the number of nodes that have completed successfully
295+
Completed int `json:"completed"`
296+
297+
// ProgressPercent is the percentage of nodes completed (0-100)
298+
ProgressPercent int `json:"progressPercent"`
299+
300+
// BatchState tracks the batch processing state for this compartment
301+
// +optional
302+
BatchState *BatchProcessingState `json:"batchState,omitempty"`
303+
}
304+
283305
// SkyhookStatus defines the observed state of Skyhook
284306
type SkyhookStatus struct {
285307

@@ -316,8 +338,9 @@ type SkyhookStatus struct {
316338
// ConfigUpdates tracks config updates
317339
ConfigUpdates map[string][]string `json:"configUpdates,omitempty"`
318340

319-
// CompartmentBatchStates tracks batch processing state per compartment
320-
CompartmentBatchStates map[string]BatchProcessingState `json:"compartmentBatchStates,omitempty"`
341+
// CompartmentStatuses tracks the detailed status of each compartment
342+
// +optional
343+
CompartmentStatuses map[string]CompartmentStatus `json:"compartmentStatuses,omitempty"`
321344

322345
// +kubebuilder:example=3
323346
// +kubebuilder:default=0

operator/api/v1alpha1/zz_generated.deepcopy.go

Lines changed: 24 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -499,38 +499,69 @@ spec:
499499
status:
500500
description: SkyhookStatus defines the observed state of Skyhook
501501
properties:
502-
compartmentBatchStates:
502+
compartmentStatuses:
503503
additionalProperties:
504-
description: BatchProcessingState tracks the current state of batch
505-
processing for a compartment
504+
description: CompartmentStatus tracks the detailed state of a compartment
506505
properties:
507-
completedNodes:
508-
description: Total number of nodes that have completed successfully
509-
(cumulative across all batches)
506+
batchState:
507+
description: BatchState tracks the batch processing state for
508+
this compartment
509+
properties:
510+
completedNodes:
511+
description: Total number of nodes that have completed successfully
512+
(cumulative across all batches)
513+
type: integer
514+
consecutiveFailures:
515+
description: Number of consecutive failures
516+
type: integer
517+
currentBatch:
518+
description: Current batch number (starts at 1)
519+
type: integer
520+
failedNodes:
521+
description: Total number of nodes that have failed (cumulative
522+
across all batches)
523+
type: integer
524+
lastBatchFailed:
525+
description: Whether the last batch failed (for slowdown
526+
logic)
527+
type: boolean
528+
lastBatchSize:
529+
description: Last batch size (for slowdown calculations)
530+
type: integer
531+
shouldStop:
532+
description: Whether the strategy should stop processing
533+
due to failures
534+
type: boolean
535+
type: object
536+
ceiling:
537+
description: Ceiling is the maximum number of nodes that can
538+
be in progress at once
510539
type: integer
511-
consecutiveFailures:
512-
description: Number of consecutive failures
540+
completed:
541+
description: Completed is the number of nodes that have completed
542+
successfully
513543
type: integer
514-
currentBatch:
515-
description: Current batch number (starts at 1)
544+
inProgress:
545+
description: InProgress is the number of nodes currently in
546+
progress
516547
type: integer
517-
failedNodes:
518-
description: Total number of nodes that have failed (cumulative
519-
across all batches)
548+
matched:
549+
description: Matched is the number of nodes that match this
550+
compartment's selector
520551
type: integer
521-
lastBatchFailed:
522-
description: Whether the last batch failed (for slowdown logic)
523-
type: boolean
524-
lastBatchSize:
525-
description: Last batch size (for slowdown calculations)
552+
progressPercent:
553+
description: ProgressPercent is the percentage of nodes completed
554+
(0-100)
526555
type: integer
527-
shouldStop:
528-
description: Whether the strategy should stop processing due
529-
to failures
530-
type: boolean
556+
required:
557+
- ceiling
558+
- completed
559+
- inProgress
560+
- matched
561+
- progressPercent
531562
type: object
532-
description: CompartmentBatchStates tracks batch processing state
533-
per compartment
563+
description: CompartmentStatuses tracks the detailed status of each
564+
compartment
534565
type: object
535566
completeNodes:
536567
default: 0/0

0 commit comments

Comments
 (0)