Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 55 additions & 24 deletions chart/templates/skyhook-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -498,38 +498,69 @@ spec:
status:
description: SkyhookStatus defines the observed state of Skyhook
properties:
compartmentBatchStates:
compartmentStatuses:
additionalProperties:
description: BatchProcessingState tracks the current state of batch
processing for a compartment
description: CompartmentStatus tracks the detailed state of a compartment
properties:
completedNodes:
description: Total number of nodes that have completed successfully
(cumulative across all batches)
batchState:
description: BatchState tracks the batch processing state for
this compartment
properties:
completedNodes:
description: Total number of nodes that have completed successfully
(cumulative across all batches)
type: integer
consecutiveFailures:
description: Number of consecutive failures
type: integer
currentBatch:
description: Current batch number (starts at 1)
type: integer
failedNodes:
description: Total number of nodes that have failed (cumulative
across all batches)
type: integer
lastBatchFailed:
description: Whether the last batch failed (for slowdown
logic)
type: boolean
lastBatchSize:
description: Last batch size (for slowdown calculations)
type: integer
shouldStop:
description: Whether the strategy should stop processing
due to failures
type: boolean
type: object
ceiling:
description: Ceiling is the maximum number of nodes that can
be in progress at once
type: integer
consecutiveFailures:
description: Number of consecutive failures
completed:
description: Completed is the number of nodes that have completed
successfully
type: integer
currentBatch:
description: Current batch number (starts at 1)
inProgress:
description: InProgress is the number of nodes currently in
progress
type: integer
failedNodes:
description: Total number of nodes that have failed (cumulative
across all batches)
matched:
description: Matched is the number of nodes that match this
compartment's selector
type: integer
lastBatchFailed:
description: Whether the last batch failed (for slowdown logic)
type: boolean
lastBatchSize:
description: Last batch size (for slowdown calculations)
progressPercent:
description: ProgressPercent is the percentage of nodes completed
(0-100)
type: integer
shouldStop:
description: Whether the strategy should stop processing due
to failures
type: boolean
required:
- ceiling
- completed
- inProgress
- matched
- progressPercent
type: object
description: CompartmentBatchStates tracks batch processing state
per compartment
description: CompartmentStatuses tracks the detailed status of each
compartment
type: object
completeNodes:
default: 0/0
Expand Down
27 changes: 25 additions & 2 deletions operator/api/v1alpha1/skyhook_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,28 @@ const (
RESTART_ALL_SERVICES InterruptType = "restartAllServices"
)

// CompartmentStatus tracks the detailed state of a compartment
type CompartmentStatus struct {
// Matched is the number of nodes that match this compartment's selector
Matched int `json:"matched"`

// Ceiling is the maximum number of nodes that can be in progress at once
Ceiling int `json:"ceiling"`

// InProgress is the number of nodes currently in progress
InProgress int `json:"inProgress"`

// Completed is the number of nodes that have completed successfully
Completed int `json:"completed"`

// ProgressPercent is the percentage of nodes completed (0-100)
ProgressPercent int `json:"progressPercent"`

// BatchState tracks the batch processing state for this compartment
// +optional
BatchState *BatchProcessingState `json:"batchState,omitempty"`
}

// SkyhookStatus defines the observed state of Skyhook
type SkyhookStatus struct {

Expand Down Expand Up @@ -316,8 +338,9 @@ type SkyhookStatus struct {
// ConfigUpdates tracks config updates
ConfigUpdates map[string][]string `json:"configUpdates,omitempty"`

// CompartmentBatchStates tracks batch processing state per compartment
CompartmentBatchStates map[string]BatchProcessingState `json:"compartmentBatchStates,omitempty"`
// CompartmentStatuses tracks the detailed status of each compartment
// +optional
CompartmentStatuses map[string]CompartmentStatus `json:"compartmentStatuses,omitempty"`

// +kubebuilder:example=3
// +kubebuilder:default=0
Expand Down
28 changes: 24 additions & 4 deletions operator/api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

79 changes: 55 additions & 24 deletions operator/config/crd/bases/skyhook.nvidia.com_skyhooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -499,38 +499,69 @@ spec:
status:
description: SkyhookStatus defines the observed state of Skyhook
properties:
compartmentBatchStates:
compartmentStatuses:
additionalProperties:
description: BatchProcessingState tracks the current state of batch
processing for a compartment
description: CompartmentStatus tracks the detailed state of a compartment
properties:
completedNodes:
description: Total number of nodes that have completed successfully
(cumulative across all batches)
batchState:
description: BatchState tracks the batch processing state for
this compartment
properties:
completedNodes:
description: Total number of nodes that have completed successfully
(cumulative across all batches)
type: integer
consecutiveFailures:
description: Number of consecutive failures
type: integer
currentBatch:
description: Current batch number (starts at 1)
type: integer
failedNodes:
description: Total number of nodes that have failed (cumulative
across all batches)
type: integer
lastBatchFailed:
description: Whether the last batch failed (for slowdown
logic)
type: boolean
lastBatchSize:
description: Last batch size (for slowdown calculations)
type: integer
shouldStop:
description: Whether the strategy should stop processing
due to failures
type: boolean
type: object
ceiling:
description: Ceiling is the maximum number of nodes that can
be in progress at once
type: integer
consecutiveFailures:
description: Number of consecutive failures
completed:
description: Completed is the number of nodes that have completed
successfully
type: integer
currentBatch:
description: Current batch number (starts at 1)
inProgress:
description: InProgress is the number of nodes currently in
progress
type: integer
failedNodes:
description: Total number of nodes that have failed (cumulative
across all batches)
matched:
description: Matched is the number of nodes that match this
compartment's selector
type: integer
lastBatchFailed:
description: Whether the last batch failed (for slowdown logic)
type: boolean
lastBatchSize:
description: Last batch size (for slowdown calculations)
progressPercent:
description: ProgressPercent is the percentage of nodes completed
(0-100)
type: integer
shouldStop:
description: Whether the strategy should stop processing due
to failures
type: boolean
required:
- ceiling
- completed
- inProgress
- matched
- progressPercent
type: object
description: CompartmentBatchStates tracks batch processing state
per compartment
description: CompartmentStatuses tracks the detailed status of each
compartment
type: object
completeNodes:
default: 0/0
Expand Down
Loading