Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions api/core/v1beta1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ func (src *Cluster) ConvertTo(dstRaw conversion.Hub) error {
return err
}

dst.Spec.Topology.ControlPlane.HealthCheck.Checks.UnhealthyMachineConditions = restored.Spec.Topology.ControlPlane.HealthCheck.Checks.UnhealthyMachineConditions
for i, md := range restored.Spec.Topology.Workers.MachineDeployments {
dst.Spec.Topology.Workers.MachineDeployments[i].HealthCheck.Checks.UnhealthyMachineConditions = md.HealthCheck.Checks.UnhealthyMachineConditions
}

// Recover intent for bool values converted to *bool.
clusterv1.Convert_bool_To_Pointer_bool(src.Spec.Paused, ok, restored.Spec.Paused, &dst.Spec.Paused)

Expand Down Expand Up @@ -145,6 +150,11 @@ func (src *ClusterClass) ConvertTo(dstRaw conversion.Hub) error {
return err
}

dst.Spec.ControlPlane.HealthCheck.Checks.UnhealthyMachineConditions = restored.Spec.ControlPlane.HealthCheck.Checks.UnhealthyMachineConditions
for i, md := range restored.Spec.Workers.MachineDeployments {
dst.Spec.Workers.MachineDeployments[i].HealthCheck.Checks.UnhealthyMachineConditions = md.HealthCheck.Checks.UnhealthyMachineConditions
}

// Recover intent for bool values converted to *bool.
for i, patch := range dst.Spec.Patches {
for j, definition := range patch.Definitions {
Expand Down Expand Up @@ -513,6 +523,8 @@ func (src *MachineHealthCheck) ConvertTo(dstRaw conversion.Hub) error {
return err
}

dst.Spec.Checks.UnhealthyMachineConditions = restored.Spec.Checks.UnhealthyMachineConditions

clusterv1.Convert_int32_To_Pointer_int32(src.Status.ExpectedMachines, ok, restored.Status.ExpectedMachines, &dst.Status.ExpectedMachines)
clusterv1.Convert_int32_To_Pointer_int32(src.Status.CurrentHealthy, ok, restored.Status.CurrentHealthy, &dst.Status.CurrentHealthy)
clusterv1.Convert_int32_To_Pointer_int32(src.Status.RemediationsAllowed, ok, restored.Status.RemediationsAllowed, &dst.Status.RemediationsAllowed)
Expand Down
20 changes: 20 additions & 0 deletions api/core/v1beta2/cluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,16 @@ type ControlPlaneTopologyHealthCheckChecks struct {
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// unhealthyMachineConditions contains a list of the machine conditions that determine
// whether a machine is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the machine is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"`
}

// ControlPlaneTopologyHealthCheckRemediation configures if and how remediations are triggered if a control plane Machine is unhealthy.
Expand Down Expand Up @@ -975,6 +985,16 @@ type MachineDeploymentTopologyHealthCheckChecks struct {
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// unhealthyMachineConditions contains a list of the machine conditions that determine
// whether a machine is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the machine is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"`
}

// MachineDeploymentTopologyHealthCheckRemediation configures if and how remediations are triggered if a MachineDeployment Machine is unhealthy.
Expand Down
20 changes: 20 additions & 0 deletions api/core/v1beta2/clusterclass_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,16 @@ type ControlPlaneClassHealthCheckChecks struct {
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// unhealthyMachineConditions contains a list of the machine conditions that determine
// whether a machine is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the machine is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"`
}

// ControlPlaneClassHealthCheckRemediation configures if and how remediations are triggered if a control plane Machine is unhealthy.
Expand Down Expand Up @@ -542,6 +552,16 @@ type MachineDeploymentClassHealthCheckChecks struct {
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// unhealthyMachineConditions contains a list of the machine conditions that determine
// whether a machine is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the machine is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"`
}

// MachineDeploymentClassHealthCheckRemediation configures if and how remediations are triggered if a MachineDeployment Machine is unhealthy.
Expand Down
4 changes: 4 additions & 0 deletions api/core/v1beta2/machine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,10 @@ const (
// defined by a MachineHealthCheck object.
MachineHealthCheckUnhealthyNodeReason = "UnhealthyNode"

// MachineHealthCheckUnhealthyMachineReason surfaces when the machine does not pass the health checks
// defined by a MachineHealthCheck object.
MachineHealthCheckUnhealthyMachineReason = "UnhealthyMachine"

// MachineHealthCheckNodeStartupTimeoutReason surfaces when the node hosted on the machine does not appear within
// the timeout defined by a MachineHealthCheck object.
MachineHealthCheckNodeStartupTimeoutReason = "NodeStartupTimeout"
Expand Down
38 changes: 37 additions & 1 deletion api/core/v1beta2/machinehealthcheck_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,16 @@ type MachineHealthCheckChecks struct {
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyNodeConditions []UnhealthyNodeCondition `json:"unhealthyNodeConditions,omitempty"`

// unhealthyMachineConditions contains a list of the machine conditions that determine
// whether a machine is considered unhealthy. The conditions are combined in a
// logical OR, i.e. if any of the conditions is met, the machine is unhealthy.
//
// +optional
// +listType=atomic
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=100
UnhealthyMachineConditions []UnhealthyMachineCondition `json:"unhealthyMachineConditions,omitempty"`
}

// MachineHealthCheckRemediation configures if and how remediations are triggered if a Machine is unhealthy.
Expand Down Expand Up @@ -227,7 +237,33 @@ type UnhealthyNodeCondition struct {

// timeoutSeconds is the duration that a node must be in a given status for,
// after which the node is considered unhealthy.
// For example, with a value of "1h", the node must match the status
// For example, with a value of "3600", the node must match the status
// for at least 1 hour before being considered unhealthy.
// +required
// +kubebuilder:validation:Minimum=0
TimeoutSeconds *int32 `json:"timeoutSeconds,omitempty"`
}

// UnhealthyMachineCondition represents a Machine condition type and value with a timeout
// specified as a duration. When the named condition has been in the given
// status for at least the timeout value, a machine is considered unhealthy.
type UnhealthyMachineCondition struct {
// type of Machine condition
// +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$`
// +kubebuilder:validation:MinLength=1
// +kubebuilder:validation:MaxLength=316
// +kubebuilder:validation:XValidation:rule="self != 'Ready' && self != 'Available' && self != 'HealthCheckSucceeded' && self != 'OwnerRemediated' && self != 'ExternallyRemediated'",message="type must not be one of: Ready, Available, HealthCheckSucceeded, OwnerRemediated, ExternallyRemediated"
// +required
Type string `json:"type,omitempty"`

// status of the condition, one of True, False, Unknown.
// +required
// +kubebuilder:validation:Enum=True;False;Unknown
Status metav1.ConditionStatus `json:"status,omitempty"`

// timeoutSeconds is the duration that a machine must be in a given status for,
// after which the machine is considered unhealthy.
// For example, with a value of "3600", the machine must match the status
// for at least 1 hour before being considered unhealthy.
// +required
// +kubebuilder:validation:Minimum=0
Expand Down
5 changes: 5 additions & 0 deletions api/core/v1beta2/v1beta1_condition_consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ const (

// UnhealthyNodeConditionV1Beta1Reason is the reason used when a machine's node has one of the MachineHealthCheck's unhealthy conditions.
UnhealthyNodeConditionV1Beta1Reason = "UnhealthyNode"

// UnhealthyMachineConditionV1Beta1Reason is the reason used when a machine has one of the MachineHealthCheck's unhealthy conditions.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit (we can also follow up in another PR)
let's clarify that this reason also overrides reasons from node issues (list...) when both applies

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, it is updated now in c45f0fe to clarify it.

// When both machine and node issues are detected, this reason takes precedence over node-related reasons
// (NodeNotFoundV1Beta1Reason, NodeStartupTimeoutV1Beta1Reason, UnhealthyNodeConditionV1Beta1Reason).
UnhealthyMachineConditionV1Beta1Reason = "UnhealthyMachine"
)

const (
Expand Down
55 changes: 55 additions & 0 deletions api/core/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading