Skip to content

Commit f66110c

Browse files
committed
Address code review feedback
- Consolidate hardware operation timestamps into single HardwareOperationStartTime field - Simplify ObservedConfigTransactionId type (*int64 -> int64) - Improve error handling with errors.Join and add elapsed duration to logs - Add test coverage for edge cases and race conditions Signed-off-by: Tao Liu <[email protected]>
1 parent 364837a commit f66110c

22 files changed

+778
-364
lines changed

api/hardwaremanagement/plugins/v1alpha1/node_allocation_requests.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -134,15 +134,10 @@ type NodeAllocationRequestStatus struct {
134134
//+operator-sdk:csv:customresourcedefinitions:type=status
135135
SelectedGroups map[string]string `json:"selectedGroups,omitempty"`
136136

137-
// ProvisioningStartTime tracks when hardware provisioning actually started.
138-
// This timestamp is used for timeout calculations for Day 0 provisioning.
137+
// HardwareOperationStartTime tracks when the current hardware operation (provisioning or configuration) actually started.
138+
// This timestamp is used for timeout calculations. The active operation is determined from the conditions.
139139
//+operator-sdk:csv:customresourcedefinitions:type=status
140-
ProvisioningStartTime *metav1.Time `json:"provisioningStartTime,omitempty"`
141-
142-
// ConfiguringStartTime tracks when hardware configuration actually started.
143-
// This timestamp is used for timeout calculations for Day 2 configuration changes.
144-
//+operator-sdk:csv:customresourcedefinitions:type=status
145-
ConfiguringStartTime *metav1.Time `json:"configuringStartTime,omitempty"`
140+
HardwareOperationStartTime *metav1.Time `json:"hardwareOperationStartTime,omitempty"`
146141
}
147142

148143
// NodeAllocationRequest is the schema for an allocation request of nodes

api/hardwaremanagement/plugins/v1alpha1/zz_generated.deepcopy.go

Lines changed: 2 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/provisioning/v1alpha1/provisioningrequest_types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,12 @@ type NodeAllocationRequestRef struct {
5656
HardwareProvisioningCheckStart *metav1.Time `json:"hardwareProvisioningCheckStart,omitempty"`
5757
// Represents the timestamp of the first status check for hardware configuring
5858
HardwareConfiguringCheckStart *metav1.Time `json:"hardwareConfiguringCheckStart,omitempty"`
59+
// ObservedConfigTransactionId tracks the ConfigTransactionId that has been observed
60+
// by the hardware plugin. This helps with debugging by showing which configuration
61+
// transaction the plugin has processed.
62+
// A value of 0 indicates the transaction has not been observed yet. Since Kubernetes
63+
// Generation starts at 1, 0 serves as a natural sentinel value for "not observed".
64+
ObservedConfigTransactionId int64 `json:"observedConfigTransactionId,omitempty"`
5965
}
6066

6167
type ClusterDetails struct {

api/provisioning/v1alpha1/provisioningrequest_webhook_test.go

Lines changed: 138 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,21 @@ var _ = Describe("ProvisioningRequestValidator", func() {
6060
})
6161

6262
Describe("ValidateUpdate", func() {
63+
const (
64+
testClusterTemplateB = "clustertemplate-b"
65+
testVersionB = "v1.0.2"
66+
)
67+
6368
BeforeEach(func() {
6469
// Create a new ClusterTemplate
6570
newCt := &ClusterTemplate{
6671
ObjectMeta: metav1.ObjectMeta{
67-
Name: "clustertemplate-b.v1.0.2",
72+
Name: testClusterTemplateB + "." + testVersionB,
6873
Namespace: "default",
6974
},
7075
Spec: ClusterTemplateSpec{
71-
Name: "clustertemplate-b",
72-
Version: "v1.0.2",
76+
Name: testClusterTemplateB,
77+
Version: testVersionB,
7378
TemplateID: "57b39bda-ac56-4143-9b10-d1a71517d04f",
7479
Templates: Templates{
7580
ClusterInstanceDefaults: "clusterinstance-defaults-v1",
@@ -92,8 +97,8 @@ var _ = Describe("ProvisioningRequestValidator", func() {
9297

9398
Context("when spec.templateName or spec.templateVersion is changed", func() {
9499
BeforeEach(func() {
95-
newPr.Spec.TemplateName = "clustertemplate-b"
96-
newPr.Spec.TemplateVersion = "v1.0.2"
100+
newPr.Spec.TemplateName = testClusterTemplateB
101+
newPr.Spec.TemplateVersion = testVersionB
97102
})
98103

99104
It("should allow the change when the ProvisioningRequest is fulfilled", func() {
@@ -680,5 +685,133 @@ var _ = Describe("ProvisioningRequestValidator", func() {
680685
})
681686
})
682687
})
688+
689+
Context("When HardwareProvisioned condition is TimedOut or Failed", func() {
690+
BeforeEach(func() {
691+
// Create ClusterTemplate for validation
692+
ct := &ClusterTemplate{
693+
ObjectMeta: metav1.ObjectMeta{
694+
Name: "clustertemplate-a.v1.0.1",
695+
Namespace: "default",
696+
},
697+
Spec: ClusterTemplateSpec{
698+
Name: "clustertemplate-a",
699+
Version: "v1.0.1",
700+
TemplateID: "test-template-id",
701+
Templates: Templates{
702+
ClusterInstanceDefaults: "defaults-v1",
703+
PolicyTemplateDefaults: "policy-defaults-v1",
704+
},
705+
TemplateParameterSchema: runtime.RawExtension{Raw: []byte(testTemplate)},
706+
},
707+
Status: ClusterTemplateStatus{
708+
Conditions: []metav1.Condition{
709+
{
710+
Type: string(CTconditionTypes.Validated),
711+
Status: metav1.ConditionTrue,
712+
},
713+
},
714+
},
715+
}
716+
Expect(fakeClient.Create(ctx, ct)).To(Succeed())
717+
718+
// Create ClusterTemplate-b that will be used by tests
719+
newCt := &ClusterTemplate{
720+
ObjectMeta: metav1.ObjectMeta{
721+
Name: testClusterTemplateB + "." + testVersionB,
722+
Namespace: "default",
723+
},
724+
Spec: ClusterTemplateSpec{
725+
Name: testClusterTemplateB,
726+
Version: testVersionB,
727+
TemplateID: "test-template-id-b",
728+
Templates: Templates{
729+
ClusterInstanceDefaults: "defaults-v1",
730+
PolicyTemplateDefaults: "policy-defaults-v1",
731+
},
732+
TemplateParameterSchema: runtime.RawExtension{Raw: []byte(testTemplate)},
733+
},
734+
Status: ClusterTemplateStatus{
735+
Conditions: []metav1.Condition{
736+
{
737+
Type: string(CTconditionTypes.Validated),
738+
Status: metav1.ConditionTrue,
739+
},
740+
},
741+
},
742+
}
743+
// Create only if it doesn't exist (to avoid conflicts between tests)
744+
existing := &ClusterTemplate{}
745+
if err := fakeClient.Get(ctx, client.ObjectKeyFromObject(newCt), existing); err != nil {
746+
Expect(fakeClient.Create(ctx, newCt)).To(Succeed())
747+
}
748+
749+
// Set HardwareProvisioned condition to TimedOut
750+
newPr.Status.Conditions = []metav1.Condition{
751+
{
752+
Type: string(PRconditionTypes.HardwareProvisioned),
753+
Status: metav1.ConditionFalse,
754+
Reason: string(CRconditionReasons.TimedOut),
755+
},
756+
}
757+
})
758+
759+
It("should reject spec changes when hardware provisioning has timed out", func() {
760+
// clustertemplate-b is already created in BeforeEach
761+
// Try to change TemplateName (a hardware provisioning-related field)
762+
newPr.Spec.TemplateName = testClusterTemplateB
763+
newPr.Spec.TemplateVersion = testVersionB
764+
765+
_, err := validator.ValidateUpdate(ctx, oldPr, newPr)
766+
Expect(err).To(HaveOccurred())
767+
Expect(err.Error()).To(ContainSubstring("hardware provisioning has timed out or failed"))
768+
Expect(err.Error()).To(ContainSubstring("Spec changes are not allowed"))
769+
Expect(err.Error()).To(ContainSubstring("delete and recreate"))
770+
})
771+
772+
It("should reject TemplateParameters changes when hardware provisioning has timed out", func() {
773+
// Try to change TemplateParameters
774+
newPr.Spec.TemplateParameters.Raw = []byte(`{
775+
"oCloudSiteId": "local-123",
776+
"nodeClusterName": "exampleCluster",
777+
"clusterInstanceParameters": {"additionalNTPSources": ["2.2.2.2"]},
778+
"policyTemplateParameters": {"sriov-network-vlan-1": "200"}
779+
}`)
780+
781+
_, err := validator.ValidateUpdate(ctx, oldPr, newPr)
782+
Expect(err).To(HaveOccurred())
783+
Expect(err.Error()).To(ContainSubstring("hardware provisioning has timed out or failed"))
784+
Expect(err.Error()).To(ContainSubstring("Spec changes are not allowed"))
785+
})
786+
787+
It("should allow non-spec changes when hardware provisioning has timed out", func() {
788+
// Only change metadata (like labels/annotations) - should be allowed
789+
newPr.Labels = map[string]string{"new-label": "new-value"}
790+
newPr.Annotations = map[string]string{"new-annotation": "new-value"}
791+
792+
_, err := validator.ValidateUpdate(ctx, oldPr, newPr)
793+
Expect(err).NotTo(HaveOccurred())
794+
})
795+
796+
It("should reject spec changes when hardware provisioning has failed", func() {
797+
// Set HardwareProvisioned condition to Failed
798+
newPr.Status.Conditions = []metav1.Condition{
799+
{
800+
Type: string(PRconditionTypes.HardwareProvisioned),
801+
Status: metav1.ConditionFalse,
802+
Reason: string(CRconditionReasons.Failed),
803+
},
804+
}
805+
806+
// clustertemplate-b is already created in BeforeEach
807+
// Try to change TemplateName and TemplateVersion
808+
newPr.Spec.TemplateName = testClusterTemplateB
809+
newPr.Spec.TemplateVersion = testVersionB
810+
811+
_, err := validator.ValidateUpdate(ctx, oldPr, newPr)
812+
Expect(err).To(HaveOccurred())
813+
Expect(err.Error()).To(ContainSubstring("hardware provisioning has timed out or failed"))
814+
})
815+
})
683816
})
684817
})

bundle/manifests/clcm.openshift.io_provisioningrequests.yaml

Lines changed: 9 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/oran-o2ims.clusterserviceversion.yaml

Lines changed: 4 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bundle/manifests/plugins.clcm.openshift.io_nodeallocationrequests.yaml

Lines changed: 3 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/clcm.openshift.io_provisioningrequests.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,15 @@ spec:
197197
nodeAllocationRequestID:
198198
description: Contains the identifier of the created NodeAllocationRequest.
199199
type: string
200+
observedConfigTransactionId:
201+
description: |-
202+
ObservedConfigTransactionId tracks the ConfigTransactionId that has been observed
203+
by the hardware plugin. This helps with debugging by showing which configuration
204+
transaction the plugin has processed.
205+
A value of 0 indicates the transaction has not been observed yet. Since Kubernetes
206+
Generation starts at 1, 0 serves as a natural sentinel value for "not observed".
207+
format: int64
208+
type: integer
200209
type: object
201210
policies:
202211
description: Holds policies that are matched with the ManagedCluster

config/crd/bases/plugins.clcm.openshift.io_nodeallocationrequests.yaml

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -285,10 +285,10 @@ spec:
285285
- type
286286
type: object
287287
type: array
288-
configuringStartTime:
288+
hardwareOperationStartTime:
289289
description: |-
290-
ConfiguringStartTime tracks when hardware configuration actually started.
291-
This timestamp is used for timeout calculations for Day 2 configuration changes.
290+
HardwareOperationStartTime tracks when the current hardware operation (provisioning or configuration) actually started.
291+
This timestamp is used for timeout calculations. The active operation is determined from the conditions.
292292
format: date-time
293293
type: string
294294
hwMgrPlugin:
@@ -310,12 +310,6 @@ spec:
310310
type: string
311311
type: array
312312
type: object
313-
provisioningStartTime:
314-
description: |-
315-
ProvisioningStartTime tracks when hardware provisioning actually started.
316-
This timestamp is used for timeout calculations for Day 0 provisioning.
317-
format: date-time
318-
type: string
319313
selectedGroups:
320314
additionalProperties:
321315
type: string

config/manifests/bases/oran-o2ims.clusterserviceversion.yaml

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -585,22 +585,17 @@ spec:
585585
displayName: Conditions
586586
path: conditions
587587
- description: |-
588-
ConfiguringStartTime tracks when hardware configuration actually started.
589-
This timestamp is used for timeout calculations for Day 2 configuration changes.
590-
displayName: Configuring Start Time
591-
path: configuringStartTime
588+
HardwareOperationStartTime tracks when the current hardware operation (provisioning or configuration) actually started.
589+
This timestamp is used for timeout calculations. The active operation is determined from the conditions.
590+
displayName: Hardware Operation Start Time
591+
path: hardwareOperationStartTime
592592
- displayName: Hw Mgr Plugin
593593
path: hwMgrPlugin
594594
- displayName: Observed Config Transaction Id
595595
path: observedConfigTransactionId
596596
- description: Properties represent the node properties in the pool
597597
displayName: Properties
598598
path: properties
599-
- description: |-
600-
ProvisioningStartTime tracks when hardware provisioning actually started.
601-
This timestamp is used for timeout calculations for Day 0 provisioning.
602-
displayName: Provisioning Start Time
603-
path: provisioningStartTime
604599
- displayName: Selected Groups
605600
path: selectedGroups
606601
version: v1alpha1

0 commit comments

Comments
 (0)