Skip to content

Commit a5199f0

Browse files
use const for labels, include timeout and fix race condition
1 parent a546d5e commit a5199f0

File tree

6 files changed

+104
-180
lines changed

6 files changed

+104
-180
lines changed

internal/controllers/inventory_controller.go

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ import (
7676
//+kubebuilder:rbac:urls="/o2ims-infrastructureInventory/v1/resourceTypes/*",verbs=get
7777
//+kubebuilder:rbac:urls="/hardware-manager/inventory/*",verbs=get;list
7878
//+kubebuilder:rbac:groups="batch",resources=cronjobs,verbs=get;list;watch;create;update;patch;delete
79-
//+kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch
8079
//+kubebuilder:rbac:groups="",resources=events,verbs=create;patch;update
8180
//+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete
8281
//+kubebuilder:rbac:groups=clcm.openshift.io,resources=hardwareplugins,verbs=get;list;watch;create;update;patch;delete
@@ -1346,22 +1345,6 @@ func (t *reconcilerTask) createAlarmServerClusterRole(ctx context.Context) error
13461345
"patch",
13471346
},
13481347
},
1349-
{
1350-
APIGroups: []string{
1351-
"route.openshift.io",
1352-
},
1353-
Resources: []string{
1354-
"routes",
1355-
},
1356-
Verbs: []string{
1357-
"get",
1358-
"list",
1359-
"watch",
1360-
},
1361-
ResourceNames: []string{
1362-
"alertmanager",
1363-
},
1364-
},
13651348
{
13661349
NonResourceURLs: []string{
13671350
"/o2ims-infrastructureCluster/v1/nodeClusterTypes",

internal/service/alarms/internal/alertmanager/api.go

Lines changed: 39 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -13,21 +13,21 @@ import (
1313
"log/slog"
1414
"net/http"
1515
"net/url"
16+
"os"
1617
"time"
1718

19+
"github.com/openshift-kni/oran-o2ims/internal/constants"
20+
ctlrutils "github.com/openshift-kni/oran-o2ims/internal/controllers/utils"
1821
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/db/repo"
1922
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/infrastructure"
20-
corev1 "k8s.io/api/core/v1"
21-
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
22-
"k8s.io/apimachinery/pkg/runtime/schema"
2323
"sigs.k8s.io/controller-runtime/pkg/client"
2424
)
2525

2626
const (
27-
// ACMObsAMRouteName route to collect ACM's AM API host
28-
ACMObsAMRouteName = "alertmanager"
29-
// ACMObsAMAuthSecretName secret to collect ACM's AM API BEARER and ca.Crt
30-
ACMObsAMAuthSecretName = "observability-alertmanager-accessor-token" // nolint: gosec
27+
// ACMObsAMServiceName is the alertmanager service name in ACM observability
28+
ACMObsAMServiceName = "alertmanager"
29+
// ACMObsAMServicePort is the HTTPS port for alertmanager service
30+
ACMObsAMServicePort = 9095
3131
)
3232

3333
// APIAlert represents the alert structure returned by the Alertmanager API.
@@ -110,16 +110,21 @@ func (c *AMClient) SyncAlerts(ctx context.Context) error {
110110

111111
// getAlerts retrieves all alerts from Alertmanager API
112112
func (c *AMClient) getAlerts(ctx context.Context) ([]APIAlert, error) {
113-
// Get the alertmanager route
114-
alertmanagerHost, err := c.GetAlertmanagerRoute(ctx)
113+
// Initialize a new client each time to pick up the latest token
114+
httpClient, token, err := c.createAlertmanagerClient()
115115
if err != nil {
116-
return nil, fmt.Errorf("failed to get alertmanager route: %w", err)
116+
return nil, fmt.Errorf("failed to create alertmanager client: %w", err)
117117
}
118118

119-
// Initialize a new client each time to pick up the latest secrets
120-
httpClient, token, err := c.createAlertmanagerClient(ctx)
121-
if err != nil {
122-
return nil, fmt.Errorf("failed to create alertmanager client: %w", err)
119+
// Build service URL for alertmanager
120+
// Format: alertmanager.open-cluster-management-observability.svc:9095
121+
// Allow override for testing
122+
alertmanagerHost := os.Getenv("ALARMS_SERVER_AM_HOST")
123+
if alertmanagerHost == "" {
124+
alertmanagerHost = fmt.Sprintf("%s.%s.svc:%d",
125+
ACMObsAMServiceName,
126+
ctlrutils.OpenClusterManagementObservabilityNamespace,
127+
ACMObsAMServicePort)
123128
}
124129

125130
// Create request
@@ -138,7 +143,7 @@ func (c *AMClient) getAlerts(ctx context.Context) ([]APIAlert, error) {
138143
q.Set("silenced", "true")
139144

140145
u.RawQuery = q.Encode()
141-
req, err := http.NewRequest(http.MethodGet, u.String(), nil)
146+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
142147
if err != nil {
143148
return nil, fmt.Errorf("error creating request: %w", err)
144149
}
@@ -179,78 +184,32 @@ func (c *AMClient) getAlerts(ctx context.Context) ([]APIAlert, error) {
179184
return alerts, nil
180185
}
181186

182-
// GetAlertmanagerRoute gets the hostname for the alertmanager route using unstructured type
183-
func (c *AMClient) GetAlertmanagerRoute(ctx context.Context) (string, error) {
184-
// Define the route GVK (Group, Version, Kind)
185-
routeGVK := schema.GroupVersionKind{
186-
Group: "route.openshift.io",
187-
Version: "v1",
188-
Kind: "Route",
189-
}
190-
191-
// Create an unstructured object
192-
route := &unstructured.Unstructured{}
193-
route.SetGroupVersionKind(routeGVK)
194-
195-
// Get the route
196-
if err := c.k8sClient.Get(ctx, client.ObjectKey{
197-
Namespace: ACMObsAMNamespace,
198-
Name: ACMObsAMRouteName,
199-
}, route); err != nil {
200-
return "", fmt.Errorf("error getting '%s' route: %w", ACMObsAMRouteName, err)
201-
}
202-
203-
// Try to get host from spec
204-
specHost, found, err := unstructured.NestedString(route.Object, "spec", "host")
205-
if err != nil {
206-
return "", fmt.Errorf("error accessing spec.host: %w", err)
207-
}
208-
if found && specHost != "" {
209-
return specHost, nil
187+
// createAlertmanagerClient creates a new HTTP client with the service account token and service CA certificate.
188+
// The token comes from the pod's service account and CA cert from the service CA file.
189+
func (c *AMClient) createAlertmanagerClient() (*http.Client, string, error) {
190+
// Determine token file path (allow override for testing/local development)
191+
tokenPath := constants.DefaultBackendTokenFile
192+
if envPath := os.Getenv("ALARMS_SERVER_TOKEN_FILE"); envPath != "" {
193+
tokenPath = envPath
210194
}
211195

212-
// Fallback to status if spec host is empty
213-
statusIngress, found, err := unstructured.NestedSlice(route.Object, "status", "ingress")
196+
// Read token from service account token file
197+
token, err := os.ReadFile(tokenPath)
214198
if err != nil {
215-
return "", fmt.Errorf("error accessing status.ingress: %w", err)
199+
return nil, "", fmt.Errorf("error reading service account token: %w", err)
216200
}
217201

218-
if found && len(statusIngress) > 0 {
219-
ingressMap, ok := statusIngress[0].(map[string]interface{})
220-
if !ok {
221-
return "", fmt.Errorf("invalid ingress format")
222-
}
223-
224-
host, found := ingressMap["host"]
225-
if found && host != "" {
226-
return host.(string), nil
227-
}
228-
}
229-
230-
return "", fmt.Errorf("no host found in alertmanager route")
231-
}
232-
233-
// createAlertmanagerClient creates a new HTTP client with the latest token
234-
// TODO: we can be more robust by reading our `/var/run/secrets/kubernetes.io/serviceaccount/token` for token instead of relying on ACMObsAMAuthSecretName data.
235-
func (c *AMClient) createAlertmanagerClient(ctx context.Context) (*http.Client, string, error) {
236-
var secret corev1.Secret
237-
if err := c.k8sClient.Get(ctx, client.ObjectKey{
238-
Namespace: ACMObsAMNamespace,
239-
Name: ACMObsAMAuthSecretName,
240-
}, &secret); err != nil {
241-
return nil, "", fmt.Errorf("error getting token secret from '%s': %w", ACMObsAMAuthSecretName, err)
202+
// Determine service CA file path (allow override for testing/local development)
203+
caPath := constants.DefaultServiceCAFile
204+
if envPath := os.Getenv("ALARMS_SERVER_CA_FILE"); envPath != "" {
205+
caPath = envPath
242206
}
243207

244-
// Extract token
245-
token, ok := secret.Data["token"]
246-
if !ok {
247-
return nil, "", fmt.Errorf("token not found in secret")
248-
}
249-
250-
// Extract CA certificate
251-
caCrt, ok := secret.Data["ca.crt"]
252-
if !ok {
253-
return nil, "", fmt.Errorf("ca.crt not found in secret")
208+
// Read service CA certificate
209+
// This is automatically mounted by Kubernetes for service-to-service TLS
210+
caCrt, err := os.ReadFile(caPath)
211+
if err != nil {
212+
return nil, "", fmt.Errorf("error reading service CA certificate: %w", err)
254213
}
255214

256215
// Create certificate pool with the CA cert

internal/service/alarms/internal/alertmanager/api_test.go

Lines changed: 43 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -10,21 +10,18 @@ import (
1010
"fmt"
1111
"net/http"
1212
"net/http/httptest"
13+
"os"
1314
"time"
1415

1516
"github.com/jackc/pgx/v5"
16-
"k8s.io/apimachinery/pkg/runtime/schema"
1717

1818
. "github.com/onsi/ginkgo/v2"
1919
. "github.com/onsi/gomega"
20-
"github.com/openshift-kni/oran-o2ims/internal/constants"
2120
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/alertmanager"
2221
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/db/repo/generated"
2322
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/infrastructure"
2423
"go.uber.org/mock/gomock"
2524
corev1 "k8s.io/api/core/v1"
26-
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
27-
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
2825
"k8s.io/apimachinery/pkg/runtime"
2926
"sigs.k8s.io/controller-runtime/pkg/client"
3027
"sigs.k8s.io/controller-runtime/pkg/client/fake"
@@ -46,10 +43,11 @@ var _ = Describe("Alertmanager API Client", func() {
4643
mockRepo *generated.MockAlarmRepositoryInterface
4744
infra *infrastructure.Infrastructure
4845
amClient *alertmanager.AMClient
49-
fakeRoute *unstructured.Unstructured
50-
fakeSecret *corev1.Secret
5146
mockAMServer *httptest.Server
5247
testAPIAlerts []alertmanager.APIAlert
48+
tempTokenFile string
49+
tempCAFile string
50+
originalHost string
5351
)
5452

5553
BeforeEach(func() {
@@ -70,46 +68,41 @@ var _ = Describe("Alertmanager API Client", func() {
7068
Expect(err).To(BeNil())
7169
}))
7270

73-
// Set up fake k8s client with the route and secret
74-
scheme := runtime.NewScheme()
75-
Expect(corev1.AddToScheme(scheme)).To(Succeed())
71+
// Create temporary token file
72+
tempToken, err := os.CreateTemp("", "token-*")
73+
Expect(err).To(BeNil())
74+
tempTokenFile = tempToken.Name()
75+
err = os.WriteFile(tempTokenFile, []byte("fake-token"), 0o600)
76+
Expect(err).To(BeNil())
77+
tempToken.Close()
7678

77-
// Create a fake Route resource
78-
fakeRoute = &unstructured.Unstructured{}
79-
fakeRoute.SetGroupVersionKind(schema.GroupVersionKind(metav1.GroupVersionKind{
80-
Group: "route.openshift.io",
81-
Version: "v1",
82-
Kind: "Route",
83-
}))
84-
fakeRoute.SetNamespace(alertmanager.ACMObsAMNamespace)
85-
fakeRoute.SetName(alertmanager.ACMObsAMRouteName)
86-
// Extract server hostname from test server URL without the scheme
87-
serverHost := mockAMServer.URL[8:] // Skip "https://"
88-
Expect(unstructured.SetNestedField(fakeRoute.Object, serverHost, "spec", "host")).To(Succeed())
89-
90-
// Extract server certificate for use in our fake secret
91-
// This gets the actual certificate from our test server
79+
// Create temporary CA file with the test server's certificate
9280
certPEM := pem.EncodeToMemory(&pem.Block{
9381
Type: "CERTIFICATE",
9482
Bytes: mockAMServer.Certificate().Raw,
9583
})
84+
tempCA, err := os.CreateTemp("", "ca-*")
85+
Expect(err).To(BeNil())
86+
tempCAFile = tempCA.Name()
87+
err = os.WriteFile(tempCAFile, certPEM, 0o600)
88+
Expect(err).To(BeNil())
89+
tempCA.Close()
90+
91+
// Set environment variables to use temp files
92+
os.Setenv("ALARMS_SERVER_TOKEN_FILE", tempTokenFile)
93+
os.Setenv("ALARMS_SERVER_CA_FILE", tempCAFile)
94+
95+
// Override the alertmanager host to point to our mock server
96+
// Extract server hostname from test server URL without the scheme
97+
originalHost = mockAMServer.URL[8:] // Skip "https://"
98+
os.Setenv("ALARMS_SERVER_AM_HOST", originalHost)
9699

97-
// Create a fake Secret
98-
fakeSecret = &corev1.Secret{
99-
ObjectMeta: metav1.ObjectMeta{
100-
Namespace: alertmanager.ACMObsAMNamespace,
101-
Name: alertmanager.ACMObsAMAuthSecretName,
102-
},
103-
Data: map[string][]byte{
104-
"token": []byte("fake-token"),
105-
"ca.crt": certPEM,
106-
},
107-
}
100+
// Set up minimal fake k8s client
101+
scheme := runtime.NewScheme()
102+
Expect(corev1.AddToScheme(scheme)).To(Succeed())
108103

109104
fakeClient = fake.NewClientBuilder().
110105
WithScheme(scheme).
111-
WithObjects(fakeSecret).
112-
WithRuntimeObjects(fakeRoute).
113106
Build()
114107

115108
infra = &infrastructure.Infrastructure{
@@ -145,6 +138,19 @@ var _ = Describe("Alertmanager API Client", func() {
145138
AfterEach(func() {
146139
mockAMServer.Close()
147140
ctrl.Finish()
141+
142+
// Clean up temporary files
143+
if tempTokenFile != "" {
144+
os.Remove(tempTokenFile)
145+
}
146+
if tempCAFile != "" {
147+
os.Remove(tempCAFile)
148+
}
149+
150+
// Unset environment variables
151+
os.Unsetenv("ALARMS_SERVER_TOKEN_FILE")
152+
os.Unsetenv("ALARMS_SERVER_CA_FILE")
153+
os.Unsetenv("ALARMS_SERVER_AM_HOST")
148154
})
149155

150156
Describe("SyncAlerts", func() {
@@ -203,32 +209,6 @@ var _ = Describe("Alertmanager API Client", func() {
203209
})
204210
})
205211

206-
Describe("GetAlertmanagerRoute", func() {
207-
It("should retrieve the route from Kubernetes API", func() {
208-
// This is using the fake client with our route already set up
209-
host, err := amClient.GetAlertmanagerRoute(ctx)
210-
211-
// Assert
212-
Expect(err).NotTo(HaveOccurred())
213-
Expect(host).NotTo(BeEmpty())
214-
Expect(host).To(ContainSubstring(constants.Localhost))
215-
})
216-
217-
It("should return error when route not found", func() {
218-
// Create a client without the route
219-
emptyClient := fake.NewClientBuilder().
220-
WithScheme(runtime.NewScheme()).
221-
Build()
222-
223-
clientWithoutRoute := alertmanager.NewAlertmanagerClient(emptyClient, mockRepo, infra)
224-
225-
_, err := clientWithoutRoute.GetAlertmanagerRoute(ctx)
226-
227-
// Assert
228-
Expect(err).To(HaveOccurred())
229-
})
230-
})
231-
232212
Describe("RunAlertSyncScheduler", func() {
233213
It("should schedule alert syncs at regular intervals", func() {
234214
// Given a short interval for testing

internal/service/alarms/internal/alertmanager/converter.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
api "github.com/openshift-kni/oran-o2ims/internal/service/alarms/api/generated"
1717
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/db/models"
1818
"github.com/openshift-kni/oran-o2ims/internal/service/alarms/internal/infrastructure"
19+
"github.com/openshift-kni/oran-o2ims/internal/service/common"
1920
)
2021

2122
// ConvertAmToAlarmEventRecordModels get alarmEventRecords based on the alertmanager notification and AlarmDefinition
@@ -133,9 +134,9 @@ func getClusterID(labels map[string]string) *uuid.UUID {
133134

134135
// isHardwareAlert checks if an alert is a hardware alert by checking for type=hardware AND component=ironic labels
135136
func isHardwareAlert(labels map[string]string) bool {
136-
alertType, hasType := labels["type"]
137-
component, hasComponent := labels["component"]
138-
return hasType && alertType == "hardware" && hasComponent && component == "ironic"
137+
alertType, hasType := labels[common.HardwareAlertTypeLabel]
138+
component, hasComponent := labels[common.HardwareAlertComponentLabel]
139+
return hasType && alertType == common.HardwareAlertTypeValue && hasComponent && component == common.HardwareAlertComponentValue
139140
}
140141

141142
// getResourceID extracts resource ID from instance_uuid label for hardware alerts

0 commit comments

Comments
 (0)