Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pkg/monitor/cluster/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ import (
// memory usage. Don't add caches here: work to remove them.

func (mon *Monitor) getClusterVersion(ctx context.Context) (*configv1.ClusterVersion, error) {
mon.cache.mu.cv.Lock()
defer mon.cache.mu.cv.Unlock()

if mon.cache.cv != nil {
return mon.cache.cv, nil
}
Expand All @@ -30,6 +33,9 @@ func (mon *Monitor) getClusterVersion(ctx context.Context) (*configv1.ClusterVer

// TODO: remove this function and paginate
func (mon *Monitor) listClusterOperators(ctx context.Context) (*configv1.ClusterOperatorList, error) {
mon.cache.mu.cos.Lock()
defer mon.cache.mu.cos.Unlock()

if mon.cache.cos != nil {
return mon.cache.cos, nil
}
Expand All @@ -41,6 +47,9 @@ func (mon *Monitor) listClusterOperators(ctx context.Context) (*configv1.Cluster

// TODO: remove this function and paginate
func (mon *Monitor) listNodes(ctx context.Context) (*corev1.NodeList, error) {
mon.cache.mu.ns.Lock()
defer mon.cache.mu.ns.Unlock()

if mon.cache.ns != nil {
return mon.cache.ns, nil
}
Expand All @@ -52,6 +61,9 @@ func (mon *Monitor) listNodes(ctx context.Context) (*corev1.NodeList, error) {

// TODO: remove this function and paginate
func (mon *Monitor) listARODeployments(ctx context.Context) (*appsv1.DeploymentList, error) {
mon.cache.mu.arodl.Lock()
defer mon.cache.mu.arodl.Unlock()

if mon.cache.arodl != nil {
return mon.cache.arodl, nil
}
Expand Down
47 changes: 34 additions & 13 deletions pkg/monitor/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
configclient "github.com/openshift/client-go/config/clientset/versioned"
machineclient "github.com/openshift/client-go/machine/clientset/versioned"
operatorclient "github.com/openshift/client-go/operator/clientset/versioned"
machinev1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1"

Check failure on line 27 in pkg/monitor/cluster/cluster.go

View workflow job for this annotation

GitHub Actions / golangci-lint

import "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" imported as "machinev1" but must be "mcv1" according to config (importas)
mcoclient "github.com/openshift/machine-config-operator/pkg/generated/clientset/versioned"

"github.com/Azure/ARO-RP/pkg/api"
Expand Down Expand Up @@ -68,6 +69,15 @@
cv *configv1.ClusterVersion
ns *corev1.NodeList
arodl *appsv1.DeploymentList
mcps *machinev1.MachineConfigPoolList
mu struct {
cos sync.Mutex
cs sync.Mutex
cv sync.Mutex
ns sync.Mutex
arodl sync.Mutex
mcps sync.Mutex
}
}

wg *sync.WaitGroup
Expand Down Expand Up @@ -208,24 +218,21 @@
}
return
}
for _, f := range []func(context.Context) error{

checks := []func(context.Context) error{
mon.emitAroOperatorHeartbeat,
mon.emitAroOperatorConditions,
mon.emitNSGReconciliation,
mon.emitClusterOperatorConditions,
mon.emitClusterOperatorVersions,
mon.emitClusterVersionConditions,
mon.emitClusterVersions,
mon.emitDaemonsetStatuses,
mon.emitDeploymentStatuses,
mon.emitWorkloadStatuses,
mon.emitMachineConfigPoolConditions,
mon.emitMachineConfigPoolUnmanagedNodeCounts,
mon.emitNodeConditions,
mon.emitPodConditions,
mon.emitDebugPodsCount,
mon.detectQuotaFailure,
mon.emitReplicasetStatuses,
mon.emitStatefulsetStatuses,
mon.emitJobConditions,
mon.emitSummary,
mon.emitHiveRegistrationStatus,
Expand All @@ -238,13 +245,27 @@
mon.emitPrometheusAlerts, // at the end for now because it's the slowest/least reliable
mon.emitCWPStatus,
mon.emitClusterAuthenticationType,
} {
err = f(ctx)
if err != nil {
errs = append(errs, err)
mon.emitFailureToGatherMetric(steps.FriendlyName(f), err)
// keep going
}
}

var wg sync.WaitGroup
errChan := make(chan error, len(checks))
wg.Add(len(checks))

for _, f := range checks {
go func(f func(context.Context) error) {
defer wg.Done()
if err := f(ctx); err != nil {
mon.emitFailureToGatherMetric(steps.FriendlyName(f), err)
errChan <- err
}
}(f)
}

wg.Wait()
close(errChan)

for err := range errChan {
errs = append(errs, err)
}

return
Expand Down
52 changes: 0 additions & 52 deletions pkg/monitor/cluster/daemonsetstatuses.go

This file was deleted.

77 changes: 0 additions & 77 deletions pkg/monitor/cluster/daemonsetstatuses_test.go

This file was deleted.

50 changes: 0 additions & 50 deletions pkg/monitor/cluster/deploymentstatuses.go

This file was deleted.

77 changes: 0 additions & 77 deletions pkg/monitor/cluster/deploymentstatuses_test.go

This file was deleted.

Loading
Loading