Skip to content

OCPBUGS-59251: Default cloud.conf if no configmap is found #404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions pkg/cloud/aws/aws_config_transformer.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ import (
awsconfig "k8s.io/cloud-provider-aws/pkg/providers/v1/config"
)

const defaultConfig = `[Global]
`

// CloudConfigTransformer is used to inject OpenShift configuration defaults into the Cloud Provider config
// for the AWS Cloud Provider.
// for the AWS Cloud Provider. If an empty source string is provided, a minimal default configuration will be created.
func CloudConfigTransformer(source string, infra *configv1.Infrastructure, network *configv1.Network) (string, error) {
cfg, err := readAWSConfig(source)
if err != nil {
Expand All @@ -26,13 +29,19 @@ func CloudConfigTransformer(source string, infra *configv1.Infrastructure, netwo
return marshalAWSConfig(cfg)
}

// readAWSConfig will parse a source string into a proper *awsconfig.CloudConfig.
// If an empty source string is provided, a default configuration will be used.
func readAWSConfig(source string) (*awsconfig.CloudConfig, error) {
cfg := &awsconfig.CloudConfig{}

// There are cases in which a cloud config was not installed with a cluster, and this is valid.
// We should, however, populate the configuration so that it doesn't fail on later versions that require
// a cloud.conf.
if len(source) == 0 {
return nil, fmt.Errorf("empty INI file")
source = defaultConfig
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm torn on this - should we expand the CloudConfigTransformer interface to provide the defaults? Or is it enough for each platform to set their minimum requirements during parsing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect each platform can set their minimum requirements? Can they each handle an empty file? Is that something we need to add testing for?

Copy link
Contributor

@theobarberbany theobarberbany Aug 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or is it enough for each platform to set their minimum requirements during parsing?

I think for now, this is likely fine

}

// Use the same method the AWS CCM uses to load configuration.
cfg := &awsconfig.CloudConfig{}
if err := gcfg.FatalOnly(gcfg.ReadStringInto(cfg, source)); err != nil {
return nil, fmt.Errorf("failed to parse INI file: %w", err)
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/cloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ import (
// consumer of the transformer.
type cloudConfigTransformer func(source string, infra *configv1.Infrastructure, network *configv1.Network) (string, error)

// defaultCloudConfig returns a default cloud configuration in the event that a cluster does not have a source config map.
// Clusters created prior to 4.14 do not always have a source config map.
type defaultCloudConfig func(platform configv1.PlatformType) (string, error)

// GetCloudConfigTransformer returns the function that should be used to transform
// the cloud configuration config map, and a boolean to indicate if the config should
// be synced from the CCO namespace before applying the transformation.
Expand Down
37 changes: 25 additions & 12 deletions pkg/controllers/cloud_config_sync_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,19 @@ func (r *CloudConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request)
}
}

if !managedConfigFound {
// Only look for an unmanaged config if the managed one isn't found and a name was specified.
if !managedConfigFound && infra.Spec.CloudConfig.Name != "" {
openshiftUnmanagedCMKey := client.ObjectKey{
Name: infra.Spec.CloudConfig.Name,
Namespace: OpenshiftConfigNamespace,
}
if err := r.Get(ctx, openshiftUnmanagedCMKey, sourceCM); err != nil {
if err := r.Get(ctx, openshiftUnmanagedCMKey, sourceCM); errors.IsNotFound(err) {
klog.Warningf("unmanaged cloud-config is not found, falling back to default cloud config.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unmanaged cloud-config

It looks like above this is referred to as the infrastructure config IIUC
Here we refer to it as unmanaged cloud-config

What is the least surprising name we can use in the logs to refer to this?

Also judging from the code above the default cloud config is an empty one right?
Should we bring that up in the log?

} else if err != nil {
klog.Errorf("unable to get cloud-config for sync")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we swallow the error here (I know we were already doing that), does it make sense to propagate it?

if err := r.setDegradedCondition(ctx); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to set conditions for cloud config controller: %v", err)
}
return ctrl.Result{}, err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also with this change it now means that if we enter this else if err != nil branch there was an error in getting the "unmanaged" cloud-config and that error is not the IsNotFound one.

So we should keep retrying to get it (likely it is a transient error e.g. network hiccup or similar) rather than just continuing with the "default" and empty cloud-config, as my understanding is that we should only be using the empty one when one doesn't exist.

}
}

Expand Down Expand Up @@ -188,40 +190,51 @@ func (r *CloudConfigReconciler) isCloudConfigSyncNeeded(platformStatus *configv1
return false, fmt.Errorf("platformStatus is required")
}
switch platformStatus.Type {
case configv1.AzurePlatformType,
case configv1.AWSPlatformType,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's with the reordering here? Not sure I'm following this..

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like it is not just a reordering, the AWSPlatform now coincides in behaviour with all the other ones

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dam's correct - AWS is now unconditionally trying to sync. Any platform not specifically called out in the list, like bare metal, will not sync.

configv1.AzurePlatformType,
configv1.GCPPlatformType,
configv1.VSpherePlatformType,
configv1.IBMCloudPlatformType,
configv1.PowerVSPlatformType,
configv1.OpenStackPlatformType,
configv1.NutanixPlatformType:
return true, nil
case configv1.AWSPlatformType:
// Some of AWS regions might require to sync a cloud-config, in such case reference in infra resource will be presented
return infraCloudConfigRef.Name != "", nil
default:
return false, nil
}
}

// prepareSourceConfigMap creates a usable ConfigMap for further processing into a cloud.conf file.
func (r *CloudConfigReconciler) prepareSourceConfigMap(source *corev1.ConfigMap, infra *configv1.Infrastructure) (*corev1.ConfigMap, error) {
cloudConfCm := source.DeepCopy()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can source ever be nil? Should we check it here again for good measure in case the code changes as this is implied?

// We might have an empty ConfigMap in clusters created before 4.14.
if cloudConfCm.Data == nil {
cloudConfCm.Data = make(map[string]string)
}

// Keys might be different between openshift-config/cloud-config and openshift-config-managed/kube-cloud-config
// Always use "cloud.conf" which is default one across openshift
cloudConfCm := source.DeepCopy()
if _, ok := cloudConfCm.Data[defaultConfigKey]; ok {
return cloudConfCm, nil
}

// If a user provides their own cloud config, copy that over into the default key.
infraConfigKey := infra.Spec.CloudConfig.Key
if val, ok := cloudConfCm.Data[infraConfigKey]; ok {
cloudConfCm.Data[defaultConfigKey] = val
delete(cloudConfCm.Data, infraConfigKey)
return cloudConfCm, nil
} else if !ok && len(cloudConfCm.Data) > 0 {
// Return an error if they provided a non-existent one and there was a cloud.conf specified.
return nil, fmt.Errorf("key %s specified in infra resource does not exist in source configmap %s",
infraConfigKey, client.ObjectKeyFromObject(source),
)
}
return nil, fmt.Errorf(
"key %s specified in infra resource does not found in source configmap %s",
infraConfigKey, client.ObjectKeyFromObject(source),
)

// If there was no data in the configmap and the user didn't specify their own make a default entry for it.
cloudConfCm.Data[defaultConfigKey] = ""

return cloudConfCm, nil
Comment on lines +234 to +237
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like it is possible to end up here even if the user specified an infraConfigKey and the len(cloudConfCm.Data) == 0, so either we need to fix up the comment at line 234, or we need to account for this extra case in the logic.

}

func (r *CloudConfigReconciler) isCloudConfigEqual(source *corev1.ConfigMap, target *corev1.ConfigMap) bool {
Expand Down
40 changes: 26 additions & 14 deletions pkg/controllers/cloud_config_sync_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ const (
infraCloudConfKey = "foo"

defaultAzureConfig = `{"cloud":"AzurePublicCloud","tenantId":"0000000-0000-0000-0000-000000000000","Entries":null,"subscriptionId":"0000000-0000-0000-0000-000000000000","vmType":"standard","putVMSSVMBatchSize":0,"enableMigrateToIPBasedBackendPoolAPI":false,"clusterServiceLoadBalancerHealthProbeMode":"shared"}`
defaultAWSConfig = `[Global]
`
)

func makeInfrastructureResource(platform configv1.PlatformType) *configv1.Infrastructure {
Expand Down Expand Up @@ -84,8 +86,7 @@ func makeInfraStatus(platform configv1.PlatformType) configv1.InfrastructureStat
}

func makeInfraCloudConfig(platform configv1.PlatformType) *corev1.ConfigMap {
defaultConfig := `[Global]
`
defaultConfig := defaultAWSConfig

if platform == configv1.AzurePlatformType {
defaultConfig = defaultAzureConfig
Expand All @@ -98,8 +99,7 @@ func makeInfraCloudConfig(platform configv1.PlatformType) *corev1.ConfigMap {
}

func makeManagedCloudConfig(platform configv1.PlatformType) *corev1.ConfigMap {
defaultConfig := `[Global]
`
defaultConfig := defaultAWSConfig

if platform == configv1.AzurePlatformType {
defaultConfig = defaultAzureConfig
Expand Down Expand Up @@ -151,14 +151,6 @@ var _ = Describe("prepareSourceConfigMap reconciler method", func() {
Expect(reconciler.isCloudConfigEqual(preparedConfig, managedCloudConfig)).Should(BeTrue())
})

It("config preparation should fail if key from infra resource does not found", func() {
brokenInfraConfig := infraCloudConfig.DeepCopy()
brokenInfraConfig.Data = map[string]string{"hehehehehe": "bar"}
_, err := reconciler.prepareSourceConfigMap(brokenInfraConfig, infra)
Expect(err).Should(Not(Succeed()))
Expect(err.Error()).Should(BeEquivalentTo("key foo specified in infra resource does not found in source configmap openshift-config/test-config"))
})

It("config preparation should not touch extra fields in infra ConfigMap", func() {
extendedInfraConfig := infraCloudConfig.DeepCopy()
extendedInfraConfig.Data = map[string]string{infraCloudConfKey: "{}", "{}": "{}"}
Expand Down Expand Up @@ -467,21 +459,28 @@ var _ = Describe("Cloud config sync reconciler", func() {
Expect(cl.Create(ctx, makeInfraCloudConfig(configv1.AWSPlatformType))).To(Succeed())
})

It("should skip config sync for AWS platform if there is no reference in infra resource", func() {
It("should sync a default config AWS platform if there is no reference in infra resource", func() {
infraResource := makeInfrastructureResource(configv1.AWSPlatformType)
infraResource.Spec.CloudConfig.Name = ""
Expect(cl.Create(ctx, infraResource)).To(Succeed())

infraResource.Status = makeInfraStatus(infraResource.Spec.PlatformSpec.Type)
Expect(cl.Status().Update(ctx, infraResource.DeepCopy())).To(Succeed())

fetchedResource := &configv1.Infrastructure{}
Expect(cl.Get(ctx, client.ObjectKeyFromObject(infraResource), fetchedResource)).To(Succeed())
Expect(fetchedResource.Spec.CloudConfig.Name).To(Equal(""))

_, err := reconciler.Reconcile(context.TODO(), ctrl.Request{})
Expect(err).To(BeNil())

allCMs := &corev1.ConfigMapList{}
Expect(cl.List(ctx, allCMs, &client.ListOptions{Namespace: targetNamespaceName})).To(Succeed())

Expect(len(allCMs.Items)).To(BeZero())
Expect(len(allCMs.Items)).To(BeEquivalentTo(1))
// Our code ensures that there is at a minimum a global section.
// The CCM itself may end up defaulting values for us, so don't use exact string matching.
Expect(allCMs.Items[0].Data[defaultConfigKey]).To(HavePrefix(defaultAWSConfig))
})

It("should perform config sync for AWS platform if there is a reference in infra resource", func() {
Expand All @@ -500,6 +499,19 @@ var _ = Describe("Cloud config sync reconciler", func() {
Expect(len(allCMs.Items)).NotTo(BeZero())
Expect(len(allCMs.Items)).To(BeEquivalentTo(1))
})

It("should error if a user-specified configmap key isn't present", func() {
infraResource := makeInfrastructureResource(configv1.AWSPlatformType)
infraResource.Spec.CloudConfig.Key = "notfound"
Expect(cl.Create(ctx, infraResource)).To(Succeed())

infraResource.Status = makeInfraStatus(infraResource.Spec.PlatformSpec.Type)
Expect(cl.Status().Update(ctx, infraResource.DeepCopy())).To(Succeed())

_, err := reconciler.Reconcile(context.TODO(), ctrl.Request{})
Expect(err).To(Not(BeNil()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we wanna expect a specific error - namely "key %s specified in infra resource does not exist in source configmap %s", ?


})
})

Context("On Azure platform", func() {
Expand Down