@@ -29,6 +29,7 @@ import (
2929 "time"
3030
3131 "github.com/aws/aws-sdk-go-v2/aws"
32+ "github.com/aws/aws-sdk-go-v2/service/ec2"
3233 ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
3334
3435 elb "github.com/aws/aws-sdk-go-v2/service/elasticloadbalancing"
@@ -1251,24 +1252,24 @@ func (c *Cloud) ensureLoadBalancer(ctx context.Context, namespacedName types.Nam
12511252
12521253 {
12531254 // Sync security groups
1254- expected := sets .New [ string ] (securityGroupIDs ... )
1255- actual := stringSetFromList (loadBalancer .SecurityGroups )
1255+ expected := sets .New (securityGroupIDs ... )
1256+ actual := sets . New (loadBalancer .SecurityGroups ... )
12561257
12571258 if ! expected .Equal (actual ) {
12581259 // This call just replaces the security groups, unlike e.g. subnets (!)
1259- request := & elb.ApplySecurityGroupsToLoadBalancerInput {}
1260- request .LoadBalancerName = aws .String (loadBalancerName )
1261- if securityGroupIDs == nil {
1262- request .SecurityGroups = nil
1263- } else {
1264- request .SecurityGroups = securityGroupIDs
1265- }
1266- klog .V (2 ).Info ("Applying updated security groups to load balancer" )
1267- _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , request )
1268- if err != nil {
1260+ klog .V (2 ).Infof ("Applying updated security groups to load balancer %q" , loadBalancerName )
1261+ if _ , err := c .elb .ApplySecurityGroupsToLoadBalancer (ctx , & elb.ApplySecurityGroupsToLoadBalancerInput {
1262+ LoadBalancerName : aws .String (loadBalancerName ),
1263+ SecurityGroups : securityGroupIDs ,
1264+ }); err != nil {
12691265 return nil , fmt .Errorf ("error applying AWS loadbalancer security groups: %q" , err )
12701266 }
12711267 dirty = true
1268+
1269+ // Ensure the replaced security groups are removed from AWS when owned by the controller.
1270+ if errs := c .removeOwnedSecurityGroups (ctx , loadBalancerName , actual .UnsortedList ()); len (errs ) > 0 {
1271+ return nil , fmt .Errorf ("error removing owned security groups: %v" , errs )
1272+ }
12721273 }
12731274 }
12741275
@@ -1879,3 +1880,143 @@ func ValidateHealthCheck(s *elbtypes.HealthCheck) error {
18791880
18801881 return nil
18811882}
1883+
1884+ // isOwnedSecurityGroup checks if the security group is owned by the controller
1885+ // by checking if the security group has the cluster ownership tag
1886+ // (kubernetes.io/cluster/<clusterID>=owned).
1887+ //
1888+ // Parameters:
1889+ // - `ctx`: The context for the operation.
1890+ // - `securityGroupID`: The ID of the security group to check.
1891+ //
1892+ // Returns:
1893+ // - `bool`: True if the security group is owned by the controller, false otherwise.
1894+ // - `error`: An error if the security group cannot be retrieved, is not found,
1895+ // or if multiple security groups are found with the same ID (unexpected).
1896+ func (c * Cloud ) isOwnedSecurityGroup (ctx context.Context , securityGroupID string ) (bool , error ) {
1897+ groups , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1898+ GroupIds : []string {securityGroupID },
1899+ })
1900+ if err != nil {
1901+ return false , fmt .Errorf ("error retrieving security group %q: %w" , securityGroupID , err )
1902+ }
1903+ if len (groups ) == 0 {
1904+ return false , fmt .Errorf ("security group %q not found" , securityGroupID )
1905+ }
1906+ if len (groups ) != 1 {
1907+ // This should not be possible - ids should be unique
1908+ return false , fmt .Errorf ("[BUG] multiple security groups(%d) found with same id %q" , len (groups ), securityGroupID )
1909+ }
1910+ return c .tagging .hasClusterTagOwned (groups [0 ].Tags )
1911+ }
1912+
1913+ // buildSecurityGroupRuleReferences finds all security groups that have ingress rules
1914+ // referencing the specified security group ID, and categorizes them based on cluster tagging.
1915+ // This is used to identify dependencies before removing a security group.
1916+ //
1917+ // Parameters:
1918+ // - ctx: The context for the request.
1919+ // - sgID: The ID of the security group to find references for.
1920+ //
1921+ // Returns:
1922+ // - map[*ec2types.SecurityGroup]bool: All security groups with ingress rules referencing sgID, mapped to their cluster tag status (true/false).
1923+ // - map[*ec2types.SecurityGroup]IPPermissionSet: Only cluster-tagged security groups mapped to their ingress rules that reference sgID.
1924+ // - error: An error if the AWS DescribeSecurityGroups API call fails.
1925+ func (c * Cloud ) buildSecurityGroupRuleReferences (ctx context.Context , sgID string ) (map [* ec2types.SecurityGroup ]bool , map [* ec2types.SecurityGroup ]IPPermissionSet , error ) {
1926+ groupsHasTags := make (map [* ec2types.SecurityGroup ]bool )
1927+ groupsLinkedPermissions := make (map [* ec2types.SecurityGroup ]IPPermissionSet )
1928+ sgsOut , err := c .ec2 .DescribeSecurityGroups (ctx , & ec2.DescribeSecurityGroupsInput {
1929+ Filters : []ec2types.Filter {
1930+ newEc2Filter ("ip-permission.group-id" , sgID ),
1931+ },
1932+ })
1933+ if err != nil {
1934+ return groupsHasTags , groupsLinkedPermissions , fmt .Errorf ("error querying security groups for ELB: %q" , err )
1935+ }
1936+
1937+ for _ , sg := range sgsOut {
1938+ groupsHasTags [& sg ] = c .tagging .hasClusterTag (sg .Tags )
1939+
1940+ groupsLinkedPermissions [& sg ] = NewIPPermissionSet ()
1941+ for _ , rule := range sg .IpPermissions {
1942+ if rule .UserIdGroupPairs != nil {
1943+ for _ , pair := range rule .UserIdGroupPairs {
1944+ if pair .GroupId != nil && aws .ToString (pair .GroupId ) == sgID {
1945+ groupsLinkedPermissions [& sg ].Insert (rule )
1946+ }
1947+ }
1948+ }
1949+ }
1950+
1951+ }
1952+ return groupsHasTags , groupsLinkedPermissions , nil
1953+ }
1954+
1955+ // removeOwnedSecurityGroups removes the CLB owned/managed security groups from AWS.
1956+ // It revokes ingress rules that reference the security groups to be removed,
1957+ // then deletes the security groups that are owned by the controller.
1958+ // This is used when updating load balancer security groups to clean up orphaned ones.
1959+ //
1960+ // Parameters:
1961+ // - `ctx`: The context for the operation.
1962+ // - `loadBalancerName`: The name of the load balancer (used for logging and deletion operations).
1963+ // - `securityGroups`: The list of security group IDs to process for removal.
1964+ //
1965+ // Returns:
1966+ // - `[]error`: Collection of all errors encountered during the removal process.
1967+ func (c * Cloud ) removeOwnedSecurityGroups (ctx context.Context , loadBalancerName string , securityGroups []string ) []error {
1968+ allErrs := []error {}
1969+ sgMap := make (map [string ]struct {})
1970+
1971+ // Validate each security group references building a reading list to be deleted.
1972+ for _ , sg := range securityGroups {
1973+ isOwned , err := c .isOwnedSecurityGroup (ctx , sg )
1974+ if err != nil {
1975+ allErrs = append (allErrs , fmt .Errorf ("unable to validate if security group %q is owned by the controller: %w" , sg , err ))
1976+ continue
1977+ }
1978+
1979+ groupsWithClusterTag , groupsLinkedPermissions , err := c .buildSecurityGroupRuleReferences (ctx , sg )
1980+ if err != nil {
1981+ allErrs = append (allErrs , fmt .Errorf ("error building security group rule references for %q: %w" , sg , err ))
1982+ continue
1983+ }
1984+
1985+ // Revoke ingress rules referencing the security group to be deleted
1986+ // from cluster-tagged security groups, when the referenced security
1987+ // group has no cluster tag, skip the revoke assuming it is user-managed.
1988+ for sgTarget , sgPerms := range groupsLinkedPermissions {
1989+ if ! groupsWithClusterTag [sgTarget ] {
1990+ klog .Warningf ("security group %q has no cluster tag, skipping remove lifecycle after update" , sg )
1991+ continue
1992+ }
1993+
1994+ klog .Infof ("revoking security group ingress references of %q from %q" , sg , aws .ToString (sgTarget .GroupId ))
1995+ if _ , err := c .ec2 .RevokeSecurityGroupIngress (ctx , & ec2.RevokeSecurityGroupIngressInput {
1996+ GroupId : sgTarget .GroupId ,
1997+ IpPermissions : sgPerms .List (),
1998+ }); err != nil {
1999+ allErrs = append (allErrs , fmt .Errorf ("error revoking security group ingress rules from %q: %w" , aws .ToString (sgTarget .GroupId ), err ))
2000+ continue
2001+ }
2002+ }
2003+
2004+ // Skip security group removal when the security group is not owned by the controller.
2005+ if ! isOwned {
2006+ klog .Warningf ("security group %q is not owned by the controller, skipping remove lifecycle after update" , sg )
2007+ continue
2008+ }
2009+
2010+ klog .Infof ("making loadbalancer owned security group %q ready for deletion" , sg )
2011+ sgMap [sg ] = struct {}{}
2012+ }
2013+ if len (sgMap ) == 0 {
2014+ return allErrs
2015+ }
2016+
2017+ if err := c .deleteSecurityGroupsWithBackoff (ctx , loadBalancerName , sgMap ); err != nil {
2018+ return append (allErrs , fmt .Errorf ("error deleting security groups %v: %v" , sgMap , err ))
2019+ }
2020+ klog .Infof ("loadbalancer owned security groups deleted!" )
2021+ return nil
2022+ }
0 commit comments