-
Notifications
You must be signed in to change notification settings - Fork 11
Pre-clean orphaned ENIs and SGs before VPC stack delete #85
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,146 @@ | ||
| package ec2 | ||
|
|
||
| import ( | ||
| "context" | ||
| "fmt" | ||
| "log" | ||
| "time" | ||
|
|
||
| "github.com/aws/aws-sdk-go-v2/aws" | ||
| "github.com/aws/aws-sdk-go-v2/service/ec2" | ||
| "github.com/aws/aws-sdk-go-v2/service/ec2/types" | ||
| ) | ||
|
|
||
| // CleanVPCForDeletion removes orphaned ENIs and non-default security groups | ||
| // from a VPC so that its CloudFormation stack can be deleted cleanly. | ||
| // Resources left behind by hosted cluster teardown (load balancer controllers, | ||
| // CNI plugin) cause CloudFormation delete-stack to fail on the first attempt. | ||
| func CleanVPCForDeletion(ctx context.Context, cfg aws.Config, vpcID string) error { | ||
| client := ec2.NewFromConfig(cfg) | ||
|
|
||
| if err := deleteOrphanedENIs(ctx, client, vpcID); err != nil { | ||
| return fmt.Errorf("cleaning ENIs: %w", err) | ||
| } | ||
|
|
||
| if err := deleteNonDefaultSecurityGroups(ctx, client, vpcID); err != nil { | ||
| return fmt.Errorf("cleaning security groups: %w", err) | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
|
|
||
| func deleteOrphanedENIs(ctx context.Context, client *ec2.Client, vpcID string) error { | ||
| out, err := client.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{ | ||
| Filters: []types.Filter{ | ||
| {Name: aws.String("vpc-id"), Values: []string{vpcID}}, | ||
| }, | ||
| }) | ||
|
Comment on lines
+33
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
# Map the target file and inspect the relevant sections.
ast-grep outline internal/aws/ec2/cleanup.go --view expanded
printf '\n--- cleanup.go (selected lines) ---\n'
sed -n '1,220p' internal/aws/ec2/cleanup.go
printf '\n--- search for paginator usage in internal/aws/ec2 ---\n'
rg -n "New.*Paginator|Paginator\(" internal/aws/ec2 internal -g '*.go'
printf '\n--- inspect callers / surrounding cleanup flow ---\n'
rg -n "CleanVPCForDeletion|deleteOrphanedENIs|deleteNonDefaultSecurityGroups" internal -g '*.go'Repository: openshift-online/rosa-hyperfleet-cli Length of output: 6172 🌐 Web query:
💡 Result: In the AWS SDK for Go v2, you handle pagination for operations like DescribeNetworkInterfaces and DescribeSecurityGroups using the service-specific paginator helpers rather than manually managing the NextToken [1][2]. The SDK provides dedicated constructors for these paginators, which simplify the process by abstracting the request/response cycle [1][2]. To implement pagination, follow this pattern: 1. Use the appropriate constructor: ec2.NewDescribeNetworkInterfacesPaginator or ec2.NewDescribeSecurityGroupsPaginator [3][4]. 2. Create the paginator by passing your client and the input parameters [1][2]. You can optionally provide a functional argument to configure settings like the page limit [3][1]. 3. Iterate using the paginator's HasMorePages and NextPage methods [3][4][1]. Example for DescribeNetworkInterfaces: paginator:= ec2.NewDescribeNetworkInterfacesPaginator(ec2Client, &ec2.DescribeNetworkInterfacesInput{ MaxResults: aws.Int32(100), }) for paginator.HasMorePages { output, err:= paginator.NextPage(ctx) if err!= nil { log.Fatalf("failed to get page: %v", err) } for _, eni:= range output.NetworkInterfaces { fmt.Println(*eni.NetworkInterfaceId) } } The paginator automatically handles the NextToken internally, storing it and applying it to subsequent calls until no more pages remain [5][1]. This is the recommended approach over manual token handling, as it reduces error-prone boilerplate and ensures standard request lifecycle management [6][1][2]. Citations:
Use EC2 paginators for both listings. 🤖 Prompt for AI Agents |
||
| if err != nil { | ||
| return fmt.Errorf("describing ENIs: %w", err) | ||
| } | ||
|
|
||
| for _, eni := range out.NetworkInterfaces { | ||
| eniID := aws.ToString(eni.NetworkInterfaceId) | ||
|
|
||
| if eni.Attachment != nil && eni.Attachment.AttachmentId != nil { | ||
| // Only attempt detach for ENIs we can force-detach. | ||
| // ELB/Lambda-managed attachments will be cleaned up by their | ||
| // owning service; skip them to avoid API errors. | ||
| if eni.Attachment.DeleteOnTermination != nil && *eni.Attachment.DeleteOnTermination { | ||
| log.Printf(" skipping ENI %s (managed attachment, will be cleaned by owner)", eniID) | ||
| continue | ||
| } | ||
|
Comment on lines
+46
to
+52
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trying to understand this part better. So for ELB/Lamba ENI we will as such not have I came across So in the previous comment , we could have the following filters instead?
This would filter out the ENI that can be cleaned up and are not eligible for automatic deletion. Thoughts? |
||
|
|
||
| log.Printf(" detaching ENI %s", eniID) | ||
| _, err := client.DetachNetworkInterface(ctx, &ec2.DetachNetworkInterfaceInput{ | ||
| AttachmentId: eni.Attachment.AttachmentId, | ||
| Force: aws.Bool(true), | ||
| }) | ||
| if err != nil { | ||
| log.Printf(" warning: failed to detach ENI %s: %v (will still try delete)", eniID, err) | ||
| } else { | ||
| waitForENIAvailable(ctx, client, eniID) | ||
| } | ||
| } | ||
|
|
||
| log.Printf(" deleting ENI %s", eniID) | ||
| _, err := client.DeleteNetworkInterface(ctx, &ec2.DeleteNetworkInterfaceInput{ | ||
| NetworkInterfaceId: aws.String(eniID), | ||
| }) | ||
| if err != nil { | ||
| log.Printf(" warning: failed to delete ENI %s: %v", eniID, err) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
|
Comment on lines
+32
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win Return partial cleanup failures to the caller. These helpers only return an error when the initial Possible fix+import "errors"
+
func deleteOrphanedENIs(ctx context.Context, client *ec2.Client, vpcID string) error {
+ var cleanupErr error
+
out, err := client.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{
Filters: []types.Filter{
{Name: aws.String("vpc-id"), Values: []string{vpcID}},
@@
_, err := client.DetachNetworkInterface(ctx, &ec2.DetachNetworkInterfaceInput{
AttachmentId: eni.Attachment.AttachmentId,
Force: aws.Bool(true),
})
if err != nil {
log.Printf(" warning: failed to detach ENI %s: %v (will still try delete)", eniID, err)
+ cleanupErr = errors.Join(cleanupErr, fmt.Errorf("detach ENI %s: %w", eniID, err))
} else {
waitForENIAvailable(ctx, client, eniID)
}
}
@@
_, err := client.DeleteNetworkInterface(ctx, &ec2.DeleteNetworkInterfaceInput{
NetworkInterfaceId: aws.String(eniID),
})
if err != nil {
log.Printf(" warning: failed to delete ENI %s: %v", eniID, err)
+ cleanupErr = errors.Join(cleanupErr, fmt.Errorf("delete ENI %s: %w", eniID, err))
}
}
- return nil
+ return cleanupErr
}Apply the same pattern in Also applies to: 93-145 🤖 Prompt for AI Agents |
||
| } | ||
|
|
||
| func waitForENIAvailable(ctx context.Context, client *ec2.Client, eniID string) { | ||
| for i := 0; i < 12; i++ { | ||
| time.Sleep(5 * time.Second) | ||
| out, err := client.DescribeNetworkInterfaces(ctx, &ec2.DescribeNetworkInterfacesInput{ | ||
| NetworkInterfaceIds: []string{eniID}, | ||
| }) | ||
| if err != nil { | ||
| return | ||
| } | ||
| if len(out.NetworkInterfaces) > 0 && out.NetworkInterfaces[0].Status == types.NetworkInterfaceStatusAvailable { | ||
| return | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func deleteNonDefaultSecurityGroups(ctx context.Context, client *ec2.Client, vpcID string) error { | ||
| out, err := client.DescribeSecurityGroups(ctx, &ec2.DescribeSecurityGroupsInput{ | ||
| Filters: []types.Filter{ | ||
| {Name: aws.String("vpc-id"), Values: []string{vpcID}}, | ||
| }, | ||
| }) | ||
| if err != nil { | ||
| return fmt.Errorf("describing security groups: %w", err) | ||
| } | ||
|
|
||
| // First pass: revoke all rules so cross-references don't block deletion. | ||
| for _, sg := range out.SecurityGroups { | ||
| if aws.ToString(sg.GroupName) == "default" { | ||
| continue | ||
| } | ||
| sgID := aws.ToString(sg.GroupId) | ||
|
|
||
| if len(sg.IpPermissions) > 0 { | ||
| _, err := client.RevokeSecurityGroupIngress(ctx, &ec2.RevokeSecurityGroupIngressInput{ | ||
| GroupId: aws.String(sgID), | ||
| IpPermissions: sg.IpPermissions, | ||
| }) | ||
| if err != nil { | ||
| log.Printf(" warning: failed to revoke ingress on SG %s: %v", sgID, err) | ||
| } | ||
| } | ||
| if len(sg.IpPermissionsEgress) > 0 { | ||
| _, err := client.RevokeSecurityGroupEgress(ctx, &ec2.RevokeSecurityGroupEgressInput{ | ||
| GroupId: aws.String(sgID), | ||
| IpPermissions: sg.IpPermissionsEgress, | ||
| }) | ||
| if err != nil { | ||
| log.Printf(" warning: failed to revoke egress on SG %s: %v", sgID, err) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Second pass: delete the security groups. | ||
| for _, sg := range out.SecurityGroups { | ||
| if aws.ToString(sg.GroupName) == "default" { | ||
| continue | ||
| } | ||
| sgID := aws.ToString(sg.GroupId) | ||
| log.Printf(" deleting security group %s (%s)", sgID, aws.ToString(sg.GroupName)) | ||
| _, err := client.DeleteSecurityGroup(ctx, &ec2.DeleteSecurityGroupInput{ | ||
| GroupId: aws.String(sgID), | ||
| }) | ||
| if err != nil { | ||
| log.Printf(" warning: failed to delete SG %s: %v", sgID, err) | ||
| } | ||
| } | ||
|
|
||
| return nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we add additional filter for
statusas well to ensure that the ENI being filtered is "available" only and not "in-use". That might simplify some of the additional checks being added.Reference - https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeNetworkInterfaces.html#API_DescribeNetworkInterfaces_RequestParameters