Skip to content

Commit 2b9d64e

Browse files
authored
feat: discover ami for unmanaged nodegroups (#707)
1 parent 108a0f7 commit 2b9d64e

File tree

5 files changed

+218
-8
lines changed

5 files changed

+218
-8
lines changed

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,6 @@ update-deps:
99
"$$SCRIPT" ; \
1010
done
1111

12+
.PHONY: test-integration
13+
test-integration: ## Run unit and integration tests
14+
go test -v -tags=integration ./...
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
package eksapi
2+
3+
import (
4+
"context"
5+
"fmt"
6+
7+
"github.com/aws/aws-sdk-go-v2/aws"
8+
"github.com/aws/aws-sdk-go-v2/service/ec2"
9+
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
10+
"github.com/aws/aws-sdk-go-v2/service/ssm"
11+
"k8s.io/klog/v2"
12+
)
13+
14+
func NewAMIResolver(awsClients *awsClients) *amiResolver {
15+
return &amiResolver{
16+
clients: awsClients,
17+
}
18+
}
19+
20+
type amiResolver struct {
21+
clients *awsClients
22+
}
23+
24+
func (r *amiResolver) Resolve(ctx context.Context, opts *deployerOptions) (string, error) {
25+
switch opts.UserDataFormat {
26+
case UserDataBootstrapSh:
27+
// TODO: AL2 is not a high priority, skipping for now.
28+
return "", fmt.Errorf("%s is not handled", opts.UserDataFormat)
29+
case UserDataNodeadm:
30+
return r.ResolveAL2023(ctx, opts)
31+
case UserDataBottlerocket:
32+
return r.ResolveBottlerocket(ctx, opts)
33+
default:
34+
return "", fmt.Errorf("unhandled userdata format: %s", opts.UserDataFormat)
35+
}
36+
}
37+
38+
func (r *amiResolver) ResolveAL2023(ctx context.Context, opts *deployerOptions) (string, error) {
39+
describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{
40+
InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))},
41+
})
42+
if err != nil {
43+
return "", err
44+
}
45+
instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0]
46+
47+
arch, err := r.resolveArch(instanceTypeInfo)
48+
if err != nil {
49+
return "", err
50+
}
51+
52+
variant := "standard"
53+
if instanceTypeInfo.NeuronInfo != nil {
54+
if len(instanceTypeInfo.NeuronInfo.NeuronDevices) > 0 {
55+
variant = "neuron"
56+
}
57+
} else if instanceTypeInfo.GpuInfo != nil {
58+
for _, gpu := range instanceTypeInfo.GpuInfo.Gpus {
59+
if aws.ToString(gpu.Manufacturer) == "NVIDIA" {
60+
variant = "nvidia"
61+
break
62+
}
63+
}
64+
}
65+
66+
getParameterReponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{
67+
Name: aws.String(fmt.Sprintf("/aws/service/eks/optimized-ami/%s/amazon-linux-2023/%s/%s/recommended/image_id", opts.KubernetesVersion, arch, variant)),
68+
})
69+
if err != nil {
70+
return "", err
71+
}
72+
73+
return aws.ToString(getParameterReponse.Parameter.Value), nil
74+
}
75+
76+
func (r *amiResolver) ResolveBottlerocket(ctx context.Context, opts *deployerOptions) (string, error) {
77+
describeInstanceTypesResponse, err := r.clients.EC2().DescribeInstanceTypes(ctx, &ec2.DescribeInstanceTypesInput{
78+
InstanceTypes: []ec2types.InstanceType{ec2types.InstanceType(r.getInstance(opts))},
79+
})
80+
if err != nil {
81+
return "", err
82+
}
83+
instanceTypeInfo := describeInstanceTypesResponse.InstanceTypes[0]
84+
85+
arch, err := r.resolveArch(instanceTypeInfo)
86+
if err != nil {
87+
return "", err
88+
}
89+
90+
// TODO: enable fips
91+
flavorSuffix := ""
92+
if instanceTypeInfo.GpuInfo != nil {
93+
for _, gpu := range instanceTypeInfo.GpuInfo.Gpus {
94+
if aws.ToString(gpu.Manufacturer) == "NVIDIA" {
95+
flavorSuffix = "-nvidia"
96+
break
97+
}
98+
}
99+
}
100+
101+
getParameterResponse, err := r.clients.SSM().GetParameter(ctx, &ssm.GetParameterInput{
102+
Name: aws.String(fmt.Sprintf("/aws/service/bottlerocket/aws-k8s-%s%s/%s/latest/image_id", opts.KubernetesVersion, flavorSuffix, arch)),
103+
})
104+
if err != nil {
105+
return "", err
106+
}
107+
108+
return aws.ToString(getParameterResponse.Parameter.Value), nil
109+
}
110+
111+
func (r *amiResolver) getInstance(opts *deployerOptions) string {
112+
instanceType := opts.InstanceTypes[0]
113+
if len(opts.InstanceTypes) > 1 {
114+
klog.Warningf("only resolving AMI based on first instance type: %s", instanceType)
115+
}
116+
return instanceType
117+
}
118+
119+
func (r *amiResolver) resolveArch(instanceTypeInfo ec2types.InstanceTypeInfo) (string, error) {
120+
// TODO: the ordering might be weird because old instances might support
121+
// both i386 and x8664.
122+
switch arch := instanceTypeInfo.ProcessorInfo.SupportedArchitectures[0]; arch {
123+
case ec2types.ArchitectureTypeArm64, ec2types.ArchitectureTypeX8664:
124+
return string(arch), nil
125+
default:
126+
return "", fmt.Errorf("unhandled arch: %s", arch)
127+
}
128+
}
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
//go:build integration
2+
3+
package eksapi
4+
5+
import (
6+
"context"
7+
"testing"
8+
9+
"github.com/aws/aws-sdk-go-v2/config"
10+
"github.com/stretchr/testify/assert"
11+
)
12+
13+
func TestAMIResolver(t *testing.T) {
14+
ctx := context.Background()
15+
awsCfg, err := config.LoadDefaultConfig(ctx)
16+
assert.NoError(t, err)
17+
18+
amiResolver := NewAMIResolver(newAWSClients(awsCfg, ""))
19+
20+
t.Run("AL2023-nvidia", func(t *testing.T) {
21+
opts := deployerOptions{
22+
UserDataFormat: UserDataNodeadm,
23+
KubernetesVersion: "1.33",
24+
}
25+
t.Run("nvidia", func(t *testing.T) {
26+
opts := opts
27+
opts.InstanceTypes = []string{"g5.xlarge"}
28+
29+
ami, err := amiResolver.Resolve(ctx, &opts)
30+
assert.NoError(t, err)
31+
assert.Regexp(t, "ami-.*", ami)
32+
})
33+
t.Run("standard", func(t *testing.T) {
34+
opts := opts
35+
opts.InstanceTypes = []string{"m5.xlarge"}
36+
37+
ami, err := amiResolver.Resolve(ctx, &opts)
38+
assert.NoError(t, err)
39+
assert.Regexp(t, "ami-.*", ami)
40+
})
41+
})
42+
43+
t.Run("Bottlerocket", func(t *testing.T) {
44+
opts := deployerOptions{
45+
UserDataFormat: UserDataBottlerocket,
46+
KubernetesVersion: "1.33",
47+
}
48+
t.Run("nvidia", func(t *testing.T) {
49+
opts := opts
50+
opts.InstanceTypes = []string{"g5.xlarge"}
51+
52+
ami, err := amiResolver.Resolve(ctx, &opts)
53+
assert.NoError(t, err)
54+
assert.Regexp(t, "ami-.*", ami)
55+
})
56+
t.Run("standard", func(t *testing.T) {
57+
opts := opts
58+
opts.InstanceTypes = []string{"m5.xlarge"}
59+
60+
ami, err := amiResolver.Resolve(ctx, &opts)
61+
assert.NoError(t, err)
62+
assert.Regexp(t, "ami-.*", ami)
63+
})
64+
})
65+
}

internal/deployers/eksapi/deployer.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package eksapi
22

33
import (
4+
"context"
45
"flag"
56
"fmt"
67
"path/filepath"
@@ -15,8 +16,8 @@ import (
1516

1617
"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
1718
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
18-
"github.com/urfave/sflags/gen/gpflag"
1919
"github.com/spf13/pflag"
20+
"github.com/urfave/sflags/gen/gpflag"
2021
"golang.org/x/exp/slices"
2122
"k8s.io/klog"
2223
"sigs.k8s.io/kubetest2/pkg/types"
@@ -299,9 +300,6 @@ func (d *deployer) verifyUpFlags() error {
299300
return fmt.Errorf("--instance-types and --instance-type-archs are mutually exclusive")
300301
}
301302
if d.UnmanagedNodes {
302-
if d.AMI == "" {
303-
return fmt.Errorf("--ami must be specified for --unmanaged-nodes")
304-
}
305303
if d.AMIType != "" {
306304
return fmt.Errorf("--ami-type should not be provided with --unmanaged-nodes")
307305
}
@@ -314,9 +312,19 @@ func (d *deployer) verifyUpFlags() error {
314312
}
315313
}
316314
if d.UserDataFormat == "" {
317-
d.UserDataFormat = "bootstrap.sh"
315+
d.UserDataFormat = UserDataBootstrapSh
318316
klog.Infof("Using default user data format: %s", d.UserDataFormat)
319317
}
318+
// AMI ID check must come after user-data format resolution because we
319+
// can try to infer the AMI type for unmanaged nodes.
320+
if d.AMI == "" {
321+
ami, err := NewAMIResolver(d.awsClients).Resolve(context.TODO(), &d.deployerOptions)
322+
if err != nil {
323+
return fmt.Errorf("failed to automatically resolve ami for unmanaged nodegroup (provide --ami to short circuit this): %w", err)
324+
}
325+
d.AMI = ami
326+
}
327+
320328
if d.EFA && len(d.InstanceTypes) != 1 {
321329
return fmt.Errorf("--efa requires a single instance type")
322330
}

internal/deployers/eksapi/userdata.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,22 @@ import (
1010
"github.com/aws/aws-k8s-tester/internal/deployers/eksapi/templates"
1111
)
1212

13+
const (
14+
UserDataBootstrapSh = "bootstrap.sh"
15+
UserDataNodeadm = "nodeadm"
16+
UserDataBottlerocket = "bottlerocket"
17+
)
18+
1319
func generateUserData(cluster *Cluster, opts *deployerOptions) (string, bool, error) {
1420
userDataIsMimePart := true
1521
var t *template.Template
1622
switch opts.UserDataFormat {
17-
case "bootstrap.sh":
23+
case UserDataBootstrapSh:
1824
t = templates.UserDataBootstrapSh
19-
case "nodeadm":
25+
case UserDataNodeadm:
2026
// TODO: replace the YAML template with proper usage of the nodeadm API go types
2127
t = templates.UserDataNodeadm
22-
case "bottlerocket":
28+
case UserDataBottlerocket:
2329
t = templates.UserDataBottlerocket
2430
userDataIsMimePart = false
2531
default:

0 commit comments

Comments
 (0)