Skip to content

Commit b2086e9

Browse files
committed
Add validation for Hybrid node IAM role
1 parent 64ffe84 commit b2086e9

File tree

6 files changed

+145
-6
lines changed

6 files changed

+145
-6
lines changed

internal/node/hybrid/hybrid.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ const (
2727
apiServerEndpointResolution = "api-server-endpoint-resolution-validation"
2828
proxyValidation = "proxy-validation"
2929
nodeInactiveValidation = "node-inactive-validation"
30+
clusterAccessValidation = "cluster-access-validation"
3031
kubeletCurrentCertPath = "/var/lib/kubelet/pki/kubelet-server-current.pem"
3132
)
3233

@@ -145,6 +146,7 @@ func (hnp *HybridNodeProvider) Validate(ctx context.Context) error {
145146
validation.New(apiServerEndpointResolution, kubernetes.ValidateAPIServerEndpointResolution),
146147
validation.New(proxyValidation, network.NewProxyValidator().Run),
147148
validation.New(nodeInactiveValidation, hnp.ValidateNodeIsInactive),
149+
validation.New(clusterAccessValidation, hnp.ValidateClusterAccess),
148150
)
149151

150152
// Run all validations sequentially
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
package hybrid
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"strings"
7+
8+
"github.com/aws/aws-sdk-go-v2/aws"
9+
"github.com/aws/aws-sdk-go-v2/aws/arn"
10+
"github.com/aws/aws-sdk-go-v2/service/eks"
11+
"github.com/aws/aws-sdk-go-v2/service/sts"
12+
13+
"github.com/aws/eks-hybrid/internal/api"
14+
"github.com/aws/eks-hybrid/internal/validation"
15+
)
16+
17+
const (
18+
accessEntryRemediation = "Ensure your EKS cluster has at least one access entry of type HYBRID_LINUX with the hybrid node IAM role as principal."
19+
)
20+
21+
// ValidateClusterAccess checks if the current IAM role has access to the EKS cluster
22+
// through an access entry
23+
func (hnp *HybridNodeProvider) ValidateClusterAccess(ctx context.Context, informer validation.Informer, _ *api.NodeConfig) error {
24+
var err error
25+
if hnp.awsConfig == nil {
26+
err = fmt.Errorf("AWS config not set")
27+
return err
28+
}
29+
30+
if hnp.cluster == nil || hnp.cluster.Name == nil {
31+
informer.Starting(ctx, clusterAccessValidation, "Skipping cluster access validation due to node IAM role missing EKS DescribeCluster permission")
32+
informer.Done(ctx, clusterAccessValidation, err)
33+
return nil
34+
}
35+
36+
informer.Starting(ctx, clusterAccessValidation, "Validating cluster access through EKS access entry")
37+
defer func() {
38+
informer.Done(ctx, clusterAccessValidation, err)
39+
}()
40+
41+
stsClient := sts.NewFromConfig(*hnp.awsConfig)
42+
eksClient := eks.NewFromConfig(*hnp.awsConfig)
43+
44+
getCallerIdentityOutput, err := stsClient.GetCallerIdentity(ctx, &sts.GetCallerIdentityInput{})
45+
if err != nil {
46+
err = validation.WithRemediation(fmt.Errorf("getting caller identity: %w", err), accessEntryRemediation)
47+
return err
48+
}
49+
50+
if getCallerIdentityOutput.Arn == nil {
51+
err = validation.WithRemediation(fmt.Errorf("caller identity ARN is nil"), accessEntryRemediation)
52+
return err
53+
}
54+
55+
roleArn := *getCallerIdentityOutput.Arn
56+
parsedARN, err := arn.Parse(roleArn)
57+
if err != nil {
58+
err = validation.WithRemediation(fmt.Errorf("parsing role ARN: %w", err), accessEntryRemediation)
59+
return err
60+
}
61+
62+
roleName, ok := extractRoleNameFromARN(parsedARN)
63+
if !ok || roleName == "" {
64+
err = validation.WithRemediation(fmt.Errorf("extracting role name from ARN: %s", roleArn), accessEntryRemediation)
65+
return err
66+
}
67+
68+
accessEntries, err := fetchAllAccessEntries(ctx, eksClient, hnp.cluster.Name)
69+
if err != nil {
70+
err = validation.WithRemediation(fmt.Errorf("fetching access entries from cluster: %w", err), accessEntryRemediation)
71+
return err
72+
}
73+
74+
foundRole := false
75+
for _, accessEntry := range accessEntries {
76+
if strings.Contains(accessEntry, fmt.Sprintf("role/%s", roleName)) ||
77+
strings.Contains(accessEntry, fmt.Sprintf("role/%s/", roleName)) ||
78+
strings.HasSuffix(accessEntry, roleName) {
79+
foundRole = true
80+
break
81+
}
82+
}
83+
84+
if !foundRole {
85+
err = validation.WithRemediation(
86+
fmt.Errorf("missing access entry of type HYBRID_LINUX with Hybrid Node role principal: %s", roleName),
87+
accessEntryRemediation,
88+
)
89+
return err
90+
}
91+
92+
return nil
93+
}
94+
95+
// extractRoleNameFromARN extracts the role name from an ARN
96+
// Returns the role name and a boolean indicating if extraction was successful
97+
func extractRoleNameFromARN(parsedARN arn.ARN) (string, bool) {
98+
splitArn := strings.Split(parsedARN.Resource, "/")
99+
100+
// Handle assumed role ARN format: arn:aws:sts::123456789012:assumed-role/RoleName/session
101+
if parsedARN.Service == "sts" && strings.HasPrefix(parsedARN.Resource, "assumed-role") && len(splitArn) >= 2 {
102+
return splitArn[1], true
103+
}
104+
105+
// Handle IAM role ARN format: arn:aws:iam::123456789012:role/RoleName
106+
if parsedARN.Service == "iam" && strings.HasPrefix(parsedARN.Resource, "role") && len(splitArn) >= 2 {
107+
return splitArn[len(splitArn)-1], true
108+
}
109+
110+
return "", false
111+
}
112+
113+
// fetchAllAccessEntries retrieves all access entries for a cluster with pagination handling
114+
func fetchAllAccessEntries(ctx context.Context, eksClient *eks.Client, clusterName *string) ([]string, error) {
115+
accessEntries := []string{}
116+
var nextToken *string
117+
118+
for {
119+
listAccessEntriesOutput, err := eksClient.ListAccessEntries(ctx, &eks.ListAccessEntriesInput{
120+
ClusterName: clusterName,
121+
NextToken: nextToken,
122+
})
123+
if err != nil {
124+
return nil, fmt.Errorf("failed to list access entries: %w", err)
125+
}
126+
127+
accessEntries = append(accessEntries, listAccessEntriesOutput.AccessEntries...)
128+
129+
if listAccessEntriesOutput.NextToken == nil || aws.ToString(listAccessEntriesOutput.NextToken) == "" {
130+
break
131+
}
132+
nextToken = listAccessEntriesOutput.NextToken
133+
}
134+
135+
return accessEntries, nil
136+
}

internal/node/hybrid/nodeinactive_validator_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ func TestHybridNodeProvider_ValidateNodeIsInactive(t *testing.T) {
101101
"kubelet-cert-validation",
102102
"api-server-endpoint-resolution-validation",
103103
"proxy-validation",
104+
"cluster-access-validation",
104105
},
105106
observedLogger,
106107
hybrid.WithDaemonManager(mockDaemon),

test/e2e/credentials/stack.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ func (s *Stack) Deploy(ctx context.Context, logger logr.Logger) (*StackOutput, e
9494
}
9595

9696
if !skipIRATest() {
97-
logger.Info("Creating access entry", "iamRoleArn", output.IRANodeRoleARN)
97+
logger.Info("Creating access entry", "iamRARoleArn", output.IRANodeRoleARN)
9898
_, err = s.EKS.CreateAccessEntry(ctx, &eks.CreateAccessEntryInput{
9999
ClusterName: &s.ClusterName,
100100
PrincipalArn: &output.IRANodeRoleARN,
@@ -404,7 +404,7 @@ func (s *Stack) Delete(ctx context.Context, logger logr.Logger, output *StackOut
404404
return fmt.Errorf("deleting SSM access entry: %w", err)
405405
}
406406
if !skipIRATest() {
407-
logger.Info("Deleting access entry", "iamRoleArn", output.IRANodeRoleARN)
407+
logger.Info("Deleting access entry", "iamRARoleArn", output.IRANodeRoleARN)
408408
if _, err := s.EKS.DeleteAccessEntry(ctx, &eks.DeleteAccessEntryInput{
409409
ClusterName: &s.ClusterName,
410410
PrincipalArn: &output.IRANodeRoleARN,

test/integration/cases/init-with-config-enrichment/run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ nodeadm install $CURRENT_VERSION --credential-provider iam-ra
2121
mock::aws_signing_helper
2222

2323
exit_code=0
24-
STDERR=$(nodeadm init --skip run,node-ip-validation,k8s-authentication-validation --config-source file://config.yaml 2>&1) || exit_code=$?
24+
STDERR=$(nodeadm init --skip run,node-ip-validation,k8s-authentication-validation,cluster-access-validation --config-source file://config.yaml 2>&1) || exit_code=$?
2525
if [ $exit_code -ne 0 ]; then
2626
assert::is-substring "$STDERR" "ResourceNotFoundException"
2727
else
@@ -37,7 +37,7 @@ aws eks create-cluster \
3737
--resources-vpc-config subnetIds=subnet-123456789012,subnet-123456789013,securityGroupIds=sg-123456789014,endpointPrivateAccess=true,endpointPublicAccess=false \
3838
--remote-network-config '{"remoteNodeNetworks":[{"cidrs":["10.100.0.0/16"]}],"remotePodNetworks":[{"cidrs":["10.101.0.0/16"]}]}'
3939

40-
if ! nodeadm init --skip run,node-ip-validation,k8s-authentication-validation --config-source file://config.yaml; then
40+
if ! nodeadm init --skip run,node-ip-validation,k8s-authentication-validation,cluster-access-validation --config-source file://config.yaml; then
4141
echo "nodeadm init should have succeeded after creating the cluster"
4242
exit 1
4343
fi

test/integration/cases/init-with-node-ip-validation/run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ nodeadm install $CURRENT_VERSION --credential-provider iam-ra
2828
mock::aws_signing_helper
2929

3030
# should fail when --node-ip set to ip not in remote node networks
31-
if nodeadm init --skip run,k8s-authentication-validation --config-source file://config-ip-out-of-range.yaml; then
31+
if nodeadm init --skip run,k8s-authentication-validation,cluster-access-validation --config-source file://config-ip-out-of-range.yaml; then
3232
echo "nodeadm init should have failed with ip out of range but succeeded unexpectedly"
3333
exit 1
3434
fi
3535

3636
# should succeed when --node-ip set to ip in remote node networks
37-
nodeadm init --skip run,k8s-authentication-validation --config-source file://config-ip-in-range.yaml
37+
nodeadm init --skip run,k8s-authentication-validation,cluster-access-validation --config-source file://config-ip-in-range.yaml

0 commit comments

Comments
 (0)