cleanup IAM resources for aws irsa (#860)
Some checks failed
Post / images (amd64) (push) Failing after 8m2s
Post / images (arm64) (push) Failing after 14m13s
Post / image manifest (push) Has been skipped
Post / trigger clusteradm e2e (push) Has been skipped
Post / coverage (push) Failing after 26m45s
Scorecard supply-chain security / Scorecard analysis (push) Failing after 1m21s
Close stale issues and PRs / stale (push) Successful in 5s

* cleanup IAM resources

Signed-off-by: Alex <alexchan2988@gmail.com>

* feat: fix go verify errrors.

Signed-off-by: Ramesh Krishna <ramekris3163@gmail.com>

* feat: cleanup IAM resources only if managed cluster annotation is present.

Signed-off-by: Ramesh Krishna <ramekris3163@gmail.com>

---------

Signed-off-by: Alex <alexchan2988@gmail.com>
Signed-off-by: Ramesh Krishna <ramekris3163@gmail.com>
Co-authored-by: Alex <alexchan2988@gmail.com>
This commit is contained in:
Ramesh Krishna
2025-03-04 20:40:50 -05:00
committed by GitHub
parent c05247840a
commit 2cc250b13a
4 changed files with 604 additions and 17 deletions

View File

@@ -25,6 +25,11 @@ import (
"open-cluster-management.io/ocm/pkg/registration/register"
)
const (
errNoSuchEntity = "NoSuchEntity"
errEntityAlreadyExists = "EntityAlreadyExists"
)
type AWSIRSAHubDriver struct {
hubClusterArn string
cfg aws.Config
@@ -50,8 +55,36 @@ func (a *AWSIRSAHubDriver) Accept(cluster *clusterv1.ManagedCluster) bool {
}
// Cleanup is run when the cluster is deleting or hubAcceptClient is set false
func (c *AWSIRSAHubDriver) Cleanup(_ context.Context, _ *clusterv1.ManagedCluster) error {
// noop
func (c *AWSIRSAHubDriver) Cleanup(ctx context.Context, managedCluster *clusterv1.ManagedCluster) error {
_, isManagedClusterIamRoleSuffixPresent :=
managedCluster.Annotations["agent.open-cluster-management.io/managed-cluster-iam-role-suffix"]
_, isManagedClusterArnPresent := managedCluster.Annotations["agent.open-cluster-management.io/managed-cluster-arn"]
logger := klog.FromContext(ctx)
if !isManagedClusterArnPresent && !isManagedClusterIamRoleSuffixPresent {
logger.V(4).Info("No Op Cleanup since managedcluster annotations are not present for awsirsa.")
return nil
}
roleName, _, roleArn, policyArn, err := getRoleAndPolicyArn(ctx, managedCluster, c.cfg)
if err != nil {
logger.V(4).Error(err, "Failed to getRoleAndPolicyArn")
return err
}
err = deleteIAMRoleAndPolicy(ctx, c.cfg, roleName, policyArn)
if err != nil {
return err
}
eksClient := eks.NewFromConfig(c.cfg)
_, hubClusterName := commonhelpers.GetAwsAccountIdAndClusterName(c.hubClusterArn)
err = deleteAccessEntry(ctx, eksClient, roleArn, hubClusterName)
if err != nil {
return err
}
return nil
}
@@ -106,19 +139,15 @@ func createIAMRoleAndPolicy(ctx context.Context, hubClusterArn string, managedCl
managedCluster.Annotations["agent.open-cluster-management.io/managed-cluster-iam-role-suffix"]
managedClusterArn, isManagedClusterArnPresent := managedCluster.Annotations["agent.open-cluster-management.io/managed-cluster-arn"]
roleName := fmt.Sprintf("ocm-hub-%s", managedClusterIamRoleSuffix)
policyName := roleName
creds, err := cfg.Credentials.Retrieve(ctx)
if err != nil {
klog.Errorf("Failed to get IAM Credentials")
return hubClusterName, "", err
}
awsAccountId := creds.AccountID
roleArn := fmt.Sprintf("arn:aws:iam::%s:role/%s", awsAccountId, roleName)
policyArn := fmt.Sprintf("arn:aws:iam::%s:policy/%s", awsAccountId, policyName)
hubAccountId, hubClusterName = commonhelpers.GetAwsAccountIdAndClusterName(hubClusterArn)
managedClusterAccountId, managedClusterName = commonhelpers.GetAwsAccountIdAndClusterName(managedClusterArn)
roleName, policyName, roleArn, policyArn, err := getRoleAndPolicyArn(ctx, managedCluster, cfg)
if err != nil {
logger.V(4).Error(err, "Failed to getRoleAndPolicyArn")
return hubClusterName, roleArn, err
}
if hubClusterArn != "" && isManagedClusterIamRoleSuffixPresent && isManagedClusterArnPresent {
// Check if hash is the same
@@ -149,7 +178,7 @@ func createIAMRoleAndPolicy(ctx context.Context, hubClusterArn string, managedCl
})
if err != nil {
// Ignore error when role already exists as we will always create the same role
if !(strings.Contains(err.Error(), "EntityAlreadyExists")) {
if !(strings.Contains(err.Error(), errEntityAlreadyExists)) {
logger.V(4).Error(err, "Failed to create IAM role %s for ManagedCluster", "IAMRole", roleName, "ManagedCluster", managedClusterName)
return hubClusterName, roleArn, err
} else {
@@ -164,7 +193,7 @@ func createIAMRoleAndPolicy(ctx context.Context, hubClusterArn string, managedCl
PolicyName: aws.String(policyName),
})
if err != nil {
if !(strings.Contains(err.Error(), "EntityAlreadyExists")) {
if !(strings.Contains(err.Error(), errEntityAlreadyExists)) {
logger.V(4).Error(err, "Failed to create IAM Policy for ManagedCluster", "IAMPolicy", policyName, "ManagedCluster", managedClusterName)
return hubClusterName, roleArn, err
} else {
@@ -249,6 +278,90 @@ func createAccessEntry(ctx context.Context, eksClient *eks.Client, roleArn strin
return nil
}
func deleteIAMRoleAndPolicy(ctx context.Context, cfg aws.Config, roleName string, policyArn string) error {
logger := klog.FromContext(ctx)
iamClient := iam.NewFromConfig(cfg)
_, err := iamClient.DetachRolePolicy(ctx, &iam.DetachRolePolicyInput{
RoleName: &roleName,
PolicyArn: &policyArn,
})
if err != nil {
if !strings.Contains(err.Error(), errNoSuchEntity) {
logger.V(4).Error(err, "Failed to detach Policy from Role", "Policy", policyArn, "Role", roleName)
return err
}
} else {
logger.V(4).Info("Policy detached successfully from Role", "Policy", policyArn, "Role", roleName)
}
_, err = iamClient.DeletePolicy(ctx, &iam.DeletePolicyInput{
PolicyArn: &policyArn,
})
if err != nil {
if !strings.Contains(err.Error(), errNoSuchEntity) {
logger.V(4).Error(err, "Failed to delete Policy", "Policy", policyArn)
return err
}
} else {
logger.V(4).Info("Policy deleted successfully", "Policy", policyArn)
}
_, err = iamClient.DeleteRole(ctx, &iam.DeleteRoleInput{
RoleName: &roleName,
})
if err != nil {
if !strings.Contains(err.Error(), errNoSuchEntity) {
logger.V(4).Error(err, "Failed to delete Role", "Role", roleName)
return err
}
} else {
logger.V(4).Info("Role deleted successfully", "Role", roleName)
}
return nil
}
func getRoleAndPolicyArn(ctx context.Context, managedCluster *v1.ManagedCluster, cfg aws.Config) (string, string, string, string, error) {
logger := klog.FromContext(ctx)
managedClusterIamRoleSuffix :=
managedCluster.Annotations["agent.open-cluster-management.io/managed-cluster-iam-role-suffix"]
roleName := fmt.Sprintf("ocm-hub-%s", managedClusterIamRoleSuffix)
policyName := roleName
creds, err := cfg.Credentials.Retrieve(ctx)
if err != nil {
logger.V(4).Error(err, "Failed to get IAM Credentials")
return "", "", "", "", err
}
awsAccountId := creds.AccountID
roleArn := fmt.Sprintf("arn:aws:iam::%s:role/%s", awsAccountId, roleName)
policyArn := fmt.Sprintf("arn:aws:iam::%s:policy/%s", awsAccountId, policyName)
return roleName, policyName, roleArn, policyArn, err
}
func deleteAccessEntry(ctx context.Context, eksClient *eks.Client, roleArn string, hubClusterName string) error {
logger := klog.FromContext(ctx)
params := &eks.DeleteAccessEntryInput{
ClusterName: aws.String(hubClusterName),
PrincipalArn: aws.String(roleArn),
}
_, err := eksClient.DeleteAccessEntry(ctx, params)
if err != nil {
logger.V(4).Error(err, "Failed to delete Access Entry for HubClusterName", "HubClusterName", hubClusterName)
return err
} else {
logger.V(4).Info("Access Entry deleted successfully for HubClusterName", "HubClusterName", hubClusterName)
}
return nil
}
func NewAWSIRSAHubDriver(ctx context.Context, hubClusterArn string, autoApprovedIdentityPatterns []string) (register.HubDriver, error) {
logger := klog.FromContext(ctx)
cfg, err := config.LoadDefaultConfig(ctx)

View File

@@ -4,10 +4,12 @@ import (
"context"
"fmt"
"os"
"regexp"
"strings"
"testing"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/aws/arn"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/eks"
ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types"
@@ -117,7 +119,7 @@ func TestAccept(t *testing.T) {
isAccepted: true,
},
}
AwsIrsaHubDriver, err := NewAWSIRSAHubDriver(context.Background(), "arn:aws:eks:us-west-2:123456789012:cluster/hub-cluster",
awsIrsaHubDriver, err := NewAWSIRSAHubDriver(context.Background(), "arn:aws:eks:us-west-2:123456789012:cluster/hub-cluster",
[]string{
"arn:aws:eks:us-west-2:123456789012:cluster/.*",
"arn:aws:eks:us-west-1:123456789012:cluster/.*",
@@ -129,7 +131,7 @@ func TestAccept(t *testing.T) {
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
isAccepted := AwsIrsaHubDriver.Accept(c.cluster)
isAccepted := awsIrsaHubDriver.Accept(c.cluster)
if c.isAccepted != isAccepted {
t.Errorf("expect %t, but %t", c.isAccepted, isAccepted)
}
@@ -206,6 +208,385 @@ func TestRenderTemplates(t *testing.T) {
}
}
func TestDeleteIAMRoleAndPolicy(t *testing.T) {
type args struct {
ctx context.Context
withAPIOptionsFunc func(*middleware.Stack) error
}
cases := []struct {
name string
args args
managedClusterAnnotations map[string]string
want error
wantErr bool
}{
{
name: "test delete IAM Role and policy",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: mockSuccessfulDeletionBehaviour,
},
managedClusterAnnotations: map[string]string{
"agent.open-cluster-management.io/managed-cluster-iam-role-suffix": "960c4e56c25ba0b571ddcdaa7edc943e",
"agent.open-cluster-management.io/managed-cluster-arn": "arn:aws:eks:us-west-2:123456789012:cluster/spoke-cluster",
},
want: nil,
wantErr: false,
},
{
name: "test delete IAM Role and policy with NoSuchEntity in DeleteRole",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: func(stack *middleware.Stack) error {
err := mockSuccessfulDeletionBehaviour(stack)
if err != nil {
return err
}
return stack.Finalize.Add(
middleware.FinalizeMiddlewareFunc(
"DeleteRoleOrDeletePolicyOrDetachPolicyMock3",
func(ctx context.Context, input middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
if middleware.GetOperationName(ctx) == "DetachRolePolicy" {
return middleware.FinalizeOutput{
Result: &iam.DetachRolePolicyOutput{},
}, middleware.Metadata{}, fmt.Errorf("failed to detach IAM policy from role, NoSuchEntity")
}
return next.HandleFinalize(ctx, input)
},
),
middleware.Before,
)
},
},
managedClusterAnnotations: map[string]string{
"agent.open-cluster-management.io/managed-cluster-iam-role-suffix": "960c4e56c25ba0b571ddcdaa7edc943e",
"agent.open-cluster-management.io/managed-cluster-arn": "arn:aws:eks:us-west-2:123456789012:cluster/spoke-cluster",
},
want: nil,
wantErr: false,
},
{
name: "test delete IAM Role and policy with NoSuchEntity in DeletePolicy",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: func(stack *middleware.Stack) error {
err := mockSuccessfulDeletionBehaviour(stack)
if err != nil {
return err
}
return stack.Finalize.Add(
middleware.FinalizeMiddlewareFunc(
"DeleteRoleOrDeletePolicyOrDetachPolicyMock3",
func(ctx context.Context, input middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
if middleware.GetOperationName(ctx) == "DeletePolicy" {
return middleware.FinalizeOutput{
Result: nil,
}, middleware.Metadata{}, fmt.Errorf("failed to delete IAM policy, NoSuchEntity")
}
return next.HandleFinalize(ctx, input)
},
),
middleware.Before,
)
},
},
managedClusterAnnotations: map[string]string{
"agent.open-cluster-management.io/managed-cluster-iam-role-suffix": "960c4e56c25ba0b571ddcdaa7edc943e",
"agent.open-cluster-management.io/managed-cluster-arn": "arn:aws:eks:us-west-2:123456789012:cluster/spoke-cluster",
},
want: nil,
wantErr: false,
},
{
name: "test delete IAM Role and policy with NoSuchEntity in DeleteRole",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: func(stack *middleware.Stack) error {
err := mockSuccessfulDeletionBehaviour(stack)
if err != nil {
return err
}
return stack.Finalize.Add(
middleware.FinalizeMiddlewareFunc(
"DeleteRoleOrDeletePolicyOrDetachPolicyMock3",
func(ctx context.Context, input middleware.FinalizeInput, next middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
if middleware.GetOperationName(ctx) == "DeleteRole" {
return middleware.FinalizeOutput{
Result: nil,
}, middleware.Metadata{}, fmt.Errorf("failed to delete IAM role, NoSuchEntity")
}
return next.HandleFinalize(ctx, input)
},
),
middleware.Before,
)
},
},
managedClusterAnnotations: map[string]string{
"agent.open-cluster-management.io/managed-cluster-iam-role-suffix": "960c4e56c25ba0b571ddcdaa7edc943e",
"agent.open-cluster-management.io/managed-cluster-arn": "arn:aws:eks:us-west-2:123456789012:cluster/spoke-cluster",
},
want: nil,
wantErr: false,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
os.Setenv("AWS_ACCESS_KEY_ID", "test")
os.Setenv("AWS_SECRET_ACCESS_KEY", "test")
os.Setenv("AWS_ACCOUNT_ID", "test")
cfg, err := config.LoadDefaultConfig(
tt.args.ctx,
config.WithAPIOptions([]func(*middleware.Stack) error{tt.args.withAPIOptionsFunc}),
)
if err != nil {
t.Fatal(err)
}
managedCluster := testinghelpers.NewManagedCluster()
managedCluster.Annotations = tt.managedClusterAnnotations
roleName, _, _, policyArn, err := getRoleAndPolicyArn(tt.args.ctx, managedCluster, cfg)
if err != nil {
t.Errorf("Error getting role and policy Arn.")
return
}
err = deleteIAMRoleAndPolicy(tt.args.ctx, cfg, roleName, policyArn)
if (err != nil) != tt.wantErr {
t.Errorf("error = %#v, wantErr %#v", err, tt.wantErr)
return
}
if tt.wantErr && err.Error() != tt.want.Error() {
t.Errorf("err = %#v, want %#v", err, tt.want)
}
})
}
}
func mockSuccessfulDeletionBehaviour(stack *middleware.Stack) error {
isValidRoleName := regexp.MustCompile(`^[A-Za-z0-9_+=,.@-]+$`).MatchString
isValidClusterName := regexp.MustCompile(`^[0-9A-Za-z][A-Za-z0-9-_]*$`).MatchString
err := stack.Initialize.Add(
middleware.InitializeMiddlewareFunc(
"DeleteRoleOrDeletePolicyOrDetachPolicyMock1",
func(ctx context.Context, input middleware.InitializeInput, next middleware.InitializeHandler) (middleware.InitializeOutput, middleware.Metadata, error) {
switch v := input.Parameters.(type) {
case *iam.DeleteRoleInput:
if !isValidRoleName(*v.RoleName) {
return middleware.InitializeOutput{Result: nil}, middleware.Metadata{}, fmt.Errorf("invalid role name")
}
case *iam.DeletePolicyInput:
if !arn.IsARN(*v.PolicyArn) {
return middleware.InitializeOutput{Result: nil}, middleware.Metadata{}, fmt.Errorf("invalid ARN")
}
case *eks.DeleteAccessEntryInput:
if !isValidClusterName(*v.ClusterName) {
return middleware.InitializeOutput{Result: nil}, middleware.Metadata{}, fmt.Errorf("invalid cluster name")
}
if !arn.IsARN(*v.PrincipalArn) {
return middleware.InitializeOutput{Result: nil}, middleware.Metadata{}, fmt.Errorf("invalid ARN")
}
}
return next.HandleInitialize(ctx, input)
},
),
middleware.Before,
)
if err != nil {
return err
}
err = stack.Finalize.Add(
middleware.FinalizeMiddlewareFunc(
"DeleteRoleOrDeletePolicyOrDetachPolicyMock2",
func(ctx context.Context, input middleware.FinalizeInput, handler middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
operationName := middleware.GetOperationName(ctx)
switch operationName {
case "DeleteRole":
return middleware.FinalizeOutput{
Result: &iam.DeleteRoleOutput{},
}, middleware.Metadata{}, nil
case "DeletePolicy":
return middleware.FinalizeOutput{
Result: &iam.DeletePolicyOutput{},
}, middleware.Metadata{}, nil
case "DetachRolePolicy":
return middleware.FinalizeOutput{
Result: &iam.DetachRolePolicyOutput{},
}, middleware.Metadata{}, nil
case "DeleteAccessEntry":
return middleware.FinalizeOutput{
Result: &eks.DeleteAccessEntryOutput{},
}, middleware.Metadata{}, nil
}
return middleware.FinalizeOutput{}, middleware.Metadata{}, nil
},
),
middleware.Before,
)
if err != nil {
return err
}
return nil
}
func TestDeleteAccessEntry(t *testing.T) {
type args struct {
ctx context.Context
withAPIOptionsFunc func(*middleware.Stack) error
}
cases := []struct {
name string
hubClusterName string
args args
want error
wantErr bool
}{
{
name: "test delete Access Entry",
hubClusterName: "hub",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: mockSuccessfulDeletionBehaviour,
},
want: nil,
wantErr: false,
},
{
name: "test delete Access Entry error due to cluster name being an ARN",
hubClusterName: "arn:aws:eks:us-west-2:123456789012:cluster/hub", // Not a cluster name, it is an ARN
args: args{
ctx: context.Background(),
withAPIOptionsFunc: mockSuccessfulDeletionBehaviour,
},
want: fmt.Errorf("operation error EKS: DeleteAccessEntry, invalid cluster name"),
wantErr: true,
},
{
name: "test delete Access Entry error",
hubClusterName: "hub",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: func(stack *middleware.Stack) error {
return stack.Finalize.Add(
middleware.FinalizeMiddlewareFunc(
"DeleteAccessEntryErrorMock",
func(ctx context.Context, input middleware.FinalizeInput, handler middleware.FinalizeHandler) (middleware.FinalizeOutput, middleware.Metadata, error) {
operationName := middleware.GetOperationName(ctx)
if operationName == "DeleteAccessEntry" {
return middleware.FinalizeOutput{
Result: nil,
}, middleware.Metadata{}, fmt.Errorf("failed to delete access entry")
}
return middleware.FinalizeOutput{}, middleware.Metadata{}, nil
},
),
middleware.Before,
)
},
},
want: fmt.Errorf("operation error EKS: DeleteAccessEntry, failed to delete access entry"),
wantErr: true,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
cfg, err := config.LoadDefaultConfig(
tt.args.ctx,
config.WithAPIOptions([]func(*middleware.Stack) error{tt.args.withAPIOptionsFunc}),
)
if err != nil {
t.Fatal(err)
}
eksClient := eks.NewFromConfig(cfg)
principalArn := "arn:aws:iam::123456789012:role/TestRole"
err = deleteAccessEntry(tt.args.ctx, eksClient, principalArn, tt.hubClusterName)
if (err != nil) != tt.wantErr {
t.Errorf("error = %#v, wantErr %#v", err, tt.wantErr)
return
}
if tt.wantErr && err.Error() != tt.want.Error() {
t.Errorf("err = %#v, want %#v", err, tt.want)
}
})
}
}
func TestCleanup(t *testing.T) {
type args struct {
ctx context.Context
withAPIOptionsFunc func(*middleware.Stack) error
}
cases := []struct {
name string
cluster *clusterv1.ManagedCluster
args args
want error
wantErr bool
}{
{
name: "test Cleanup",
cluster: &clusterv1.ManagedCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "managed-cluster",
Annotations: map[string]string{
"agent.open-cluster-management.io/managed-cluster-arn": "arn:aws:eks:us-west-2:123456789012:cluster/managed-cluster1",
"agent.open-cluster-management.io/managed-cluster-iam-role-suffix": "7f8141296c75f2871e3d030f85c35692",
},
},
},
args: args{
ctx: context.Background(),
withAPIOptionsFunc: mockSuccessfulDeletionBehaviour,
},
want: nil,
wantErr: false,
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
cfg, err := config.LoadDefaultConfig(
tt.args.ctx,
config.WithAPIOptions([]func(*middleware.Stack) error{tt.args.withAPIOptionsFunc}),
)
if err != nil {
t.Fatal(err)
}
awsIrsaHubDriver, err := NewAWSIRSAHubDriver(context.Background(), "arn:aws:eks:us-west-2:123456789012:cluster/hub-cluster", []string{})
if err != nil {
t.Errorf("error creating AWSIRSAHubDriver")
return
}
awsIrsaHubDriver.(*AWSIRSAHubDriver).cfg = cfg
err = awsIrsaHubDriver.Cleanup(tt.args.ctx, tt.cluster)
if (err != nil) != tt.wantErr {
t.Errorf("error = %#v, wantErr %#v", err, tt.wantErr)
return
}
if tt.wantErr && err.Error() != tt.want.Error() {
t.Errorf("err = %#v, want %#v", err, tt.want)
}
})
}
}
func TestCreateIAMRoleAndPolicy(t *testing.T) {
type args struct {
ctx context.Context
@@ -267,7 +648,7 @@ func TestCreateIAMRoleAndPolicy(t *testing.T) {
wantErr: false,
},
{
name: "test invalid hubclusrterarn passed during join.",
name: "test invalid hubclusterarn passed during join.",
args: args{
ctx: context.Background(),
withAPIOptionsFunc: func(stack *middleware.Stack) error {