🛠️ Improve AWS EKS support

- detect which EKS version to use
  (instead of hard-coding it in the TF config)
- do not issue a CSR on EKS
  (because EKS is broken and doesn't support it)
- automatically install a StorageClass on EKS
  (because the EBS CSI addon doesn't install one by default)
- put EKS clusters in the default VPC
  (instead of creating one VPC per cluster,
  since there is a default limit of 5 VPC per region)
This commit is contained in:
Jérôme Petazzoni
2025-10-25 11:26:13 +02:00
parent 6d8ae7132d
commit f25abf663b
4 changed files with 136 additions and 62 deletions

View File

@@ -2,7 +2,7 @@ terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "2.16.1"
version = "~> 2.38.0"
}
helm = {
source = "hashicorp/helm"
@@ -107,6 +107,31 @@ resource "helm_release" "metrics_server_${index}" {
]
}
# As of October 2025, the ebs-csi-driver addon (which is used on EKS
# to provision persistent volumes) doesn't automatically create a
# StorageClass. Here, we're trying to detect the DaemonSet created
# by the ebs-csi-driver; and if we find it, we create the corresponding
# StorageClass.
data "kubernetes_resources" "ebs_csi_node_${index}" {
provider = kubernetes.cluster_${index}
api_version = "apps/v1"
kind = "DaemonSet"
label_selector = "app.kubernetes.io/name=aws-ebs-csi-driver"
namespace = "kube-system"
}
resource "kubernetes_storage_class" "ebs_csi_${index}" {
count = (length(data.kubernetes_resources.ebs_csi_node_${index}.objects) > 0) ? 1 : 0
provider = kubernetes.cluster_${index}
metadata {
name = "ebs-csi"
annotations = {
"storageclass.kubernetes.io/is-default-class" = "true"
}
}
storage_provisioner = "ebs.csi.aws.com"
}
# This section here deserves a little explanation.
#
# When we access a cluster with shpod (either through SSH or code-server)
@@ -136,8 +161,14 @@ resource "helm_release" "metrics_server_${index}" {
# Lastly - in the ConfigMap we actually put both the original kubeconfig,
# and the one where we injected our new user (just in case we want to
# use or look at the original for any reason).
#
# One more thing: the kubernetes.io/kube-apiserver-client signer is
# disabled on EKS, so... we don't generate that ConfigMap on EKS.
# To detect if we're on EKS, we're looking for the ebs-csi-node DaemonSet.
# (Which means that the detection will break if the ebs-csi addon is missing.)
resource "kubernetes_config_map" "kubeconfig_${index}" {
count = (length(data.kubernetes_resources.ebs_csi_node_${index}.objects) > 0) ? 0 : 1
provider = kubernetes.cluster_${index}
metadata {
name = "kubeconfig"
@@ -163,7 +194,7 @@ resource "kubernetes_config_map" "kubeconfig_${index}" {
- name: cluster-admin
user:
client-key-data: $${base64encode(tls_private_key.cluster_admin_${index}.private_key_pem)}
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}.certificate)}
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}[0].certificate)}
EOT
}
}
@@ -201,6 +232,7 @@ resource "kubernetes_cluster_role_binding" "shpod_cluster_admin_${index}" {
}
resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
count = (length(data.kubernetes_resources.ebs_csi_node_${index}.objects) > 0) ? 0 : 1
provider = kubernetes.cluster_${index}
metadata {
name = "cluster-admin"

View File

@@ -1,60 +1,45 @@
# Taken from:
# https://github.com/hashicorp/learn-terraform-provision-eks-cluster/blob/main/main.tf
data "aws_availability_zones" "available" {}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "3.19.0"
name = var.cluster_name
cidr = "10.0.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
enable_dns_hostnames = true
public_subnet_tags = {
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
"kubernetes.io/role/elb" = 1
}
private_subnet_tags = {
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
"kubernetes.io/role/internal-elb" = 1
}
data "aws_eks_cluster_versions" "_" {
default_only = true
}
module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "19.5.1"
cluster_name = var.cluster_name
cluster_version = "1.24"
vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets
cluster_endpoint_public_access = true
eks_managed_node_group_defaults = {
ami_type = "AL2_x86_64"
source = "terraform-aws-modules/eks/aws"
version = "~> 21.0"
name = var.cluster_name
kubernetes_version = data.aws_eks_cluster_versions._.cluster_versions[0].cluster_version
vpc_id = local.vpc_id
subnet_ids = local.subnet_ids
endpoint_public_access = true
enable_cluster_creator_admin_permissions = true
upgrade_policy = {
# The default policy is EXTENDED, which incurs additional costs
# when running an old control plane. We don't advise to run old
# control planes, but we also don't want to incur costs if an
# old version is chosen accidentally.
support_type = "STANDARD"
}
addons = {
coredns = {}
eks-pod-identity-agent = {
before_compute = true
}
kube-proxy = {}
vpc-cni = {
before_compute = true
}
aws-ebs-csi-driver = {
service_account_role_arn = module.irsa-ebs-csi.iam_role_arn
}
}
eks_managed_node_groups = {
one = {
name = "node-group-one"
x86 = {
name = "x86"
instance_types = [local.node_size]
min_size = var.min_nodes_per_pool
max_size = var.max_nodes_per_pool
desired_size = var.min_nodes_per_pool
min_size = var.min_nodes_per_pool
max_size = var.max_nodes_per_pool
desired_size = var.min_nodes_per_pool
}
}
}
@@ -66,7 +51,7 @@ data "aws_iam_policy" "ebs_csi_policy" {
module "irsa-ebs-csi" {
source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc"
version = "4.7.0"
version = "~> 5.39.0"
create_role = true
role_name = "AmazonEKSTFEBSCSIRole-${module.eks.cluster_name}"
@@ -75,13 +60,9 @@ module "irsa-ebs-csi" {
oidc_fully_qualified_subjects = ["system:serviceaccount:kube-system:ebs-csi-controller-sa"]
}
resource "aws_eks_addon" "ebs-csi" {
cluster_name = module.eks.cluster_name
addon_name = "aws-ebs-csi-driver"
addon_version = "v1.5.2-eksbuild.1"
service_account_role_arn = module.irsa-ebs-csi.iam_role_arn
tags = {
"eks_addon" = "ebs-csi"
"terraform" = "true"
}
resource "aws_vpc_security_group_ingress_rule" "_" {
security_group_id = module.eks.node_security_group_id
cidr_ipv4 = "0.0.0.0/0"
ip_protocol = -1
description = "Allow all traffic to Kubernetes nodes (so that we can use NodePorts, hostPorts, etc.)"
}

View File

@@ -2,7 +2,7 @@ terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.47.0"
version = "~> 6.17.0"
}
}
}

View File

@@ -0,0 +1,61 @@
# OK, we have two options here.
# 1. Create our own VPC
# - Pros: provides good isolation from other stuff deployed in the
# AWS account; makes sure that we don't interact with
# existing security groups, subnets, etc.
# - Cons: by default, there is a quota of 5 VPC per region, so
# we can only deploy 5 clusters
# 2. Use the default VPC
# - Pros/cons: the opposite :)
variable "use_default_vpc" {
type = bool
default = true
}
data "aws_vpc" "default" {
default = true
}
data "aws_subnets" "default" {
filter {
name = "vpc-id"
values = [data.aws_vpc.default.id]
}
}
data "aws_availability_zones" "available" {}
module "vpc" {
count = var.use_default_vpc ? 0 : 1
source = "terraform-aws-modules/vpc/aws"
version = "~> 6.0"
name = var.cluster_name
cidr = "10.0.0.0/16"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
private_subnets = ["10.0.11.0/24", "10.0.12.0/24", "10.0.13.0/24"]
public_subnets = ["10.0.21.0/24", "10.0.22.0/24", "10.0.23.0/24"]
enable_nat_gateway = true
single_nat_gateway = true
enable_dns_hostnames = true
map_public_ip_on_launch = true
public_subnet_tags = {
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
"kubernetes.io/role/elb" = 1
}
private_subnet_tags = {
"kubernetes.io/cluster/${var.cluster_name}" = "shared"
"kubernetes.io/role/internal-elb" = 1
}
}
locals {
vpc_id = var.use_default_vpc ? data.aws_vpc.default.id : module.vpc[0].vpc_id
subnet_ids = var.use_default_vpc ? data.aws_subnets.default.ids : module.vpc[0].public_subnets
}