Compare commits

..

1 Commits

Author SHA1 Message Date
Jérôme Petazzoni
9bad3f26f3 ☸️ Kubernetes December 2021 content 2021-12-10 16:27:58 +01:00
193 changed files with 1324 additions and 5123 deletions

View File

@@ -3,12 +3,6 @@
# - no actual persistence
# - scaling down to 1 will break the cluster
# - pods may be colocated
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
@@ -34,6 +28,11 @@ subjects:
name: consul
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: v1
kind: Service
metadata:
name: consul
@@ -62,7 +61,7 @@ spec:
serviceAccountName: consul
containers:
- name: consul
image: "consul:1.11"
image: "consul:1.8"
env:
- name: NAMESPACE
valueFrom:

View File

@@ -2,12 +2,6 @@
# There is still no actual persistence, but:
# - podAntiaffinity prevents pod colocation
# - clusters works when scaling down to 1 (thanks to lifecycle hook)
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
@@ -33,6 +27,11 @@ subjects:
name: consul
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: v1
kind: Service
metadata:
name: consul
@@ -69,7 +68,7 @@ spec:
terminationGracePeriodSeconds: 10
containers:
- name: consul
image: "consul:1.11"
image: "consul:1.8"
env:
- name: NAMESPACE
valueFrom:

View File

@@ -1,11 +1,5 @@
# Even better Consul cluster.
# That one uses a volumeClaimTemplate to achieve true persistence.
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
@@ -31,6 +25,11 @@ subjects:
name: consul
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: v1
kind: Service
metadata:
name: consul
@@ -76,7 +75,7 @@ spec:
terminationGracePeriodSeconds: 10
containers:
- name: consul
image: "consul:1.11"
image: "consul:1.8"
volumeMounts:
- name: data
mountPath: /consul/data

View File

@@ -1,28 +0,0 @@
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: ingress-domain-name
spec:
rules:
- name: create-ingress
match:
resources:
kinds:
- Service
generate:
kind: Ingress
name: "{{request.object.metadata.name}}"
namespace: "{{request.object.metadata.namespace}}"
data:
spec:
rules:
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
http:
paths:
- backend:
service:
name: "{{request.object.metadata.name}}"
port:
number: 80
path: /
pathType: Prefix

View File

@@ -1,32 +0,0 @@
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: ingress-domain-name
spec:
rules:
- name: create-ingress
match:
resources:
kinds:
- Service
preconditions:
- key: "{{request.object.spec.ports[0].name}}"
operator: Equals
value: http
generate:
kind: Ingress
name: "{{request.object.metadata.name}}"
namespace: "{{request.object.metadata.namespace}}"
data:
spec:
rules:
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
http:
paths:
- backend:
service:
name: "{{request.object.metadata.name}}"
port:
name: http
path: /
pathType: Prefix

View File

@@ -1,37 +0,0 @@
apiVersion: kyverno.io/v1
kind: ClusterPolicy
metadata:
name: ingress-domain-name
spec:
rules:
- name: create-ingress
context:
- name: configmap
configMap:
name: ingress-domain-name
namespace: "{{request.object.metadata.namespace}}"
match:
resources:
kinds:
- Service
preconditions:
- key: "{{request.object.spec.ports[0].name}}"
operator: Equals
value: http
generate:
kind: Ingress
name: "{{request.object.metadata.name}}"
namespace: "{{request.object.metadata.namespace}}"
data:
spec:
rules:
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.{{configmap.data.domain}}"
http:
paths:
- backend:
service:
name: "{{request.object.metadata.name}}"
port:
name: http
path: /
pathType: Prefix

View File

@@ -17,12 +17,12 @@ metadata:
spec:
selector:
matchLabels:
app: rainbow
app: color
color: blue
template:
metadata:
labels:
app: rainbow
app: color
color: blue
spec:
containers:
@@ -33,7 +33,7 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: rainbow
app: color
color: blue
name: color
namespace: blue
@@ -44,7 +44,7 @@ spec:
protocol: TCP
targetPort: 80
selector:
app: rainbow
app: color
color: blue
type: ClusterIP
---
@@ -66,12 +66,12 @@ metadata:
spec:
selector:
matchLabels:
app: rainbow
app: color
color: green
template:
metadata:
labels:
app: rainbow
app: color
color: green
spec:
containers:
@@ -82,7 +82,7 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: rainbow
app: color
color: green
name: color
namespace: green
@@ -93,7 +93,7 @@ spec:
protocol: TCP
targetPort: 80
selector:
app: rainbow
app: color
color: green
type: ClusterIP
---
@@ -115,12 +115,12 @@ metadata:
spec:
selector:
matchLabels:
app: rainbow
app: color
color: red
template:
metadata:
labels:
app: rainbow
app: color
color: red
spec:
containers:
@@ -131,7 +131,7 @@ apiVersion: v1
kind: Service
metadata:
labels:
app: rainbow
app: color
color: red
name: color
namespace: red
@@ -142,6 +142,6 @@ spec:
protocol: TCP
targetPort: 80
selector:
app: rainbow
app: color
color: red
type: ClusterIP

View File

@@ -1,107 +1,17 @@
⚠️ This is work in progress. The UX needs to be improved,
and the docs could be better.
This directory contains a Terraform configuration to deploy
a bunch of Kubernetes clusters on various cloud providers,
using their respective managed Kubernetes products.
a bunch of Kubernetes clusters on various cloud providers, using their respective managed Kubernetes products.
## With shell wrapper
This is the recommended use. It makes it easy to start N clusters
on any provider. It will create a directory with a name like
`tag-YYYY-MM-DD-HH-MM-SS-SEED-PROVIDER`, copy the Terraform configuration
to that directory, then create the clusters using that configuration.
1. One-time setup: configure provider authentication for the provider(s) that you wish to use.
- Digital Ocean:
```bash
doctl auth init
```
- Google Cloud Platform: you will need to create a project named `prepare-tf`
and enable the relevant APIs for this project (sorry, if you're new to GCP,
this sounds vague; but if you're familiar with it you know what to do; if you
want to change the project name you can edit the Terraform configuration)
- Linode:
```bash
linode-cli configure
```
- Oracle Cloud: FIXME
(set up `oci` through the `oci-cli` Python package)
- Scaleway: run `scw init`
2. Optional: set number of clusters, cluster size, and region.
By default, 1 cluster will be configured, with 2 nodes, and auto-scaling up to 5 nodes.
If you want, you can override these parameters, with the following variables.
```bash
export TF_VAR_how_many_clusters=5
export TF_VAR_min_nodes_per_pool=2
export TF_VAR_max_nodes_per_pool=4
export TF_VAR_location=xxx
```
The `location` variable is optional. Each provider should have a default value.
The value of the `location` variable is provider-specific. Examples:
| Provider | Example value | How to see possible values
|---------------|-------------------|---------------------------
| Digital Ocean | `ams3` | `doctl compute region list`
| Google Cloud | `europe-north1-a` | `gcloud compute zones list`
| Linode | `eu-central` | `linode-cli regions list`
| Oracle Cloud | `eu-stockholm-1` | `oci iam region list`
You can also specify multiple locations, and then they will be
used in round-robin fashion.
For example, with Google Cloud, since the default quotas are very
low (my account is limited to 8 public IP addresses per zone, and
my requests to increase that quota were denied) you can do the
following:
```bash
export TF_VAR_location=$(gcloud compute zones list --format=json | jq -r .[].name | grep ^europe)
```
Then when you apply, clusters will be created across all available
zones in Europe. (When I write this, there are 20+ zones in Europe,
so even with my quota, I can create 40 clusters.)
3. Run!
```bash
./run.sh <providername>
```
(If you don't specify a provider name, it will list available providers.)
4. Shutting down
Go to the directory that was created by the previous step (`tag-YYYY-MM...`)
and run `terraform destroy`.
You can also run `./clean.sh` which will destroy ALL clusters deployed by the previous run script.
## Without shell wrapper
Expert mode.
Useful to run steps sperarately, and/or when working on the Terraform configurations.
To use it:
1. Select the provider you wish to use.
Go to the `source` directory and edit `main.tf`.
Change the `source` attribute of the `module "clusters"` section.
Check the content of the `modules` directory to see available choices.
```bash
vim main.tf
```
2. Initialize the provider.
```bash
@@ -110,20 +20,24 @@ terraform init
3. Configure provider authentication.
See steps above, and add the following extra steps:
- Digital Coean:
```bash
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
```
- Linode:
```bash
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
```
- Digital Ocean: `export DIGITALOCEAN_ACCESS_TOKEN=...`
(check `~/.config/doctl/config.yaml` for the token)
- Linode: `export LINODE_TOKEN=...`
(check `~/.config/linode-cli` for the token)
- Oracle Cloud: it should use `~/.oci/config`
- Scaleway: run `scw init`
4. Decide how many clusters and how many nodes per clusters you want.
```bash
export TF_VAR_how_many_clusters=5
export TF_VAR_min_nodes_per_pool=2
# Optional (will enable autoscaler when available)
export TF_VAR_max_nodes_per_pool=4
# Optional (will only work on some providers)
export TF_VAR_enable_arm_pool=true
```
5. Provision clusters.
```bash
@@ -132,7 +46,7 @@ terraform apply
6. Perform second stage provisioning.
This will install an SSH server on the clusters.
This will install a SSH server on the clusters.
```bash
cd stage2
@@ -158,5 +72,5 @@ terraform destroy
9. Clean up stage2.
```bash
rm stage2/terraform.tfstate*
rm stage/terraform.tfstate*
```

View File

@@ -1,9 +0,0 @@
#!/bin/sh
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
for T in tag-*; do
(
cd $T
terraform apply -destroy -auto-approve && mv ../$T ../deleted$T
)
done

16
prepare-tf/locals.tf Normal file
View File

@@ -0,0 +1,16 @@
resource "random_string" "_" {
length = 5
special = false
upper = false
}
resource "time_static" "_" {}
locals {
tag = format("tf-%s-%s", formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339), random_string._.result)
# Common tags to be assigned to all resources
common_tags = [
"created-by=terraform",
"tag=${local.tag}"
]
}

View File

@@ -1,5 +1,5 @@
module "clusters" {
source = "./modules/PROVIDER"
source = "./modules/linode"
for_each = local.clusters
cluster_name = each.value.cluster_name
min_nodes_per_pool = var.min_nodes_per_pool
@@ -7,24 +7,22 @@ module "clusters" {
enable_arm_pool = var.enable_arm_pool
node_size = var.node_size
common_tags = local.common_tags
location = each.value.location
}
locals {
clusters = {
for i in range(101, 101 + var.how_many_clusters) :
i => {
cluster_name = format("%s-%03d", local.tag, i)
kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
cluster_name = format("%s-%03d", local.tag, i)
kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
#dashdash_kubeconfig = format("--kubeconfig=./stage2/kubeconfig.%03d", i)
externalips_path = format("./stage2/externalips.%03d", i)
flags_path = format("./stage2/flags.%03d", i)
location = local.locations[i % length(local.locations)]
}
}
}
resource "local_file" "stage2" {
filename = "./stage2/main.tf"
filename = "./stage2/main.tf"
file_permission = "0644"
content = templatefile(
"./stage2.tmpl",
@@ -32,15 +30,6 @@ resource "local_file" "stage2" {
)
}
resource "local_file" "flags" {
for_each = local.clusters
filename = each.value.flags_path
file_permission = "0600"
content = <<-EOT
has_metrics_server: ${module.clusters[each.key].has_metrics_server}
EOT
}
resource "local_file" "kubeconfig" {
for_each = local.clusters
filename = each.value.kubeconfig_path
@@ -70,8 +59,8 @@ resource "null_resource" "wait_for_nodes" {
}
data "external" "externalips" {
for_each = local.clusters
depends_on = [null_resource.wait_for_nodes]
for_each = local.clusters
depends_on = [ null_resource.wait_for_nodes ]
program = [
"sh",
"-c",

View File

@@ -1,13 +1,12 @@
resource "digitalocean_kubernetes_cluster" "_" {
name = var.cluster_name
tags = var.common_tags
# Region is mandatory, so let's provide a default value.
region = var.location != null ? var.location : "nyc1"
name = var.cluster_name
tags = local.common_tags
region = var.region
version = var.k8s_version
node_pool {
name = "x86"
tags = var.common_tags
name = "dok-x86"
tags = local.common_tags
size = local.node_type
auto_scale = true
min_nodes = var.min_nodes_per_pool

View File

@@ -5,7 +5,3 @@ output "kubeconfig" {
output "cluster_id" {
value = digitalocean_kubernetes_cluster._.id
}
output "has_metrics_server" {
value = false
}

View File

@@ -8,6 +8,10 @@ variable "common_tags" {
default = []
}
locals {
common_tags = [for tag in var.common_tags : replace(tag, "=", "-")]
}
variable "node_size" {
type = string
default = "M"
@@ -44,9 +48,9 @@ locals {
# To view supported regions, run:
# doctl compute region list
variable "location" {
variable "region" {
type = string
default = null
default = "nyc1"
}
# To view supported versions, run:

View File

@@ -1,8 +1,7 @@
resource "linode_lke_cluster" "_" {
label = var.cluster_name
tags = var.common_tags
# "region" is mandatory, so let's provide a default value if none was given.
region = var.location != null ? var.location : "eu-central"
label = var.cluster_name
tags = var.common_tags
region = var.region
k8s_version = var.k8s_version
pool {

View File

@@ -5,7 +5,3 @@ output "kubeconfig" {
output "cluster_id" {
value = linode_lke_cluster._.id
}
output "has_metrics_server" {
value = false
}

View File

@@ -42,11 +42,11 @@ locals {
node_type = var.node_types[var.node_size]
}
# To view supported regions, run:
# To view supported versions, run:
# linode-cli regions list
variable "location" {
variable "region" {
type = string
default = null
default = "us-east"
}
# To view supported versions, run:

View File

@@ -1,7 +1,6 @@
resource "oci_identity_compartment" "_" {
name = var.cluster_name
description = var.cluster_name
enable_delete = true
name = var.cluster_name
description = var.cluster_name
}
locals {

View File

@@ -9,7 +9,3 @@ output "kubeconfig" {
output "cluster_id" {
value = oci_containerengine_cluster._.id
}
output "has_metrics_server" {
value = false
}

View File

@@ -70,13 +70,6 @@ locals {
node_type = var.node_types[var.node_size]
}
# To view supported regions, run:
# oci iam region list | jq .data[].name
variable "location" {
type = string
default = null
}
# To view supported versions, run:
# oci ce cluster-options get --cluster-option-id all | jq -r '.data["kubernetes-versions"][]'
variable "k8s_version" {

View File

@@ -1,6 +1,5 @@
resource "scaleway_k8s_cluster" "_" {
name = var.cluster_name
region = var.location
tags = var.common_tags
version = var.k8s_version
cni = var.cni
@@ -9,7 +8,7 @@ resource "scaleway_k8s_cluster" "_" {
resource "scaleway_k8s_pool" "_" {
cluster_id = scaleway_k8s_cluster._.id
name = "x86"
name = "scw-x86"
tags = var.common_tags
node_type = local.node_type
size = var.min_nodes_per_pool

View File

@@ -5,7 +5,3 @@ output "kubeconfig" {
output "cluster_id" {
value = scaleway_k8s_cluster._.id
}
output "has_metrics_server" {
value = sort([var.k8s_version, "1.22"])[0] == "1.22"
}

View File

@@ -47,12 +47,7 @@ variable "cni" {
default = "cilium"
}
variable "location" {
type = string
default = null
}
# To view supported versions, run:
# See supported versions with:
# scw k8s version list -o json | jq -r .[].name
variable "k8s_version" {
type = string

View File

@@ -1,49 +0,0 @@
#!/bin/sh
set -e
TIME=$(which time)
PROVIDER=$1
[ "$PROVIDER" ] || {
echo "Please specify a provider as first argument, or 'ALL' for parallel mode."
echo "Available providers:"
ls -1 source/modules
exit 1
}
[ "$TAG" ] || {
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
RANDOMTAG=$(base64 /dev/urandom | tr A-Z a-z | tr -d /+ | head -c5)
export TAG=tag-$TIMESTAMP-$RANDOMTAG
}
[ "$PROVIDER" = "ALL" ] && {
for PROVIDER in $(ls -1 source/modules); do
$TERMINAL -T $TAG-$PROVIDER -e sh -c "
export TAG=$TAG-$PROVIDER
$0 $PROVIDER
cd $TAG-$PROVIDER
bash
" &
done
exit 0
}
[ -d "source/modules/$PROVIDER" ] || {
echo "Provider '$PROVIDER' not found."
echo "Available providers:"
ls -1 source/modules
exit 1
}
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
cp -a source $TAG
cd $TAG
cp -r modules/$PROVIDER modules/PROVIDER
$TIME -o time.1.init terraform init
$TIME -o time.2.stage1 terraform apply -auto-approve
cd stage2
$TIME -o ../time.3.init terraform init
$TIME -o ../time.4.stage2 terraform apply -auto-approve

View File

@@ -1,19 +0,0 @@
resource "random_string" "_" {
length = 4
number = false
special = false
upper = false
}
resource "time_static" "_" {}
locals {
timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
tag = random_string._.result
# Common tags to be assigned to all resources
common_tags = [
"created-by-terraform",
format("created-at-%s", local.timestamp),
format("created-for-%s", local.tag)
]
}

View File

@@ -1,65 +0,0 @@
resource "google_container_cluster" "_" {
name = var.cluster_name
project = local.project
location = local.location
min_master_version = var.k8s_version
# To deploy private clusters, uncomment the section below,
# and uncomment the block in network.tf.
# Private clusters require extra resources (Cloud NAT,
# router, network, subnet) and the quota for some of these
# resources is fairly low on GCP; so if you want to deploy
# a lot of private clusters (more than 10), you can use these
# blocks as a base but you will probably have to refactor
# things quite a bit (you will at least need to define a single
# shared router and use it across all the clusters).
/*
network = google_compute_network._.name
subnetwork = google_compute_subnetwork._.name
private_cluster_config {
enable_private_nodes = true
# This must be set to "false".
# (Otherwise, access to the public endpoint is disabled.)
enable_private_endpoint = false
# This must be set to a /28.
# I think it shouldn't collide with the pod network subnet.
master_ipv4_cidr_block = "10.255.255.0/28"
}
# Private clusters require "VPC_NATIVE" networking mode
# (as opposed to the legacy "ROUTES").
networking_mode = "VPC_NATIVE"
# ip_allocation_policy is required for VPC_NATIVE clusters.
ip_allocation_policy {
# This is the block that will be used for pods.
cluster_ipv4_cidr_block = "10.0.0.0/12"
# The services block is optional
# (GKE will pick one automatically).
#services_ipv4_cidr_block = ""
}
*/
node_pool {
name = "x86"
node_config {
tags = var.common_tags
machine_type = local.node_type
}
initial_node_count = var.min_nodes_per_pool
autoscaling {
min_node_count = var.min_nodes_per_pool
max_node_count = max(var.min_nodes_per_pool, var.max_nodes_per_pool)
}
}
# This is not strictly necessary.
# We'll see if we end up using it.
# (If it is removed, make sure to also remove the corresponding
# key+cert variables from outputs.tf!)
master_auth {
client_certificate_config {
issue_client_certificate = true
}
}
}

View File

@@ -1,38 +0,0 @@
/*
resource "google_compute_network" "_" {
name = var.cluster_name
project = local.project
# The default is to create subnets automatically.
# However, this creates one subnet per zone in all regions,
# which causes a quick exhaustion of the subnet quota.
auto_create_subnetworks = false
}
resource "google_compute_subnetwork" "_" {
name = var.cluster_name
ip_cidr_range = "10.254.0.0/16"
region = local.region
network = google_compute_network._.id
project = local.project
}
resource "google_compute_router" "_" {
name = var.cluster_name
region = local.region
network = google_compute_network._.name
project = local.project
}
resource "google_compute_router_nat" "_" {
name = var.cluster_name
router = google_compute_router._.name
region = local.region
project = local.project
# Everyone in the network is allowed to NAT out.
# (We would change this if we only wanted to allow specific subnets to NAT out.)
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
# Pick NAT addresses automatically.
# (We would change this if we wanted to use specific addresses to NAT out.)
nat_ip_allocate_option = "AUTO_ONLY"
}
*/

View File

@@ -1,35 +0,0 @@
data "google_client_config" "_" {}
output "kubeconfig" {
value = <<-EOT
apiVersion: v1
kind: Config
current-context: ${google_container_cluster._.name}
clusters:
- name: ${google_container_cluster._.name}
cluster:
server: https://${google_container_cluster._.endpoint}
certificate-authority-data: ${google_container_cluster._.master_auth[0].cluster_ca_certificate}
contexts:
- name: ${google_container_cluster._.name}
context:
cluster: ${google_container_cluster._.name}
user: client-token
users:
- name: client-cert
user:
client-key-data: ${google_container_cluster._.master_auth[0].client_key}
client-certificate-data: ${google_container_cluster._.master_auth[0].client_certificate}
- name: client-token
user:
token: ${data.google_client_config._.access_token}
EOT
}
output "cluster_id" {
value = google_container_cluster._.id
}
output "has_metrics_server" {
value = true
}

View File

@@ -1,8 +0,0 @@
terraform {
required_providers {
google = {
source = "hashicorp/google"
version = "4.5.0"
}
}
}

View File

@@ -1,68 +0,0 @@
variable "cluster_name" {
type = string
default = "deployed-with-terraform"
}
variable "common_tags" {
type = list(string)
default = []
}
variable "node_size" {
type = string
default = "M"
}
variable "min_nodes_per_pool" {
type = number
default = 2
}
variable "max_nodes_per_pool" {
type = number
default = 5
}
# FIXME
variable "enable_arm_pool" {
type = bool
default = false
}
variable "node_types" {
type = map(string)
default = {
"S" = "e2-small"
"M" = "e2-medium"
"L" = "e2-standard-2"
}
}
locals {
node_type = var.node_types[var.node_size]
}
# To view supported locations, run:
# gcloud compute zones list
variable "location" {
type = string
default = null
}
# To view supported versions, run:
# gcloud container get-server-config --region=europe-north1 '--format=flattened(channels)'
# But it's also possible to just specify e.g. "1.20" and it figures it out.
variable "k8s_version" {
type = string
default = "1.21"
}
locals {
location = var.location != null ? var.location : "europe-north1-a"
region = replace(local.location, "/-[a-z]$/", "")
# Unfortunately, the following line doesn't work
# (that attribute just returns an empty string)
# so we have to hard-code the project name.
#project = data.google_client_config._.project
project = "prepare-tf"
}

View File

@@ -1,40 +0,0 @@
variable "how_many_clusters" {
type = number
default = 1
}
variable "node_size" {
type = string
default = "M"
# Can be S, M, L.
# We map these values to different specific instance types for each provider,
# but the idea is that they shoudl correspond to the following sizes:
# S = 2 GB RAM
# M = 4 GB RAM
# L = 8 GB RAM
}
variable "min_nodes_per_pool" {
type = number
default = 1
}
variable "max_nodes_per_pool" {
type = number
default = 0
}
variable "enable_arm_pool" {
type = bool
default = false
}
variable "location" {
type = string
default = null
}
# TODO: perhaps handle if it's space-separated instead of newline?
locals {
locations = var.location == null ? [null] : split("\n", var.location)
}

View File

@@ -2,7 +2,7 @@ terraform {
required_providers {
kubernetes = {
source = "hashicorp/kubernetes"
version = "2.7.1"
version = "2.0.3"
}
}
}
@@ -119,11 +119,6 @@ resource "kubernetes_cluster_role_binding" "shpod_${index}" {
name = "shpod"
namespace = "shpod"
}
subject {
api_group = "rbac.authorization.k8s.io"
kind = "Group"
name = "shpod-cluster-admins"
}
}
resource "random_string" "shpod_${index}" {
@@ -140,10 +135,6 @@ provider "helm" {
}
resource "helm_release" "metrics_server_${index}" {
# Some providers pre-install metrics-server.
# Some don't. Let's install metrics-server,
# but only if it's not already installed.
count = yamldecode(file("./flags.${index}"))["has_metrics_server"] ? 0 : 1
provider = helm.cluster_${index}
repository = "https://charts.bitnami.com/bitnami"
chart = "metrics-server"
@@ -191,7 +182,7 @@ resource "kubernetes_config_map" "kubeconfig_${index}" {
- name: cluster-admin
user:
client-key-data: $${base64encode(tls_private_key.cluster_admin_${index}.private_key_pem)}
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}.certificate)}
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request.cluster_admin_${index}.certificate)}
EOT
}
}
@@ -205,14 +196,11 @@ resource "tls_cert_request" "cluster_admin_${index}" {
private_key_pem = tls_private_key.cluster_admin_${index}.private_key_pem
subject {
common_name = "cluster-admin"
# Note: CSR API v1 doesn't allow issuing certs with "system:masters" anymore.
#organization = "system:masters"
# We'll use this custom group name instead.cluster-admin user.
organization = "shpod-cluster-admins"
organization = "system:masters"
}
}
resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
resource "kubernetes_certificate_signing_request" "cluster_admin_${index}" {
provider = kubernetes.cluster_${index}
metadata {
name = "cluster-admin"
@@ -220,7 +208,6 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
spec {
usages = ["client auth"]
request = tls_cert_request.cluster_admin_${index}.cert_request_pem
signer_name = "kubernetes.io/kube-apiserver-client"
}
auto_approve = true
}

28
prepare-tf/variables.tf Normal file
View File

@@ -0,0 +1,28 @@
variable "how_many_clusters" {
type = number
default = 2
}
variable "node_size" {
type = string
default = "M"
# Can be S, M, L.
# S = 2 GB RAM
# M = 4 GB RAM
# L = 8 GB RAM
}
variable "min_nodes_per_pool" {
type = number
default = 1
}
variable "max_nodes_per_pool" {
type = number
default = 0
}
variable "enable_arm_pool" {
type = bool
default = true
}

View File

@@ -14,9 +14,7 @@ These tools can help you to create VMs on:
- [Docker](https://docs.docker.com/engine/installation/)
- [Docker Compose](https://docs.docker.com/compose/install/)
- [Parallel SSH](https://github.com/lilydjwg/pssh)
(should be installable with `pip install git+https://github.com/lilydjwg/pssh`;
on a Mac, try `brew install pssh`)
- [Parallel SSH](https://code.google.com/archive/p/parallel-ssh/) (on a Mac: `brew install pssh`)
Depending on the infrastructure that you want to use, you also need to install
the CLI that is specific to that cloud. For OpenStack deployments, you will

View File

@@ -314,12 +314,11 @@ _cmd_kube() {
SETTINGS=tags/$TAG/settings.yaml
KUBEVERSION=$(awk '/^kubernetes_version:/ {print $2}' $SETTINGS)
if [ "$KUBEVERSION" ]; then
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
Package: kubectl kubeadm kubelet
Pin: version $KUBEVERSION*
Pin-Priority: 1000
EOF"
EXTRA_APTGET="=$KUBEVERSION-00"
EXTRA_KUBEADM="kubernetesVersion: v$KUBEVERSION"
else
EXTRA_APTGET=""
EXTRA_KUBEADM=""
fi
# Install packages
@@ -330,8 +329,7 @@ EOF"
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
sudo apt-get install -qy kubelet kubeadm kubectl &&
sudo apt-mark hold kubelet kubeadm kubectl
sudo apt-get install -qy kubelet$EXTRA_APTGET kubeadm$EXTRA_APTGET kubectl$EXTRA_APTGET &&
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
@@ -343,11 +341,6 @@ EOF"
sudo swapoff -a"
fi
# Re-enable CRI interface in containerd
pssh "
echo '# Use default parameters for containerd.' | sudo tee /etc/containerd/config.toml
sudo systemctl restart containerd"
# Initialize kube control plane
pssh --timeout 200 "
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
@@ -357,38 +350,19 @@ kind: InitConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
bootstrapTokens:
- token: \$(cat /tmp/token)
nodeRegistration:
# Comment out the next line to switch back to Docker.
criSocket: /run/containerd/containerd.sock
ignorePreflightErrors:
- NumCPU
---
kind: JoinConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
discovery:
bootstrapToken:
apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
token: \$(cat /tmp/token)
unsafeSkipCAVerification: true
nodeRegistration:
# Comment out the next line to switch back to Docker.
criSocket: /run/containerd/containerd.sock
ignorePreflightErrors:
- NumCPU
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
# The following line is necessary when using Docker.
# It doesn't seem necessary when using containerd.
#cgroupDriver: cgroupfs
cgroupDriver: cgroupfs
---
kind: ClusterConfiguration
apiVersion: kubeadm.k8s.io/v1beta2
apiServer:
certSANs:
- \$(cat /tmp/ipv4)
$EXTRA_KUBEADM
EOF
sudo kubeadm init --config=/tmp/kubeadm-config.yaml
sudo kubeadm init --config=/tmp/kubeadm-config.yaml --ignore-preflight-errors=NumCPU
fi"
# Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
@@ -412,8 +386,8 @@ EOF
pssh --timeout 200 "
if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
FIRSTNODE=\$(cat /etc/name_of_first_node) &&
ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
sudo kubeadm join --config /tmp/kubeadm-config.yaml
TOKEN=\$(ssh $SSHOPTS \$FIRSTNODE cat /tmp/token) &&
sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
fi"
# Install metrics server
@@ -504,7 +478,7 @@ EOF
if [ ! -x /usr/local/bin/kustomize ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kustomize
kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
echo complete -C /usr/local/bin/kustomize kustomize | sudo tee /etc/bash_completion.d/kustomize
kustomize version
fi"

View File

@@ -1,22 +1,22 @@
#!/bin/sh
# https://open-api.netlify.com/#tag/dnsZone
[ "$1" ] || {
echo ""
echo "Add a record in Netlify DNS."
echo "This script is hardcoded to add a record to container.training".
echo ""
echo "Syntax:"
echo "$0 list"
echo "$0 add <name> <ipaddr>"
echo "$0 del <recordid>"
echo "$0 <name> <ipaddr>"
echo ""
echo "Example to create a A record for eu.container.training:"
echo "$0 add eu 185.145.250.0"
echo "$0 eu 185.145.250.0"
echo ""
exit 1
}
NAME=$1.container.training
ADDR=$2
NETLIFY_USERID=$(jq .userId < ~/.config/netlify/config.json)
NETLIFY_TOKEN=$(jq -r .users[$NETLIFY_USERID].auth.token < ~/.config/netlify/config.json)
@@ -29,54 +29,19 @@ netlify() {
ZONE_ID=$(netlify dns_zones |
jq -r '.[] | select ( .name == "container.training" ) | .id')
_list() {
netlify dns_zones/$ZONE_ID/dns_records |
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
}
# It looks like if we create two identical records, then delete one of them,
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
# though it's still visible through the API and the website?)
_add() {
NAME=$1.container.training
ADDR=$2
if netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
grep .
then
echo "It looks like that record already exists. Refusing to create it."
exit 1
fi
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
# It looks like if we create two identical records, then delete one of them,
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
# though it's still visible through the API and the website?)
if netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
grep .
then
echo "It looks like that record already exists. Refusing to create it."
exit 1
fi
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'")'
}
_del() {
RECORD_ID=$1
# OK, since that one is dangerous, I'm putting the whole request explicitly here
http DELETE \
https://api.netlify.com/api/v1/dns_zones/$ZONE_ID/dns_records/$RECORD_ID \
"Authorization:Bearer $NETLIFY_TOKEN"
}
case "$1" in
list)
_list
;;
add)
_add $2 $3
;;
del)
_del $2
;;
*)
echo "Unknown command '$1'."
exit 1
;;
esac
netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'")'

View File

@@ -14,9 +14,7 @@ paper_size: A4
user_login: k8s
user_password: training
# For a list of old versions, check:
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
kubernetes_version: 1.18.20
kubernetes_version: 1.19.16
image:

View File

@@ -2,7 +2,7 @@
#/ /kube-halfday.yml.html 200!
#/ /kube-fullday.yml.html 200!
#/ /kube-twodays.yml.html 200!
/ /kube.yml.html 200!
/ /k8s.yml.html 200!
# And this allows to do "git clone https://container.training".
/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack

View File

@@ -109,7 +109,7 @@ class: extra-details
- Example: [ctr.run](https://ctr.run/)
.lab[
.exercise[
- Use ctr.run to automatically build a container image and run it:
```bash

View File

@@ -28,7 +28,7 @@ class: self-paced
- Likewise, it will take more than merely *reading* these slides
to make you an expert
- These slides include *tons* of demos, exercises, and examples
- These slides include *tons* of exercises and examples
- They assume that you have access to a machine running Docker

View File

@@ -4,6 +4,8 @@
(we will use the `rng` service in the dockercoins app)
- See what happens when the load increses
- Observe the correct behavior of the readiness probe
(spoiler alert: it involves timeouts!)
(when deploying e.g. an invalid image)
- Observe the behavior of the liveness probe

View File

@@ -2,85 +2,34 @@
- We want to add healthchecks to the `rng` service in dockercoins
- The `rng` service exhibits an interesting behavior under load:
*its latency increases (which will cause probes to time out!)*
- We want to see:
- what happens when the readiness probe fails
- what happens when the liveness probe fails
- how to set "appropriate" probes and probe parameters
---
## Setup
- First, deploy a new copy of dockercoins
(for instance, in a brand new namespace)
- Then, add a readiness probe on the `rng` service
- Pro tip #1: ping (e.g. with `httping`) the `rng` service at all times
- it should initially show a few milliseconds latency
- that will increase when we scale up
- it will also let us detect when the service goes "boom"
- Pro tip #2: also keep an eye on the web UI
---
## Readiness
- Add a readiness probe to `rng`
- this requires editing the pod template in the Deployment manifest
- use a simple HTTP check on the `/` route of the service
- keep all other parameters (timeouts, thresholds...) at their default values
(using a simple HTTP check on the `/` route of the service)
- Check what happens when deploying an invalid image for `rng` (e.g. `alpine`)
*(If the probe was set up correctly, the app will continue to work,
because Kubernetes won't switch over the traffic to the `alpine` containers,
because they don't pass the readiness probe.)*
- Then roll back `rng` to the original image and add a liveness probe
(with the same parameters)
- Scale up the `worker` service (to 15+ workers) and observe
- What happens?
---
## Readiness under load
## Goal
- Then roll back `rng` to the original image
- *Before* adding the readiness probe:
- Check what happens when we scale up the `worker` Deployment to 15+ workers
updating the image of the `rng` service with `alpine` should break it
(get the latency above 1 second)
- *After* adding the readiness probe:
*(We should now observe intermittent unavailability of the service, i.e. every
30 seconds it will be unreachable for a bit, then come back, then go away again, etc.)*
updating the image of the `rng` service with `alpine` shouldn't break it
---
- When adding the liveness probe, nothing special should happen
## Liveness
- Now replace the readiness probe with a liveness probe
- What happens now?
*(At first the behavior looks the same as with the readiness probe:
service becomes unreachable, then reachable again, etc.; but there is
a significant difference behind the scenes. What is it?)*
---
## Readiness and liveness
- Bonus questions!
- What happens if we enable both probes at the same time?
- What strategies can we use so that both probes are useful?
- Scaling the `worker` service will then cause disruptions

View File

@@ -16,7 +16,7 @@
## Goal
- We want to be able to access the web app using a URL like:
- We want to be able to access the web app using an URL like:
http://webapp.localdev.me

View File

@@ -1,5 +1,3 @@
⚠️ BROKEN EXERCISE - DO NOT USE
## Exercise — Ingress Secret Policy
*Implement policy to limit impact of ingress controller vulnerabilities.*

View File

@@ -1,5 +1,3 @@
⚠️ BROKEN EXERCISE - DO NOT USE
# Exercise — Ingress Secret Policy
- Most ingress controllers have access to all Secrets

View File

@@ -1,9 +0,0 @@
## Exercise — Generating Ingress With Kyverno
- When a Service gets created, automatically generate an Ingress
- Step 1: expose all services with a hard-coded domain name
- Step 2: only expose services that have a port named `http`
- Step 3: configure the domain name with a per-namespace ConfigMap

View File

@@ -1,33 +0,0 @@
# Exercise — Generating Ingress With Kyverno
When a Service gets created...
*(for instance, Service `blue` in Namespace `rainbow`)*
...Automatically generate an Ingress.
*(for instance, with host name `blue.rainbow.MYDOMAIN.COM`)*
---
## Goals
- Step 1: expose all services with a hard-coded domain name
- Step 2: only expose services that have a port named `http`
- Step 3: configure the domain name with a per-namespace ConfigMap
(e.g. `kubectl create configmap ingress-domain-name --from-literal=domain=1.2.3.4.nip.io`)
---
## Hints
- We want to use a Kyverno `generate` ClusterPolicy
- For step 1, check [Generate Resources](https://kyverno.io/docs/writing-policies/generate/) documentation
- For step 2, check [Preconditions](https://kyverno.io/docs/writing-policies/preconditions/) documentation
- For step 3, check [External Data Sources](https://kyverno.io/docs/writing-policies/external-data-sources/) documentation

View File

@@ -1,9 +0,0 @@
## Exercise — Terraform Node Pools
- Write a Terraform configuration to deploy a cluster
- The cluster should have two node pools with autoscaling
- Deploy two apps, each using exclusively one node pool
- Bonus: deploy an app balanced across both node pools

View File

@@ -1,69 +0,0 @@
# Exercise — Terraform Node Pools
- Write a Terraform configuration to deploy a cluster
- The cluster should have two node pools with autoscaling
- Deploy two apps, each using exclusively one node pool
- Bonus: deploy an app balanced across both node pools
---
## Cluster deployment
- Write a Terraform configuration to deploy a cluster
- We want to have two node pools with autoscaling
- Example for sizing:
- 4 GB / 1 CPU per node
- pools of 1 to 4 nodes
---
## Cluster autoscaling
- Deploy an app on the cluster
(you can use `nginx`, `jpetazzo/color`...)
- Set a resource request (e.g. 1 GB RAM)
- Scale up and verify that the autoscaler kicks in
---
## Pool isolation
- We want to deploy two apps
- The first app should be deployed exclusively on the first pool
- The second app should be deployed exclusively on the second pool
- Check the next slide for hints!
---
## Hints
- One solution involves adding a `nodeSelector` to the pod templates
- Another solution involves adding:
- `taints` to the node pools
- matching `tolerations` to the pod templates
---
## Balancing
- Step 1: make sure that the pools are not balanced
- Step 2: deploy a new app, check that it goes to the emptiest pool
- Step 3: update the app so that it balances (as much as possible) between pools

View File

@@ -1,60 +0,0 @@
#!/bin/sh
# The materials for a given training live in their own branch.
# Sometimes, we write custom content (or simply new content) for a training,
# and that content doesn't get merged back to main. This script tries to
# detect that with the following heuristics:
# - list all remote branches
# - for each remote branch, list the changes that weren't merged into main
# (using "diff main...$BRANCH", three dots)
# - ignore a bunch of training-specific files that change all the time anyway
# - for the remaining files, compute the diff between main and the branch
# (using "diff main..$BRANCH", two dots)
# - ignore changes of less than 10 lines
# - also ignore a few red herrings
# - display whatever is left
# For "git diff" (in the filter function) to work correctly, we must be
# at the root of the repo.
cd $(git rev-parse --show-toplevel)
BRANCHES=$(git branch -r | grep -v origin/HEAD | grep origin/2)
filter() {
threshold=10
while read filename; do
case $filename in
# Generic training-specific files
slides/*.html) continue;;
slides/*.yml) continue;;
slides/logistics*.md) continue;;
# Specific content that can be ignored
#slides/containers/Local_Environment.md) threshold=100;;
# Content that was moved/refactored enough to confuse us
slides/containers/Local_Environment.md) threshold=100;;
slides/exercises.md) continue;;
slides/k8s/batch-jobs) threshold=20;;
# Renames
*/{*}*) continue;;
esac
git diff --find-renames --numstat main..$BRANCH -- "$filename" | {
# If the files are identical, the diff will be empty, and "read" will fail.
read plus minus filename || return
# Ignore binary files (FIXME though?)
if [ $plus = - ]; then
return
fi
diff=$((plus-minus))
if [ $diff -gt $threshold ]; then
echo git diff main..$BRANCH -- $filename
fi
}
done
}
for BRANCH in $BRANCHES; do
if FILES=$(git diff --find-renames --name-only main...$BRANCH | filter | grep .); then
echo "🌳 $BRANCH:"
echo "$FILES"
fi
done

View File

@@ -1,11 +1,13 @@
title: |
Kubernetes
Kubernetes Training
chat: "[Chat room](https://lumen.container.training/mattermost)"
#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
chat: "[Mattermost](https://ardanlive.container.training/mattermost/)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2022-01-lumen.container.training/
slides: https://2021-12-k8s.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -15,19 +17,25 @@ exclude:
content:
- shared/title.md
- logistics.md
- exercises/k8sfundamentals-brief.md
- exercises/localcluster-brief.md
- exercises/remotecluster-brief.md
- exercises/healthchecks-brief.md
- exercises/appconfig-brief.md
- exercises/ingress-brief.md
- k8s/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-slack.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
-
- # DAY 1
- shared/prereqs.md
#- shared/webssh.md
- shared/connecting.md
#- k8s/versions-k8s.md
- shared/sampleapp.md
#- shared/composescale.md
#- shared/hastyconclusions.md
- shared/composedown.md
- k8s/concepts-k8s.md
- k8s/kubectlget.md
@@ -35,10 +43,9 @@ content:
- k8s/kubenet.md
- k8s/kubectlexpose.md
- k8s/shippingimages.md
#- k8s/buildshiprun-dockerhub.md
- exercises/k8sfundamentals-details.md
-
- k8s/ourapponkube.md
- # DAY 2
- shared/declarative.md
- k8s/declarative.md
- k8s/deploymentslideshow.md
@@ -48,50 +55,42 @@ content:
- k8s/namespaces.md
- k8s/yamldeploy.md
- k8s/authoring-yaml.md
- k8s/setup-overview.md
- k8s/setup-devel.md
- k8s/setup-managed.md
#- k8s/setup-selfhosted.md
- k8s/localkubeconfig.md
- k8s/accessinternal.md
#- k8s/kubectlproxy.md
- exercises/localcluster-details.md
- exercises/remotecluster-details.md
- # DAY 3
- k8s/scalingdockercoins.md
- shared/hastyconclusions.md
- k8s/daemonset.md
- k8s/setup-overview.md
- k8s/setup-devel.md
#- k8s/setup-managed.md
#- k8s/setup-selfhosted.md
#- k8s/dashboard.md
- k8s/localkubeconfig.md
- k8s/accessinternal.md
- exercises/localcluster-details.md
-
- k8s/rollout.md
- k8s/healthchecks.md
- exercises/healthchecks-details.md
- k8s/ingress.md
- exercises/ingress-details.md
#- k8s/ingress-tls.md
- k8s/kustomize.md
- k8s/k9s.md
- k8s/tilt.md
-
#- k8s/healthchecks-more.md
- exercises/healthchecks-details.md
- # DAY 4
- k8s/volumes.md
- k8s/configuration.md
- k8s/secrets.md
- k8s/ingress.md
#- k8s/ingress-tls.md
- exercises/appconfig-details.md
- exercises/ingress-details.md
- # DAY 5
- k8s/netpol.md
- k8s/authn-authz.md
- k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/horizontal-pod-autoscaler.md
-
- k8s/volumes.md
- k8s/configuration.md
- k8s/secrets.md
- k8s/statefulsets.md
- k8s/consul.md
- k8s/pv-pvc-sc.md
- k8s/volume-claim-templates.md
#- k8s/portworx.md
- k8s/openebs.md
- k8s/stateful-failover.md
#- k8s/batch-jobs.md
-
- |
# (Extra content)
- k8s/operators.md
- k8s/sealed-secrets.md
- k8s/eck.md
- shared/thankyou.md
#- k8s/horizontal-pod-autoscaler.md
#-
# - k8s/helm-intro.md
# - k8s/helm-chart-format.md
# - k8s/helm-create-basic-chart.md
# - k8s/helm-create-better-chart.md

View File

@@ -32,7 +32,7 @@
- You're welcome to use whatever you like (e.g. AWS profiles)
.lab[
.exercise[
- Set the AWS region, API access key, and secret key:
```bash
@@ -58,7 +58,7 @@
- register it in our kubeconfig file
.lab[
.exercise[
- Update our kubeconfig file:
```bash

View File

@@ -20,13 +20,13 @@
## Suspension of disbelief
The labs and demos in this section assume that we have set up `kubectl` on our
The exercises in this section assume that we have set up `kubectl` on our
local machine in order to access a remote cluster.
We will therefore show how to access services and pods of the remote cluster,
from our local machine.
You can also run these commands directly on the cluster (if you haven't
You can also run these exercises directly on the cluster (if you haven't
installed and set up `kubectl` locally).
Running commands locally will be less useful
@@ -58,7 +58,7 @@ installed and set up `kubectl` to communicate with your cluster.
- Let's access the `webui` service through `kubectl proxy`
.lab[
.exercise[
- Run an API proxy in the background:
```bash
@@ -101,7 +101,7 @@ installed and set up `kubectl` to communicate with your cluster.
- Let's access our remote Redis server
.lab[
.exercise[
- Forward connections from local port 10000 to remote port 6379:
```bash

View File

@@ -198,7 +198,7 @@ Some examples ...
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
.lab[
.exercise[
- Go to the webhook directory:
```bash
@@ -244,7 +244,7 @@ class: extra-details
- We need to update the configuration with the correct `url`
.lab[
.exercise[
- Edit the webhook configuration manifest:
```bash
@@ -271,7 +271,7 @@ class: extra-details
(so if the webhook server is down, we can still create pods)
.lab[
.exercise[
- Register the webhook:
```bash
@@ -288,7 +288,7 @@ It is strongly recommended to tail the logs of the API server while doing that.
- Let's create a pod and try to set a `color` label
.lab[
.exercise[
- Create a pod named `chroma`:
```bash
@@ -328,7 +328,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
## Update the webhook configuration
.lab[
.exercise[
- First, check the ngrok URL of the tunnel for the Flask app:
```bash
@@ -395,7 +395,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
## Let's get to work!
.lab[
.exercise[
- Make sure we're in the right directory:
```bash
@@ -424,7 +424,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
... we'll store it in a ConfigMap, and install dependencies on the fly
.lab[
.exercise[
- Load the webhook source in a ConfigMap:
```bash
@@ -446,7 +446,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
(of course, there are plenty others options; e.g. `cfssl`)
.lab[
.exercise[
- Generate a self-signed certificate:
```bash
@@ -470,7 +470,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
- Let's reconfigure the webhook to use our Service instead of ngrok
.lab[
.exercise[
- Edit the webhook configuration manifest:
```bash
@@ -504,7 +504,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
Shell to the rescue!
.lab[
.exercise[
- Load up our cert and encode it in base64:
```bash

View File

@@ -66,7 +66,7 @@
- We'll ask `kubectl` to show us the exacts requests that it's making
.lab[
.exercise[
- Check the URI for a cluster-scope, "core" resource, e.g. a Node:
```bash
@@ -122,7 +122,7 @@ class: extra-details
- What about namespaced resources?
.lab[
.exercise[
- Check the URI for a namespaced, "core" resource, e.g. a Service:
```bash
@@ -169,7 +169,7 @@ class: extra-details
## Accessing a subresource
.lab[
.exercise[
- List `kube-proxy` pods:
```bash
@@ -200,7 +200,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout
- There are at least three useful commands to introspect the API server
.lab[
.exercise[
- List resources types, their group, kind, short names, and scope:
```bash
@@ -249,7 +249,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout
The following assumes that `metrics-server` is deployed on your cluster.
.lab[
.exercise[
- Check that the metrics.k8s.io is registered with `metrics-server`:
```bash
@@ -271,7 +271,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
- We can have multiple resources with the same name
.lab[
.exercise[
- Look for resources named `node`:
```bash
@@ -298,7 +298,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
- But we can look at the raw data (with `-o json` or `-o yaml`)
.lab[
.exercise[
- Look at NodeMetrics objects with one of these commands:
```bash
@@ -320,7 +320,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
--
.lab[
.exercise[
- Display node metrics:
```bash
@@ -342,7 +342,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
- Then we can register that server by creating an APIService resource
.lab[
.exercise[
- Check the definition used for the `metrics-server`:
```bash

View File

@@ -103,7 +103,7 @@ class: extra-details
---
## `WithWaitGroup`
## `WithWaitGroup`,
- When we shutdown, tells clients (with in-flight requests) to retry

View File

@@ -203,9 +203,9 @@ What does that mean?
## Let's experiment a bit!
- For this section, connect to the first node of the `test` cluster
- For the exercises in this section, connect to the first node of the `test` cluster
.lab[
.exercise[
- SSH to the first node of the test cluster
@@ -224,7 +224,7 @@ What does that mean?
- Let's create a simple object
.lab[
.exercise[
- Create a namespace with the following command:
```bash
@@ -246,7 +246,7 @@ This is equivalent to `kubectl create namespace hello`.
- Let's retrieve the object we just created
.lab[
.exercise[
- Read back our object:
```bash
@@ -354,7 +354,7 @@ class: extra-details
- The easiest way is to use `kubectl label`
.lab[
.exercise[
- In one terminal, watch namespaces:
```bash
@@ -402,7 +402,7 @@ class: extra-details
- DELETED resources
.lab[
.exercise[
- In one terminal, watch pods, displaying full events:
```bash

View File

@@ -361,7 +361,7 @@ class: extra-details
## Listing service accounts
.lab[
.exercise[
- The resource name is `serviceaccount` or `sa` for short:
```bash
@@ -378,7 +378,7 @@ class: extra-details
## Finding the secret
.lab[
.exercise[
- List the secrets for the `default` service account:
```bash
@@ -398,7 +398,7 @@ class: extra-details
- The token is stored in the secret, wrapped with base64 encoding
.lab[
.exercise[
- View the secret:
```bash
@@ -421,7 +421,7 @@ class: extra-details
- Let's send a request to the API, without and with the token
.lab[
.exercise[
- Find the ClusterIP for the `kubernetes` service:
```bash
@@ -616,7 +616,7 @@ class: extra-details
- Nixery automatically generates images with the requested packages
.lab[
.exercise[
- Run our pod:
```bash
@@ -632,7 +632,7 @@ class: extra-details
- Normally, at this point, we don't have any API permission
.lab[
.exercise[
- Check our permissions with `kubectl`:
```bash
@@ -658,7 +658,7 @@ class: extra-details
(but again, we could call it `view` or whatever we like)
.lab[
.exercise[
- Create the new role binding:
```bash
@@ -716,7 +716,7 @@ It's important to note a couple of details in these flags...
- We should be able to *view* things, but not to *edit* them
.lab[
.exercise[
- Check our permissions with `kubectl`:
```bash

View File

@@ -93,7 +93,7 @@
- We can use the `--dry-run=client` option
.lab[
.exercise[
- Generate the YAML for a Deployment without creating it:
```bash
@@ -128,7 +128,7 @@ class: extra-details
## The limits of `kubectl apply --dry-run=client`
.lab[
.exercise[
- Generate the YAML for a deployment:
```bash
@@ -161,7 +161,7 @@ class: extra-details
(all validation and mutation hooks will be executed)
.lab[
.exercise[
- Try the same YAML file as earlier, with server-side dry run:
```bash
@@ -200,7 +200,7 @@ class: extra-details
- `kubectl diff` does a server-side dry run, *and* shows differences
.lab[
.exercise[
- Try `kubectl diff` on the YAML that we tweaked earlier:
```bash

View File

@@ -1,693 +0,0 @@
# Amazon EKS
- Elastic Kubernetes Service
- AWS runs the Kubernetes control plane
(all we see is an API server endpoint)
- Pods can run on any combination of:
- EKS-managed nodes
- self-managed nodes
- Fargate
- Leverages and integrates with AWS services and APIs
---
## Some integrations
- Authenticate with IAM users and roles
- Associate IAM roles to Kubernetes ServiceAccounts
- Load balance traffic with ALB/ELB/NLB
- Persist data with EBS/EFS
- Label nodes with instance ID, instance type, region, AZ ...
- Pods can be "first class citizens" of VPC
---
## Pros/cons
- Fully managed control plane
- Handles deployment, upgrade, scaling of the control plane
- Available versions and features tend to lag a bit
- Doesn't fit the most demanding users
("demanding" starts somewhere between 100 and 1000 nodes)
---
## Good to know ...
- Some integrations are specific to EKS
(some authentication models)
- Many integrations are *not* specific to EKS
- The Cloud Controller Manager can run outside of EKS
(and provide LoadBalancer services, EBS volumes, and more)
---
# Provisioning clusters
- AWS console, API, CLI
- `eksctl`
- Infrastructure-as-Code
---
## AWS "native" provisioning
- AWS web console
- click-click-click!
- difficulty: low
- AWS API or CLI
- must provide subnets, ARNs
- difficulty: medium
---
## `eksctl`
- Originally developed by Weave
(back when AWS "native" provisioning wasn't very good)
- `eksctl create cluster` just works™
- Has been "adopted" by AWS
(is listed in official documentations)
---
## Infrastructure-as-Code
- Cloud Formation
- Terraform
[terraform-aws-eks](https://github.com/terraform-aws-modules/terraform-aws-eks)
by the community
([example](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/basic))
[terraform-provider-aws](https://github.com/hashicorp/terraform-provider-aws)
by Hashicorp
([example](https://github.com/hashicorp/terraform-provider-aws/tree/main/examples/eks-getting-started))
[Kubestack](https://www.kubestack.com/)
---
## Node groups
- Virtually all provisioning models have a concept of "node group"
- Node group = group of similar nodes in an ASG
- can span multiple AZ
- can have instances of different types¹
- A cluster will need at least one node group
.footnote[¹As I understand it, to specify fallbacks if one instance type is unavailable or out of capacity.]
---
# IAM → EKS authentication
- Access EKS clusters using IAM users and roles
- No special role, permission, or policy is needed in IAM
(but the `eks:DescribeCluster` permission can be useful, see later)
- Users and roles need to be explicitly listed in the cluster
- Configuration is done through a ConfigMap in the cluster
---
## Setting it up
- Nothing to do when creating the cluster
(feature is always enabled)
- Users and roles are *mapped* to Kubernetes users and groups
(through the `aws-auth` ConfigMap in `kube-system`)
- That's it!
---
## Mapping
- The `aws-auth` ConfigMap can contain two entries:
- `mapRoles` (map IAM roles)
- `mapUsers` (map IAM users)
- Each entry is a YAML file
- Each entry includes:
- `rolearn` or `userarn` to map
- `username` (as a string)
- `groups` (as a list; can be empty)
---
## Example
```yaml
apiVersion: v1
kind: ConfigMap
metadata:
namespace: kube-system
name: aws-auth
data:
mapRoles: `|`
- rolearn: arn:aws:iam::111122223333:role/blah
username: blah
groups: [ devs, ops ]
mapUsers: `|`
- userarn: arn:aws:iam::111122223333:user/alice
username: alice
groups: [ system:masters ]
- userarn: arn:aws:iam::111122223333:user/bob
username: bob
groups: [ system:masters ]
```
---
## Client setup
- We need either the `aws` CLI or the `aws-iam-authenticator`
- We use them as `exec` plugins in `~/.kube/config`
- Done automatically by `eksctl`
- Or manually with `aws eks update-kubeconfig`
- Discovering the address of the API server requires one IAM permission
```json
"Action": [
"eks:DescribeCluster"
],
"Resource": "arn:aws:eks:<region>:<account>:cluster/<cluster-name>"
```
(wildcards can be used when specifying the resource)
---
class: extra-details
## How it works
- The helper generates a token
(with `aws eks get-token` or `aws-iam-authenticator token`)
- Note: these calls will always succeed!
(even if AWS API keys are invalid)
- The token is used to authenticate with the Kubernetes API
- AWS' Kubernetes API server will decode and validate the token
(and map the underlying user or role accordingly)
---
## Read The Fine Manual
https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
---
# EKS → IAM authentication
- Access AWS services from workloads running on EKS
(e.g.: access S3 bucket from code running in a Pod)
- This works by associating an IAM role to a K8S ServiceAccount
- There are also a few specific roles used internally by EKS
(e.g. to let the nodes establish network configurations)
- ... We won't talk about these
---
## The big picture
- One-time setup task
([create an OIDC provider associated to our EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html))
- Create (or update) a role with an appropriate *trust policy*
(more on that later)
- Annotate service accounts to map them to that role
`eks.amazonaws.com/role-arn=arn:aws:iam::111122223333:role/some-iam-role`
- Create (or re-create) pods using that ServiceAccount
- The pods can now use that role!
---
## Trust policies
- IAM roles have a *trust policy* (aka *assume role policy*)
(cf `aws iam create-role ... --assume-role-policy-document ...`)
- That policy contains a *statement* list
- This list indicates who/what is allowed to assume (use) the role
- In the current scenario, that policy will contain something saying:
*ServiceAccount S on EKS cluster C is allowed to use this role*
---
## Trust policy for a single ServiceAccount
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringEquals": {
"${OIDC_PROVIDER}:sub":
"system:serviceaccount:<namespace>:<service-account>"
}
}
}
]
}
```
---
## Trust policy for multiple ServiceAccounts
```json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringLike": {
"${OIDC_PROVIDER}:sub":
["system:serviceaccount:container-training:*"]
}
}
}
]
}
```
---
## The little details
- When pods are created, they are processed by a mutating webhook
(typically named `pod-identity-webhook`)
- Pods using a ServiceAccount with the right annotation get:
- an extra token
<br/>
(mounted in `/var/run/secrets/eks.amazonaws.com/serviceaccount/token`)
- a few env vars
<br/>
(including `AWS_WEB_IDENTITY_TOKEN_FILE` and `AWS_ROLE_ARN`)
- AWS client libraries and tooling will work this that
(see [this list](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-minimum-sdk.html) for supported versions)
---
# CNI
- EKS is a compliant Kubernetes implementation
(which means we can use a wide range of CNI plugins)
- However, the recommended CNI plugin is the "AWS VPC CNI"
(https://github.com/aws/amazon-vpc-cni-k8s)
- Pods are then "first class citizens" of AWS VPC
---
## AWS VPC CNI
- Each Pod gets an address in a VPC subnet
- No overlay network, no encapsulation, no overhead
(other than AWS network fabric, obviously)
- Probably the fastest network option when running on AWS
- Allows "direct" load balancing (more on that later)
- Can use security groups with Pod traffic
- But: limits the number of Pods per Node
- But: more complex configuration (more on that later)
---
## Number of Pods per Node
- Each Pod gets an IP address on an ENI
(Elastic Network Interface)
- EC2 instances can only have a limited number of ENIs
(the exact limit depends on the instance type)
- ENIs can only have a limited number of IP addresses
(with variations here as well)
- This gives limits of e.g. 35 pods on `t3.large`, 29 on `c5.large` ...
(see
[full list of limits per instance type](https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt
)
and
[ENI/IP details](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/pkg/awsutils/vpc_ip_resource_limit.go
))
---
## Limits?
- These limits might seem low
- They're not *that* low if you compute e.g. the RAM/Pod ratio
- Except if you're running lots if tiny pods
- Bottom line: do the math!
---
class: extra-details
## Pre-loading
- It can take a little while to allocate/attach an ENI
- The AWS VPC CNI can keep a few extra addresses on each Node
(by default, one ENI worth of IP addresses)
- This is tunable if needed
(see [the docs](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/docs/eni-and-ip-target.md
) for details)
---
## Better load balancing
- The default path for inbound traffic is:
Load balancer → NodePort → Pod
- With the AWS VPC CNI, it becomes possible to do:
Load balancer → Pod
- More on that in the load balancing section!
---
## Configuration complexity
- The AWS VPC CNI is a very good solution when running EKS
- It brings optimized solutions to various use-cases:
- direct load balancing
- user authentication
- interconnection with other infrastructure
- etc.
- Keep in mind that all these solutions are AWS-specific
- They can require a non-trivial amount of specific configuration
- Especially when moving from a simple POC to an IAC deployment!
---
# Load Balancers
- Here be dragons!
- Multiple options, each with different pros/cons
- It's necessary to know both AWS products and K8S concepts
---
## AWS load balancers
- CLB / Classic Load Balancer (formerly known as ELB)
- can work in L4 (TCP) or L7 (HTTP) mode
- can do TLS unrolling
- can't do websockets, HTTP/2, content-based routing ...
- NLB / Network Load Balancer
- high-performance L4 load balancer with TLS support
- ALB / Application Load Balancer
- HTTP load balancer
- can do TLS unrolling
- can do websockets, HTTP/2, content-based routing ...
---
## Load balancing modes
- "IP targets"
- send traffic directly from LB to Pods
- Pods must use the AWS VPC CNI
- compatible with Fargate Pods
- "Instance targets"
- send traffic to a NodePort (generally incurs an extra hop)
- Pods can use any CNI
- not compatible with Fargate Pods
- Each LB (Service) can use a different mode, if necessary
---
## Kubernetes load balancers
- Service (L4)
- ClusterIP: internal load balancing
- NodePort: external load balancing on ports >30000
- LoadBalancer: external load balancing on the port you want
- ExternalIP: external load balancing directly on nodes
- Ingress (L7 HTTP)
- partial content-based routing (`Host` header, request path)
- requires an Ingress Controller (in front)
- works with Services (in back)
---
## Two controllers are available
- Kubernetes "in-tree" load balancer controller
- always available
- used by default for LoadBalancer Services
- creates CLB by default; can also do NLB
- can only do "instance targets"
- can use extra CLB features (TLS, HTTP)
- AWS Load Balancer Controller (fka AWS ALB Ingress Controller)
- optional add-on (requires additional config)
- primarily meant to be an Ingress Controller
- creates NLB and ALB
- can do "instance targets" and "IP targets"
- can also be used for LoadBalancer Services with type `nlb-ip`
- They can run side by side
---
## Which one should we use?
- AWS Load Balancer Controller supports "IP targets"
(which means direct routing of traffic to Pods)
- It can be used as an Ingress controller
- It *seems* to be the perfect solution for EKS!
- However ...
---
## Caveats
- AWS Load Balancer Controller requires extensive configuration
- a few hours to a few days to get it to work in a POC ...
- a few days to a few weeks to industrialize that process?
- It's AWS-specific
- It still introduces an extra hop, even if that hop is invisible
- Other ingress controllers can have interesting features
(canary deployment, A/B testing ...)
---
## Noteworthy annotations and docs
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb-ip`
- LoadBalancer Service with "IP targets" ([docs](https://kubernetes-sigs.github.io/aws-load-balancer-controller/latest/guide/service/nlb_ip_mode/))
- requires AWS Load Balancer Controller
- `service.beta.kubernetes.io/aws-load-balancer-internal: "true"`
- internal load balancer (for private VPC)
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb`
- opt for NLB instead of CLB with in-tree controller
- `service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: "*"`
- use HAProxy [PROXY protocol](https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt)
---
## TLS-related annotations
- `service.beta.kubernetes.io/aws-load-balancer-ssl-cert`
- enable TLS and use that certificate
- example value: `arn:aws:acm:<region>:<account>:certificate/<cert-id>`
- `service.beta.kubernetes.io/aws-load-balancer-ssl-ports`
- enable TLS *only* on the specified ports (when multiple ports are exposed)
- example value: `"443,8443"`
- `service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy`
- specify ciphers and other TLS parameters to use (see [that list](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/elb-security-policy-table.html))
- example value: `"ELBSecurityPolicy-TLS-1-2-2017-01"`
---
## To HTTP(S) or not to HTTP(S)
- `service.beta.kubernetes.io/aws-load-balancer-backend-protocol`
- can be either `http`, `https`, `ssl`, or `tcp`
- if `https` or `ssl`: enable TLS to the backend
- if `http` or `https`: enable HTTP `x-forwarded-for` headers (with `http` or `https`)
???
## Cluster autoscaling
## Logging
https://docs.aws.amazon.com/eks/latest/userguide/logging-using-cloudtrail.html
:EN:- Working with EKS
:EN:- Cluster and user provisioning
:EN:- Networking and load balancing
:FR:- Travailler avec EKS
:FR:- Outils de déploiement
:FR:- Intégration avec IAM
:FR:- Fonctionalités réseau

View File

@@ -30,7 +30,7 @@
- or we hit the *backoff limit* of the Job (default=6)
.lab[
.exercise[
- Create a Job that has a 50% chance of success:
```bash
@@ -49,7 +49,7 @@
- If the Pod fails, the Job creates another Pod
.lab[
.exercise[
- Check the status of the Pod(s) created by the Job:
```bash
@@ -108,7 +108,7 @@ class: extra-details
(The Cron Job will not hold if a previous job is still running)
.lab[
.exercise[
- Create the Cron Job:
```bash
@@ -135,7 +135,7 @@ class: extra-details
(re-creating another one if it fails, for instance if its node fails)
.lab[
.exercise[
- Check the Jobs that are created:
```bash

View File

@@ -98,7 +98,7 @@
- Let's list our bootstrap tokens on a cluster created with kubeadm
.lab[
.exercise[
- Log into node `test1`
@@ -145,7 +145,7 @@ class: extra-details
- The token we need to use has the form `abcdef.1234567890abcdef`
.lab[
.exercise[
- Check that it is accepted by the API server:
```bash
@@ -177,7 +177,7 @@ class: extra-details
- That information is stored in a public ConfigMap
.lab[
.exercise[
- Retrieve that ConfigMap:
```bash

View File

@@ -88,7 +88,7 @@ spec:
- Let's try this out!
.lab[
.exercise[
- Check the port used by our self-hosted registry:
```bash

View File

@@ -40,7 +40,7 @@
- Let's build the image for the DockerCoins `worker` service with Kaniko
.lab[
.exercise[
- Find the port number for our self-hosted registry:
```bash
@@ -160,7 +160,7 @@ spec:
- The YAML for the pod is in `k8s/kaniko-build.yaml`
.lab[
.exercise[
- Create the pod:
```bash

View File

@@ -37,7 +37,7 @@ so that your build pipeline is automated.*
- We will deploy a registry container, and expose it with a NodePort
.lab[
.exercise[
- Create the registry service:
```bash
@@ -57,7 +57,7 @@ so that your build pipeline is automated.*
- We need to find out which port has been allocated
.lab[
.exercise[
- View the service details:
```bash
@@ -78,7 +78,7 @@ so that your build pipeline is automated.*
- A convenient Docker registry API route to remember is `/v2/_catalog`
.lab[
.exercise[
<!-- ```hide kubectl wait deploy/registry --for condition=available```-->
@@ -102,7 +102,7 @@ We should see:
- We can retag a small image, and push it to the registry
.lab[
.exercise[
- Make sure we have the busybox image, and retag it:
```bash
@@ -123,7 +123,7 @@ We should see:
- Let's use the same endpoint as before
.lab[
.exercise[
- Ensure that our busybox image is now in the local registry:
```bash
@@ -143,7 +143,7 @@ The curl command should now output:
- We are going to use a convenient feature of Docker Compose
.lab[
.exercise[
- Go to the `stacks` directory:
```bash
@@ -217,7 +217,7 @@ class: extra-details
- All our images should now be in the registry
.lab[
.exercise[
- Re-run the same `curl` command as earlier:
```bash
@@ -232,4 +232,4 @@ variable, so that we can quickly switch from
the self-hosted registry to pre-built images
hosted on the Docker Hub. So make sure that
this $REGISTRY variable is set correctly when
running these commands!*
running the exercises!*

View File

@@ -56,7 +56,7 @@
- It can be installed with a YAML manifest, or with Helm
.lab[
.exercise[
- Let's install the cert-manager Helm chart with this one-liner:
```bash
@@ -86,7 +86,7 @@
- The manifest shown on the previous slide is in @@LINK[k8s/cm-clusterissuer.yaml]
.lab[
.exercise[
- Create the ClusterIssuer:
```bash
@@ -115,7 +115,7 @@
- The manifest shown on the previous slide is in @@LINK[k8s/cm-certificate.yaml]
.lab[
.exercise[
- Edit the Certificate to update the domain name
@@ -140,7 +140,7 @@
- then it waits for the challenge to complete
.lab[
.exercise[
- View the resources created by cert-manager:
```bash
@@ -158,7 +158,7 @@
`http://<our-domain>/.well-known/acme-challenge/<token>`
.lab[
.exercise[
- Check the *path* of the Ingress in particular:
```bash
@@ -176,7 +176,7 @@
An Ingress Controller! 😅
.lab[
.exercise[
- Install an Ingress Controller:
```bash

View File

@@ -1,445 +0,0 @@
# Cluster autoscaler
- When the cluster is full, we need to add more nodes
- This can be done manually:
- deploy new machines and add them to the cluster
- if using managed Kubernetes, use some API/CLI/UI
- Or automatically with the cluster autoscaler:
https://github.com/kubernetes/autoscaler
---
## Use-cases
- Batch job processing
"once in a while, we need to execute these 1000 jobs in parallel"
"...but the rest of the time there is almost nothing running on the cluster"
- Dynamic workload
"a few hours per day or a few days per week, we have a lot of traffic"
"...but the rest of the time, the load is much lower"
---
## Pay for what you use
- The point of the cloud is to "pay for what you use"
- If you have a fixed number of cloud instances running at all times:
*you're doing in wrong (except if your load is always the same)*
- If you're not using some kind of autoscaling, you're wasting money
(except if you like lining the pockets of your cloud provider)
---
## Running the cluster autoscaler
- We must run nodes on a supported infrastructure
- See [here] for a non-exhaustive list of supported providers
- Sometimes, the cluster autoscaler is installed automatically
(or by setting a flag / checking a box when creating the cluster)
- Sometimes, it requires additional work
(which is often non-trivial and highly provider-specific)
[here]: https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider
---
## Scaling up in theory
IF a Pod is `Pending`,
AND adding a Node would allow this Pod to be scheduled,
THEN add a Node.
---
## Fine print 1
*IF a Pod is `Pending`...*
- First of all, the Pod must exist
- Pod creation might be blocked by e.g. a namespace quota
- In that case, the cluster autoscaler will never trigger
---
## Fine print 2
*IF a Pod is `Pending`...*
- If our Pods do not have resource requests:
*they will be in the `BestEffort` class*
- Generally, Pods in the `BestEffort` class are schedulable
- except if they have anti-affinity placement constraints
- except if all Nodes already run the max number of pods (110 by default)
- Therefore, if we want to leverage cluster autoscaling:
*our Pods should have resource requests*
---
## Fine print 3
*AND adding a Node would allow this Pod to be scheduled...*
- The autoscaler won't act if:
- the Pod is too big to fit on a single Node
- the Pod has impossible placement constraints
- Examples:
- "run one Pod per datacenter" with 4 pods and 3 datacenters
- "use this nodeSelector" but no such Node exists
---
## Trying it out
- We're going to check how much capacity is available on the cluster
- Then we will create a basic deployment
- We will add resource requests to that deployment
- Then scale the deployment to exceed the available capacity
- **The following commands require a working cluster autoscaler!**
---
## Checking available resources
.lab[
- Check how much CPU is allocatable on the cluster:
```bash
kubectl get nodes -o jsonpath={..allocatable.cpu}
```
]
- If we see e.g. `2800m 2800m 2800m`, that means:
3 nodes with 2.8 CPUs allocatable each
- To trigger autoscaling, we will create 7 pods requesting 1 CPU each
(each node can fit 2 such pods)
---
## Creating our test Deployment
.lab[
- Create the Deployment:
```bash
kubectl create deployment blue --image=jpetazzo/color
```
- Add a request for 1 CPU:
```bash
kubectl patch deployment blue --patch='
spec:
template:
spec:
containers:
- name: color
resources:
requests:
cpu: 1
'
```
]
---
## Scaling up in practice
- This assumes that we have strictly less than 7 CPUs available
(adjust the numbers if necessary!)
.lab[
- Scale up the Deployment:
```bash
kubectl scale deployment blue --replicas=7
```
- Check that we have a new Pod, and that it's `Pending`:
```bash
kubectl get pods
```
]
---
## Cluster autoscaling
- After a few minutes, a new Node should appear
- When that Node becomes `Ready`, the Pod will be assigned to it
- The Pod will then be `Running`
- Reminder: the `AGE` of the Pod indicates when the Pod was *created*
(it doesn't indicate when the Pod was scheduled or started!)
- To see other state transitions, check the `status.conditions` of the Pod
---
## Scaling down in theory
IF a Node has less than 50% utilization for 10 minutes,
AND all its Pods can be scheduled on other Nodes,
AND all its Pods are *evictable*,
AND the Node doesn't have a "don't scale me down" annotation¹,
THEN drain the Node and shut it down.
.footnote[¹The annotation is: `cluster-autoscaler.kubernetes.io/scale-down-disabled=true`]
---
## When is a Pod "evictable"?
By default, Pods are evictable, except if any of the following is true.
- They have a restrictive Pod Disruption Budget
- They are "standalone" (not controlled by a ReplicaSet/Deployment, StatefulSet, Job...)
- They are in `kube-system` and don't have a Pod Disruption Budget
- They have local storage (that includes `EmptyDir`!)
This can be overridden by setting the annotation:
<br/>
`cluster-autoscaler.kubernetes.io/safe-to-evict`
<br/>(it can be set to `true` or `false`)
---
## Pod Disruption Budget
- Special resource to configure how many Pods can be *disrupted*
(i.e. shutdown/terminated)
- Applies to Pods matching a given selector
(typically matching the selector of a Deployment)
- Only applies to *voluntary disruption*
(e.g. cluster autoscaler draining a node, planned maintenance...)
- Can express `minAvailable` or `maxUnavailable`
- See [documentation] for details and examples
[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
---
## Local storage
- If our Pods use local storage, they will prevent scaling down
- If we have e.g. an `EmptyDir` volume for caching/sharing:
make sure to set the `.../safe-to-evict` annotation to `true`!
- Even if the volume...
- ...only has a PID file or UNIX socket
- ...is empty
- ...is not mounted by any container in the Pod!
---
## Expensive batch jobs
- Careful if we have long-running batch jobs!
(e.g. jobs that take many hours/days to complete)
- These jobs could get evicted before they complete
(especially if they use less than 50% of the allocatable resources)
- Make sure to set the `.../safe-to-evict` annotation to `false`!
---
## Node groups
- Easy scenario: all nodes have the same size
- Realistic scenario: we have nodes of different sizes
- e.g. mix of CPU and GPU nodes
- e.g. small nodes for control plane, big nodes for batch jobs
- e.g. leveraging spot capacity
- The cluster autoscaler can handle it!
---
class: extra-details
## Leveraging spot capacity
- AWS, Azure, and Google Cloud are typically more expensive then their competitors
- However, they offer *spot* capacity (spot instances, spot VMs...)
- *Spot* capacity:
- has a much lower cost (see e.g. AWS [spot instance advisor][awsspot])
- has a cost that varies continuously depending on regions, instance type...
- can be preempted at all times
- To be cost-effective, it is strongly recommended to leverage spot capacity
[awsspot]: https://aws.amazon.com/ec2/spot/instance-advisor/
---
## Node groups in practice
- The cluster autoscaler maps nodes to *node groups*
- this is an internal, provider-dependent mechanism
- the node group is sometimes visible through a proprietary label or annotation
- Each node group is scaled independently
- The cluster autoscaler uses [expanders] to decide which node group to scale up
(the default expander is "random", i.e. pick a node group at random!)
- Of course, only acceptable node groups will be considered
(i.e. node groups that could accommodate the `Pending` Pods)
[expanders]: https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-expanders
---
class: extra-details
## Scaling to zero
- *In general,* a node group needs to have at least one node at all times
(the cluster autoscaler uses that node to figure out the size, labels, taints... of the group)
- *On some providers,* there are special ways to specify labels and/or taints
(but if you want to scale to zero, check that the provider supports it!)
---
## Warning
- Autoscaling up is easy
- Autoscaling down is harder
- It might get stuck because Pods are not evictable
- Do at least a dry run to make sure that the cluster scales down correctly!
- Have alerts on cloud spend
- *Especially when using big/expensive nodes (e.g. with GPU!)*
---
## Preferred vs. Required
- Some Kubernetes mechanisms allow to express "soft preferences":
- affinity (`requiredDuringSchedulingIgnoredDuringExecution` vs `preferredDuringSchedulingIgnoredDuringExecution`)
- taints (`NoSchedule`/`NoExecute` vs `PreferNoSchedule`)
- Remember that these "soft preferences" can be ignored
(and given enough time and churn on the cluster, they will!)
---
## Troubleshooting
- The cluster autoscaler publishes its status on a ConfigMap
.lab[
- Check the cluster autoscaler status:
```bash
kubectl describe configmap --namespace kube-system cluster-autoscaler-status
```
]
- We can also check the logs of the autoscaler
(except on managed clusters where it's running internally, not visible to us)
---
## Acknowledgements
Special thanks to [@s0ulshake] for their help with this section!
If you need help to run your data science workloads on Kubernetes,
<br/>they're available for consulting.
(Get in touch with them through https://www.linkedin.com/in/ajbowen/)
[@s0ulshake]: https://twitter.com/s0ulshake

View File

@@ -18,9 +18,9 @@
- It's easy to check the version for the API server
.lab[
.exercise[
- Log into node `oldversion1`
- Log into node `test1`
- Check the version of kubectl and of the API server:
```bash
@@ -39,7 +39,7 @@
- It's also easy to check the version of kubelet
.lab[
.exercise[
- Check node versions (includes kubelet, kernel, container engine):
```bash
@@ -60,7 +60,7 @@
- If the control plane is self-hosted (running in pods), we can check it
.lab[
.exercise[
- Show image versions for all pods in `kube-system` namespace:
```bash
@@ -81,7 +81,7 @@
## What version are we running anyway?
- When I say, "I'm running Kubernetes 1.18", is that the version of:
- When I say, "I'm running Kubernetes 1.15", is that the version of:
- kubectl
@@ -157,15 +157,15 @@
## Kubernetes uses semantic versioning
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.18.20:
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.17.2:
- MAJOR = 1
- MINOR = 18
- PATCH = 20
- MINOR = 17
- PATCH = 2
- It's always possible to mix and match different PATCH releases
(e.g. 1.18.20 and 1.18.15 are compatible)
(e.g. 1.16.1 and 1.16.6 are compatible)
- It is recommended to run the latest PATCH release
@@ -181,9 +181,9 @@
- All components support a difference of one¹ MINOR version
- This allows live upgrades (since we can mix e.g. 1.18 and 1.19)
- This allows live upgrades (since we can mix e.g. 1.15 and 1.16)
- It also means that going from 1.18 to 1.20 requires going through 1.19
- It also means that going from 1.14 to 1.16 requires going through 1.15
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
and kubectl, which can be one MINOR ahead or behind API server.]
@@ -214,7 +214,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
- We will change the version of the API server
- We will work with cluster `oldversion` (nodes `oldversion1`, `oldversion2`, `oldversion3`)
- We will work with cluster `test` (nodes `test1`, `test2`, `test3`)
---
@@ -240,9 +240,9 @@ and kubectl, which can be one MINOR ahead or behind API server.]
- We will edit the YAML file to use a different image version
.lab[
.exercise[
- Log into node `oldversion1`
- Log into node `test1`
- Check API server version:
```bash
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
```
- Look for the `image:` line, and update it to e.g. `v1.19.0`
- Look for the `image:` line, and update it to e.g. `v1.16.0`
]
@@ -264,7 +264,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
- The API server will be briefly unavailable while kubelet restarts it
.lab[
.exercise[
- Check the API server version:
```bash
@@ -299,7 +299,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
(note: this is possible only because the cluster was installed with kubeadm)
.lab[
.exercise[
- Check what will be upgraded:
```bash
@@ -308,11 +308,11 @@ and kubectl, which can be one MINOR ahead or behind API server.]
]
Note 1: kubeadm thinks that our cluster is running 1.19.0.
Note 1: kubeadm thinks that our cluster is running 1.16.0.
<br/>It is confused by our manual upgrade of the API server!
Note 2: kubeadm itself is still version 1.18.20..
<br/>It doesn't know how to upgrade do 1.19.X.
Note 2: kubeadm itself is still version 1.15.9.
<br/>It doesn't know how to upgrade do 1.16.X.
---
@@ -320,7 +320,7 @@ Note 2: kubeadm itself is still version 1.18.20..
- First things first: we need to upgrade kubeadm
.lab[
.exercise[
- Upgrade kubeadm:
```
@@ -335,28 +335,28 @@ Note 2: kubeadm itself is still version 1.18.20..
]
Problem: kubeadm doesn't know know how to handle
upgrades from version 1.18.
upgrades from version 1.15.
This is because we installed version 1.22 (or even later).
This is because we installed version 1.17 (or even later).
We need to install kubeadm version 1.19.X.
We need to install kubeadm version 1.16.X.
---
## Downgrading kubeadm
- We need to go back to version 1.19.X.
- We need to go back to version 1.16.X (e.g. 1.16.6)
.lab[
.exercise[
- View available versions for package `kubeadm`:
```bash
apt show kubeadm -a | grep ^Version | grep 1.19
apt show kubeadm -a | grep ^Version | grep 1.16
```
- Downgrade kubeadm:
```
sudo apt install kubeadm=1.19.8-00
sudo apt install kubeadm=1.16.6-00
```
- Check what kubeadm tells us:
@@ -366,7 +366,7 @@ We need to install kubeadm version 1.19.X.
]
kubeadm should now agree to upgrade to 1.19.8.
kubeadm should now agree to upgrade to 1.16.6.
---
@@ -378,11 +378,11 @@ kubeadm should now agree to upgrade to 1.19.8.
- Or we can try the upgrade anyway
.lab[
.exercise[
- Perform the upgrade:
```bash
sudo kubeadm upgrade apply v1.19.8
sudo kubeadm upgrade apply v1.16.6
```
]
@@ -395,9 +395,9 @@ kubeadm should now agree to upgrade to 1.19.8.
- We can therefore use `apt` or `apt-get`
.lab[
.exercise[
- Log into node `oldversion3`
- Log into node `test3`
- View available versions for package `kubelet`:
```bash
@@ -406,7 +406,7 @@ kubeadm should now agree to upgrade to 1.19.8.
- Upgrade kubelet:
```bash
sudo apt install kubelet=1.19.8-00
sudo apt install kubelet=1.16.6-00
```
]
@@ -415,9 +415,9 @@ kubeadm should now agree to upgrade to 1.19.8.
## Checking what we've done
.lab[
.exercise[
- Log into node `oldversion1`
- Log into node `test1`
- Check node versions:
```bash
@@ -458,15 +458,15 @@ kubeadm should now agree to upgrade to 1.19.8.
(after upgrading the control plane)
.lab[
.exercise[
- Download the configuration on each node, and upgrade kubelet:
```bash
for N in 1 2 3; do
ssh oldversion$N "
sudo apt install kubeadm=1.19.8-00 &&
ssh test$N "
sudo apt install kubeadm=1.16.6-00 &&
sudo kubeadm upgrade node &&
sudo apt install kubelet=1.19.8-00"
sudo apt install kubelet=1.16.6-00"
done
```
]
@@ -475,9 +475,9 @@ kubeadm should now agree to upgrade to 1.19.8.
## Checking what we've done
- All our nodes should now be updated to version 1.19.8
- All our nodes should now be updated to version 1.16.6
.lab[
.exercise[
- Check nodes versions:
```bash
@@ -492,13 +492,13 @@ class: extra-details
## Skipping versions
- This example worked because we went from 1.18 to 1.19
- This example worked because we went from 1.15 to 1.16
- If you are upgrading from e.g. 1.16, you will have to go through 1.17 first
- If you are upgrading from e.g. 1.14, you will have to go through 1.15 first
- This means upgrading kubeadm to 1.17.X, then using it to upgrade the cluster
- This means upgrading kubeadm to 1.15.X, then using it to upgrade the cluster
- Then upgrading kubeadm to 1.18.X, etc.
- Then upgrading kubeadm to 1.16.X, etc.
- **Make sure to read the release notes before upgrading!**

View File

@@ -204,7 +204,7 @@ class: extra-details
## Logging into the new cluster
.lab[
.exercise[
- Log into node `kuberouter1`
@@ -228,7 +228,7 @@ class: extra-details
- By default, kubelet gets the CNI configuration from `/etc/cni/net.d`
.lab[
.exercise[
- Check the content of `/etc/cni/net.d`
@@ -262,7 +262,7 @@ class: extra-details
(where `C` is our cluster number)
.lab[
.exercise[
- Edit the Compose file to set the Cluster CIDR:
```bash
@@ -298,7 +298,7 @@ class: extra-details
(where `A.B.C.D` is the public address of `kuberouter1`, running the control plane)
.lab[
.exercise[
- Edit the YAML file to set the API server address:
```bash
@@ -320,7 +320,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
- This is similar to what we did for the `kubenet` cluster
.lab[
.exercise[
- Generate the kubeconfig file (replacing `X.X.X.X` with the address of `kuberouter1`):
```bash
@@ -338,7 +338,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
- We need to copy that kubeconfig file to the other nodes
.lab[
.exercise[
- Copy `kubeconfig` to the other nodes:
```bash
@@ -359,7 +359,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
- We need to pass `--network-plugin=cni`
.lab[
.exercise[
- Join the first node:
```bash
@@ -384,7 +384,7 @@ class: extra-details
(in `/etc/cni/net.d`)
.lab[
.exercise[
- Check the content of `/etc/cni/net.d`
@@ -400,7 +400,7 @@ class: extra-details
- Let's create a Deployment and expose it with a Service
.lab[
.exercise[
- Create a Deployment running a web server:
```bash
@@ -423,7 +423,7 @@ class: extra-details
## Checking that everything works
.lab[
.exercise[
- Get the ClusterIP address for the service:
```bash
@@ -449,7 +449,7 @@ class: extra-details
- What if we need to check that everything is working properly?
.lab[
.exercise[
- Check the IP addresses of our pods:
```bash
@@ -490,7 +490,7 @@ class: extra-details
## Trying `kubectl logs` / `kubectl exec`
.lab[
.exercise[
- Try to show the logs of a kube-router pod:
```bash

View File

@@ -384,7 +384,7 @@ We'll cover them just after!*
- We can create each Namespace, Deployment, and Service by hand, or...
.lab[
.exercise[
- We can deploy the app with a YAML manifest:
```bash
@@ -403,7 +403,7 @@ We'll cover them just after!*
- Since the `cluster.local` suffix can change, we'll use `x.y.svc`
.lab[
.exercise[
- Check that the app is up and running:
```bash
@@ -427,7 +427,7 @@ Here is the file that we will use, @@LINK[k8s/haproxy.cfg]:
## Creating the ConfigMap
.lab[
.exercise[
- Create a ConfigMap named `haproxy` and holding the configuration file:
```bash
@@ -455,7 +455,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
## Creating the Pod
.lab[
.exercise[
- Create the HAProxy Pod:
```bash
@@ -480,7 +480,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
(one request to `blue`, one request to `green`, one request to `blue`, etc.)
.lab[
.exercise[
- Send a few requests:
```bash
@@ -509,7 +509,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
## Creating the configmap
.lab[
.exercise[
- Our configmap will have a single key, `http.addr`:
```bash
@@ -539,7 +539,7 @@ We are going to use the following pod definition:
- The resource definition from the previous slide is in @@LINK[k8s/registry.yaml]
.lab[
.exercise[
- Create the registry pod:
```bash

View File

@@ -86,7 +86,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
- We'll use the provided YAML file
.lab[
.exercise[
- Create the stateful set and associated service:
```bash
@@ -177,7 +177,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
(pods will be replaced one by one)
.lab[
.exercise[
- Deploy a better Consul cluster:
```bash

View File

@@ -74,7 +74,7 @@
- Let's create the Custom Resource Definition for our Coffee resource
.lab[
.exercise[
- Load the CRD:
```bash
@@ -103,7 +103,7 @@ spec:
taste: strong
```
.lab[
.exercise[
- Create a few types of coffee beans:
```bash
@@ -118,7 +118,7 @@ spec:
- By default, `kubectl get` only shows name and age of custom resources
.lab[
.exercise[
- View the coffee beans that we just created:
```bash
@@ -195,7 +195,7 @@ There are many possibilities!
- Let's update our CRD using @@LINK[k8s/coffee-3.yaml]
.lab[
.exercise[
- Update the CRD:
```bash

View File

@@ -186,7 +186,7 @@ class: extra-details
.warning[If you want to use another name than `jean.doe`, update the YAML file!]
.lab[
.exercise[
- Create the global namespace for all users:
```bash
@@ -208,7 +208,7 @@ class: extra-details
(the token will be their password)
.lab[
.exercise[
- List the user's secrets:
```bash
@@ -228,7 +228,7 @@ class: extra-details
- Let's create a new context that will use that token to access the API
.lab[
.exercise[
- Add a new identity to our kubeconfig file:
```bash
@@ -254,7 +254,7 @@ class: extra-details
- Let's check that our access rights are set properly
.lab[
.exercise[
- Try to access any resource:
```bash
@@ -280,7 +280,7 @@ class: extra-details
(many people prefer cfssl, easyrsa, or other tools; that's fine too!)
.lab[
.exercise[
- Generate the key and certificate signing request:
```bash
@@ -313,7 +313,7 @@ The command above generates:
## Sending the CSR to Kubernetes
.lab[
.exercise[
- Generate and create the CSR resource:
```bash
@@ -344,7 +344,7 @@ The command above generates:
- Fow now, this is configured [through an experimental controller manager flag](https://github.com/kubernetes/kubernetes/issues/67324)
.lab[
.exercise[
- Edit the static pod definition for the controller manager:
```bash
@@ -366,7 +366,7 @@ The command above generates:
- Let's inspect the CSR, and if it is valid, approve it
.lab[
.exercise[
- Switch back to `cluster-admin`:
```bash
@@ -389,7 +389,7 @@ The command above generates:
## Obtaining the certificate
.lab[
.exercise[
- Switch back to the user's identity:
```bash
@@ -414,7 +414,7 @@ The command above generates:
## Using the certificate
.lab[
.exercise[
- Add the key and certificate to kubeconfig:
```bash

View File

@@ -83,7 +83,7 @@
- Let's start with the YAML file for the current `rng` resource
.lab[
.exercise[
- Dump the `rng` resource in YAML:
```bash
@@ -102,7 +102,7 @@
(It can't be that easy, right?)
.lab[
.exercise[
- Change `kind: Deployment` to `kind: DaemonSet`
@@ -169,7 +169,7 @@ We all knew this couldn't be that easy, right!
- The `--force` flag's actual name is `--validate=false`
.lab[
.exercise[
- Try to load our YAML file and ignore errors:
```bash
@@ -192,7 +192,7 @@ Wait ... Now, can it be *that* easy?
- Did we transform our `deployment` into a `daemonset`?
.lab[
.exercise[
- Look at the resources that we have now:
```bash
@@ -289,7 +289,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t
- That set of pods is defined by the *selector* of the `rng` service
.lab[
.exercise[
- Check the *selector* in the `rng` service definition:
```bash
@@ -312,7 +312,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t
- For instance, with `kubectl get`, `kubectl logs`, `kubectl delete` ... and more
.lab[
.exercise[
- Get the list of pods matching selector `app=rng`:
```bash
@@ -480,7 +480,7 @@ be any interruption.*
- `kubectl label` can use selectors itself
.lab[
.exercise[
- Add `active=yes` to all pods that have `app=rng`:
```bash
@@ -501,7 +501,7 @@ be any interruption.*
- the selector of the service (that's the one we want to change)
.lab[
.exercise[
- Update the service to add `active: yes` to its selector:
```bash
@@ -546,7 +546,7 @@ be any interruption.*
## Updating the service selector, take 2
.lab[
.exercise[
- Update the YAML manifest of the service
@@ -592,7 +592,7 @@ If we did everything correctly, the web UI shouldn't show any change.
## Removing a pod from the load balancer
.lab[
.exercise[
- In one window, check the logs of that pod:
```bash

View File

@@ -56,7 +56,7 @@
- The guest/admin account
.lab[
.exercise[
- Create all the dashboard resources, with the following command:
```bash
@@ -69,7 +69,7 @@
## Connecting to the dashboard
.lab[
.exercise[
- Check which port the dashboard is on:
```bash
@@ -81,7 +81,7 @@
You'll want the `3xxxx` port.
.lab[
.exercise[
- Connect to http://oneofournodes:3xxxx/
@@ -115,7 +115,7 @@ The dashboard will then ask you which authentication you want to use.
- Seriously, don't leave that thing running!
.lab[
.exercise[
- Remove what we just created:
```bash
@@ -160,7 +160,7 @@ The dashboard will then ask you which authentication you want to use.
(named `kubernetes-dashboard:cluster-admin`)
.lab[
.exercise[
- Create all the dashboard resources, with the following command:
```bash
@@ -177,7 +177,7 @@ The dashboard will then ask you which authentication you want to use.
- Kubernetes will automatically generate a token for that ServiceAccount
.lab[
.exercise[
- Display the token:
```bash
@@ -197,7 +197,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.
## Connecting to the dashboard
.lab[
.exercise[
- Check which port the dashboard is on:
```bash
@@ -209,7 +209,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.
You'll want the `3xxxx` port.
.lab[
.exercise[
- Connect to http://oneofournodes:3xxxx/

View File

@@ -1,157 +0,0 @@
# Our demo apps
- We are going to use a few demo apps for demos and labs
- Let's get acquainted with them before we dive in!
---
## The `color` app
- Image name: `jpetazzo/color`, `ghcr.io/jpetazzo/color`
- Available for linux/amd64, linux/arm64, linux/arm/v7 platforms
- HTTP server listening on port 80
- Serves a web page with a single line of text
- The background of the page is derived from the hostname
(e.g. if the hostname is `blue-xyz-123`, the background is `blue`)
- The web page is "curl-friendly"
(it contains `\r` characters to hide HTML tags and declutter the output)
---
## The `color` app in action
- Create a Deployment called `blue` using image `jpetazzo/color`
- Expose that Deployment with a Service
- Connect to the Service with a web browser
- Connect to the Service with `curl`
---
## Dockercoins
- App with 5 microservices:
- `worker` (runs an infinite loop connecting to the other services)
- `rng` (web service; generates random numbers)
- `hasher` (web service; computes SHA sums)
- `redis` (holds a single counter incremented by the `worker` at each loop)
- `webui` (web app; displays a graph showing the rate of increase of the counter)
- Uses a mix of Node, Python, Ruby
- Very simple components (approx. 50 lines of code for the most complicated one)
---
class: pic
![Dockercoins application diagram](images/dockercoins-diagram.png)
---
## Deploying Dockercoins
- Pre-built images available as `dockercoins/<component>:v0.1`
(e.g. `dockercoins/worker:v0.1`)
- Containers "discover" each other through DNS
(e.g. worker connects to `http://hasher/`)
- A Kubernetes YAML manifest is available in *the* repo
---
## The repository
- When we refer to "the" repository, it means:
https://github.com/jpetazzo/container.training
- It hosts slides, demo apps, deployment scripts...
- All the sample commands, labs, etc. will assume that it's available in:
`~/container.training`
- Let's clone the repo in our environment!
---
## Cloning the repo
.lab[
- There is a convenient shortcut to clone the repository:
```bash
git clone https://container.training
```
]
While the repository clones, fork it, star it ~~subscribe and hit the bell!~~
---
## Running Dockercoins
- All the Kubernetes manifests are in the `k8s` subdirectory
- This directory has a `dockercoins.yaml` manifest
.lab[
- Deploy Dockercoins:
```bash
kubectl apply -f ~/container.training/k8s/dockercoins.yaml
```
]
- The `webui` is exposed with a `NodePort` service
- Connect to it (through the `NodePort` or `port-forward`)
- Note, it might take a minute for the worker to start
---
## Details
- If the `worker` Deployment is scaled up, the graph should go up
- The `rng` Service is meant to be a bottleneck
(capping the graph to 10/second until `rng` is scaled up)
- There is artificial latency in the different services
(so that the app doesn't consume CPU/RAM/network)
---
## More colors
- The repository also contains a `rainbow.yaml` manifest
- It creates three namespaces (`blue`, `green`, `red`)
- In each namespace, there is an instance of the `color` app
(we can use that later to do *literal* blue-green deployment!)

View File

@@ -52,7 +52,7 @@
- Let's make sure we have everything we need first
.lab[
.exercise[
- Log into the `dmuc1` machine
@@ -101,7 +101,7 @@
## Starting API server
.lab[
.exercise[
- Try to start the API server:
```bash
@@ -118,7 +118,7 @@ it cannot start without it.
## Starting etcd
.lab[
.exercise[
- Try to start etcd:
```bash
@@ -144,7 +144,7 @@ serving insecure client requests on 127.0.0.1:2379, this is strongly discouraged
- That argument should be a comma-separated list of URLs
.lab[
.exercise[
- Start API server:
```bash
@@ -161,7 +161,7 @@ Success!
- Let's try a few "classic" commands
.lab[
.exercise[
- List nodes:
```bash
@@ -201,7 +201,7 @@ class: extra-details
- Let's run a web server!
.lab[
.exercise[
- Create a Deployment with NGINX:
```bash
@@ -216,7 +216,7 @@ Success?
## Checking our Deployment status
.lab[
.exercise[
- Look at pods, deployments, etc.:
```bash
@@ -249,7 +249,7 @@ And, there is no ReplicaSet, and no Pod.
## Starting the controller manager
.lab[
.exercise[
- Try to start the controller manager:
```bash
@@ -289,7 +289,7 @@ Using the inClusterConfig. This might not work.
## Starting the controller manager (for real)
.lab[
.exercise[
- Start the controller manager:
```bash
@@ -304,7 +304,7 @@ Success!
## Checking our Deployment status
.lab[
.exercise[
- Check all our resources again:
```bash
@@ -371,7 +371,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.
- The ReplicaSet controller will no longer create pods referencing the (missing) token
.lab[
.exercise[
- Programmatically change the `default` ServiceAccount:
```bash
@@ -402,7 +402,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.
- Once we patch the default service account, the ReplicaSet can create a Pod
.lab[
.exercise[
- Check that we now have a pod:
```bash
@@ -437,7 +437,7 @@ If we're impatient, we can restart the controller manager.
- We're going to use Docker (because it's the default option)
.lab[
.exercise[
- Start the Docker Engine:
```bash
@@ -479,7 +479,7 @@ docker run alpine echo hello world
- Or we can generate the file with `kubectl`
.lab[
.exercise[
- Create the file `~/.kube/config` with `kubectl`:
```bash
@@ -519,7 +519,7 @@ clusters:
## Starting kubelet
.lab[
.exercise[
- Start kubelet with that kubeconfig file:
```bash
@@ -536,7 +536,7 @@ Success!
- Let's check that our node registered correctly
.lab[
.exercise[
- List the nodes in our cluster:
```bash
@@ -555,7 +555,7 @@ Its name will be its hostname (it should be `dmuc1`).
- Let's check if our pod is running
.lab[
.exercise[
- List all resources:
```bash
@@ -594,7 +594,7 @@ Which is normal: it needs to be *scheduled*.
- Just like for controller manager, we can use `--kubeconfig` or `--master`
.lab[
.exercise[
- Start the scheduler:
```bash
@@ -613,7 +613,7 @@ Which is normal: it needs to be *scheduled*.
- Then it will be `Running`
.lab[
.exercise[
- Check pod status:
```bash
@@ -654,7 +654,7 @@ class: extra-details
- Let's check that our pod correctly runs NGINX
.lab[
.exercise[
- Check our pod's IP address:
```bash
@@ -676,7 +676,7 @@ We should see the `Welcome to nginx!` page.
- We can now create a Service associated with this Deployment
.lab[
.exercise[
- Expose the Deployment's port 80:
```bash
@@ -705,7 +705,7 @@ This won't work. We need kube-proxy to enable internal communication.
(although that will be deprecated in the future)
.lab[
.exercise[
- Start kube-proxy:
```bash
@@ -720,7 +720,7 @@ This won't work. We need kube-proxy to enable internal communication.
- Now that kube-proxy is running, we should be able to connect
.lab[
.exercise[
- Check the Service's ClusterIP again, and retry connecting:
```bash
@@ -742,7 +742,7 @@ class: extra-details
- When a Service is created or updated, kube-proxy creates iptables rules
.lab[
.exercise[
- Check out the `OUTPUT` chain in the `nat` table:
```bash
@@ -766,7 +766,7 @@ class: extra-details
- The last command showed a chain named `KUBE-SVC-...` corresponding to our service
.lab[
.exercise[
- Check that `KUBE-SVC-...` chain:
```bash

View File

@@ -28,7 +28,7 @@
- ... But losing a node = losing the volumes on that node!
.lab[
.exercise[
- Install the local path storage provisioner:
```bash
@@ -49,7 +49,7 @@
- Or we need to tag a StorageClass to be used as the default one
.lab[
.exercise[
- List StorageClasses:
```bash
@@ -68,7 +68,7 @@ We should see the `local-path` StorageClass.
`storageclass.kubernetes.io/is-default-class: true`
.lab[
.exercise[
- Tag the StorageClass so that it's the default one:
```bash
@@ -99,7 +99,7 @@ Now, the StorageClass should have `(default)` next to its name.
- All these resources are grouped in a convenient YAML file
.lab[
.exercise[
- Install the operator:
```bash
@@ -114,7 +114,7 @@ Now, the StorageClass should have `(default)` next to its name.
- Let's see which CRDs were created
.lab[
.exercise[
- List all CRDs:
```bash
@@ -135,7 +135,7 @@ This operator supports ElasticSearch, but also Kibana and APM. Cool!
- We need to create that namespace
.lab[
.exercise[
- Create the `eck-demo` namespace:
```bash
@@ -180,7 +180,7 @@ ServiceAccount is located.
- whether to use TLS or not
- etc.
.lab[
.exercise[
- Create our ElasticSearch cluster:
```bash
@@ -197,7 +197,7 @@ ServiceAccount is located.
- It will report our cluster status through the CRD
.lab[
.exercise[
- Check the logs of the operator:
```bash
@@ -231,7 +231,7 @@ ServiceAccount is located.
- But let's check at least if ElasticSearch is up!
.lab[
.exercise[
- Get the ClusterIP of our ES instance:
```bash
@@ -255,7 +255,7 @@ We get an authentication error. Our cluster is protected!
- It generates a random password and stores it in a Secret
.lab[
.exercise[
- Extract the password:
```bash
@@ -280,7 +280,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
- We'll deploy a filebeat DaemonSet to collect node logs
.lab[
.exercise[
- Deploy filebeat:
```bash
@@ -314,7 +314,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
- Let's give it a try!
.lab[
.exercise[
- Deploy a Kibana instance:
```bash
@@ -345,7 +345,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
- It's using the same user/password as ElasticSearch
.lab[
.exercise[
- Get the NodePort allocated to Kibana:
```bash
@@ -364,7 +364,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
After the Kibana UI loads, we need to click around a bit
.lab[
.exercise[
- Pick "explore on my own"
@@ -404,7 +404,7 @@ After the Kibana UI loads, we need to click around a bit
- We prepared yet another manifest for that!
.lab[
.exercise[
- Deploy Cerebro:
```bash
@@ -428,7 +428,7 @@ After the Kibana UI loads, we need to click around a bit
- Let's change that!
.lab[
.exercise[
- Edit the ElasticSearch cluster manifest:
```bash

View File

@@ -41,7 +41,7 @@
- When we use `kubectl describe` on an object, `kubectl` retrieves the associated events
.lab[
.exercise[
- See the API requests happening when we use `kubectl describe`:
```bash
@@ -82,7 +82,7 @@
- Let's create an event related to a Node, based on @@LINK[k8s/event-node.yaml]
.lab[
.exercise[
- Edit `k8s/event-node.yaml`
@@ -100,7 +100,7 @@
- Let's create an event related to a Pod, based on @@LINK[k8s/event-pod.yaml]
.lab[
.exercise[
- Create a pod

View File

@@ -77,18 +77,17 @@
- Create a new branch in your fork; e.g. `prod`
(e.g. with "branch" dropdown through the GitHub web UI)
(e.g. by adding a line in the README through the GitHub web UI)
- This is the branch that we are going to use for deployment
---
## Setting up Flux with kustomize
## Setting up Flux
- Clone the Flux repository:
```bash
```
git clone https://github.com/fluxcd/flux
cd flux
```
- Edit `deploy/flux-deployment.yaml`
@@ -100,27 +99,8 @@
```
- Apply all the YAML:
```bash
kubectl apply -k deploy/
```
---
## Setting up Flux with Helm
- Add Flux helm repo:
```bash
helm repo add fluxcd https://charts.fluxcd.io
```
- Install Flux:
```bash
kubectl create namespace flux
helm upgrade --install flux \
--set git.url=git@github.com:your-git-username/kubercoins \
--set git.branch=prod \
--namespace flux \
fluxcd/flux
kubectl apply -f deploy/
```
---
@@ -130,8 +110,8 @@
- When it starts, Flux generates an SSH key
- Display that key:
```bash
kubectl -n flux logs deployment/flux | grep identity.pub | cut -d '"' -f2
```
kubectl logs deployment/flux | grep identity
```
- Then add that key to the repository, giving it **write** access
@@ -177,14 +157,14 @@
## Setting up Gitkube
- Install the CLI:
```bash
```
sudo curl -L -o /usr/local/bin/gitkube \
https://github.com/hasura/gitkube/releases/download/v0.2.1/gitkube_linux_amd64
sudo chmod +x /usr/local/bin/gitkube
```
- Install Gitkube on the cluster:
```bash
```
gitkube install --expose ClusterIP
```
@@ -216,20 +196,20 @@
## Pushing to our remote
- Get the `gitkubed` IP address:
```bash
```
kubectl -n kube-system get svc gitkubed
IP=$(kubectl -n kube-system get svc gitkubed -o json |
jq -r .spec.clusterIP)
```
- Get ourselves a sample repository with resource YAML files:
```bash
```
git clone git://github.com/jpetazzo/kubercoins
cd kubercoins
```
- Add the remote and push to it:
```bash
```
git remote add k8s ssh://default-example@$IP/~/git/default-example
git push k8s master
```

View File

@@ -79,9 +79,9 @@
## Creating a new namespace
- This will make sure that we don't collide / conflict with previous labs and exercises
- This will make sure that we don't collide / conflict with previous exercises
.lab[
.exercise[
- Create the yellow namespace:
```bash
@@ -103,7 +103,7 @@
https://github.com/jpetazzo/kubercoins
.lab[
.exercise[
- Clone that repository:
```bash
@@ -152,7 +152,7 @@ It will use the default success threshold (1 successful attempt = alive).
- Let's add the liveness probe, then deploy DockerCoins
.lab[
.exercise[
- Edit `rng-deployment.yaml` and add the liveness probe
```bash
@@ -180,7 +180,7 @@ It will use the default success threshold (1 successful attempt = alive).
- Let's generate traffic and see what happens!
.lab[
.exercise[
- Get the ClusterIP address of the rng service:
```bash
@@ -195,7 +195,7 @@ It will use the default success threshold (1 successful attempt = alive).
- Each command below will show us what's happening on a different level
.lab[
.exercise[
- In one window, monitor cluster events:
```bash
@@ -220,7 +220,7 @@ It will use the default success threshold (1 successful attempt = alive).
- Let's use `ab` to send concurrent requests to rng
.lab[
.exercise[
- In yet another window, generate traffic:
```bash

View File

@@ -1,18 +1,16 @@
# Healthchecks
- Containers can have *healthchecks*
- Kubernetes provides two kinds of healthchecks: liveness and readiness
- There are three kinds of healthchecks, corresponding to very different use-cases:
- Healthchecks are *probes* that apply to *containers* (not to pods)
- liveness = detect when a container is "dead" and needs to be restarted
- Each container can have two (optional) probes:
- readiness = detect when a container is ready to serve traffic
- liveness = is this container dead or alive?
- startup = detect if a container has finished to boot
- readiness = is this container ready to serve traffic?
- These healthchecks are optional (we can use none, all, or some of them)
- Different probes are available (HTTP request, TCP connection, program execution)
- Different probes are available (HTTP, TCP, program execution)
- Let's see the difference and how to use them!
@@ -20,13 +18,11 @@
## Liveness probe
*This container is dead, we don't know how to fix it, other than restarting it.*
- Indicates if the container is dead or alive
- A dead container cannot come back to life
- If the liveness probe fails, the container is killed (destroyed)
- If the liveness probe fails, the container is killed
(to make really sure that it's really dead; no zombies or undeads!)
@@ -54,31 +50,9 @@
---
## Readiness probe (1)
## Readiness probe
*Make sure that a container is ready before continuing a rolling update.*
- Indicates if the container is ready to handle traffic
- When doing a rolling update, the Deployment controller waits for Pods to be ready
(a Pod is ready when all the containers in the Pod are ready)
- Improves reliability and safety of rolling updates:
- don't roll out a broken version (that doesn't pass readiness checks)
- don't lose processing capacity during a rolling update
---
## Readiness probe (2)
*Temporarily remove a container (overloaded or otherwise) from a Service load balancer.*
- A container can mark itself "not ready" temporarily
(e.g. if it's overloaded or needs to reload/restart/garbage collect...)
- Indicates if the container is ready to serve traffic
- If a container becomes "unready" it might be ready again soon
@@ -106,9 +80,9 @@
- runtime is busy doing garbage collection or initial data load
- To redirect new connections to other Pods
- For processes that take a long time to start
(e.g. fail the readiness probe when the Pod's load is too high)
(more on that later)
---
@@ -146,35 +120,27 @@
---
class: extra-details
## Startup probe
*The container takes too long to start, and is killed by the liveness probe!*
- Kubernetes 1.16 introduces a third type of probe: `startupProbe`
- By default, probes (including liveness) start immediately
(it is in `alpha` in Kubernetes 1.16)
- With the default probe interval and failure threshold:
- It can be used to indicate "container not ready *yet*"
*a container must respond in less than 30 seconds, or it will be killed!*
- process is still starting
- There are two ways to avoid that:
- loading external data, priming caches
- set `initialDelaySeconds` (a fixed, rigid delay)
- Before Kubernetes 1.16, we had to use the `initialDelaySeconds` parameter
- use a `startupProbe`
(available for both liveness and readiness probes)
- Kubernetes will run only the startup probe, and when it succeeds, run the other probes
- `initialDelaySeconds` is a rigid delay (always wait X before running probes)
---
## When to use a startup probe
- For containers that take a long time to start
(more than 30 seconds)
- Especially if that time can vary a lot
(e.g. fast in dev, slow in prod, or the other way around)
- `startupProbe` works better when a container start time can vary a lot
---
@@ -224,16 +190,17 @@ Here is a pod template for the `rng` web service of the DockerCoins app:
apiVersion: v1
kind: Pod
metadata:
name: healthy-app
name: rng-with-liveness
spec:
containers:
- name: myapp
image: myregistry.io/myapp:v1.0
- name: rng
image: dockercoins/rng:v0.1
livenessProbe:
httpGet:
path: /health
path: /
port: 80
periodSeconds: 5
initialDelaySeconds: 10
periodSeconds: 1
```
If the backend serves an error, or takes longer than 1s, 3 times in a row, it gets killed.
@@ -300,7 +267,7 @@ If the Redis process becomes unresponsive, it will be killed.
(In that context, worker = process that doesn't accept connections)
- Readiness is useful mostly for rolling updates
- Readiness isn't useful
(because workers aren't backends for a service)

View File

@@ -48,7 +48,7 @@
- If you haven't done it before, you need to add the repo for that chart
.lab[
.exercise[
- Add the repo that holds the chart for the OWASP Juice Shop:
```bash
@@ -63,7 +63,7 @@
- We can use `helm pull` to download a chart from a repo
.lab[
.exercise[
- Download the tarball for `juice/juice-shop`:
```bash
@@ -85,7 +85,7 @@
- Let's look at the files and directories in the `juice-shop` chart
.lab[
.exercise[
- Display the tree structure of the chart we just downloaded:
```bash
@@ -108,7 +108,7 @@ We see the components mentioned above: `Chart.yaml`, `templates/`, `values.yaml`
(using the standard Go template library)
.lab[
.exercise[
- Look at the template file for the Service resource:
```bash

View File

@@ -6,7 +6,7 @@
(Resource names, service types, number of replicas...)
.lab[
.exercise[
- Create a sample chart:
```bash
@@ -27,7 +27,7 @@
- There is a convenient `dockercoins.yml` in the repo
.lab[
.exercise[
- Copy the YAML file to the `templates` subdirectory in the chart:
```bash
@@ -50,7 +50,7 @@
(as surprising as it might seem!)
.lab[
.exercise[
- Let's try to install the chart:
```
@@ -79,7 +79,7 @@ kind: Service, namespace: default, name: hasher
- we can also tell Helm to use a different namespace
.lab[
.exercise[
- Create a new namespace:
```bash
@@ -99,7 +99,7 @@ kind: Service, namespace: default, name: hasher
- Let's try to see the release that we just deployed
.lab[
.exercise[
- List Helm releases:
```bash
@@ -118,7 +118,7 @@ We have to specify its namespace (or switch to that namespace).
- Try again, with the correct namespace
.lab[
.exercise[
- List Helm releases in `helmcoins`:
```bash
@@ -133,7 +133,7 @@ We have to specify its namespace (or switch to that namespace).
- We can check the worker logs, or the web UI
.lab[
.exercise[
- Retrieve the NodePort number of the web UI:
```bash
@@ -181,7 +181,7 @@ have details about recommended annotations and labels.
- Let's remove that chart before moving on
.lab[
.exercise[
- Delete the release (don't forget to specify the namespace):
```bash

View File

@@ -24,7 +24,7 @@
- This will give us a basic chart that we will customize
.lab[
.exercise[
- Create a basic chart:
```bash
@@ -81,7 +81,7 @@ This creates a basic chart in the directory `helmcoins`.
- Exception: for redis, we want to use the official image redis:latest
.lab[
.exercise[
- Write YAML files for the 5 components, with the following model:
```yaml
@@ -98,7 +98,7 @@ This creates a basic chart in the directory `helmcoins`.
- For convenience, let's work in a separate namespace
.lab[
.exercise[
- Create a new namespace (if it doesn't already exist):
```bash
@@ -126,7 +126,7 @@ This creates a basic chart in the directory `helmcoins`.
helm upgrade COMPONENT-NAME CHART-DIRECTORY --install
```
.lab[
.exercise[
- Install the 5 components of DockerCoins:
```bash
@@ -165,7 +165,7 @@ class: extra-details
- Let's see if DockerCoins is working!
.lab[
.exercise[
- Check the logs of the worker:
```bash
@@ -187,7 +187,7 @@ There are *many* issues to fix!
- It looks like our images can't be found
.lab[
.exercise[
- Use `kubectl describe` on any of the pods in error
@@ -205,7 +205,7 @@ There are *many* issues to fix!
(and try to find the one generating the Deployment resource)
.lab[
.exercise[
- Show the structure of the `helmcoins` chart that Helm generated:
```bash
@@ -228,7 +228,7 @@ There are *many* issues to fix!
- Let's look for `AppVersion` there!
.lab[
.exercise[
- Check the file `helmcoins/Chart.yaml`
@@ -250,7 +250,7 @@ There are *many* issues to fix!
(to match what we've specified in our values YAML files)
.lab[
.exercise[
- Edit `helmcoins/templates/deployment.yaml`
@@ -266,7 +266,7 @@ There are *many* issues to fix!
- To use the new template, we need to *upgrade* the release to use that chart
.lab[
.exercise[
- Upgrade all components:
```bash
@@ -306,7 +306,7 @@ We should see all pods "Running". But ... not all of them are READY.
(`kubectl describe` will retrieve the events related to the object)
.lab[
.exercise[
- Check the events for the redis pods:
```bash
@@ -345,7 +345,7 @@ It's failing both its liveness and readiness probes!
`{{ end }}` at the end
.lab[
.exercise[
- Edit `helmcoins/templates/deployment.yaml`
@@ -386,7 +386,7 @@ This is what the new YAML should look like (added lines in yellow):
- We need to upgrade all the services again to use the new chart
.lab[
.exercise[
- Upgrade all components:
```bash
@@ -410,7 +410,7 @@ Everything should now be running!
- Is this working now?
.lab[
.exercise[
- Let's check the logs of the worker:
```bash
@@ -429,7 +429,7 @@ Typically, that error means that the `redis` service doesn't exist.
- What about the services created by our chart?
.lab[
.exercise[
- Check the list of services:
```bash
@@ -452,7 +452,7 @@ We need to change that!
- `include` indicates a *template block* defined somewhere else
.lab[
.exercise[
- Find where that `fullname` thing is defined:
```bash
@@ -473,7 +473,7 @@ We can look at the definition, but it's fairly complex ...
- The name of the release is available as `{{ .Release.Name }}`
.lab[
.exercise[
- Edit `helmcoins/templates/service.yaml`
@@ -528,7 +528,7 @@ We can look at the definition, but it's fairly complex ...
- Let's add a `service.port` value to the redis release
.lab[
.exercise[
- Edit `redis.yaml` to add:
```yaml
@@ -563,7 +563,7 @@ We can look at the definition, but it's fairly complex ...
## Changing the deployment template
.lab[
.exercise[
- Edit `helmcoins/templates/deployment.yaml`

View File

@@ -51,7 +51,7 @@
- First, let's edit `Chart.yaml`
.lab[
.exercise[
- In `Chart.yaml`, fill the `dependencies` section:
```yaml
@@ -93,7 +93,7 @@ use Bitnami's Redis chart.
- After adding the dependency, we ask Helm to pin an download it
.lab[
.exercise[
- Ask Helm:
```bash
@@ -262,7 +262,7 @@ class: extra-details
## Embedding a dependency
.lab[
.exercise[
- Decompress the chart:
```yaml

View File

@@ -203,7 +203,7 @@ class: extra-details
- If the `helm` CLI is not installed in your environment, install it
.lab[
.exercise[
- Check if `helm` is installed:
```bash
@@ -232,7 +232,7 @@ class: extra-details
- They can be managed (installed, upgraded...) with the `helm` CLI
.lab[
.exercise[
- Deploy Tiller:
```bash
@@ -258,7 +258,7 @@ class: extra-details
- In a more realistic deployment, you might create per-user or per-team
service accounts, roles, and role bindings
.lab[
.exercise[
- Grant `cluster-admin` role to `kube-system:default` service account:
```bash
@@ -329,7 +329,7 @@ class: extra-details
- We can use `helm search hub <keyword>`
.lab[
.exercise[
- Look for the OWASP Juice Shop app:
```bash
@@ -351,7 +351,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop
- We can also use the Artifact Hub search feature
.lab[
.exercise[
- Go to https://artifacthub.io/
@@ -367,7 +367,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop
- Click on the "Install" button, it will show instructions
.lab[
.exercise[
- First, add the repository for that chart:
```bash
@@ -393,7 +393,7 @@ Note: it is also possible to install directly a chart, with `--repo https://...`
- We can also use `--generate-name` to ask Helm to generate a name for us
.lab[
.exercise[
- List the releases:
```bash
@@ -433,7 +433,7 @@ class: extra-details
- We can use a selector to see these resources
.lab[
.exercise[
- List all the resources created by this release:
```bash
@@ -472,7 +472,7 @@ It is defined in that chart. In other words, not all charts will provide this la
- We can inspect a chart with `helm show` or `helm inspect`
.lab[
.exercise[
- Look at the README for the app:
```bash
@@ -500,7 +500,7 @@ The `readme` may or may not have (accurate) explanations for the values.
- We are going to update `my-juice-shop` to change the type of the service
.lab[
.exercise[
- Update `my-juice-shop`:
```bash
@@ -523,7 +523,7 @@ All unspecified values will take the default values defined in the chart.
- Let's check the app that we just installed
.lab[
.exercise[
- Check the node port allocated to the service:
```bash

View File

@@ -16,7 +16,7 @@
- If you haven't done it before, you need to add the repo for that chart
.lab[
.exercise[
- Add the repo that holds the chart for the OWASP Juice Shop:
```bash
@@ -33,7 +33,7 @@
- Let's use the `juice/juice-shop` chart as an example
.lab[
.exercise[
- Install a release called `orange` with the chart `juice/juice-shop`:
```bash
@@ -53,7 +53,7 @@
- Helm stores successive revisions of each release
.lab[
.exercise[
- View the history for that release:
```bash
@@ -76,7 +76,7 @@ Where does that come from?
- ConfigMaps, Secrets?
.lab[
.exercise[
- Look for ConfigMaps and Secrets:
```bash
@@ -95,7 +95,7 @@ We should see a number of secrets with TYPE `helm.sh/release.v1`.
- Let's find out what is in these Helm secrets
.lab[
.exercise[
- Examine the secret corresponding to the second release of `orange`:
```bash
@@ -113,7 +113,7 @@ There is a key named `release`.
- Let's see what's in this `release` thing!
.lab[
.exercise[
- Dump the secret:
```bash
@@ -131,7 +131,7 @@ Secrets are encoded in base64. We need to decode that!
- We can pipe the output through `base64 -d` or use go-template's `base64decode`
.lab[
.exercise[
- Decode the secret:
```bash
@@ -155,7 +155,7 @@ Let's try one more round of decoding!
- Just add one more base64 decode filter
.lab[
.exercise[
- Decode it twice:
```bash
@@ -175,7 +175,7 @@ Let's try one more round of decoding!
- We could use `file` to figure out the data type
.lab[
.exercise[
- Pipe the decoded release through `file -`:
```bash
@@ -196,7 +196,7 @@ Gzipped data! It can be decoded with `gunzip -c`.
- Let's uncompress the data and save it to a file
.lab[
.exercise[
- Rerun the previous command, but with `| gunzip -c > release-info` :
```bash

View File

@@ -119,7 +119,7 @@
- Let's try to install a couple releases with that schema!
.lab[
.exercise[
- Try an invalid `pullPolicy`:
```bash
@@ -147,7 +147,7 @@
- We can fix that with `"additionalProperties": false`
.lab[
.exercise[
- Edit `values.schema.json` to add `"additionalProperties": false`
```json
@@ -165,7 +165,7 @@
## Testing with unknown properties
.lab[
.exercise[
- Try to pass an extra property:
```bash

View File

@@ -76,7 +76,7 @@
(it is a web server that will use 1s of CPU for each HTTP request)
.lab[
.exercise[
- Deploy the web server:
```bash
@@ -101,7 +101,7 @@
- Let's start a bunch of commands to watch what is happening
.lab[
.exercise[
- Monitor pod CPU usage:
```bash
@@ -143,7 +143,7 @@
- We will use `ab` (Apache Bench) to send traffic
.lab[
.exercise[
- Send a lot of requests to the service, with a concurrency level of 3:
```bash
@@ -170,7 +170,7 @@ The CPU utilization should increase to 100%.
- There is a helper command to do that for us: `kubectl autoscale`
.lab[
.exercise[
- Create the HPA policy for the `busyhttp` deployment:
```bash
@@ -209,7 +209,7 @@ This can also be set with `--cpu-percent=`.
- Since our server can use up to 1 core, let's request 1 core
.lab[
.exercise[
- Edit the Deployment definition:
```bash
@@ -287,7 +287,7 @@ This can also be set with `--cpu-percent=`.
- Since `busyhttp` uses CPU cycles, let's stop it before moving on
.lab[
.exercise[
- Delete the `busyhttp` Deployment:
```bash

View File

@@ -62,7 +62,7 @@
- That's the easy part!
.lab[
.exercise[
- Create a new namespace and switch to it:
```bash
@@ -90,7 +90,7 @@
(by about 100ms per `worker` Pod after the 3rd worker)
.lab[
.exercise[
- Check the `webui` port and open it in your browser:
```bash
@@ -114,7 +114,7 @@
- It monitors exactly one URL, that must be passed as a command-line argument
.lab[
.exercise[
- Deploy `httplat`:
```bash
@@ -148,7 +148,7 @@ class: extra-details
(because we can configure it dynamically with annotations)
.lab[
.exercise[
- If it's not installed yet on the cluster, install Prometheus:
```bash
@@ -169,7 +169,7 @@ class: extra-details
- We can use annotations to tell Prometheus to collect the metrics
.lab[
.exercise[
- Tell Prometheus to "scrape" our latency exporter:
```bash
@@ -191,7 +191,7 @@ You'll need to instruct it to scrape http://httplat.customscaling.svc:9080/metri
- Before moving on, confirm that Prometheus has our metrics
.lab[
.exercise[
- Connect to Prometheus
@@ -407,7 +407,7 @@ Putting togeher @@LINK[k8s/hpa-v2-pa-httplat.yaml]:
- Of course, it won't quite work yet (we're missing the *Prometheus adapter*)
.lab[
.exercise[
- Create the HorizontalPodAutoscaler:
```bash
@@ -469,7 +469,7 @@ no custom metrics API (custom.metrics.k8s.io) registered
- There is ~~an app~~ a Helm chart for that
.lab[
.exercise[
- Install the Prometheus adapter:
```bash
@@ -534,7 +534,7 @@ Here is the rule that we need to add to the configuration:
## Editing the adapter's configuration
.lab[
.exercise[
- Edit the adapter's ConfigMap:
```bash

View File

@@ -1,181 +0,0 @@
## Optimizing request flow
- With most ingress controllers, requests follow this path:
HTTP client → load balancer → NodePort → ingress controller Pod → app Pod
- Sometimes, some of these components can be on the same machine
(e.g. ingress controller Pod and app Pod)
- But they can also be on different machines
(each arrow = a potential hop)
- This could add some unwanted latency!
(See following diagrams)
---
class: pic
![](images/kubernetes-services/61-ING.png)
---
class: pic
![](images/kubernetes-services/62-ING-path.png)
---
## External traffic policy
- The Service manifest has a field `spec.externalTrafficPolicy`
- Possible values are:
- `Cluster` (default) - load balance connections to all pods
- `Local` - only send connections to local pods (on the same node)
- When the policy is set to `Local`, we avoid one hop:
HTTP client → load balancer → NodePort .red[**→**] ingress controller Pod → app Pod
(See diagram on next slide)
---
class: pic
![](images/kubernetes-services/63-ING-policy.png)
---
## What if there is no Pod?
- If a connection for a Service arrives on a Node through a NodePort...
- ...And that Node doesn't host a Pod matching the selector of that Service...
(i.e. there is no local Pod)
- ...Then the connection is refused
- This can be detected from outside (by the external load balancer)
- The external load balancer won't send connections to these nodes
(See diagram on next slide)
---
class: pic
![](images/kubernetes-services/64-ING-nolocal.png)
---
class: extra-details
## Internal traffic policy
- Since Kubernetes 1.21, there is also `spec.internalTrafficPolicy`
- It works similarly but for internal traffic
- It's an *alpha* feature
(not available by default; needs special steps to be enabled on the control plane)
- See the [documentation] for more details
[documentation]: https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/
---
## Other ways to save hops
- Run the ingress controller as a DaemonSet, using port 80 on the nodes:
HTTP client → load balancer → ingress controller on Node port 80 → app Pod
- Then simplify further by setting a set of DNS records pointing to the nodes:
HTTP client → ingress controller on Node port 80 → app Pod
- Or run a combined load balancer / ingress controller at the edge of the cluster:
HTTP client → edge ingress controller → app Pod
---
## Source IP address
- Obtaining the IP address of the HTTP client (from the app Pod) can be tricky!
- We should consider (at least) two steps:
- obtaining the IP address of the HTTP client (from the ingress controller)
- passing that IP address from the ingress controller to the HTTP client
- The second step is usually done by injecting an HTTP header
(typically `x-forwarded-for`)
- Most ingress controllers do that out of the box
- But how does the ingress controller obtain the IP address of the HTTP client? 🤔
---
## Scenario 1, direct connection
- If the HTTP client connects directly to the ingress controller: easy!
- e.g. when running a combined load balancer / ingress controller
- or when running the ingress controller as a Daemon Set directly on port 80
---
## Scenario 2, external load balancer
- Most external load balancers running in TCP mode don't expose client addresses
(HTTP client connects to load balancer; load balancer connects to ingress controller)
- The ingress controller will "see" the IP address of the load balancer
(instead of the IP address of the client)
- Many external load balancers support the [Proxy Protocol]
- This enables the ingress controller to "see" the IP address of the HTTP client
- It needs to be enabled on both ends (ingress controller and load balancer)
[ProxyProtocol]: https://www.haproxy.com/blog/haproxy/proxy-protocol/
---
## Scenario 3, leveraging `externalTrafficPolicy`
- In some cases, the external load balancer will preserve the HTTP client address
- It is then possible to set `externalTrafficPolicy` to `Local`
- The ingress controller will then "see" the HTTP client address
- If `externalTrafficPolicy` is set to `Cluster`:
- sometimes the client address will be visible
- when bouncing the connection to another node, the address might be changed
- This is a big "it depends!"
- Bottom line: rely on the two other techniques instead?

View File

@@ -85,7 +85,7 @@ class: extra-details
- Let's set it now
.lab[
.exercise[
- Set the `DOMAIN` environment variable:
```bash
@@ -120,7 +120,7 @@ class: extra-details
- Thanks to `openssl`, generating a self-signed cert is just one command away!
.lab[
.exercise[
- Generate a key and certificate:
```bash
@@ -175,7 +175,7 @@ class: extra-details
- Let's use a volume to get easy access to the generated key and certificate
.lab[
.exercise[
- Obtain a certificate from Let's Encrypt:
```bash
@@ -203,7 +203,7 @@ Remove `--test-cert` to obtain a *real* certificate.
- they are owned by `root`
.lab[
.exercise[
- Grant ourselves permissions on these files:
```bash
@@ -265,7 +265,7 @@ Remove `--test-cert` to obtain a *real* certificate.
- However, the Endpoints needs to be adapted to put the current node's address
.lab[
.exercise[
- Edit `~/containers.training/k8s/certbot.yaml`
@@ -286,7 +286,7 @@ Remove `--test-cert` to obtain a *real* certificate.
(i.e. 8000)
.lab[
.exercise[
- Run `certbot`:
```bash
@@ -312,7 +312,7 @@ Remove `--test-cert` to get a production certificate.
(and owned by root)
.lab[
.exercise[
- Grand ourselves permissions on these files:
```bash
@@ -338,7 +338,7 @@ Remove `--test-cert` to get a production certificate.
- We can create a Secret to hold them
.lab[
.exercise[
- Create the Secret:
```bash
@@ -402,7 +402,7 @@ class: extra-details
## Using the certificate
.lab[
.exercise[
- Add the `tls` section to an existing Ingress

View File

@@ -37,19 +37,18 @@
- Service with `type: LoadBalancer`
- requires a particular controller (e.g. CCM, MetalLB)
- costs a bit of money for each service
- if TLS is desired, it has to be implemented by the app
- works for any TCP protocol (not just HTTP)
- doesn't interpret the HTTP protocol (no fancy routing)
- costs a bit of money for each service
- Ingress
- requires an ingress controller
- flat cost regardless of number of ingresses
- can implement TLS transparently for the app
- only supports HTTP
- can do content-based routing (e.g. per URI)
- lower cost per service
<br/>(exact pricing depends on provider's model)
---
@@ -123,46 +122,18 @@
class: extra-details
## Special cases
- GKE has "[GKE Ingress]", a custom ingress controller
(enabled by default)
- EKS has "AWS ALB Ingress Controller" as well
(not enabled by default, requires extra setup)
- They leverage cloud-specific HTTP load balancers
(GCP HTTP LB, AWS ALB)
- They typically a cost *per ingress resource*
[GKE Ingress]: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress
---
class: extra-details
## Single or multiple LoadBalancer
- Most ingress controllers will create a LoadBalancer Service
(and will receive all HTTP/HTTPS traffic through it)
- We need to point our DNS entries to the IP address of that LB
- Some rare ingress controllers will allocate one LB per ingress resource
(example: the GKE Ingress and ALB Ingress mentioned previously)
(example: by default, the AWS ingress controller based on ALBs)
- This leads to increased costs
- Note that it's possible to have multiple "rules" per ingress resource
(this will reduce costs but may be less convenient to manage)
---
## Ingress in action
@@ -251,22 +222,15 @@ class: extra-details
## Running Traefik
- The [Traefik documentation][traefikdoc] recommends to use a Helm chart
- The [Traefik documentation](https://docs.traefik.io/user-guide/kubernetes/#deploy-trfik-using-a-deployment-or-daemonset) tells us to pick between Deployment and Daemon Set
- For simplicity, we're going to use a custom YAML manifest
- We are going to use a Daemon Set so that each node can accept connections
- Our manifest will:
- use a Daemon Set so that each node can accept connections
- We will do two minor changes to the [YAML provided by Traefik](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-ds.yaml):
- enable `hostNetwork`
- add a *toleration* so that Traefik also runs on all nodes
- We could do the same with the official [Helm chart][traefikchart]
[traefikdoc]: https://doc.traefik.io/traefik/getting-started/install-traefik/#use-the-helm-chart
[traefikchart]: https://artifacthub.io/packages/helm/traefik/traefik
- add a *toleration* so that Traefik also runs on `node1`
---
@@ -290,7 +254,7 @@ class: extra-details
## Checking taints on our nodes
.lab[
.exercise[
- Check our nodes specs:
```bash
@@ -341,7 +305,7 @@ class: extra-details
## Checking tolerations on the control plane
.lab[
.exercise[
- Check tolerations for CoreDNS:
```bash
@@ -367,7 +331,7 @@ class: extra-details
## Special tolerations
.lab[
.exercise[
- Check tolerations on `kube-proxy`:
```bash
@@ -396,7 +360,7 @@ This one is a special case that means "ignore all taints and run anyway."
- [Traefik's RBAC rules](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-rbac.yaml) allowing it to watch necessary API objects
.lab[
.exercise[
- Apply the YAML:
```bash
@@ -411,7 +375,7 @@ This one is a special case that means "ignore all taints and run anyway."
- If Traefik started correctly, we now have a web server listening on each node
.lab[
.exercise[
- Check that Traefik is serving 80/tcp:
```bash
@@ -430,7 +394,7 @@ This is normal: we haven't provided any ingress rule yet.
- To make our lives easier, we will use [nip.io](http://nip.io)
- Check out `http://red.A.B.C.D.nip.io`
- Check out `http://cheddar.A.B.C.D.nip.io`
(replacing A.B.C.D with the IP address of `node1`)
@@ -446,7 +410,7 @@ This is normal: we haven't provided any ingress rule yet.
- With the current install method, it's listening on port 8080
.lab[
.exercise[
- Go to `http://node1:8080` (replacing `node1` with its IP address)
@@ -458,36 +422,38 @@ This is normal: we haven't provided any ingress rule yet.
## Setting up host-based routing ingress rules
- We are going to use the `jpetazzo/color` image
- We are going to use `errm/cheese` images
- This image contains a simple static HTTP server on port 80
(there are [3 tags available](https://hub.docker.com/r/errm/cheese/tags/): wensleydale, cheddar, stilton)
- We will run 3 deployments (`red`, `green`, `blue`)
- These images contain a simple static HTTP server sending a picture of cheese
- We will run 3 deployments (one for each cheese)
- We will create 3 services (one for each deployment)
- Then we will create 3 ingress rules (one for each service)
- We will route `<color>.A.B.C.D.nip.io` to the corresponding deployment
- We will route `<name-of-cheese>.A.B.C.D.nip.io` to the corresponding deployment
---
## Running colorful web servers
## Running cheesy web servers
.lab[
.exercise[
- Run all three deployments:
```bash
kubectl create deployment red --image=jpetazzo/color
kubectl create deployment green --image=jpetazzo/color
kubectl create deployment blue --image=jpetazzo/color
kubectl create deployment cheddar --image=errm/cheese:cheddar
kubectl create deployment stilton --image=errm/cheese:stilton
kubectl create deployment wensleydale --image=errm/cheese:wensleydale
```
- Create a service for each of them:
```bash
kubectl expose deployment red --port=80
kubectl expose deployment green --port=80
kubectl expose deployment blue --port=80
kubectl expose deployment cheddar --port=80
kubectl expose deployment stilton --port=80
kubectl expose deployment wensleydale --port=80
```
]
@@ -503,17 +469,17 @@ This is normal: we haven't provided any ingress rule yet.
- Since Kubernetes 1.19, we can use `kubectl create ingress`
```bash
kubectl create ingress red \
--rule=red.`A.B.C.D`.nip.io/*=red:80
kubectl create ingress cheddar \
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80
```
- We can specify multiple rules per resource
```bash
kubectl create ingress rgb \
--rule=red.`A.B.C.D`.nip.io/*=red:80 \
--rule=green.`A.B.C.D`.nip.io/*=green:80 \
--rule=blue.`A.B.C.D`.nip.io/*=blue:80
kubectl create ingress cheeses \
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80 \
--rule=stilton.`A.B.C.D`.nip.io/*=stilton:80 \
--rule=wensleydale.`A.B.C.D`.nip.io/*=wensleydale:80
```
---
@@ -523,14 +489,14 @@ This is normal: we haven't provided any ingress rule yet.
- The `*` is important:
```
--rule=red.A.B.C.D.nip.io/`*`=red:80
--rule=cheddar.A.B.C.D.nip.io/`*`=cheddar:80
```
- It means "all URIs below that path"
- Without the `*`, it means "only that exact path"
(if we omit it, requests for e.g. `red.A.B.C.D.nip.io/hello` will 404)
(and requests for e.g. images or other URIs won't work)
---
@@ -542,15 +508,15 @@ Here is a minimal host-based ingress resource:
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: red
name: cheddar
spec:
rules:
- host: red.`A.B.C.D`.nip.io
- host: cheddar.`A.B.C.D`.nip.io
http:
paths:
- path: /
backend:
serviceName: red
serviceName: cheddar
servicePort: 80
```
@@ -574,8 +540,8 @@ class: extra-details
- If we want to see "modern" YAML, we can use `-o yaml --dry-run=client`:
```bash
kubectl create ingress red -o yaml --dry-run=client \
--rule=red.`A.B.C.D`.nip.io/*=red:80
kubectl create ingress cheddar -o yaml --dry-run=client \
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80
```
@@ -643,21 +609,13 @@ class: extra-details
---
## Vendor-specific example
## A special feature in action
- Let's see how to implement *canary releases*
- We're going to see how to implement *canary releases* with Traefik
- The example here will use Traefik v1
- This feature is available on multiple ingress controllers
(which is obsolete)
- It won't work on your Kubernetes cluster!
(unless you're running an oooooold version of Kubernetes)
(and an equally oooooooold version of Traefik)
- We've left it here just as an example!
- ... But it is configured very differently on each of them
---
@@ -698,7 +656,7 @@ class: extra-details
---
## Canary releases with Traefik v1
## Canary releases with Traefik
- We need to deploy the canary and expose it with a separate service
@@ -710,6 +668,14 @@ class: extra-details
- If we want, we can send requests to more than 2 services
- Let's send requests to our 3 cheesy services!
.exercise[
- Create the resource shown on the next slide
]
---
## The Ingress resource
@@ -719,34 +685,63 @@ class: extra-details
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: rgb
name: cheeseplate
annotations:
traefik.ingress.kubernetes.io/service-weights: |
red: 50%
green: 25%
blue: 25%
cheddar: 50%
wensleydale: 25%
stilton: 25%
spec:
rules:
- host: rgb.`A.B.C.D`.nip.io
- host: cheeseplate.`A.B.C.D`.nip.io
http:
paths:
- path: /
backend:
serviceName: red
serviceName: cheddar
servicePort: 80
- path: /
backend:
serviceName: green
serviceName: wensleydale
servicePort: 80
- path: /
backend:
serviceName: blue
serviceName: stilton
servicePort: 80
```
]
---
## Testing the canary
- Let's check the percentage of requests going to each service
.exercise[
- Continuously send HTTP requests to the new ingress:
```bash
while sleep 0.1; do
curl -s http://cheeseplate.A.B.C.D.nip.io/
done
```
]
We should see a 50/25/25 request mix.
---
class: extra-details
## Load balancing fairness
Note: if we use odd request ratios, the load balancing algorithm might appear to be broken on a small scale (when sending a small number of requests), but on a large scale (with many requests) it will be fair.
For instance, with a 11%/89% ratio, we can see 79 requests going to the 89%-weighted service, and then requests alternating between the two services; then 79 requests again, etc.
---
class: extra-details
## Other ingress controllers

Some files were not shown because too many files have changed in this diff Show More