☸️ Kubernetes December 2021 content

2026-03-02 01:10:20 +00:00 · 2021-12-10 16:27:58 +01:00
193 changed files with 1324 additions and 5123 deletions
--- a/k8s/consul-1.yaml
+++ b/k8s/consul-1.yaml
@@ -3,12 +3,6 @@
 # - no actual persistence
 # - scaling down to 1 will break the cluster
 # - pods may be colocated
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -34,6 +28,11 @@ subjects:
    name: consul
 ---
 apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
+apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -62,7 +61,7 @@ spec:
      serviceAccountName: consul
      containers:
        - name: consul
-          image: "consul:1.11"
+          image: "consul:1.8"
          env:
            - name: NAMESPACE
              valueFrom:
--- a/k8s/consul-2.yaml
+++ b/k8s/consul-2.yaml
@@ -2,12 +2,6 @@
 # There is still no actual persistence, but:
 # - podAntiaffinity prevents pod colocation
 # - clusters works when scaling down to 1 (thanks to lifecycle hook)
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -33,6 +27,11 @@ subjects:
    name: consul
 ---
 apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
+apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -69,7 +68,7 @@ spec:
      terminationGracePeriodSeconds: 10
      containers:
        - name: consul
-          image: "consul:1.11"
+          image: "consul:1.8"
          env:
            - name: NAMESPACE
              valueFrom:
--- a/k8s/consul-3.yaml
+++ b/k8s/consul-3.yaml
@@ -1,11 +1,5 @@
 # Even better Consul cluster.
 # That one uses a volumeClaimTemplate to achieve true persistence.
---
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -31,6 +25,11 @@ subjects:
    name: consul
 ---
 apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
+apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -76,7 +75,7 @@ spec:
      terminationGracePeriodSeconds: 10
      containers:
        - name: consul
-          image: "consul:1.11"
+          image: "consul:1.8"
          volumeMounts:
            - name: data
              mountPath: /consul/data
--- a/k8s/kyverno-ingress-domain-name-1.yaml
+++ b/k8s/kyverno-ingress-domain-name-1.yaml
@@ -1,28 +0,0 @@
-apiVersion: kyverno.io/v1
-kind: ClusterPolicy
-metadata:
-  name: ingress-domain-name
-spec:
-  rules:
-  - name: create-ingress
-    match:
-      resources: 
-        kinds:
-        - Service
-    generate: 
-      kind: Ingress
-      name: "{{request.object.metadata.name}}"
-      namespace: "{{request.object.metadata.namespace}}"
-      data:
-        spec:
-          rules:
-          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
-            http:
-              paths:
-              - backend:
-                  service:
-                    name: "{{request.object.metadata.name}}"
-                    port:
-                      number: 80
-                path: /
-                pathType: Prefix
--- a/k8s/kyverno-ingress-domain-name-2.yaml
+++ b/k8s/kyverno-ingress-domain-name-2.yaml
@@ -1,32 +0,0 @@
-apiVersion: kyverno.io/v1
-kind: ClusterPolicy
-metadata:
-  name: ingress-domain-name
-spec:
-  rules:
-  - name: create-ingress
-    match:
-      resources: 
-        kinds:
-        - Service
-    preconditions:
-    - key: "{{request.object.spec.ports[0].name}}"
-      operator: Equals
-      value: http
-    generate: 
-      kind: Ingress
-      name: "{{request.object.metadata.name}}"
-      namespace: "{{request.object.metadata.namespace}}"
-      data:
-        spec:
-          rules:
-          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
-            http:
-              paths:
-              - backend:
-                  service:
-                    name: "{{request.object.metadata.name}}"
-                    port:
-                      name: http
-                path: /
-                pathType: Prefix
--- a/k8s/kyverno-ingress-domain-name-3.yaml
+++ b/k8s/kyverno-ingress-domain-name-3.yaml
@@ -1,37 +0,0 @@
-apiVersion: kyverno.io/v1
-kind: ClusterPolicy
-metadata:
-  name: ingress-domain-name
-spec:
-  rules:
-  - name: create-ingress
-    context:
-    - name: configmap
-      configMap:
-        name: ingress-domain-name
-        namespace: "{{request.object.metadata.namespace}}"
-    match:
-      resources: 
-        kinds:
-        - Service
-    preconditions:
-    - key: "{{request.object.spec.ports[0].name}}"
-      operator: Equals
-      value: http
-    generate: 
-      kind: Ingress
-      name: "{{request.object.metadata.name}}"
-      namespace: "{{request.object.metadata.namespace}}"
-      data:
-        spec:
-          rules:
-          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.{{configmap.data.domain}}"
-            http:
-              paths:
-              - backend:
-                  service:
-                    name: "{{request.object.metadata.name}}"
-                    port:
-                      name: http
-                path: /
-                pathType: Prefix
--- a/k8s/rainbow.yaml
+++ b/k8s/rainbow.yaml
@@ -17,12 +17,12 @@ metadata:
 spec:
  selector:
    matchLabels:
-      app: rainbow
+      app: color
      color: blue
  template:
    metadata:
      labels:
-        app: rainbow
+        app: color
        color: blue
    spec:
      containers:
@@ -33,7 +33,7 @@ apiVersion: v1
 kind: Service
 metadata:
  labels:
-    app: rainbow
+    app: color
    color: blue
  name: color
  namespace: blue
@@ -44,7 +44,7 @@ spec:
    protocol: TCP
    targetPort: 80
  selector:
-    app: rainbow
+    app: color
    color: blue
  type: ClusterIP
 ---
@@ -66,12 +66,12 @@ metadata:
 spec:
  selector:
    matchLabels:
-      app: rainbow
+      app: color
      color: green
  template:
    metadata:
      labels:
-        app: rainbow
+        app: color
        color: green
    spec:
      containers:
@@ -82,7 +82,7 @@ apiVersion: v1
 kind: Service
 metadata:
  labels:
-    app: rainbow
+    app: color
    color: green
  name: color
  namespace: green
@@ -93,7 +93,7 @@ spec:
    protocol: TCP
    targetPort: 80
  selector:
-    app: rainbow
+    app: color
    color: green
  type: ClusterIP
 ---
@@ -115,12 +115,12 @@ metadata:
 spec:
  selector:
    matchLabels:
-      app: rainbow
+      app: color
      color: red
  template:
    metadata:
      labels:
-        app: rainbow
+        app: color
        color: red
    spec:
      containers:
@@ -131,7 +131,7 @@ apiVersion: v1
 kind: Service
 metadata:
  labels:
-    app: rainbow
+    app: color
    color: red
  name: color
  namespace: red
@@ -142,6 +142,6 @@ spec:
    protocol: TCP
    targetPort: 80
  selector:
-    app: rainbow
+    app: color
    color: red
  type: ClusterIP
--- a/prepare-tf/README.md
+++ b/prepare-tf/README.md
@@ -1,107 +1,17 @@
-⚠️ This is work in progress. The UX needs to be improved,
-and the docs could be better.
-
 This directory contains a Terraform configuration to deploy
-a bunch of Kubernetes clusters on various cloud providers,
-using their respective managed Kubernetes products.
+a bunch of Kubernetes clusters on various cloud providers, using their respective managed Kubernetes products.

-## With shell wrapper
-
-This is the recommended use. It makes it easy to start N clusters
-on any provider. It will create a directory with a name like
-`tag-YYYY-MM-DD-HH-MM-SS-SEED-PROVIDER`, copy the Terraform configuration
-to that directory, then create the clusters using that configuration.
-
-1. One-time setup: configure provider authentication for the provider(s) that you wish to use.
-
- Digital Ocean:
-  ```bash
-  doctl auth init
-  ```
-
- Google Cloud Platform: you will need to create a project named `prepare-tf`
-  and enable the relevant APIs for this project (sorry, if you're new to GCP,
-  this sounds vague; but if you're familiar with it you know what to do; if you
-  want to change the project name you can edit the Terraform configuration)
-
- Linode:
-  ```bash
-  linode-cli configure
-  ```
-
- Oracle Cloud: FIXME
-  (set up `oci` through the `oci-cli` Python package)
-
- Scaleway: run `scw init`
-
-2. Optional: set number of clusters, cluster size, and region.
-
-By default, 1 cluster will be configured, with 2 nodes, and auto-scaling up to 5 nodes.
-
-If you want, you can override these parameters, with the following variables.
-
-```bash
-export TF_VAR_how_many_clusters=5
-export TF_VAR_min_nodes_per_pool=2
-export TF_VAR_max_nodes_per_pool=4
-export TF_VAR_location=xxx
-```
-
-The `location` variable is optional. Each provider should have a default value.
-The value of the `location` variable is provider-specific. Examples:
-
-| Provider      | Example value     | How to see possible values
-|---------------|-------------------|---------------------------
-| Digital Ocean | `ams3`            | `doctl compute region list`
-| Google Cloud  | `europe-north1-a` | `gcloud  compute zones list`
-| Linode        | `eu-central`      | `linode-cli regions list`
-| Oracle Cloud  | `eu-stockholm-1`  | `oci iam region list`
-
-You can also specify multiple locations, and then they will be
-used in round-robin fashion.
-
-For example, with Google Cloud, since the default quotas are very
-low (my account is limited to 8 public IP addresses per zone, and
-my requests to increase that quota were denied) you can do the
-following:
-
-```bash
-export TF_VAR_location=$(gcloud compute zones list --format=json | jq -r .[].name | grep ^europe)
-```
-
-Then when you apply, clusters will be created across all available
-zones in Europe. (When I write this, there are 20+ zones in Europe,
-so even with my quota, I can create 40 clusters.)
-
-3. Run!
-
-```bash
-./run.sh <providername>
-```
-
-(If you don't specify a provider name, it will list available providers.)
-
-4. Shutting down
-
-Go to the directory that was created by the previous step (`tag-YYYY-MM...`)
-and run `terraform destroy`.
-
-You can also run `./clean.sh` which will destroy ALL clusters deployed by the previous run script.
-
-## Without shell wrapper
-
-Expert mode.
-
-Useful to run steps sperarately, and/or when working on the Terraform configurations.
+To use it:

 1. Select the provider you wish to use.

-Go to the `source` directory and edit `main.tf`.
-
 Change the `source` attribute of the `module "clusters"` section.
-
 Check the content of the `modules` directory to see available choices.

+```bash
+vim main.tf
+```
+
 2. Initialize the provider.

 ```bash
@@ -110,20 +20,24 @@ terraform init

 3. Configure provider authentication.

-See steps above, and add the following extra steps:
-
- Digital Coean:
-  ```bash
-  export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
-  ```
-
- Linode:
-  ```bash
-  export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
-  ```
+- Digital Ocean: `export DIGITALOCEAN_ACCESS_TOKEN=...`
+  (check `~/.config/doctl/config.yaml` for the token)
+- Linode: `export LINODE_TOKEN=...`
+  (check `~/.config/linode-cli` for the token)
+- Oracle Cloud: it should use `~/.oci/config`
+- Scaleway: run `scw init`

 4. Decide how many clusters and how many nodes per clusters you want.

+```bash
+export TF_VAR_how_many_clusters=5
+export TF_VAR_min_nodes_per_pool=2
+# Optional (will enable autoscaler when available)
+export TF_VAR_max_nodes_per_pool=4
+# Optional (will only work on some providers)
+export TF_VAR_enable_arm_pool=true
+```
+
 5. Provision clusters.

 ```bash
@@ -132,7 +46,7 @@ terraform apply

 6. Perform second stage provisioning.

-This will install an SSH server on the clusters.
+This will install a SSH server on the clusters.

 ```bash
 cd stage2
@@ -158,5 +72,5 @@ terraform destroy
 9. Clean up stage2.

 ```bash
-rm stage2/terraform.tfstate*
+rm stage/terraform.tfstate*
 ```
--- a/prepare-tf/cleanup.sh
+++ b/prepare-tf/cleanup.sh
@@ -1,9 +0,0 @@
-#!/bin/sh
-export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
-export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
-for T in  tag-*; do
-(
-  cd $T
-  terraform apply -destroy -auto-approve && mv ../$T ../deleted$T
-)
-done
--- a/prepare-tf/locals.tf
+++ b/prepare-tf/locals.tf
@@ -0,0 +1,16 @@
+resource "random_string" "_" {
+  length  = 5
+  special = false
+  upper   = false
+}
+
+resource "time_static" "_" {}
+
+locals {
+  tag = format("tf-%s-%s", formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339), random_string._.result)
+  # Common tags to be assigned to all resources
+  common_tags = [
+    "created-by=terraform",
+    "tag=${local.tag}"
+  ]
+}
--- a/prepare-tf/source/main.tf
+++ b/prepare-tf/source/main.tf
@@ -1,5 +1,5 @@
 module "clusters" {
-  source             = "./modules/PROVIDER"
+  source             = "./modules/linode"
  for_each           = local.clusters
  cluster_name       = each.value.cluster_name
  min_nodes_per_pool = var.min_nodes_per_pool
@@ -7,24 +7,22 @@ module "clusters" {
  enable_arm_pool    = var.enable_arm_pool
  node_size          = var.node_size
  common_tags        = local.common_tags
-  location           = each.value.location
 }

 locals {
  clusters = {
    for i in range(101, 101 + var.how_many_clusters) :
    i => {
-      cluster_name     = format("%s-%03d", local.tag, i)
-      kubeconfig_path  = format("./stage2/kubeconfig.%03d", i)
+      cluster_name    = format("%s-%03d", local.tag, i)
+      kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
+      #dashdash_kubeconfig = format("--kubeconfig=./stage2/kubeconfig.%03d", i)
      externalips_path = format("./stage2/externalips.%03d", i)
-      flags_path       = format("./stage2/flags.%03d", i)
-      location         = local.locations[i % length(local.locations)]
    }
  }
 }

 resource "local_file" "stage2" {
-  filename        = "./stage2/main.tf"
+  filename = "./stage2/main.tf"
  file_permission = "0644"
  content = templatefile(
    "./stage2.tmpl",
@@ -32,15 +30,6 @@ resource "local_file" "stage2" {
  )
 }

-resource "local_file" "flags" {
-  for_each        = local.clusters
-  filename        = each.value.flags_path
-  file_permission = "0600"
-  content         = <<-EOT
-    has_metrics_server: ${module.clusters[each.key].has_metrics_server}
-  EOT
-}
-
 resource "local_file" "kubeconfig" {
  for_each        = local.clusters
  filename        = each.value.kubeconfig_path
@@ -70,8 +59,8 @@ resource "null_resource" "wait_for_nodes" {
 }

 data "external" "externalips" {
-  for_each   = local.clusters
-  depends_on = [null_resource.wait_for_nodes]
+  for_each = local.clusters
+  depends_on = [ null_resource.wait_for_nodes ]
  program = [
    "sh",
    "-c",
--- a/prepare-tf/source/modules/digitalocean/main.tf
+++ b/prepare-tf/source/modules/digitalocean/main.tf
@@ -1,13 +1,12 @@
 resource "digitalocean_kubernetes_cluster" "_" {
-  name = var.cluster_name
-  tags = var.common_tags
-  # Region is mandatory, so let's provide a default value.
-  region  = var.location != null ? var.location : "nyc1"
+  name    = var.cluster_name
+  tags    = local.common_tags
+  region  = var.region
  version = var.k8s_version

  node_pool {
-    name       = "x86"
-    tags       = var.common_tags
+    name       = "dok-x86"
+    tags       = local.common_tags
    size       = local.node_type
    auto_scale = true
    min_nodes  = var.min_nodes_per_pool
--- a/prepare-tf/source/modules/digitalocean/outputs.tf
+++ b/prepare-tf/source/modules/digitalocean/outputs.tf
@@ -5,7 +5,3 @@ output "kubeconfig" {
 output "cluster_id" {
  value = digitalocean_kubernetes_cluster._.id
 }
-
-output "has_metrics_server" {
-  value = false
-}
--- a/prepare-tf/source/modules/digitalocean/providers.tf
+++ b/prepare-tf/source/modules/digitalocean/providers.tf
--- a/prepare-tf/source/modules/digitalocean/variables.tf
+++ b/prepare-tf/source/modules/digitalocean/variables.tf
@@ -8,6 +8,10 @@ variable "common_tags" {
  default = []
 }

+locals {
+  common_tags = [for tag in var.common_tags : replace(tag, "=", "-")]
+}
+
 variable "node_size" {
  type    = string
  default = "M"
@@ -44,9 +48,9 @@ locals {

 # To view supported regions, run:
 # doctl compute region list
-variable "location" {
+variable "region" {
  type    = string
-  default = null
+  default = "nyc1"
 }

 # To view supported versions, run:
--- a/prepare-tf/source/modules/linode/main.tf
+++ b/prepare-tf/source/modules/linode/main.tf
@@ -1,8 +1,7 @@
 resource "linode_lke_cluster" "_" {
-  label = var.cluster_name
-  tags  = var.common_tags
-  # "region" is mandatory, so let's provide a default value if none was given.
-  region      = var.location != null ? var.location : "eu-central"
+  label       = var.cluster_name
+  tags        = var.common_tags
+  region      = var.region
  k8s_version = var.k8s_version

  pool {
--- a/prepare-tf/source/modules/linode/outputs.tf
+++ b/prepare-tf/source/modules/linode/outputs.tf
@@ -5,7 +5,3 @@ output "kubeconfig" {
 output "cluster_id" {
  value = linode_lke_cluster._.id
 }
-
-output "has_metrics_server" {
-  value = false
-}
--- a/prepare-tf/source/modules/linode/providers.tf
+++ b/prepare-tf/source/modules/linode/providers.tf
--- a/prepare-tf/source/modules/linode/variables.tf
+++ b/prepare-tf/source/modules/linode/variables.tf
@@ -42,11 +42,11 @@ locals {
  node_type = var.node_types[var.node_size]
 }

-# To view supported regions, run:
+# To view supported versions, run:
 # linode-cli regions list
-variable "location" {
+variable "region" {
  type    = string
-  default = null
+  default = "us-east"
 }

 # To view supported versions, run:
--- a/prepare-tf/source/modules/oraclecloud/main.tf
+++ b/prepare-tf/source/modules/oraclecloud/main.tf
@@ -1,7 +1,6 @@
 resource "oci_identity_compartment" "_" {
-  name          = var.cluster_name
-  description   = var.cluster_name
-  enable_delete = true
+  name        = var.cluster_name
+  description = var.cluster_name
 }

 locals {
--- a/prepare-tf/source/modules/oraclecloud/network.tf
+++ b/prepare-tf/source/modules/oraclecloud/network.tf
--- a/prepare-tf/source/modules/oraclecloud/outputs.tf
+++ b/prepare-tf/source/modules/oraclecloud/outputs.tf
@@ -9,7 +9,3 @@ output "kubeconfig" {
 output "cluster_id" {
  value = oci_containerengine_cluster._.id
 }
-
-output "has_metrics_server" {
-  value = false
-}
--- a/prepare-tf/source/modules/oraclecloud/providers.tf
+++ b/prepare-tf/source/modules/oraclecloud/providers.tf
--- a/prepare-tf/source/modules/oraclecloud/variables.tf
+++ b/prepare-tf/source/modules/oraclecloud/variables.tf
@@ -70,13 +70,6 @@ locals {
  node_type = var.node_types[var.node_size]
 }

-# To view supported regions, run:
-# oci iam region list | jq .data[].name
-variable "location" {
-  type    = string
-  default = null
-}
-
 # To view supported versions, run:
 # oci ce cluster-options get --cluster-option-id all | jq -r '.data["kubernetes-versions"][]'
 variable "k8s_version" {
--- a/prepare-tf/source/modules/scaleway/main.tf
+++ b/prepare-tf/source/modules/scaleway/main.tf
@@ -1,6 +1,5 @@
 resource "scaleway_k8s_cluster" "_" {
  name                        = var.cluster_name
-  region                      = var.location
  tags                        = var.common_tags
  version                     = var.k8s_version
  cni                         = var.cni
@@ -9,7 +8,7 @@ resource "scaleway_k8s_cluster" "_" {

 resource "scaleway_k8s_pool" "_" {
  cluster_id  = scaleway_k8s_cluster._.id
-  name        = "x86"
+  name        = "scw-x86"
  tags        = var.common_tags
  node_type   = local.node_type
  size        = var.min_nodes_per_pool
--- a/prepare-tf/source/modules/scaleway/outputs.tf
+++ b/prepare-tf/source/modules/scaleway/outputs.tf
@@ -5,7 +5,3 @@ output "kubeconfig" {
 output "cluster_id" {
  value = scaleway_k8s_cluster._.id
 }
-
-output "has_metrics_server" {
-  value = sort([var.k8s_version, "1.22"])[0] == "1.22"
-}
--- a/prepare-tf/source/modules/scaleway/providers.tf
+++ b/prepare-tf/source/modules/scaleway/providers.tf
--- a/prepare-tf/source/modules/scaleway/variables.tf
+++ b/prepare-tf/source/modules/scaleway/variables.tf
@@ -47,12 +47,7 @@ variable "cni" {
  default = "cilium"
 }

-variable "location" {
-  type    = string
-  default = null
-}
-
-# To view supported versions, run:
+# See supported versions with:
 # scw k8s version list -o json | jq -r .[].name
 variable "k8s_version" {
  type    = string
--- a/prepare-tf/source/providers.tf
+++ b/prepare-tf/source/providers.tf
--- a/prepare-tf/run.sh
+++ b/prepare-tf/run.sh
@@ -1,49 +0,0 @@
-#!/bin/sh
-set -e
-
-TIME=$(which time)
-
-PROVIDER=$1
-[ "$PROVIDER" ] || {
-  echo "Please specify a provider as first argument, or 'ALL' for parallel mode."
-  echo "Available providers:"
-  ls -1 source/modules
-  exit 1
-}
-
-[ "$TAG" ] || {
-  TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
-  RANDOMTAG=$(base64 /dev/urandom | tr A-Z a-z | tr -d /+ | head -c5)
-  export TAG=tag-$TIMESTAMP-$RANDOMTAG
-}
-
-[ "$PROVIDER" = "ALL" ] && {
-  for PROVIDER in $(ls -1 source/modules); do
-    $TERMINAL -T $TAG-$PROVIDER -e sh -c "
-      export TAG=$TAG-$PROVIDER
-      $0 $PROVIDER
-      cd $TAG-$PROVIDER
-      bash
-      " &
-  done
-  exit 0
-}
-
-[ -d "source/modules/$PROVIDER" ] || {
-  echo "Provider '$PROVIDER' not found."
-  echo "Available providers:"
-  ls -1 source/modules
-  exit 1  
-}
-
-export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
-export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
-
-cp -a source $TAG
-cd $TAG
-cp -r modules/$PROVIDER modules/PROVIDER
-$TIME -o time.1.init terraform init
-$TIME -o time.2.stage1 terraform apply -auto-approve
-cd stage2
-$TIME -o ../time.3.init terraform init
-$TIME -o ../time.4.stage2 terraform apply -auto-approve
--- a/prepare-tf/source/locals.tf
+++ b/prepare-tf/source/locals.tf
@@ -1,19 +0,0 @@
-resource "random_string" "_" {
-  length  = 4
-  number  = false
-  special = false
-  upper   = false
-}
-
-resource "time_static" "_" {}
-
-locals {
-  timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
-  tag       = random_string._.result
-  # Common tags to be assigned to all resources
-  common_tags = [
-    "created-by-terraform",
-    format("created-at-%s", local.timestamp),
-    format("created-for-%s", local.tag)
-  ]
-}
--- a/prepare-tf/source/modules/googlecloud/main.tf
+++ b/prepare-tf/source/modules/googlecloud/main.tf
@@ -1,65 +0,0 @@
-resource "google_container_cluster" "_" {
-  name               = var.cluster_name
-  project            = local.project
-  location           = local.location
-  min_master_version = var.k8s_version
-
-  # To deploy private clusters, uncomment the section below,
-  # and uncomment the block in network.tf.
-  # Private clusters require extra resources (Cloud NAT,
-  # router, network, subnet) and the quota for some of these
-  # resources is fairly low on GCP; so if you want to deploy
-  # a lot of private clusters (more than 10), you can use these
-  # blocks as a base but you will probably have to refactor
-  # things quite a bit (you will at least need to define a single
-  # shared router and use it across all the clusters).
-  /*
-  network    = google_compute_network._.name
-  subnetwork = google_compute_subnetwork._.name
-
-  private_cluster_config {
-    enable_private_nodes = true
-    # This must be set to "false".
-    # (Otherwise, access to the public endpoint is disabled.)
-    enable_private_endpoint = false
-    # This must be set to a /28.
-    # I think it shouldn't collide with the pod network subnet.
-    master_ipv4_cidr_block = "10.255.255.0/28"
-  }
-  # Private clusters require "VPC_NATIVE" networking mode
-  # (as opposed to the legacy "ROUTES").
-  networking_mode = "VPC_NATIVE"
-  # ip_allocation_policy is required for VPC_NATIVE clusters.
-  ip_allocation_policy {
-    # This is the block that will be used for pods.
-    cluster_ipv4_cidr_block = "10.0.0.0/12"
-    # The services block is optional
-    # (GKE will pick one automatically).
-    #services_ipv4_cidr_block = ""
-  }
-  */
-
-  node_pool {
-    name = "x86"
-    node_config {
-      tags         = var.common_tags
-      machine_type = local.node_type
-    }
-    initial_node_count = var.min_nodes_per_pool
-    autoscaling {
-      min_node_count = var.min_nodes_per_pool
-      max_node_count = max(var.min_nodes_per_pool, var.max_nodes_per_pool)
-    }
-  }
-
-  # This is not strictly necessary.
-  # We'll see if we end up using it.
-  # (If it is removed, make sure to also remove the corresponding
-  # key+cert variables from outputs.tf!)
-  master_auth {
-    client_certificate_config {
-      issue_client_certificate = true
-    }
-  }
-}
-
--- a/prepare-tf/source/modules/googlecloud/network.tf
+++ b/prepare-tf/source/modules/googlecloud/network.tf
@@ -1,38 +0,0 @@
-/*
-resource "google_compute_network" "_" {
-  name    = var.cluster_name
-  project = local.project
-  # The default is to create subnets automatically.
-  # However, this creates one subnet per zone in all regions,
-  # which causes a quick exhaustion of the subnet quota.
-  auto_create_subnetworks = false
-}
-
-resource "google_compute_subnetwork" "_" {
-  name          = var.cluster_name
-  ip_cidr_range = "10.254.0.0/16"
-  region        = local.region
-  network       = google_compute_network._.id
-  project       = local.project
-}
-
-resource "google_compute_router" "_" {
-  name    = var.cluster_name
-  region  = local.region
-  network = google_compute_network._.name
-  project = local.project
-}
-
-resource "google_compute_router_nat" "_" {
-  name    = var.cluster_name
-  router  = google_compute_router._.name
-  region  = local.region
-  project = local.project
-  # Everyone in the network is allowed to NAT out.
-  # (We would change this if we only wanted to allow specific subnets to NAT out.)
-  source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
-  # Pick NAT addresses automatically.
-  # (We would change this if we wanted to use specific addresses to NAT out.)
-  nat_ip_allocate_option = "AUTO_ONLY"
-}
-*/
--- a/prepare-tf/source/modules/googlecloud/outputs.tf
+++ b/prepare-tf/source/modules/googlecloud/outputs.tf
@@ -1,35 +0,0 @@
-data "google_client_config" "_" {}
-
-output "kubeconfig" {
-  value = <<-EOT
-    apiVersion: v1
-    kind: Config
-    current-context: ${google_container_cluster._.name}
-    clusters:
-    - name: ${google_container_cluster._.name}
-      cluster:
-        server: https://${google_container_cluster._.endpoint}
-        certificate-authority-data: ${google_container_cluster._.master_auth[0].cluster_ca_certificate}
-    contexts:
-    - name: ${google_container_cluster._.name}
-      context:
-        cluster: ${google_container_cluster._.name}
-        user: client-token
-    users:
-    - name: client-cert
-      user:
-        client-key-data: ${google_container_cluster._.master_auth[0].client_key}
-        client-certificate-data: ${google_container_cluster._.master_auth[0].client_certificate}
-    - name: client-token
-      user:
-        token: ${data.google_client_config._.access_token}
-    EOT
-}
-
-output "cluster_id" {
-  value = google_container_cluster._.id
-}
-
-output "has_metrics_server" {
-  value = true
-}
--- a/prepare-tf/source/modules/googlecloud/providers.tf
+++ b/prepare-tf/source/modules/googlecloud/providers.tf
@@ -1,8 +0,0 @@
-terraform {
-  required_providers {
-    google = {
-      source  = "hashicorp/google"
-      version = "4.5.0"
-    }
-  }
-}
--- a/prepare-tf/source/modules/googlecloud/variables.tf
+++ b/prepare-tf/source/modules/googlecloud/variables.tf
@@ -1,68 +0,0 @@
-variable "cluster_name" {
-  type    = string
-  default = "deployed-with-terraform"
-}
-
-variable "common_tags" {
-  type    = list(string)
-  default = []
-}
-
-variable "node_size" {
-  type    = string
-  default = "M"
-}
-
-variable "min_nodes_per_pool" {
-  type    = number
-  default = 2
-}
-
-variable "max_nodes_per_pool" {
-  type    = number
-  default = 5
-}
-
-# FIXME
-variable "enable_arm_pool" {
-  type    = bool
-  default = false
-}
-
-variable "node_types" {
-  type = map(string)
-  default = {
-    "S" = "e2-small"
-    "M" = "e2-medium"
-    "L" = "e2-standard-2"
-  }
-}
-
-locals {
-  node_type = var.node_types[var.node_size]
-}
-
-# To view supported locations, run:
-# gcloud compute zones list
-variable "location" {
-  type    = string
-  default = null
-}
-
-# To view supported versions, run:
-# gcloud container get-server-config --region=europe-north1 '--format=flattened(channels)'
-# But it's also possible to just specify e.g. "1.20" and it figures it out.
-variable "k8s_version" {
-  type    = string
-  default = "1.21"
-}
-
-locals {
-  location = var.location != null ? var.location : "europe-north1-a"
-  region   = replace(local.location, "/-[a-z]$/", "")
-  # Unfortunately, the following line doesn't work
-  # (that attribute just returns an empty string)
-  # so we have to hard-code the project name.
-  #project = data.google_client_config._.project
-  project = "prepare-tf"
-}
--- a/prepare-tf/source/variables.tf
+++ b/prepare-tf/source/variables.tf
@@ -1,40 +0,0 @@
-variable "how_many_clusters" {
-  type    = number
-  default = 1
-}
-
-variable "node_size" {
-  type    = string
-  default = "M"
-  # Can be S, M, L.
-  # We map these values to different specific instance types for each provider,
-  # but the idea is that they shoudl correspond to the following sizes:
-  # S = 2 GB RAM
-  # M = 4 GB RAM
-  # L = 8 GB RAM
-}
-
-variable "min_nodes_per_pool" {
-  type    = number
-  default = 1
-}
-
-variable "max_nodes_per_pool" {
-  type    = number
-  default = 0
-}
-
-variable "enable_arm_pool" {
-  type    = bool
-  default = false
-}
-
-variable "location" {
-  type    = string
-  default = null
-}
-
-# TODO: perhaps handle if it's space-separated instead of newline?
-locals {
-  locations = var.location == null ? [null] : split("\n", var.location)
-}
--- a/prepare-tf/source/stage2.tmpl
+++ b/prepare-tf/source/stage2.tmpl
@@ -2,7 +2,7 @@ terraform {
  required_providers {
    kubernetes = {
      source  = "hashicorp/kubernetes"
-      version = "2.7.1"
+      version = "2.0.3"
    }
  }
 }
@@ -119,11 +119,6 @@ resource "kubernetes_cluster_role_binding" "shpod_${index}" {
    name      = "shpod"
    namespace = "shpod"
  }
-  subject {
-    api_group = "rbac.authorization.k8s.io"
-    kind      = "Group"
-    name      = "shpod-cluster-admins"
-  }
 }

 resource "random_string" "shpod_${index}" {
@@ -140,10 +135,6 @@ provider "helm" {
 }

 resource "helm_release" "metrics_server_${index}" {
-  # Some providers pre-install metrics-server.
-  # Some don't. Let's install metrics-server,
-  # but only if it's not already installed.
-  count = yamldecode(file("./flags.${index}"))["has_metrics_server"] ? 0 : 1
  provider = helm.cluster_${index}
  repository = "https://charts.bitnami.com/bitnami"
  chart = "metrics-server"
@@ -191,7 +182,7 @@ resource "kubernetes_config_map" "kubeconfig_${index}" {
      - name: cluster-admin
        user:
          client-key-data: $${base64encode(tls_private_key.cluster_admin_${index}.private_key_pem)}
-          client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}.certificate)}
+          client-certificate-data: $${base64encode(kubernetes_certificate_signing_request.cluster_admin_${index}.certificate)}
    EOT
  }
 }
@@ -205,14 +196,11 @@ resource "tls_cert_request" "cluster_admin_${index}" {
  private_key_pem = tls_private_key.cluster_admin_${index}.private_key_pem
  subject {
    common_name = "cluster-admin"
-    # Note: CSR API v1 doesn't allow issuing certs with "system:masters" anymore.
-    #organization = "system:masters"
-    # We'll use this custom group name instead.cluster-admin user.
-    organization = "shpod-cluster-admins"
+    organization = "system:masters"
  }
 }

-resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
+resource "kubernetes_certificate_signing_request" "cluster_admin_${index}" {
  provider = kubernetes.cluster_${index}
  metadata {
    name = "cluster-admin"
@@ -220,7 +208,6 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
  spec {
    usages = ["client auth"]
    request = tls_cert_request.cluster_admin_${index}.cert_request_pem
-    signer_name = "kubernetes.io/kube-apiserver-client"
  }
  auto_approve = true
 }
--- a/prepare-tf/variables.tf
+++ b/prepare-tf/variables.tf
@@ -0,0 +1,28 @@
+variable "how_many_clusters" {
+  type    = number
+  default = 2
+}
+
+variable "node_size" {
+  type    = string
+  default = "M"
+  # Can be S, M, L.
+  # S = 2 GB RAM
+  # M = 4 GB RAM
+  # L = 8 GB RAM
+}
+
+variable "min_nodes_per_pool" {
+  type    = number
+  default = 1
+}
+
+variable "max_nodes_per_pool" {
+  type    = number
+  default = 0
+}
+
+variable "enable_arm_pool" {
+  type    = bool
+  default = true
+}
--- a/prepare-vms/README.md
+++ b/prepare-vms/README.md
@@ -14,9 +14,7 @@ These tools can help you to create VMs on:

 - [Docker](https://docs.docker.com/engine/installation/)
 - [Docker Compose](https://docs.docker.com/compose/install/)
- [Parallel SSH](https://github.com/lilydjwg/pssh)
-  (should be installable with `pip install git+https://github.com/lilydjwg/pssh`;
-  on a Mac, try `brew install pssh`)
+- [Parallel SSH](https://code.google.com/archive/p/parallel-ssh/) (on a Mac: `brew install pssh`) 

 Depending on the infrastructure that you want to use, you also need to install
 the CLI that is specific to that cloud. For OpenStack deployments, you will
--- a/prepare-vms/lib/commands.sh
+++ b/prepare-vms/lib/commands.sh
@@ -314,12 +314,11 @@ _cmd_kube() {
    SETTINGS=tags/$TAG/settings.yaml
    KUBEVERSION=$(awk '/^kubernetes_version:/ {print $2}' $SETTINGS)
    if [ "$KUBEVERSION" ]; then
-        pssh "
-        sudo tee /etc/apt/preferences.d/kubernetes <<EOF
-Package: kubectl kubeadm kubelet
-Pin: version $KUBEVERSION*
-Pin-Priority: 1000
-EOF"
+        EXTRA_APTGET="=$KUBEVERSION-00"
+        EXTRA_KUBEADM="kubernetesVersion: v$KUBEVERSION"
+    else
+        EXTRA_APTGET=""
+        EXTRA_KUBEADM=""
    fi

    # Install packages
@@ -330,8 +329,7 @@ EOF"
    sudo tee /etc/apt/sources.list.d/kubernetes.list"
    pssh --timeout 200 "
    sudo apt-get update -q &&
-    sudo apt-get install -qy kubelet kubeadm kubectl &&
-    sudo apt-mark hold kubelet kubeadm kubectl
+    sudo apt-get install -qy kubelet$EXTRA_APTGET kubeadm$EXTRA_APTGET kubectl$EXTRA_APTGET &&
    kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
    echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
    echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
@@ -343,11 +341,6 @@ EOF"
        sudo swapoff -a"
    fi

-    # Re-enable CRI interface in containerd
-    pssh "
-    echo '# Use default parameters for containerd.' | sudo tee /etc/containerd/config.toml
-    sudo systemctl restart containerd"
-
    # Initialize kube control plane
    pssh --timeout 200 "
    if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
@@ -357,38 +350,19 @@ kind: InitConfiguration
 apiVersion: kubeadm.k8s.io/v1beta2
 bootstrapTokens:
 - token: \$(cat /tmp/token)
-nodeRegistration:
-  # Comment out the next line to switch back to Docker.
-  criSocket: /run/containerd/containerd.sock
-  ignorePreflightErrors:
-  - NumCPU
---
-kind: JoinConfiguration
-apiVersion: kubeadm.k8s.io/v1beta2
-discovery:
-  bootstrapToken:
-    apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
-    token: \$(cat /tmp/token)
-    unsafeSkipCAVerification: true
-nodeRegistration:
-  # Comment out the next line to switch back to Docker.
-  criSocket: /run/containerd/containerd.sock
-  ignorePreflightErrors:
-  - NumCPU
 ---
 kind: KubeletConfiguration
 apiVersion: kubelet.config.k8s.io/v1beta1
-# The following line is necessary when using Docker.
-# It doesn't seem necessary when using containerd.
-#cgroupDriver: cgroupfs
+cgroupDriver: cgroupfs
 ---
 kind: ClusterConfiguration
 apiVersion: kubeadm.k8s.io/v1beta2
 apiServer:
  certSANs:
  - \$(cat /tmp/ipv4)
+$EXTRA_KUBEADM
 EOF
-	sudo kubeadm init --config=/tmp/kubeadm-config.yaml
+	sudo kubeadm init --config=/tmp/kubeadm-config.yaml --ignore-preflight-errors=NumCPU
    fi"

    # Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
@@ -412,8 +386,8 @@ EOF
    pssh --timeout 200 "
    if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
        FIRSTNODE=\$(cat /etc/name_of_first_node) &&
-        ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
-        sudo kubeadm join --config /tmp/kubeadm-config.yaml
+        TOKEN=\$(ssh $SSHOPTS \$FIRSTNODE cat /tmp/token) &&
+        sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
    fi"

    # Install metrics server
@@ -504,7 +478,7 @@ EOF
    if [ ! -x /usr/local/bin/kustomize ]; then
        curl -fsSL $URL |
        sudo tar -C /usr/local/bin -zx kustomize
-        kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
+        echo complete -C /usr/local/bin/kustomize kustomize | sudo tee /etc/bash_completion.d/kustomize
        kustomize version
    fi"

--- a/prepare-vms/netlify-dns.sh
+++ b/prepare-vms/netlify-dns.sh
@@ -1,22 +1,22 @@
 #!/bin/sh

-# https://open-api.netlify.com/#tag/dnsZone
 [ "$1" ] || {
  echo ""
  echo "Add a record in Netlify DNS."
  echo "This script is hardcoded to add a record to container.training".
  echo ""
  echo "Syntax:"
-  echo "$0 list"
-  echo "$0 add <name> <ipaddr>"
-  echo "$0 del <recordid>"
+  echo "$0 <name> <ipaddr>"
  echo ""
  echo "Example to create a A record for eu.container.training:"
-  echo "$0 add eu 185.145.250.0"
+  echo "$0 eu 185.145.250.0"
  echo ""
  exit 1
 }

+NAME=$1.container.training
+ADDR=$2
+
 NETLIFY_USERID=$(jq .userId < ~/.config/netlify/config.json)
 NETLIFY_TOKEN=$(jq -r .users[$NETLIFY_USERID].auth.token < ~/.config/netlify/config.json)

@@ -29,54 +29,19 @@ netlify() {
 ZONE_ID=$(netlify dns_zones |
          jq -r '.[] | select ( .name == "container.training" ) | .id')

-_list() {
-  netlify dns_zones/$ZONE_ID/dns_records |
-    jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
-}
+# It looks like if we create two identical records, then delete one of them,
+# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
+# though it's still visible through the API and the website?)

-_add() {
-  NAME=$1.container.training
-  ADDR=$2
+if netlify dns_zones/$ZONE_ID/dns_records | 
+        jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
+        grep .
+then
+  echo "It looks like that record already exists. Refusing to create it."
+  exit 1
+fi

+netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300

-  # It looks like if we create two identical records, then delete one of them,
-  # Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
-  # though it's still visible through the API and the website?)
-
-  if netlify dns_zones/$ZONE_ID/dns_records |
-          jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
-          grep .
-  then
-    echo "It looks like that record already exists. Refusing to create it."
-    exit 1
-  fi
-
-  netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
-
-  netlify dns_zones/$ZONE_ID/dns_records |
-          jq '.[] | select(.hostname=="'$NAME'")'
-}
-
-_del() {
-  RECORD_ID=$1
-  # OK, since that one is dangerous, I'm putting the whole request explicitly here
-  http DELETE \
-    https://api.netlify.com/api/v1/dns_zones/$ZONE_ID/dns_records/$RECORD_ID \
-    "Authorization:Bearer $NETLIFY_TOKEN"
-}
-
-case "$1" in
-  list)
-    _list
-    ;;
-  add)
-    _add $2 $3
-    ;;
-  del)
-    _del $2
-    ;;
-  *)
-    echo "Unknown command '$1'."
-    exit 1
-    ;;
-esac
+netlify dns_zones/$ZONE_ID/dns_records | 
+        jq '.[] | select(.hostname=="'$NAME'")'
--- a/prepare-vms/settings/admin-oldversion.yaml
+++ b/prepare-vms/settings/admin-oldversion.yaml
@@ -14,9 +14,7 @@ paper_size: A4
 user_login: k8s
 user_password: training

-# For a list of old versions, check:
-# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
-kubernetes_version: 1.18.20
+kubernetes_version: 1.19.16

 image:

--- a/slides/_redirects
+++ b/slides/_redirects
@@ -2,7 +2,7 @@
 #/ /kube-halfday.yml.html 200!
 #/ /kube-fullday.yml.html 200!
 #/ /kube-twodays.yml.html 200!
-/ /kube.yml.html 200!
+/ /k8s.yml.html 200!

 # And this allows to do "git clone https://container.training".
 /info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
--- a/slides/containers/Publishing_To_Docker_Hub.md
+++ b/slides/containers/Publishing_To_Docker_Hub.md
@@ -109,7 +109,7 @@ class: extra-details

 - Example: [ctr.run](https://ctr.run/)

-.lab[
+.exercise[

 - Use ctr.run to automatically build a container image and run it:
  ```bash
--- a/slides/containers/intro.md
+++ b/slides/containers/intro.md
@@ -28,7 +28,7 @@ class: self-paced
 - Likewise, it will take more than merely *reading* these slides
  to make you an expert

- These slides include *tons* of demos, exercises, and examples
+- These slides include *tons* of exercises and examples

 - They assume that you have access to a machine running Docker

--- a/slides/exercises/healthchecks-brief.md
+++ b/slides/exercises/healthchecks-brief.md
@@ -4,6 +4,8 @@

  (we will use the `rng` service in the dockercoins app)

- See what happens when the load increses
+- Observe the correct behavior of the readiness probe

-  (spoiler alert: it involves timeouts!)
+  (when deploying e.g. an invalid image)
+
+- Observe the behavior of the liveness probe
--- a/slides/exercises/healthchecks-details.md
+++ b/slides/exercises/healthchecks-details.md
@@ -2,85 +2,34 @@

 - We want to add healthchecks to the `rng` service in dockercoins

- The `rng` service exhibits an interesting behavior under load:
-
-  *its latency increases (which will cause probes to time out!)*
-
- We want to see:
-
-  - what happens when the readiness probe fails
-
-  - what happens when the liveness probe fails
-
-  - how to set "appropriate" probes and probe parameters
-
---
-
-## Setup
-
 - First, deploy a new copy of dockercoins

-  (for instance, in a brand new namespace)
+- Then, add a readiness probe on the `rng` service

- Pro tip #1: ping (e.g. with `httping`) the `rng` service at all times
-
-  - it should initially show a few milliseconds latency
-
-  - that will increase when we scale up
-
-  - it will also let us detect when the service goes "boom"
-
- Pro tip #2: also keep an eye on the web UI
-
---
-
-## Readiness
-
- Add a readiness probe to `rng`
-
-  - this requires editing the pod template in the Deployment manifest
-
-  - use a simple HTTP check on the `/` route of the service
-
-  - keep all other parameters (timeouts, thresholds...) at their default values
+  (using a simple HTTP check on the `/` route of the service)

 - Check what happens when deploying an invalid image for `rng` (e.g. `alpine`)

-*(If the probe was set up correctly, the app will continue to work,
-because Kubernetes won't switch over the traffic to the `alpine` containers,
-because they don't pass the readiness probe.)*
+- Then roll back `rng` to the original image and add a liveness probe
+
+  (with the same parameters)
+
+- Scale up the `worker` service (to 15+ workers) and observe
+
+- What happens?

 ---

-## Readiness under load
+## Goal

- Then roll back `rng` to the original image
+- *Before* adding the readiness probe:

- Check what happens when we scale up the `worker` Deployment to 15+ workers
+  updating the image of the `rng` service with `alpine` should break it

-  (get the latency above 1 second)
+- *After* adding the readiness probe:

-*(We should now observe intermittent unavailability of the service, i.e. every
-30 seconds it will be unreachable for a bit, then come back, then go away again, etc.)*
+  updating the image of the `rng` service with `alpine` shouldn't break it

---
+- When adding the liveness probe, nothing special should happen

-## Liveness
-
- Now replace the readiness probe with a liveness probe
-
- What happens now?
-
-*(At first the behavior looks the same as with the readiness probe:
-service becomes unreachable, then reachable again, etc.; but there is
-a significant difference behind the scenes. What is it?)*
-
---
-
-## Readiness and liveness
-
- Bonus questions!
-
- What happens if we enable both probes at the same time?
-
- What strategies can we use so that both probes are useful?
+- Scaling the `worker` service will then cause disruptions
--- a/slides/exercises/ingress-details.md
+++ b/slides/exercises/ingress-details.md
@@ -16,7 +16,7 @@

 ## Goal

- We want to be able to access the web app using a URL like:
+- We want to be able to access the web app using an URL like:

  http://webapp.localdev.me

--- a/slides/exercises/ingress-secret-policy-brief.md
+++ b/slides/exercises/ingress-secret-policy-brief.md
@@ -1,5 +1,3 @@
-⚠️ BROKEN EXERCISE - DO NOT USE
-
 ## Exercise — Ingress Secret Policy

 *Implement policy to limit impact of ingress controller vulnerabilities.*
--- a/slides/exercises/ingress-secret-policy-details.md
+++ b/slides/exercises/ingress-secret-policy-details.md
@@ -1,5 +1,3 @@
-⚠️ BROKEN EXERCISE - DO NOT USE
-
 # Exercise — Ingress Secret Policy

 - Most ingress controllers have access to all Secrets
--- a/slides/exercises/kyverno-ingress-domain-name-brief.md
+++ b/slides/exercises/kyverno-ingress-domain-name-brief.md
@@ -1,9 +0,0 @@
-## Exercise — Generating Ingress With Kyverno
-
- When a Service gets created, automatically generate an Ingress
-
- Step 1: expose all services with a hard-coded domain name
-
- Step 2: only expose services that have a port named `http`
-
- Step 3: configure the domain name with a per-namespace ConfigMap
--- a/slides/exercises/kyverno-ingress-domain-name-details.md
+++ b/slides/exercises/kyverno-ingress-domain-name-details.md
@@ -1,33 +0,0 @@
-# Exercise — Generating Ingress With Kyverno
-
-When a Service gets created...
-
-*(for instance, Service `blue` in Namespace `rainbow`)*
-
-...Automatically generate an Ingress.
-
-*(for instance, with host name `blue.rainbow.MYDOMAIN.COM`)*
-
---
-
-## Goals
-
- Step 1: expose all services with a hard-coded domain name
-
- Step 2: only expose services that have a port named `http`
-
- Step 3: configure the domain name with a per-namespace ConfigMap
-
-  (e.g. `kubectl create configmap ingress-domain-name --from-literal=domain=1.2.3.4.nip.io`)
-
---
-
-## Hints
-
- We want to use a Kyverno `generate` ClusterPolicy
-
- For step 1, check [Generate Resources](https://kyverno.io/docs/writing-policies/generate/) documentation
-
- For step 2, check [Preconditions](https://kyverno.io/docs/writing-policies/preconditions/) documentation
-
- For step 3, check [External Data Sources](https://kyverno.io/docs/writing-policies/external-data-sources/) documentation
--- a/slides/exercises/tf-nodepools-brief.md
+++ b/slides/exercises/tf-nodepools-brief.md
@@ -1,9 +0,0 @@
-## Exercise — Terraform Node Pools
-
- Write a Terraform configuration to deploy a cluster
-
- The cluster should have two node pools with autoscaling
-
- Deploy two apps, each using exclusively one node pool
-
- Bonus: deploy an app balanced across both node pools
--- a/slides/exercises/tf-nodepools-details.md
+++ b/slides/exercises/tf-nodepools-details.md
@@ -1,69 +0,0 @@
-# Exercise — Terraform Node Pools
-
- Write a Terraform configuration to deploy a cluster
-
- The cluster should have two node pools with autoscaling
-
- Deploy two apps, each using exclusively one node pool
-
- Bonus: deploy an app balanced across both node pools
-
---
-
-## Cluster deployment
-
- Write a Terraform configuration to deploy a cluster
-
- We want to have two node pools with autoscaling
-
- Example for sizing:
-
-  - 4 GB / 1 CPU per node
-
-  - pools of 1 to 4 nodes
-
---
-
-## Cluster autoscaling
-
- Deploy an app on the cluster
-
-  (you can use `nginx`, `jpetazzo/color`...)
-
- Set a resource request (e.g. 1 GB RAM)
-
- Scale up and verify that the autoscaler kicks in
-
---
-
-## Pool isolation
-
- We want to deploy two apps
-
- The first app should be deployed exclusively on the first pool
-
- The second app should be deployed exclusively on the second pool
-
- Check the next slide for hints!
-
---
-
-## Hints
-
- One solution involves adding a `nodeSelector` to the pod templates
-
- Another solution involves adding:
-
-  - `taints` to the node pools
-
-  - matching `tolerations` to the pod templates
-
---
-
-## Balancing
-
- Step 1: make sure that the pools are not balanced
-
- Step 2: deploy a new app, check that it goes to the emptiest pool
-
- Step 3: update the app so that it balances (as much as possible) between pools
--- a/slides/find-unmerged-changes.sh
+++ b/slides/find-unmerged-changes.sh
@@ -1,60 +0,0 @@
-#!/bin/sh
-
-# The materials for a given training live in their own branch.
-# Sometimes, we write custom content (or simply new content) for a training,
-# and that content doesn't get merged back to main. This script tries to
-# detect that with the following heuristics:
-# - list all remote branches
-# - for each remote branch, list the changes that weren't merged into main
-#   (using "diff main...$BRANCH", three dots)
-# - ignore a bunch of training-specific files that change all the time anyway
-# - for the remaining files, compute the diff between main and the branch
-#   (using "diff main..$BRANCH", two dots)
-# - ignore changes of less than 10 lines
-# - also ignore a few red herrings
-# - display whatever is left
-
-# For "git diff" (in the filter function) to work correctly, we must be
-# at the root of the repo.
-cd $(git rev-parse --show-toplevel)
-
-BRANCHES=$(git branch -r | grep -v origin/HEAD | grep origin/2)
-
-filter() {
-  threshold=10
-  while read filename; do
-    case $filename in
-      # Generic training-specific files
-      slides/*.html) continue;;
-      slides/*.yml) continue;;
-      slides/logistics*.md) continue;;
-      # Specific content that can be ignored
-      #slides/containers/Local_Environment.md) threshold=100;;
-      # Content that was moved/refactored enough to confuse us
-      slides/containers/Local_Environment.md) threshold=100;;
-      slides/exercises.md) continue;;
-      slides/k8s/batch-jobs) threshold=20;;
-      # Renames
-      */{*}*) continue;;
-    esac
-    git diff --find-renames --numstat main..$BRANCH -- "$filename" | {
-      # If the files are identical, the diff will be empty, and "read" will fail.
-      read plus minus filename || return
-      # Ignore binary files (FIXME though?)
-      if [ $plus = - ]; then
-        return
-      fi
-      diff=$((plus-minus))
-      if [ $diff -gt $threshold ]; then
-        echo git diff main..$BRANCH -- $filename
-      fi
-    }
-  done
-}
-
-for BRANCH in $BRANCHES; do
-  if FILES=$(git diff --find-renames --name-only main...$BRANCH | filter | grep .); then
-    echo "🌳 $BRANCH:"
-    echo "$FILES"
-  fi
-done
--- a/slides/kube.yml
+++ b/slides/kube.yml
@@ -1,11 +1,13 @@
 title: |
-  Kubernetes
+  Kubernetes Training

-chat: "[Chat room](https://lumen.container.training/mattermost)"
+#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
+#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
+chat: "[Mattermost](https://ardanlive.container.training/mattermost/)"

 gitrepo: github.com/jpetazzo/container.training

-slides: https://2022-01-lumen.container.training/
+slides: https://2021-12-k8s.container.training/

 #slidenumberprefix: "#SomeHashTag &mdash; "

@@ -15,19 +17,25 @@ exclude:
 content:
 - shared/title.md
 - logistics.md
+- exercises/k8sfundamentals-brief.md
+- exercises/localcluster-brief.md
+- exercises/remotecluster-brief.md
+- exercises/healthchecks-brief.md
+- exercises/appconfig-brief.md
+- exercises/ingress-brief.md
 - k8s/intro.md
 - shared/about-slides.md
 - shared/chat-room-im.md
 #- shared/chat-room-slack.md
+#- shared/chat-room-zoom-meeting.md
+#- shared/chat-room-zoom-webinar.md
 - shared/toc.md
-
+- # DAY 1
  - shared/prereqs.md
  #- shared/webssh.md
  - shared/connecting.md
  #- k8s/versions-k8s.md
  - shared/sampleapp.md
-  #- shared/composescale.md
-  #- shared/hastyconclusions.md
  - shared/composedown.md
  - k8s/concepts-k8s.md
  - k8s/kubectlget.md
@@ -35,10 +43,9 @@ content:
  - k8s/kubenet.md
  - k8s/kubectlexpose.md
  - k8s/shippingimages.md
-  #- k8s/buildshiprun-dockerhub.md
  - exercises/k8sfundamentals-details.md
-
  - k8s/ourapponkube.md
+- # DAY 2
  - shared/declarative.md
  - k8s/declarative.md
  - k8s/deploymentslideshow.md
@@ -48,50 +55,42 @@ content:
  - k8s/namespaces.md
  - k8s/yamldeploy.md
  - k8s/authoring-yaml.md
+  - k8s/setup-overview.md
+  - k8s/setup-devel.md
+  - k8s/setup-managed.md
+  #- k8s/setup-selfhosted.md
+  - k8s/localkubeconfig.md
+  - k8s/accessinternal.md
+  #- k8s/kubectlproxy.md
+  - exercises/localcluster-details.md
+  - exercises/remotecluster-details.md
+- # DAY 3
  - k8s/scalingdockercoins.md
  - shared/hastyconclusions.md
  - k8s/daemonset.md
-  - k8s/setup-overview.md
-  - k8s/setup-devel.md
-  #- k8s/setup-managed.md
-  #- k8s/setup-selfhosted.md
-  #- k8s/dashboard.md
-  - k8s/localkubeconfig.md
-  - k8s/accessinternal.md
-  - exercises/localcluster-details.md
-
  - k8s/rollout.md
  - k8s/healthchecks.md
-  - exercises/healthchecks-details.md
-  - k8s/ingress.md
-  - exercises/ingress-details.md
-  #- k8s/ingress-tls.md
-  - k8s/kustomize.md
  - k8s/k9s.md
  - k8s/tilt.md
-
+  #- k8s/healthchecks-more.md
+  - exercises/healthchecks-details.md
+- # DAY 4
+  - k8s/volumes.md
+  - k8s/configuration.md
+  - k8s/secrets.md
+  - k8s/ingress.md
+  #- k8s/ingress-tls.md
+  - exercises/appconfig-details.md
+  - exercises/ingress-details.md
+- # DAY 5
  - k8s/netpol.md
  - k8s/authn-authz.md
  - k8s/resource-limits.md
  - k8s/metrics-server.md
  - k8s/cluster-sizing.md
-  - k8s/horizontal-pod-autoscaler.md
-
-  - k8s/volumes.md
-  - k8s/configuration.md
-  - k8s/secrets.md
-  - k8s/statefulsets.md
-  - k8s/consul.md
-  - k8s/pv-pvc-sc.md
-  - k8s/volume-claim-templates.md
-  #- k8s/portworx.md
-  - k8s/openebs.md
-  - k8s/stateful-failover.md
-  #- k8s/batch-jobs.md
-
-  - |
-    # (Extra content)
-  - k8s/operators.md
-  - k8s/sealed-secrets.md
-  - k8s/eck.md
-  - shared/thankyou.md
+  #- k8s/horizontal-pod-autoscaler.md
+#-
+#  - k8s/helm-intro.md
+#  - k8s/helm-chart-format.md
+#  - k8s/helm-create-basic-chart.md
+#  - k8s/helm-create-better-chart.md
--- a/slides/k8s/access-eks-cluster.md
+++ b/slides/k8s/access-eks-cluster.md
@@ -32,7 +32,7 @@

 - You're welcome to use whatever you like (e.g. AWS profiles)

-.lab[
+.exercise[

 - Set the AWS region, API access key, and secret key:
  ```bash
@@ -58,7 +58,7 @@

  - register it in our kubeconfig file

-.lab[
+.exercise[

 - Update our kubeconfig file:
  ```bash
--- a/slides/k8s/accessinternal.md
+++ b/slides/k8s/accessinternal.md
@@ -20,13 +20,13 @@

 ## Suspension of disbelief

-The labs and demos in this section assume that we have set up `kubectl` on our
+The exercises in this section assume that we have set up `kubectl` on our
 local machine in order to access a remote cluster.

 We will therefore show how to access services and pods of the remote cluster,
 from our local machine.

-You can also run these commands directly on the cluster (if you haven't
+You can also run these exercises directly on the cluster (if you haven't
 installed and set up `kubectl` locally).

 Running commands locally will be less useful
@@ -58,7 +58,7 @@ installed and set up `kubectl` to communicate with your cluster.

 - Let's access the `webui` service through `kubectl proxy`

-.lab[
+.exercise[

 - Run an API proxy in the background:
  ```bash
@@ -101,7 +101,7 @@ installed and set up `kubectl` to communicate with your cluster.

 - Let's access our remote Redis server

-.lab[
+.exercise[

 - Forward connections from local port 10000 to remote port 6379:
  ```bash
--- a/slides/k8s/admission.md
+++ b/slides/k8s/admission.md
@@ -198,7 +198,7 @@ Some examples ...

  (the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)

-.lab[
+.exercise[

 - Go to the webhook directory:
  ```bash
@@ -244,7 +244,7 @@ class: extra-details

 - We need to update the configuration with the correct `url`

-.lab[
+.exercise[

 - Edit the webhook configuration manifest:
  ```bash
@@ -271,7 +271,7 @@ class: extra-details

  (so if the webhook server is down, we can still create pods)

-.lab[
+.exercise[

 - Register the webhook:
  ```bash
@@ -288,7 +288,7 @@ It is strongly recommended to tail the logs of the API server while doing that.

 - Let's create a pod and try to set a `color` label

-.lab[
+.exercise[

 - Create a pod named `chroma`:
  ```bash
@@ -328,7 +328,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 ## Update the webhook configuration

-.lab[
+.exercise[

 - First, check the ngrok URL of the tunnel for the Flask app:
  ```bash
@@ -395,7 +395,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 ## Let's get to work!

-.lab[
+.exercise[

 - Make sure we're in the right directory:
  ```bash
@@ -424,7 +424,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

  ... we'll store it in a ConfigMap, and install dependencies on the fly

-.lab[
+.exercise[

 - Load the webhook source in a ConfigMap:
  ```bash
@@ -446,7 +446,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

  (of course, there are plenty others options; e.g. `cfssl`)

-.lab[
+.exercise[

 - Generate a self-signed certificate:
  ```bash
@@ -470,7 +470,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 - Let's reconfigure the webhook to use our Service instead of ngrok

-.lab[
+.exercise[

 - Edit the webhook configuration manifest:
  ```bash
@@ -504,7 +504,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 Shell to the rescue!

-.lab[
+.exercise[

 - Load up our cert and encode it in base64:
  ```bash
--- a/slides/k8s/aggregation-layer.md
+++ b/slides/k8s/aggregation-layer.md
@@ -66,7 +66,7 @@

 - We'll ask `kubectl` to show us the exacts requests that it's making

-.lab[
+.exercise[

 - Check the URI for a cluster-scope, "core" resource, e.g. a Node:
  ```bash
@@ -122,7 +122,7 @@ class: extra-details

 - What about namespaced resources?

-.lab[
+.exercise[

 - Check the URI for a namespaced, "core" resource, e.g. a Service:
  ```bash
@@ -169,7 +169,7 @@ class: extra-details

 ## Accessing a subresource

-.lab[
+.exercise[

 - List `kube-proxy` pods:
  ```bash
@@ -200,7 +200,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout

 - There are at least three useful commands to introspect the API server

-.lab[
+.exercise[

 - List resources types, their group, kind, short names, and scope:
  ```bash
@@ -249,7 +249,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout

 The following assumes that `metrics-server` is deployed on your cluster.

-.lab[
+.exercise[

 - Check that the metrics.k8s.io is registered with `metrics-server`:
  ```bash
@@ -271,7 +271,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - We can have multiple resources with the same name

-.lab[
+.exercise[

 - Look for resources named `node`:
  ```bash
@@ -298,7 +298,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - But we can look at the raw data (with `-o json` or `-o yaml`)

-.lab[
+.exercise[

 - Look at NodeMetrics objects with one of these commands:
  ```bash
@@ -320,7 +320,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 --

-.lab[
+.exercise[

 - Display node metrics:
  ```bash
@@ -342,7 +342,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - Then we can register that server by creating an APIService resource

-.lab[
+.exercise[

 - Check the definition used for the `metrics-server`:
  ```bash
--- a/slides/k8s/apiserver-deepdive.md
+++ b/slides/k8s/apiserver-deepdive.md
@@ -103,7 +103,7 @@ class: extra-details

 ---

-## `WithWaitGroup`
+## `WithWaitGroup`, 

 - When we shutdown, tells clients (with in-flight requests) to retry

--- a/slides/k8s/architecture.md
+++ b/slides/k8s/architecture.md
@@ -203,9 +203,9 @@ What does that mean?

 ## Let's experiment a bit!

- For this section, connect to the first node of the `test` cluster
+- For the exercises in this section, connect to the first node of the `test` cluster

-.lab[
+.exercise[

 - SSH to the first node of the test cluster

@@ -224,7 +224,7 @@ What does that mean?

 - Let's create a simple object

-.lab[
+.exercise[

 - Create a namespace with the following command:
  ```bash
@@ -246,7 +246,7 @@ This is equivalent to `kubectl create namespace hello`.

 - Let's retrieve the object we just created

-.lab[
+.exercise[

 - Read back our object:
  ```bash
@@ -354,7 +354,7 @@ class: extra-details

 - The easiest way is to use `kubectl label`

-.lab[
+.exercise[

 - In one terminal, watch namespaces:
  ```bash
@@ -402,7 +402,7 @@ class: extra-details

  - DELETED resources

-.lab[
+.exercise[

 - In one terminal, watch pods, displaying full events:
  ```bash
--- a/slides/k8s/authn-authz.md
+++ b/slides/k8s/authn-authz.md
@@ -361,7 +361,7 @@ class: extra-details

 ## Listing service accounts

-.lab[
+.exercise[

 - The resource name is `serviceaccount` or `sa` for short:
  ```bash
@@ -378,7 +378,7 @@ class: extra-details

 ## Finding the secret

-.lab[
+.exercise[

 - List the secrets for the `default` service account:
  ```bash
@@ -398,7 +398,7 @@ class: extra-details

 - The token is stored in the secret, wrapped with base64 encoding

-.lab[
+.exercise[

 - View the secret:
  ```bash
@@ -421,7 +421,7 @@ class: extra-details

 - Let's send a request to the API, without and with the token

-.lab[
+.exercise[

 - Find the ClusterIP for the `kubernetes` service:
  ```bash
@@ -616,7 +616,7 @@ class: extra-details

 - Nixery automatically generates images with the requested packages

-.lab[
+.exercise[

 - Run our pod:
  ```bash
@@ -632,7 +632,7 @@ class: extra-details

 - Normally, at this point, we don't have any API permission

-.lab[
+.exercise[

 - Check our permissions with `kubectl`:
  ```bash
@@ -658,7 +658,7 @@ class: extra-details

  (but again, we could call it `view` or whatever we like)

-.lab[
+.exercise[

 - Create the new role binding:
  ```bash
@@ -716,7 +716,7 @@ It's important to note a couple of details in these flags...

 - We should be able to *view* things, but not to *edit* them

-.lab[
+.exercise[

 - Check our permissions with `kubectl`:
  ```bash
--- a/slides/k8s/authoring-yaml.md
+++ b/slides/k8s/authoring-yaml.md
@@ -93,7 +93,7 @@

 - We can use the `--dry-run=client` option

-.lab[
+.exercise[

 - Generate the YAML for a Deployment without creating it:
  ```bash
@@ -128,7 +128,7 @@ class: extra-details

 ## The limits of `kubectl apply --dry-run=client`

-.lab[
+.exercise[

 - Generate the YAML for a deployment:
  ```bash
@@ -161,7 +161,7 @@ class: extra-details

  (all validation and mutation hooks will be executed)

-.lab[
+.exercise[

 - Try the same YAML file as earlier, with server-side dry run:
  ```bash
@@ -200,7 +200,7 @@ class: extra-details

 - `kubectl diff` does a server-side dry run, *and* shows differences

-.lab[
+.exercise[

 - Try `kubectl diff` on the YAML that we tweaked earlier:
  ```bash
--- a/slides/k8s/aws-eks.md
+++ b/slides/k8s/aws-eks.md
@@ -1,693 +0,0 @@
-# Amazon EKS
-
- Elastic Kubernetes Service
-
- AWS runs the Kubernetes control plane
-
-  (all we see is an API server endpoint)
-
- Pods can run on any combination of:
-
-  - EKS-managed nodes
-
-  - self-managed nodes
-
-  - Fargate
-
- Leverages and integrates with AWS services and APIs
-
---
-
-## Some integrations
-
- Authenticate with IAM users and roles
-
- Associate IAM roles to Kubernetes ServiceAccounts
-
- Load balance traffic with ALB/ELB/NLB
-
- Persist data with EBS/EFS
-
- Label nodes with instance ID, instance type, region, AZ ...
-
- Pods can be "first class citizens" of VPC
-
---
-
-## Pros/cons
-
- Fully managed control plane
-
- Handles deployment, upgrade, scaling of the control plane
-
- Available versions and features tend to lag a bit
-
- Doesn't fit the most demanding users
-
-  ("demanding" starts somewhere between 100 and 1000 nodes)
-
---
-
-## Good to know ...
-
- Some integrations are specific to EKS
-
-  (some authentication models)
-
- Many integrations are *not* specific to EKS
-
- The Cloud Controller Manager can run outside of EKS
-
-  (and provide LoadBalancer services, EBS volumes, and more)
-
---
-
-# Provisioning clusters
-
- AWS console, API, CLI
-
- `eksctl`
-
- Infrastructure-as-Code
-
---
-
-## AWS "native" provisioning
-
- AWS web console
-
-  - click-click-click!
-
-  - difficulty: low
-
- AWS API or CLI
-
-  - must provide subnets, ARNs
-
-  - difficulty: medium
-
---
-
-## `eksctl`
-
- Originally developed by Weave
-
-  (back when AWS "native" provisioning wasn't very good)
-
- `eksctl create cluster` just works™
-
- Has been "adopted" by AWS
-
-  (is listed in official documentations)
-
---
-
-## Infrastructure-as-Code
-
- Cloud Formation
-
- Terraform
-
-  [terraform-aws-eks](https://github.com/terraform-aws-modules/terraform-aws-eks)
-  by the community
-  ([example](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/basic))
-
-  [terraform-provider-aws](https://github.com/hashicorp/terraform-provider-aws)
-  by Hashicorp
-  ([example](https://github.com/hashicorp/terraform-provider-aws/tree/main/examples/eks-getting-started))
-
-  [Kubestack](https://www.kubestack.com/)
-
---
-
-## Node groups
-
- Virtually all provisioning models have a concept of "node group"
-
- Node group = group of similar nodes in an ASG
-
-  - can span multiple AZ
-
-  - can have instances of different types¹
-
- A cluster will need at least one node group
-
-.footnote[¹As I understand it, to specify fallbacks if one instance type is unavailable or out of capacity.]
-
---
-
-# IAM → EKS authentication
-
- Access EKS clusters using IAM users and roles
-
- No special role, permission, or policy is needed in IAM
-
-  (but the `eks:DescribeCluster` permission can be useful, see later)
-
- Users and roles need to be explicitly listed in the cluster
-
- Configuration is done through a ConfigMap in the cluster
-
---
-
-## Setting it up
-
- Nothing to do when creating the cluster
-
-  (feature is always enabled)
-
- Users and roles are *mapped* to Kubernetes users and groups
-
-  (through the `aws-auth` ConfigMap in `kube-system`)
-
- That's it!
-
---
-
-## Mapping
-
- The `aws-auth` ConfigMap can contain two entries:
-
-  - `mapRoles` (map IAM roles)
-
-  - `mapUsers` (map IAM users)
-
- Each entry is a YAML file
-
- Each entry includes:
-
-  - `rolearn` or `userarn` to map
-
-  - `username` (as a string)
-
-  - `groups` (as a list; can be empty)
-
---
-
-## Example
-
-```yaml
-apiVersion: v1
-kind: ConfigMap
-metadata:
-  namespace: kube-system
-  name: aws-auth
-data:
-  mapRoles: `|`
-    - rolearn: arn:aws:iam::111122223333:role/blah
-      username: blah
-      groups: [ devs, ops ]
-  mapUsers: `|`
-    - userarn: arn:aws:iam::111122223333:user/alice
-      username: alice
-      groups: [ system:masters ]
-    - userarn: arn:aws:iam::111122223333:user/bob
-      username: bob
-      groups: [ system:masters ]
-```
-
---
-
-## Client setup
-
- We need either the `aws` CLI or the `aws-iam-authenticator`
-
- We use them as `exec` plugins in `~/.kube/config`
-
- Done automatically by `eksctl`
-
- Or manually with `aws eks update-kubeconfig`
-
- Discovering the address of the API server requires one IAM permission
-
-  ```json
-    "Action": [
-        "eks:DescribeCluster"
-    ],
-    "Resource": "arn:aws:eks:<region>:<account>:cluster/<cluster-name>"
-  ```
-
-  (wildcards can be used when specifying the resource)
-
---
-
-class: extra-details
-
-## How it works
-
- The helper generates a token
-
-  (with `aws eks get-token` or `aws-iam-authenticator token`)
-
- Note: these calls will always succeed!
-
-  (even if AWS API keys are invalid)
-
- The token is used to authenticate with the Kubernetes API
-
- AWS' Kubernetes API server will decode and validate the token
-
-  (and map the underlying user or role accordingly)
-
---
-
-## Read The Fine Manual
-
-https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
-
---
-
-# EKS → IAM authentication
-
- Access AWS services from workloads running on EKS
-
-  (e.g.: access S3 bucket from code running in a Pod)
-
- This works by associating an IAM role to a K8S ServiceAccount
-
- There are also a few specific roles used internally by EKS
-
-  (e.g. to let the nodes establish network configurations)
-
- ... We won't talk about these
-
---
-
-## The big picture
-
- One-time setup task
-
-  ([create an OIDC provider associated to our EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html))
-
- Create (or update) a role with an appropriate *trust policy*
-
-  (more on that later)
-
- Annotate service accounts to map them to that role
-
-  `eks.amazonaws.com/role-arn=arn:aws:iam::111122223333:role/some-iam-role`
-
- Create (or re-create) pods using that ServiceAccount
-
- The pods can now use that role!
-
---
-
-## Trust policies
-
- IAM roles have a *trust policy* (aka *assume role policy*)
-
-  (cf `aws iam create-role ... --assume-role-policy-document ...`)
-
- That policy contains a *statement* list
-
- This list indicates who/what is allowed to assume (use) the role
-
- In the current scenario, that policy will contain something saying:
-
-  *ServiceAccount S on EKS cluster C is allowed to use this role*
-
---
-
-## Trust policy for a single ServiceAccount
-
-```json
-{
-  "Version": "2012-10-17",
-  "Statement": [
-    {
-      "Effect": "Allow",
-      "Principal": {
-        "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
-      },
-      "Action": "sts:AssumeRoleWithWebIdentity",
-      "Condition": {
-        "StringEquals": {
-          "${OIDC_PROVIDER}:sub":
-            "system:serviceaccount:<namespace>:<service-account>"
-        }
-      }
-    }
-  ]
-}
-```
-
---
-
-## Trust policy for multiple ServiceAccounts
-
-```json
-{
-  "Version": "2012-10-17",
-  "Statement": [
-    {
-      "Effect": "Allow",
-      "Principal": {
-        "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
-      },
-      "Action": "sts:AssumeRoleWithWebIdentity",
-      "Condition": {
-        "StringLike": {
-            "${OIDC_PROVIDER}:sub": 
-              ["system:serviceaccount:container-training:*"]
-        }
-      }
-    }
-  ]
-}
-```
-
---
-
-## The little details
-
- When pods are created, they are processed by a mutating webhook
-
-  (typically named `pod-identity-webhook`)
-
- Pods using a ServiceAccount with the right annotation get:
-
-  - an extra token
-    <br/>
-    (mounted in `/var/run/secrets/eks.amazonaws.com/serviceaccount/token`)
-
-  - a few env vars
-    <br/>
-    (including `AWS_WEB_IDENTITY_TOKEN_FILE` and `AWS_ROLE_ARN`)
-
- AWS client libraries and tooling will work this that
-
-  (see [this list](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-minimum-sdk.html) for supported versions)
-
---
-
-# CNI
-
- EKS is a compliant Kubernetes implementation
-
-  (which means we can use a wide range of CNI plugins)
-
- However, the recommended CNI plugin is the "AWS VPC CNI"
-
-  (https://github.com/aws/amazon-vpc-cni-k8s)
-
- Pods are then "first class citizens" of AWS VPC
-
---
-
-## AWS VPC CNI
-
- Each Pod gets an address in a VPC subnet
-
- No overlay network, no encapsulation, no overhead
-
-  (other than AWS network fabric, obviously)
-
- Probably the fastest network option when running on AWS
-
- Allows "direct" load balancing (more on that later)
-
- Can use security groups with Pod traffic
-
- But: limits the number of Pods per Node
-
- But: more complex configuration (more on that later)
-
---
-
-## Number of Pods per Node
-
- Each Pod gets an IP address on an ENI
-
-  (Elastic Network Interface)
-
- EC2 instances can only have a limited number of ENIs
-
-  (the exact limit depends on the instance type)
-
- ENIs can only have a limited number of IP addresses
-
-  (with variations here as well)
-
- This gives limits of e.g. 35 pods on `t3.large`, 29 on `c5.large` ...
-
-  (see
-  [full list of limits per instance type](https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt
-)
-  and
-  [ENI/IP details](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/pkg/awsutils/vpc_ip_resource_limit.go
-))
-
---
-
-## Limits?
-
- These limits might seem low
-
- They're not *that* low if you compute e.g. the RAM/Pod ratio
-
- Except if you're running lots if tiny pods
-
- Bottom line: do the math!
-
---
-
-class: extra-details
-
-## Pre-loading
-
- It can take a little while to allocate/attach an ENI
-
- The AWS VPC CNI can keep a few extra addresses on each Node
-
-  (by default, one ENI worth of IP addresses)
-
- This is tunable if needed
-
-  (see [the docs](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/docs/eni-and-ip-target.md
-) for details)
-
---
-
-## Better load balancing
-
- The default path for inbound traffic is:
-
-  Load balancer → NodePort → Pod
-
- With the AWS VPC CNI, it becomes possible to do:
-
-  Load balancer → Pod
-
- More on that in the load balancing section!
-
---
-
-## Configuration complexity
-
- The AWS VPC CNI is a very good solution when running EKS
-
- It brings optimized solutions to various use-cases:
-
-  - direct load balancing
-  - user authentication
-  - interconnection with other infrastructure
-  - etc.
-
- Keep in mind that all these solutions are AWS-specific
-
- They can require a non-trivial amount of specific configuration
-
- Especially when moving from a simple POC to an IAC deployment!
-
---
-
-# Load Balancers
-
- Here be dragons!
-
- Multiple options, each with different pros/cons
-
- It's necessary to know both AWS products and K8S concepts
-
---
-
-## AWS load balancers
-
- CLB / Classic Load Balancer (formerly known as ELB)
-
-  - can work in L4 (TCP) or L7 (HTTP) mode
-  - can do TLS unrolling
-  - can't do websockets, HTTP/2, content-based routing ...
-
- NLB / Network Load Balancer
-
-  - high-performance L4 load balancer with TLS support
-
- ALB / Application Load Balancer
-
-  - HTTP load balancer
-  - can do TLS unrolling
-  - can do websockets, HTTP/2, content-based routing ...
-
---
-
-## Load balancing modes
-
- "IP targets"
-
-  - send traffic directly from LB to Pods
-
-  - Pods must use the AWS VPC CNI
-
-  - compatible with Fargate Pods
-
- "Instance targets"
-
-  - send traffic to a NodePort (generally incurs an extra hop)
-
-  - Pods can use any CNI
-
-  - not compatible with Fargate Pods
-
- Each LB (Service) can use a different mode, if necessary
-
---
-
-## Kubernetes load balancers
-
- Service (L4)
-
-  - ClusterIP: internal load balancing
-  - NodePort: external load balancing on ports >30000
-  - LoadBalancer: external load balancing on the port you want
-  - ExternalIP: external load balancing directly on nodes
-
- Ingress (L7 HTTP)
-
-  - partial content-based routing (`Host` header, request path)
-  - requires an Ingress Controller (in front)
-  - works with Services (in back)
-
---
-
-## Two controllers are available
-
- Kubernetes "in-tree" load balancer controller
-
-  - always available
-  - used by default for LoadBalancer Services
-  - creates CLB by default; can also do NLB
-  - can only do "instance targets"
-  - can use extra CLB features (TLS, HTTP)
-
- AWS Load Balancer Controller (fka AWS ALB Ingress Controller)
-
-  - optional add-on (requires additional config)
-  - primarily meant to be an Ingress Controller
-  - creates NLB and ALB
-  - can do "instance targets" and "IP targets"
-  - can also be used for LoadBalancer Services with type `nlb-ip`
-
- They can run side by side
-
---
-
-## Which one should we use?
-
- AWS Load Balancer Controller supports "IP targets"
-
-  (which means direct routing of traffic to Pods)
-
- It can be used as an Ingress controller
-
- It *seems* to be the perfect solution for EKS!
-
- However ...
-
---
-
-## Caveats
-
- AWS Load Balancer Controller requires extensive configuration
-
-  - a few hours to a few days to get it to work in a POC ...
-
-  - a few days to a few weeks to industrialize that process?
-
- It's AWS-specific
-
- It still introduces an extra hop, even if that hop is invisible
-
- Other ingress controllers can have interesting features
-
-  (canary deployment, A/B testing ...)
-
---
-
-## Noteworthy annotations and docs
-
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb-ip`
-
-  - LoadBalancer Service with "IP targets" ([docs](https://kubernetes-sigs.github.io/aws-load-balancer-controller/latest/guide/service/nlb_ip_mode/))
-  - requires AWS Load Balancer Controller
-
- `service.beta.kubernetes.io/aws-load-balancer-internal: "true"`
-
-  - internal load balancer (for private VPC)
-
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb`
-
-  - opt for NLB instead of CLB with in-tree controller
-
- `service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: "*"`
-
-  - use HAProxy [PROXY protocol](https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt)
-
---
-
-## TLS-related annotations
-
- `service.beta.kubernetes.io/aws-load-balancer-ssl-cert`
-
-  - enable TLS and use that certificate
-  - example value: `arn:aws:acm:<region>:<account>:certificate/<cert-id>`
-
- `service.beta.kubernetes.io/aws-load-balancer-ssl-ports`
-
-  - enable TLS *only* on the specified ports (when multiple ports are exposed)
-  - example value: `"443,8443"`
-
- `service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy`
-
-  - specify ciphers and other TLS parameters to use (see [that list](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/elb-security-policy-table.html))
-  - example value: `"ELBSecurityPolicy-TLS-1-2-2017-01"`
-
---
-
-## To HTTP(S) or not to HTTP(S)
-
- `service.beta.kubernetes.io/aws-load-balancer-backend-protocol`
-
-  - can be either `http`, `https`, `ssl`, or `tcp`
-
-  - if `https` or `ssl`: enable TLS to the backend
-
-  - if `http` or `https`: enable HTTP `x-forwarded-for` headers (with `http` or `https`)
-
-???
-
-## Cluster autoscaling
-
-## Logging
-
-https://docs.aws.amazon.com/eks/latest/userguide/logging-using-cloudtrail.html
-
-:EN:- Working with EKS
-:EN:- Cluster and user provisioning
-:EN:- Networking and load balancing
-
-:FR:- Travailler avec EKS
-:FR:- Outils de déploiement
-:FR:- Intégration avec IAM
-:FR:- Fonctionalités réseau
--- a/slides/k8s/batch-jobs.md
+++ b/slides/k8s/batch-jobs.md
@@ -30,7 +30,7 @@

  - or we hit the *backoff limit* of the Job (default=6)

-.lab[
+.exercise[

 - Create a Job that has a 50% chance of success:
  ```bash
@@ -49,7 +49,7 @@

 - If the Pod fails, the Job creates another Pod

-.lab[
+.exercise[

 - Check the status of the Pod(s) created by the Job:
  ```bash
@@ -108,7 +108,7 @@ class: extra-details

  (The Cron Job will not hold if a previous job is still running)

-.lab[
+.exercise[

 - Create the Cron Job:
  ```bash
@@ -135,7 +135,7 @@ class: extra-details

  (re-creating another one if it fails, for instance if its node fails)

-.lab[
+.exercise[

 - Check the Jobs that are created:
  ```bash
--- a/slides/k8s/bootstrap.md
+++ b/slides/k8s/bootstrap.md
@@ -98,7 +98,7 @@

 - Let's list our bootstrap tokens on a cluster created with kubeadm

-.lab[
+.exercise[

 - Log into node `test1`

@@ -145,7 +145,7 @@ class: extra-details

 - The token we need to use has the form `abcdef.1234567890abcdef`

-.lab[
+.exercise[

 - Check that it is accepted by the API server:
  ```bash
@@ -177,7 +177,7 @@ class: extra-details

 - That information is stored in a public ConfigMap

-.lab[
+.exercise[

 - Retrieve that ConfigMap:
  ```bash
--- a/slides/k8s/build-with-docker.md
+++ b/slides/k8s/build-with-docker.md
@@ -88,7 +88,7 @@ spec:

 - Let's try this out!

-.lab[
+.exercise[

 - Check the port used by our self-hosted registry:
  ```bash
--- a/slides/k8s/build-with-kaniko.md
+++ b/slides/k8s/build-with-kaniko.md
@@ -40,7 +40,7 @@

 - Let's build the image for the DockerCoins `worker` service with Kaniko

-.lab[
+.exercise[

 - Find the port number for our self-hosted registry:
  ```bash
@@ -160,7 +160,7 @@ spec:

 - The YAML for the pod is in `k8s/kaniko-build.yaml`

-.lab[
+.exercise[

 - Create the pod:
  ```bash
--- a/slides/k8s/buildshiprun-selfhosted.md
+++ b/slides/k8s/buildshiprun-selfhosted.md
@@ -37,7 +37,7 @@ so that your build pipeline is automated.*

 - We will deploy a registry container, and expose it with a NodePort

-.lab[
+.exercise[

 - Create the registry service:
  ```bash
@@ -57,7 +57,7 @@ so that your build pipeline is automated.*

 - We need to find out which port has been allocated

-.lab[
+.exercise[

 - View the service details:
  ```bash
@@ -78,7 +78,7 @@ so that your build pipeline is automated.*

 - A convenient Docker registry API route to remember is `/v2/_catalog`

-.lab[
+.exercise[

 <!-- ```hide kubectl wait deploy/registry --for condition=available```-->

@@ -102,7 +102,7 @@ We should see:

 - We can retag a small image, and push it to the registry

-.lab[
+.exercise[

 - Make sure we have the busybox image, and retag it:
  ```bash
@@ -123,7 +123,7 @@ We should see:

 - Let's use the same endpoint as before

-.lab[
+.exercise[

 - Ensure that our busybox image is now in the local registry:
  ```bash
@@ -143,7 +143,7 @@ The curl command should now output:

 - We are going to use a convenient feature of Docker Compose

-.lab[
+.exercise[

 - Go to the `stacks` directory:
  ```bash
@@ -217,7 +217,7 @@ class: extra-details

 - All our images should now be in the registry

-.lab[
+.exercise[

 - Re-run the same `curl` command as earlier:
  ```bash
@@ -232,4 +232,4 @@ variable, so that we can quickly switch from
 the self-hosted registry to pre-built images
 hosted on the Docker Hub. So make sure that
 this $REGISTRY variable is set correctly when
-running these commands!*
+running the exercises!*
--- a/slides/k8s/cert-manager.md
+++ b/slides/k8s/cert-manager.md
@@ -56,7 +56,7 @@

 - It can be installed with a YAML manifest, or with Helm

-.lab[
+.exercise[

 - Let's install the cert-manager Helm chart with this one-liner:
  ```bash
@@ -86,7 +86,7 @@

 - The manifest shown on the previous slide is in @@LINK[k8s/cm-clusterissuer.yaml]

-.lab[
+.exercise[

 - Create the ClusterIssuer:
  ```bash
@@ -115,7 +115,7 @@

 - The manifest shown on the previous slide is in @@LINK[k8s/cm-certificate.yaml]

-.lab[
+.exercise[

 - Edit the Certificate to update the domain name

@@ -140,7 +140,7 @@

 - then it waits for the challenge to complete

-.lab[
+.exercise[

 - View the resources created by cert-manager:
  ```bash
@@ -158,7 +158,7 @@

  `http://<our-domain>/.well-known/acme-challenge/<token>`

-.lab[
+.exercise[

 - Check the *path* of the Ingress in particular:
  ```bash
@@ -176,7 +176,7 @@

 An Ingress Controller! 😅

-.lab[
+.exercise[

 - Install an Ingress Controller:
  ```bash
--- a/slides/k8s/cluster-autoscaler.md
+++ b/slides/k8s/cluster-autoscaler.md
@@ -1,445 +0,0 @@
-# Cluster autoscaler
-
- When the cluster is full, we need to add more nodes
-
- This can be done manually:
-
-  - deploy new machines and add them to the cluster
-
-  - if using managed Kubernetes, use some API/CLI/UI
-
- Or automatically with the cluster autoscaler:
-
-  https://github.com/kubernetes/autoscaler
-
---
-
-## Use-cases
-
- Batch job processing
-
-  "once in a while, we need to execute these 1000 jobs in parallel"
-
-  "...but the rest of the time there is almost nothing running on the cluster"
-
- Dynamic workload
-
-  "a few hours per day or a few days per week, we have a lot of traffic"
-
-  "...but the rest of the time, the load is much lower"
-
---
-
-## Pay for what you use
-
- The point of the cloud is to "pay for what you use"
-
- If you have a fixed number of cloud instances running at all times:
-
-  *you're doing in wrong (except if your load is always the same)*
-
- If you're not using some kind of autoscaling, you're wasting money
-
-  (except if you like lining the pockets of your cloud provider)
-
---
-
-## Running the cluster autoscaler
-
- We must run nodes on a supported infrastructure
-
- See [here] for a non-exhaustive list of supported providers
-
- Sometimes, the cluster autoscaler is installed automatically
-
-  (or by setting a flag / checking a box when creating the cluster)
-
- Sometimes, it requires additional work
-
-  (which is often non-trivial and highly provider-specific)
-
-[here]: https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider
-
---
-
-## Scaling up in theory
-
-IF a Pod is `Pending`,
-
-AND adding a Node would allow this Pod to be scheduled,
-
-THEN add a Node.
-
---
-
-## Fine print 1
-
-*IF a Pod is `Pending`...*
-
- First of all, the Pod must exist
-
- Pod creation might be blocked by e.g. a namespace quota
-
- In that case, the cluster autoscaler will never trigger
-
---
-
-## Fine print 2
-
-*IF a Pod is `Pending`...*
-
- If our Pods do not have resource requests:
-
-  *they will be in the `BestEffort` class*
-
- Generally, Pods in the `BestEffort` class are schedulable
-
-  - except if they have anti-affinity placement constraints
-
-  - except if all Nodes already run the max number of pods (110 by default)
-
- Therefore, if we want to leverage cluster autoscaling:
-
-  *our Pods should have resource requests*
-
---
-
-## Fine print 3
-
-*AND adding a Node would allow this Pod to be scheduled...*
-
- The autoscaler won't act if:
-
-  - the Pod is too big to fit on a single Node
-
-  - the Pod has impossible placement constraints
-
- Examples:
-
-  - "run one Pod per datacenter" with 4 pods and 3 datacenters
-
-  - "use this nodeSelector" but no such Node exists
-
---
-
-## Trying it out
-
- We're going to check how much capacity is available on the cluster
-
- Then we will create a basic deployment
-
- We will add resource requests to that deployment
-
- Then scale the deployment to exceed the available capacity
-
- **The following commands require a working cluster autoscaler!**
-
---
-
-## Checking available resources
-
-.lab[
-
- Check how much CPU is allocatable on the cluster:
-  ```bash
-  kubectl get nodes  -o jsonpath={..allocatable.cpu}
-  ```
-
-]
-
- If we see e.g. `2800m 2800m 2800m`, that means:
-
-  3 nodes with 2.8 CPUs allocatable each
-
- To trigger autoscaling, we will create 7 pods requesting 1 CPU each
-
-  (each node can fit 2 such pods)
-
---
-
-## Creating our test Deployment
-
-.lab[
-
- Create the Deployment:
-  ```bash
-  kubectl create deployment blue --image=jpetazzo/color
-  ```
-
- Add a request for 1 CPU:
-  ```bash
-    kubectl patch deployment blue --patch='
-    spec:
-      template:
-        spec:
-          containers:
-          - name: color
-            resources:
-              requests:
-                cpu: 1
-    '
-  ```
-]
-
---
-
-## Scaling up in practice
-
- This assumes that we have strictly less than 7 CPUs available
-
-  (adjust the numbers if necessary!)
-
-.lab[
-
- Scale up the Deployment:
-  ```bash
-  kubectl scale deployment blue --replicas=7
-  ```
-
- Check that we have a new Pod, and that it's `Pending`:
-  ```bash
-  kubectl get pods
-  ```
-
-]
-
---
-
-## Cluster autoscaling
-
- After a few minutes, a new Node should appear
-
- When that Node becomes `Ready`, the Pod will be assigned to it
-
- The Pod will then be `Running`
-
- Reminder: the `AGE` of the Pod indicates when the Pod was *created*
-
-  (it doesn't indicate when the Pod was scheduled or started!)
-
- To see other state transitions, check the `status.conditions` of the Pod
-
---
-
-## Scaling down in theory
-
-IF a Node has less than 50% utilization for 10 minutes,
-
-AND all its Pods can be scheduled on other Nodes,
-
-AND all its Pods are *evictable*,
-
-AND the Node doesn't have a "don't scale me down" annotation¹,
-
-THEN drain the Node and shut it down.
-
-.footnote[¹The annotation is: `cluster-autoscaler.kubernetes.io/scale-down-disabled=true`]
-
---
-
-## When is a Pod "evictable"?
-
-By default, Pods are evictable, except if any of the following is true.
-
- They have a restrictive Pod Disruption Budget
-
- They are "standalone" (not controlled by a ReplicaSet/Deployment, StatefulSet, Job...)
-
- They are in `kube-system` and don't have a Pod Disruption Budget
-
- They have local storage (that includes `EmptyDir`!)
-
-This can be overridden by setting the annotation:
-<br/>
-`cluster-autoscaler.kubernetes.io/safe-to-evict`
-<br/>(it can be set to `true` or `false`)
-
---
-
-## Pod Disruption Budget
-
- Special resource to configure how many Pods can be *disrupted*
-
-  (i.e. shutdown/terminated)
-
- Applies to Pods matching a given selector
-
-  (typically matching the selector of a Deployment)
-
- Only applies to *voluntary disruption*
-
-  (e.g. cluster autoscaler draining a node, planned maintenance...)
-
- Can express `minAvailable` or `maxUnavailable`
-
- See [documentation] for details and examples
-
-[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
-
---
-
-## Local storage
-
- If our Pods use local storage, they will prevent scaling down
-
- If we have e.g. an `EmptyDir` volume for caching/sharing:
-
-  make sure to set the `.../safe-to-evict` annotation to `true`!
-
- Even if the volume...
-
-  - ...only has a PID file or UNIX socket
-
-  - ...is empty
-
-  - ...is not mounted by any container in the Pod!
-
---
-
-## Expensive batch jobs
-
- Careful if we have long-running batch jobs!
-
-  (e.g. jobs that take many hours/days to complete)
-
- These jobs could get evicted before they complete
-
-  (especially if they use less than 50% of the allocatable resources)
-
- Make sure to set the `.../safe-to-evict` annotation to `false`!
-
---
-
-## Node groups
-
- Easy scenario: all nodes have the same size
-
- Realistic scenario: we have nodes of different sizes
-
-  - e.g. mix of CPU and GPU nodes
-
-  - e.g. small nodes for control plane, big nodes for batch jobs
-
-  - e.g. leveraging spot capacity
-
- The cluster autoscaler can handle it!
-
---
-
-class: extra-details
-
-## Leveraging spot capacity
-
- AWS, Azure, and Google Cloud are typically more expensive then their competitors
-
- However, they offer *spot* capacity (spot instances, spot VMs...)
-
- *Spot* capacity:
-
-  - has a much lower cost (see e.g. AWS [spot instance advisor][awsspot])
-
-  - has a cost that varies continuously depending on regions, instance type...
-
-  - can be preempted at all times
-
- To be cost-effective, it is strongly recommended to leverage spot capacity
-
-[awsspot]: https://aws.amazon.com/ec2/spot/instance-advisor/
-
---
-
-## Node groups in practice
-
- The cluster autoscaler maps nodes to *node groups*
-
-  - this is an internal, provider-dependent mechanism
-
-  - the node group is sometimes visible through a proprietary label or annotation
-
- Each node group is scaled independently
-
- The cluster autoscaler uses [expanders] to decide which node group to scale up
-
-  (the default expander is "random", i.e. pick a node group at random!) 
-
- Of course, only acceptable node groups will be considered
-
-  (i.e. node groups that could accommodate the `Pending` Pods)
-
-[expanders]: https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-expanders
-
---
-
-class: extra-details
-
-## Scaling to zero
-
- *In general,* a node group needs to have at least one node at all times
-
-  (the cluster autoscaler uses that node to figure out the size, labels, taints... of the group)
-
- *On some providers,* there are special ways to specify labels and/or taints
-
-  (but if you want to scale to zero, check that the provider supports it!)
-
---
-
-## Warning
-
- Autoscaling up is easy
-
- Autoscaling down is harder
-
- It might get stuck because Pods are not evictable
-
- Do at least a dry run to make sure that the cluster scales down correctly!
-
- Have alerts on cloud spend
-
- *Especially when using big/expensive nodes (e.g. with GPU!)*
-
---
-
-## Preferred vs. Required
-
- Some Kubernetes mechanisms allow to express "soft preferences":
-
-  - affinity (`requiredDuringSchedulingIgnoredDuringExecution` vs `preferredDuringSchedulingIgnoredDuringExecution`)
-
-  - taints (`NoSchedule`/`NoExecute` vs `PreferNoSchedule`)
-
- Remember that these "soft preferences" can be ignored
-
-  (and given enough time and churn on the cluster, they will!)
-
---
-
-## Troubleshooting
-
- The cluster autoscaler publishes its status on a ConfigMap
-
-.lab[
-
- Check the cluster autoscaler status:
-  ```bash
-  kubectl describe configmap --namespace kube-system cluster-autoscaler-status
-  ```
-
-]
-
- We can also check the logs of the autoscaler
-
-  (except on managed clusters where it's running internally, not visible to us)
-
---
-
-## Acknowledgements
-
-Special thanks to [@s0ulshake] for their help with this section!
-
-If you need help to run your data science workloads on Kubernetes,
-<br/>they're available for consulting.
-
-(Get in touch with them through https://www.linkedin.com/in/ajbowen/)
-
-[@s0ulshake]: https://twitter.com/s0ulshake
--- a/slides/k8s/cluster-upgrade.md
+++ b/slides/k8s/cluster-upgrade.md
@@ -18,9 +18,9 @@

 - It's easy to check the version for the API server

-.lab[
+.exercise[

- Log into node `oldversion1`
+- Log into node `test1`

 - Check the version of kubectl and of the API server:
  ```bash
@@ -39,7 +39,7 @@

 - It's also easy to check the version of kubelet

-.lab[
+.exercise[

 - Check node versions (includes kubelet, kernel, container engine):
  ```bash
@@ -60,7 +60,7 @@

 - If the control plane is self-hosted (running in pods), we can check it

-.lab[
+.exercise[

 - Show image versions for all pods in `kube-system` namespace:
  ```bash
@@ -81,7 +81,7 @@

 ## What version are we running anyway?

- When I say, "I'm running Kubernetes 1.18", is that the version of:
+- When I say, "I'm running Kubernetes 1.15", is that the version of:

  - kubectl

@@ -157,15 +157,15 @@

 ## Kubernetes uses semantic versioning

- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.18.20:
+- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.17.2:

  - MAJOR = 1
-  - MINOR = 18
-  - PATCH = 20
+  - MINOR = 17
+  - PATCH = 2

 - It's always possible to mix and match different PATCH releases

-  (e.g. 1.18.20 and 1.18.15 are compatible)
+  (e.g. 1.16.1 and 1.16.6 are compatible)

 - It is recommended to run the latest PATCH release

@@ -181,9 +181,9 @@

 - All components support a difference of one¹ MINOR version

- This allows live upgrades (since we can mix e.g. 1.18 and 1.19)
+- This allows live upgrades (since we can mix e.g. 1.15 and 1.16)

- It also means that going from 1.18 to 1.20 requires going through 1.19
+- It also means that going from 1.14 to 1.16 requires going through 1.15

 .footnote[¹Except kubelet, which can be up to two MINOR behind API server,
 and kubectl, which can be one MINOR ahead or behind API server.]
@@ -214,7 +214,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]

 - We will change the version of the API server

- We will work with cluster `oldversion` (nodes `oldversion1`, `oldversion2`, `oldversion3`)
+- We will work with cluster `test` (nodes `test1`, `test2`, `test3`)

 ---

@@ -240,9 +240,9 @@ and kubectl, which can be one MINOR ahead or behind API server.]

 - We will edit the YAML file to use a different image version

-.lab[
+.exercise[

- Log into node `oldversion1`
+- Log into node `test1`

 - Check API server version:
  ```bash
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
  sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
  ```

- Look for the `image:` line, and update it to e.g. `v1.19.0`
+- Look for the `image:` line, and update it to e.g. `v1.16.0`

 ]

@@ -264,7 +264,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]

 - The API server will be briefly unavailable while kubelet restarts it

-.lab[
+.exercise[

 - Check the API server version:
  ```bash
@@ -299,7 +299,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]

  (note: this is possible only because the cluster was installed with kubeadm)

-.lab[
+.exercise[

 - Check what will be upgraded:
  ```bash
@@ -308,11 +308,11 @@ and kubectl, which can be one MINOR ahead or behind API server.]

 ]

-Note 1: kubeadm thinks that our cluster is running 1.19.0.
+Note 1: kubeadm thinks that our cluster is running 1.16.0.
 <br/>It is confused by our manual upgrade of the API server!

-Note 2: kubeadm itself is still version 1.18.20..
-<br/>It doesn't know how to upgrade do 1.19.X.
+Note 2: kubeadm itself is still version 1.15.9.
+<br/>It doesn't know how to upgrade do 1.16.X.

 ---

@@ -320,7 +320,7 @@ Note 2: kubeadm itself is still version 1.18.20..

 - First things first: we need to upgrade kubeadm

-.lab[
+.exercise[

 - Upgrade kubeadm:
  ```
@@ -335,28 +335,28 @@ Note 2: kubeadm itself is still version 1.18.20..
 ]

 Problem: kubeadm doesn't know know how to handle
-upgrades from version 1.18.
+upgrades from version 1.15.

-This is because we installed version 1.22 (or even later).
+This is because we installed version 1.17 (or even later).

-We need to install kubeadm version 1.19.X.
+We need to install kubeadm version 1.16.X.

 ---

 ## Downgrading kubeadm

- We need to go back to version 1.19.X.
+- We need to go back to version 1.16.X (e.g. 1.16.6)

-.lab[
+.exercise[

 - View available versions for package `kubeadm`:
  ```bash
-  apt show kubeadm -a | grep ^Version | grep 1.19
+  apt show kubeadm -a | grep ^Version | grep 1.16
  ```

 - Downgrade kubeadm:
  ```
-  sudo apt install kubeadm=1.19.8-00
+  sudo apt install kubeadm=1.16.6-00
  ```

 - Check what kubeadm tells us:
@@ -366,7 +366,7 @@ We need to install kubeadm version 1.19.X.

 ]

-kubeadm should now agree to upgrade to 1.19.8.
+kubeadm should now agree to upgrade to 1.16.6.

 ---

@@ -378,11 +378,11 @@ kubeadm should now agree to upgrade to 1.19.8.

 - Or we can try the upgrade anyway

-.lab[
+.exercise[

 - Perform the upgrade:
  ```bash
-  sudo kubeadm upgrade apply v1.19.8
+  sudo kubeadm upgrade apply v1.16.6
  ```

 ]
@@ -395,9 +395,9 @@ kubeadm should now agree to upgrade to 1.19.8.

 - We can therefore use `apt` or `apt-get`

-.lab[
+.exercise[

- Log into node `oldversion3`
+- Log into node `test3`

 - View available versions for package `kubelet`:
  ```bash
@@ -406,7 +406,7 @@ kubeadm should now agree to upgrade to 1.19.8.

 - Upgrade kubelet:
  ```bash
-  sudo apt install kubelet=1.19.8-00
+  sudo apt install kubelet=1.16.6-00
  ```

 ]
@@ -415,9 +415,9 @@ kubeadm should now agree to upgrade to 1.19.8.

 ## Checking what we've done

-.lab[
+.exercise[

- Log into node `oldversion1`
+- Log into node `test1`

 - Check node versions:
  ```bash
@@ -458,15 +458,15 @@ kubeadm should now agree to upgrade to 1.19.8.

  (after upgrading the control plane)

-.lab[
+.exercise[

 - Download the configuration on each node, and upgrade kubelet:
  ```bash
    for N in 1 2 3; do
-      ssh oldversion$N "
-        sudo apt install kubeadm=1.19.8-00 &&
+      ssh test$N "
+        sudo apt install kubeadm=1.16.6-00 &&
        sudo kubeadm upgrade node &&
-        sudo apt install kubelet=1.19.8-00"
+        sudo apt install kubelet=1.16.6-00"
    done
  ```
 ]
@@ -475,9 +475,9 @@ kubeadm should now agree to upgrade to 1.19.8.

 ## Checking what we've done

- All our nodes should now be updated to version 1.19.8
+- All our nodes should now be updated to version 1.16.6

-.lab[
+.exercise[

 - Check nodes versions:
  ```bash
@@ -492,13 +492,13 @@ class: extra-details

 ## Skipping versions

- This example worked because we went from 1.18 to 1.19
+- This example worked because we went from 1.15 to 1.16

- If you are upgrading from e.g. 1.16, you will have to go through 1.17 first
+- If you are upgrading from e.g. 1.14, you will have to go through 1.15 first

- This means upgrading kubeadm to 1.17.X, then using it to upgrade the cluster
+- This means upgrading kubeadm to 1.15.X, then using it to upgrade the cluster

- Then upgrading kubeadm to 1.18.X, etc.
+- Then upgrading kubeadm to 1.16.X, etc.

 - **Make sure to read the release notes before upgrading!**

--- a/slides/k8s/cni.md
+++ b/slides/k8s/cni.md
@@ -204,7 +204,7 @@ class: extra-details

 ## Logging into the new cluster

-.lab[
+.exercise[

 - Log into node `kuberouter1`

@@ -228,7 +228,7 @@ class: extra-details

 - By default, kubelet gets the CNI configuration from `/etc/cni/net.d`

-.lab[
+.exercise[

 - Check the content of `/etc/cni/net.d`

@@ -262,7 +262,7 @@ class: extra-details

  (where `C` is our cluster number)

-.lab[
+.exercise[

 - Edit the Compose file to set the Cluster CIDR:
  ```bash
@@ -298,7 +298,7 @@ class: extra-details

  (where `A.B.C.D` is the public address of `kuberouter1`, running the control plane)

-.lab[
+.exercise[

 - Edit the YAML file to set the API server address:
  ```bash
@@ -320,7 +320,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).

 - This is similar to what we did for the `kubenet` cluster

-.lab[
+.exercise[

 - Generate the kubeconfig file (replacing `X.X.X.X` with the address of `kuberouter1`):
  ```bash
@@ -338,7 +338,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).

 - We need to copy that kubeconfig file to the other nodes

-.lab[
+.exercise[

 - Copy `kubeconfig` to the other nodes:
  ```bash
@@ -359,7 +359,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).

 - We need to pass `--network-plugin=cni`

-.lab[
+.exercise[

 - Join the first node:
   ```bash
@@ -384,7 +384,7 @@ class: extra-details

  (in `/etc/cni/net.d`)

-.lab[
+.exercise[

 - Check the content of `/etc/cni/net.d`

@@ -400,7 +400,7 @@ class: extra-details

 - Let's create a Deployment and expose it with a Service

-.lab[
+.exercise[

 - Create a Deployment running a web server:
  ```bash
@@ -423,7 +423,7 @@ class: extra-details

 ## Checking that everything works

-.lab[
+.exercise[

 - Get the ClusterIP address for the service:
  ```bash
@@ -449,7 +449,7 @@ class: extra-details

 - What if we need to check that everything is working properly?

-.lab[
+.exercise[

 - Check the IP addresses of our pods:
  ```bash
@@ -490,7 +490,7 @@ class: extra-details

 ## Trying `kubectl logs` / `kubectl exec`

-.lab[
+.exercise[

 - Try to show the logs of a kube-router pod:
  ```bash
--- a/slides/k8s/configuration.md
+++ b/slides/k8s/configuration.md
@@ -384,7 +384,7 @@ We'll cover them just after!*

 - We can create each Namespace, Deployment, and Service by hand, or...

-.lab[
+.exercise[

 - We can deploy the app with a YAML manifest:
  ```bash
@@ -403,7 +403,7 @@ We'll cover them just after!*

 - Since the `cluster.local` suffix can change, we'll use `x.y.svc`

-.lab[
+.exercise[

 - Check that the app is up and running:
  ```bash
@@ -427,7 +427,7 @@ Here is the file that we will use, @@LINK[k8s/haproxy.cfg]:

 ## Creating the ConfigMap

-.lab[
+.exercise[

 - Create a ConfigMap named `haproxy` and holding the configuration file:
  ```bash
@@ -455,7 +455,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:

 ## Creating the Pod

-.lab[
+.exercise[

 - Create the HAProxy Pod:
  ```bash
@@ -480,7 +480,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:

  (one request to `blue`, one request to `green`, one request to `blue`, etc.)

-.lab[
+.exercise[

 - Send a few requests:
  ```bash
@@ -509,7 +509,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:

 ## Creating the configmap

-.lab[
+.exercise[

 - Our configmap will have a single key, `http.addr`:
  ```bash
@@ -539,7 +539,7 @@ We are going to use the following pod definition:

 - The resource definition from the previous slide is in @@LINK[k8s/registry.yaml]

-.lab[
+.exercise[

 - Create the registry pod:
  ```bash
--- a/slides/k8s/consul.md
+++ b/slides/k8s/consul.md
@@ -86,7 +86,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \

 - We'll use the provided YAML file

-.lab[
+.exercise[

 - Create the stateful set and associated service:
  ```bash
@@ -177,7 +177,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \

  (pods will be replaced one by one)

-.lab[
+.exercise[

 - Deploy a better Consul cluster:
  ```bash
--- a/slides/k8s/crd.md
+++ b/slides/k8s/crd.md
@@ -74,7 +74,7 @@

 - Let's create the Custom Resource Definition for our Coffee resource

-.lab[
+.exercise[

 - Load the CRD:
  ```bash
@@ -103,7 +103,7 @@ spec:
  taste: strong
 ```

-.lab[
+.exercise[

 - Create a few types of coffee beans:
  ```bash
@@ -118,7 +118,7 @@ spec:

 - By default, `kubectl get` only shows name and age of custom resources

-.lab[
+.exercise[

 - View the coffee beans that we just created:
  ```bash
@@ -195,7 +195,7 @@ There are many possibilities!

 - Let's update our CRD using @@LINK[k8s/coffee-3.yaml]

-.lab[
+.exercise[

 - Update the CRD:
  ```bash
--- a/slides/k8s/csr-api.md
+++ b/slides/k8s/csr-api.md
@@ -186,7 +186,7 @@ class: extra-details

 .warning[If you want to use another name than `jean.doe`, update the YAML file!]

-.lab[
+.exercise[

 - Create the global namespace for all users:
  ```bash
@@ -208,7 +208,7 @@ class: extra-details

  (the token will be their password)

-.lab[
+.exercise[

 - List the user's secrets:
  ```bash
@@ -228,7 +228,7 @@ class: extra-details

 - Let's create a new context that will use that token to access the API

-.lab[
+.exercise[

 - Add a new identity to our kubeconfig file:
  ```bash
@@ -254,7 +254,7 @@ class: extra-details

 - Let's check that our access rights are set properly

-.lab[
+.exercise[

 - Try to access any resource:
  ```bash
@@ -280,7 +280,7 @@ class: extra-details

  (many people prefer cfssl, easyrsa, or other tools; that's fine too!)

-.lab[
+.exercise[

 - Generate the key and certificate signing request:
  ```bash
@@ -313,7 +313,7 @@ The command above generates:

 ## Sending the CSR to Kubernetes

-.lab[
+.exercise[

 - Generate and create the CSR resource:
  ```bash
@@ -344,7 +344,7 @@ The command above generates:

 - Fow now, this is configured [through an experimental controller manager flag](https://github.com/kubernetes/kubernetes/issues/67324)

-.lab[
+.exercise[

 - Edit the static pod definition for the controller manager:
  ```bash
@@ -366,7 +366,7 @@ The command above generates:

 - Let's inspect the CSR, and if it is valid, approve it

-.lab[
+.exercise[

 - Switch back to `cluster-admin`:
  ```bash
@@ -389,7 +389,7 @@ The command above generates:

 ## Obtaining the certificate

-.lab[
+.exercise[

 - Switch back to the user's identity:
  ```bash
@@ -414,7 +414,7 @@ The command above generates:

 ## Using the certificate

-.lab[
+.exercise[

 - Add the key and certificate to kubeconfig:
  ```bash
--- a/slides/k8s/daemonset.md
+++ b/slides/k8s/daemonset.md
@@ -83,7 +83,7 @@

 - Let's start with the YAML file for the current `rng` resource

-.lab[
+.exercise[

 - Dump the `rng` resource in YAML:
  ```bash
@@ -102,7 +102,7 @@

  (It can't be that easy, right?)

-.lab[
+.exercise[

 - Change `kind: Deployment` to `kind: DaemonSet`

@@ -169,7 +169,7 @@ We all knew this couldn't be that easy, right!

 - The `--force` flag's actual name is `--validate=false`

-.lab[
+.exercise[

 - Try to load our YAML file and ignore errors:
  ```bash
@@ -192,7 +192,7 @@ Wait ... Now, can it be *that* easy?

 - Did we transform our `deployment` into a `daemonset`?

-.lab[
+.exercise[

 - Look at the resources that we have now:
  ```bash
@@ -289,7 +289,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t

 - That set of pods is defined by the *selector* of the `rng` service

-.lab[
+.exercise[

 - Check the *selector* in the `rng` service definition:
  ```bash
@@ -312,7 +312,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t

 - For instance, with `kubectl get`, `kubectl logs`, `kubectl delete` ... and more

-.lab[
+.exercise[

 - Get the list of pods matching selector `app=rng`:
  ```bash
@@ -480,7 +480,7 @@ be any interruption.*

 - `kubectl label` can use selectors itself

-.lab[
+.exercise[

 - Add `active=yes` to all pods that have `app=rng`:
  ```bash
@@ -501,7 +501,7 @@ be any interruption.*

  - the selector of the service (that's the one we want to change)

-.lab[
+.exercise[

 - Update the service to add `active: yes` to its selector:
  ```bash
@@ -546,7 +546,7 @@ be any interruption.*

 ## Updating the service selector, take 2

-.lab[
+.exercise[

 - Update the YAML manifest of the service

@@ -592,7 +592,7 @@ If we did everything correctly, the web UI shouldn't show any change.

 ## Removing a pod from the load balancer

-.lab[
+.exercise[

 - In one window, check the logs of that pod:
  ```bash
--- a/slides/k8s/dashboard.md
+++ b/slides/k8s/dashboard.md
@@ -56,7 +56,7 @@

 - The guest/admin account

-.lab[
+.exercise[

 - Create all the dashboard resources, with the following command:
  ```bash
@@ -69,7 +69,7 @@

 ## Connecting to the dashboard

-.lab[
+.exercise[

 - Check which port the dashboard is on:
  ```bash
@@ -81,7 +81,7 @@
 You'll want the `3xxxx` port.


-.lab[
+.exercise[

 - Connect to http://oneofournodes:3xxxx/

@@ -115,7 +115,7 @@ The dashboard will then ask you which authentication you want to use.

 - Seriously, don't leave that thing running!

-.lab[
+.exercise[

 - Remove what we just created:
  ```bash
@@ -160,7 +160,7 @@ The dashboard will then ask you which authentication you want to use.

  (named `kubernetes-dashboard:cluster-admin`)

-.lab[
+.exercise[

 - Create all the dashboard resources, with the following command:
  ```bash
@@ -177,7 +177,7 @@ The dashboard will then ask you which authentication you want to use.

 - Kubernetes will automatically generate a token for that ServiceAccount

-.lab[
+.exercise[

 - Display the token:
  ```bash
@@ -197,7 +197,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.

 ## Connecting to the dashboard

-.lab[
+.exercise[

 - Check which port the dashboard is on:
  ```bash
@@ -209,7 +209,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.
 You'll want the `3xxxx` port.


-.lab[
+.exercise[

 - Connect to http://oneofournodes:3xxxx/

--- a/slides/k8s/demo-apps.md
+++ b/slides/k8s/demo-apps.md
@@ -1,157 +0,0 @@
-# Our demo apps
-
- We are going to use a few demo apps for demos and labs
-
- Let's get acquainted with them before we dive in!
-
---
-
-## The `color` app
-
- Image name: `jpetazzo/color`, `ghcr.io/jpetazzo/color`
-
- Available for linux/amd64, linux/arm64, linux/arm/v7 platforms
-
- HTTP server listening on port 80
-
- Serves a web page with a single line of text
-
- The background of the page is derived from the hostname
-
-  (e.g. if the hostname is `blue-xyz-123`, the background is `blue`)
-
- The web page is "curl-friendly"
-
-  (it contains `\r` characters to hide HTML tags and declutter the output)
-
---
-
-## The `color` app in action
-
- Create a Deployment called `blue` using image `jpetazzo/color`
-
- Expose that Deployment with a Service
-
- Connect to the Service with a web browser
-
- Connect to the Service with `curl`
-
---
-
-## Dockercoins
-
- App with 5 microservices:
-
-  - `worker` (runs an infinite loop connecting to the other services)
-
-  - `rng` (web service; generates random numbers)
-
-  - `hasher` (web service; computes SHA sums)
-
-  - `redis` (holds a single counter incremented by the `worker` at each loop)
-
-  - `webui` (web app; displays a graph showing the rate of increase of the counter)
-
- Uses a mix of Node, Python, Ruby
-
- Very simple components (approx. 50 lines of code for the most complicated one)
-
---
-
-class: pic
-
-![Dockercoins application diagram](images/dockercoins-diagram.png)
-
---
-
-## Deploying Dockercoins
-
- Pre-built images available as `dockercoins/<component>:v0.1`
-
-  (e.g. `dockercoins/worker:v0.1`)
-
- Containers "discover" each other through DNS
-
-  (e.g. worker connects to `http://hasher/`)
-
- A Kubernetes YAML manifest is available in *the* repo
-
---
-
-## The repository
-
- When we refer to "the" repository, it means:
-
-  https://github.com/jpetazzo/container.training
-
- It hosts slides, demo apps, deployment scripts...
-
- All the sample commands, labs, etc. will assume that it's available in:
-
-  `~/container.training`
-
- Let's clone the repo in our environment!
-
---
-
-## Cloning the repo
-
-.lab[
-
- There is a convenient shortcut to clone the repository:
-  ```bash
-  git clone https://container.training
-  ```
-
-]
-
-While the repository clones, fork it, star it ~~subscribe and hit the bell!~~
-
---
-
-## Running Dockercoins
-
- All the Kubernetes manifests are in the `k8s` subdirectory
-
- This directory has a `dockercoins.yaml` manifest
-
-.lab[
-
- Deploy Dockercoins:
-  ```bash
-  kubectl apply -f ~/container.training/k8s/dockercoins.yaml
-  ```
-
-]
-
- The `webui` is exposed with a `NodePort` service
-
- Connect to it (through the `NodePort` or `port-forward`)
-
- Note, it might take a minute for the worker to start
-
---
-
-## Details
-
- If the `worker` Deployment is scaled up, the graph should go up
-
- The `rng` Service is meant to be a bottleneck
-
-  (capping the graph to 10/second until `rng` is scaled up)
-
- There is artificial latency in the different services
-
-  (so that the app doesn't consume CPU/RAM/network)
-
---
-
-## More colors
-
- The repository also contains a `rainbow.yaml` manifest
-
- It creates three namespaces (`blue`, `green`, `red`)
-
- In each namespace, there is an instance of the `color` app
-
-  (we can use that later to do *literal* blue-green deployment!)
--- a/slides/k8s/dmuc.md
+++ b/slides/k8s/dmuc.md
@@ -52,7 +52,7 @@

 - Let's make sure we have everything we need first

-.lab[
+.exercise[

 - Log into the `dmuc1` machine

@@ -101,7 +101,7 @@

 ## Starting API server

-.lab[
+.exercise[

 - Try to start the API server:
  ```bash
@@ -118,7 +118,7 @@ it cannot start without it.

 ## Starting etcd

-.lab[
+.exercise[

 - Try to start etcd:
  ```bash
@@ -144,7 +144,7 @@ serving insecure client requests on 127.0.0.1:2379, this is strongly discouraged

 - That argument should be a comma-separated list of URLs

-.lab[
+.exercise[

 - Start API server:
  ```bash
@@ -161,7 +161,7 @@ Success!

 - Let's try a few "classic" commands

-.lab[
+.exercise[

 - List nodes:
  ```bash
@@ -201,7 +201,7 @@ class: extra-details

 - Let's run a web server!

-.lab[
+.exercise[

 - Create a Deployment with NGINX:
  ```bash
@@ -216,7 +216,7 @@ Success?

 ## Checking our Deployment status

-.lab[
+.exercise[

 - Look at pods, deployments, etc.:
  ```bash
@@ -249,7 +249,7 @@ And, there is no ReplicaSet, and no Pod.

 ## Starting the controller manager

-.lab[
+.exercise[

 - Try to start the controller manager:
  ```bash
@@ -289,7 +289,7 @@ Using the inClusterConfig.  This might not work.

 ## Starting the controller manager (for real)

-.lab[
+.exercise[

 - Start the controller manager:
  ```bash
@@ -304,7 +304,7 @@ Success!

 ## Checking our Deployment status

-.lab[
+.exercise[

 - Check all our resources again:
  ```bash
@@ -371,7 +371,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.

 - The ReplicaSet controller will no longer create pods referencing the (missing) token

-.lab[
+.exercise[

 - Programmatically change the `default` ServiceAccount:
  ```bash
@@ -402,7 +402,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.

 - Once we patch the default service account, the ReplicaSet can create a Pod

-.lab[
+.exercise[

 - Check that we now have a pod:
  ```bash
@@ -437,7 +437,7 @@ If we're impatient, we can restart the controller manager.

 - We're going to use Docker (because it's the default option)

-.lab[
+.exercise[

 - Start the Docker Engine:
  ```bash
@@ -479,7 +479,7 @@ docker run alpine echo hello world

 - Or we can generate the file with `kubectl`

-.lab[
+.exercise[

 - Create the file `~/.kube/config` with `kubectl`:
  ```bash
@@ -519,7 +519,7 @@ clusters:

 ## Starting kubelet

-.lab[
+.exercise[

 - Start kubelet with that kubeconfig file:
  ```bash
@@ -536,7 +536,7 @@ Success!

 - Let's check that our node registered correctly

-.lab[
+.exercise[

 - List the nodes in our cluster:
  ```bash
@@ -555,7 +555,7 @@ Its name will be its hostname (it should be `dmuc1`).

 - Let's check if our pod is running

-.lab[
+.exercise[

 - List all resources:
  ```bash
@@ -594,7 +594,7 @@ Which is normal: it needs to be *scheduled*.

 - Just like for controller manager, we can use `--kubeconfig` or `--master`

-.lab[
+.exercise[

 - Start the scheduler:
  ```bash
@@ -613,7 +613,7 @@ Which is normal: it needs to be *scheduled*.

 - Then it will be `Running`

-.lab[
+.exercise[

 - Check pod status:
  ```bash
@@ -654,7 +654,7 @@ class: extra-details

 - Let's check that our pod correctly runs NGINX

-.lab[
+.exercise[

 - Check our pod's IP address:
  ```bash
@@ -676,7 +676,7 @@ We should see the `Welcome to nginx!` page.

 - We can now create a Service associated with this Deployment

-.lab[
+.exercise[

 - Expose the Deployment's port 80:
  ```bash
@@ -705,7 +705,7 @@ This won't work. We need kube-proxy to enable internal communication.

  (although that will be deprecated in the future)

-.lab[
+.exercise[

 - Start kube-proxy:
  ```bash
@@ -720,7 +720,7 @@ This won't work. We need kube-proxy to enable internal communication.

 - Now that kube-proxy is running, we should be able to connect

-.lab[
+.exercise[

 - Check the Service's ClusterIP again, and retry connecting:
  ```bash
@@ -742,7 +742,7 @@ class: extra-details

 - When a Service is created or updated, kube-proxy creates iptables rules

-.lab[
+.exercise[

 - Check out the `OUTPUT` chain in the `nat` table:
  ```bash
@@ -766,7 +766,7 @@ class: extra-details

 - The last command showed a chain named `KUBE-SVC-...` corresponding to our service

-.lab[
+.exercise[

 - Check that `KUBE-SVC-...` chain:
  ```bash
--- a/slides/k8s/eck.md
+++ b/slides/k8s/eck.md
@@ -28,7 +28,7 @@

 - ... But losing a node = losing the volumes on that node!

-.lab[
+.exercise[

 - Install the local path storage provisioner:
  ```bash
@@ -49,7 +49,7 @@

 - Or we need to tag a StorageClass to be used as the default one

-.lab[
+.exercise[

 - List StorageClasses:
  ```bash
@@ -68,7 +68,7 @@ We should see the `local-path` StorageClass.

  `storageclass.kubernetes.io/is-default-class: true`

-.lab[
+.exercise[

 - Tag the StorageClass so that it's the default one:
  ```bash
@@ -99,7 +99,7 @@ Now, the StorageClass should have `(default)` next to its name.

 - All these resources are grouped in a convenient YAML file

-.lab[
+.exercise[

 - Install the operator:
  ```bash
@@ -114,7 +114,7 @@ Now, the StorageClass should have `(default)` next to its name.

 - Let's see which CRDs were created

-.lab[
+.exercise[

 - List all CRDs:
  ```bash
@@ -135,7 +135,7 @@ This operator supports ElasticSearch, but also Kibana and APM. Cool!

 - We need to create that namespace

-.lab[
+.exercise[

 - Create the `eck-demo` namespace:
  ```bash
@@ -180,7 +180,7 @@ ServiceAccount is located.
  - whether to use TLS or not
  - etc.

-.lab[
+.exercise[

 - Create our ElasticSearch cluster:
  ```bash
@@ -197,7 +197,7 @@ ServiceAccount is located.

 - It will report our cluster status through the CRD

-.lab[
+.exercise[

 - Check the logs of the operator:
  ```bash
@@ -231,7 +231,7 @@ ServiceAccount is located.

 - But let's check at least if ElasticSearch is up!

-.lab[
+.exercise[

 - Get the ClusterIP of our ES instance:
  ```bash
@@ -255,7 +255,7 @@ We get an authentication error. Our cluster is protected!

 - It generates a random password and stores it in a Secret

-.lab[
+.exercise[

 - Extract the password:
  ```bash
@@ -280,7 +280,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.

 - We'll deploy a filebeat DaemonSet to collect node logs

-.lab[
+.exercise[

 - Deploy filebeat:
  ```bash
@@ -314,7 +314,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.

 - Let's give it a try!

-.lab[
+.exercise[

 - Deploy a Kibana instance:
  ```bash
@@ -345,7 +345,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.

 - It's using the same user/password as ElasticSearch

-.lab[
+.exercise[

 - Get the NodePort allocated to Kibana:
  ```bash
@@ -364,7 +364,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.

 After the Kibana UI loads, we need to click around a bit

-.lab[
+.exercise[

 - Pick "explore on my own"

@@ -404,7 +404,7 @@ After the Kibana UI loads, we need to click around a bit

 - We prepared yet another manifest for that!

-.lab[
+.exercise[

 - Deploy Cerebro:
  ```bash
@@ -428,7 +428,7 @@ After the Kibana UI loads, we need to click around a bit

 - Let's change that!

-.lab[
+.exercise[

 - Edit the ElasticSearch cluster manifest:
  ```bash
--- a/slides/k8s/events.md
+++ b/slides/k8s/events.md
@@ -41,7 +41,7 @@

 - When we use `kubectl describe` on an object, `kubectl` retrieves the associated events

-.lab[
+.exercise[

 - See the API requests happening when we use `kubectl describe`:
  ```bash
@@ -82,7 +82,7 @@

 - Let's create an event related to a Node, based on @@LINK[k8s/event-node.yaml]

-.lab[
+.exercise[

 - Edit `k8s/event-node.yaml`

@@ -100,7 +100,7 @@

 - Let's create an event related to a Pod, based on @@LINK[k8s/event-pod.yaml]

-.lab[
+.exercise[

 - Create a pod

--- a/slides/k8s/gitworkflows.md
+++ b/slides/k8s/gitworkflows.md
@@ -77,18 +77,17 @@

 - Create a new branch in your fork; e.g. `prod`

-  (e.g. with "branch" dropdown through the GitHub web UI)
+  (e.g. by adding a line in the README through the GitHub web UI)

 - This is the branch that we are going to use for deployment

 ---

-## Setting up Flux with kustomize
+## Setting up Flux

 - Clone the Flux repository:
-  ```bash
+  ```
  git clone https://github.com/fluxcd/flux
-  cd flux
  ```

 - Edit `deploy/flux-deployment.yaml`
@@ -100,27 +99,8 @@
  ```

 - Apply all the YAML:
-  ```bash
-  kubectl apply -k deploy/
  ```
-
---
-
-## Setting up Flux with Helm
-
- Add Flux helm repo:
-  ```bash
-  helm repo add fluxcd https://charts.fluxcd.io
-  ```
-
- Install Flux:
-  ```bash
-  kubectl create namespace flux
-  helm upgrade --install flux \
-    --set git.url=git@github.com:your-git-username/kubercoins \
-    --set git.branch=prod \
-    --namespace flux \
-    fluxcd/flux
+  kubectl apply -f deploy/
  ```

 ---
@@ -130,8 +110,8 @@
 - When it starts, Flux generates an SSH key

 - Display that key:
-  ```bash
-  kubectl -n flux logs deployment/flux | grep identity.pub | cut -d '"' -f2
+  ```
+  kubectl logs deployment/flux | grep identity
  ```

 - Then add that key to the repository, giving it **write** access
@@ -177,14 +157,14 @@
 ## Setting up Gitkube

 - Install the CLI:
-  ```bash
+  ```
  sudo curl -L -o /usr/local/bin/gitkube \
       https://github.com/hasura/gitkube/releases/download/v0.2.1/gitkube_linux_amd64
  sudo chmod +x /usr/local/bin/gitkube
  ```

 - Install Gitkube on the cluster:
-  ```bash
+  ```
  gitkube install --expose ClusterIP
  ```

@@ -216,20 +196,20 @@
 ## Pushing to our remote

 - Get the `gitkubed` IP address:
-  ```bash
+  ```
  kubectl -n kube-system get svc gitkubed
  IP=$(kubectl -n kube-system get svc gitkubed -o json | 
  	   jq -r .spec.clusterIP)
  ```

 - Get ourselves a sample repository with resource YAML files:
-  ```bash
+  ```
  git clone git://github.com/jpetazzo/kubercoins
  cd kubercoins
  ```

 - Add the remote and push to it:
-  ```bash
+  ```
  git remote add k8s ssh://default-example@$IP/~/git/default-example
  git push k8s master
  ```
--- a/slides/k8s/healthchecks-more.md
+++ b/slides/k8s/healthchecks-more.md
@@ -79,9 +79,9 @@

 ## Creating a new namespace

- This will make sure that we don't collide / conflict with previous labs and exercises
+- This will make sure that we don't collide / conflict with previous exercises

-.lab[
+.exercise[

 - Create the yellow namespace:
  ```bash
@@ -103,7 +103,7 @@

  https://github.com/jpetazzo/kubercoins

-.lab[
+.exercise[

 - Clone that repository:
  ```bash
@@ -152,7 +152,7 @@ It will use the default success threshold (1 successful attempt = alive).

 - Let's add the liveness probe, then deploy DockerCoins

-.lab[
+.exercise[

 - Edit `rng-deployment.yaml` and add the liveness probe
  ```bash
@@ -180,7 +180,7 @@ It will use the default success threshold (1 successful attempt = alive).

 - Let's generate traffic and see what happens!

-.lab[
+.exercise[

 - Get the ClusterIP address of the rng service:
  ```bash
@@ -195,7 +195,7 @@ It will use the default success threshold (1 successful attempt = alive).

 - Each command below will show us what's happening on a different level

-.lab[
+.exercise[

 - In one window, monitor cluster events:
  ```bash
@@ -220,7 +220,7 @@ It will use the default success threshold (1 successful attempt = alive).

 - Let's use `ab` to send concurrent requests to rng

-.lab[
+.exercise[

 - In yet another window, generate traffic:
  ```bash
--- a/slides/k8s/healthchecks.md
+++ b/slides/k8s/healthchecks.md
@@ -1,18 +1,16 @@
 # Healthchecks

- Containers can have *healthchecks*
+- Kubernetes provides two kinds of healthchecks: liveness and readiness

- There are three kinds of healthchecks, corresponding to very different use-cases:
+- Healthchecks are *probes* that apply to *containers* (not to pods)

-  - liveness  = detect when a container is "dead" and needs to be restarted
+- Each container can have two (optional) probes:

-  - readiness = detect when a container is ready to serve traffic
+  - liveness = is this container dead or alive?

-  - startup = detect if a container has finished to boot
+  - readiness = is this container ready to serve traffic?

- These healthchecks are optional (we can use none, all, or some of them)
-
- Different probes are available (HTTP request, TCP connection, program execution)
+- Different probes are available (HTTP, TCP, program execution)

 - Let's see the difference and how to use them!

@@ -20,13 +18,11 @@

 ## Liveness probe

-*This container is dead, we don't know how to fix it, other than restarting it.*
-
 - Indicates if the container is dead or alive

 - A dead container cannot come back to life

- If the liveness probe fails, the container is killed (destroyed)
+- If the liveness probe fails, the container is killed

  (to make really sure that it's really dead; no zombies or undeads!)

@@ -54,31 +50,9 @@

 ---

-## Readiness probe (1)
+## Readiness probe

-*Make sure that a container is ready before continuing a rolling update.*
-
- Indicates if the container is ready to handle traffic
-
- When doing a rolling update, the Deployment controller waits for Pods to be ready
-
-  (a Pod is ready when all the containers in the Pod are ready)
-
- Improves reliability and safety of rolling updates:
-
-  - don't roll out a broken version (that doesn't pass readiness checks)
-
-  - don't lose processing capacity during a rolling update
-
---
-
-## Readiness probe (2)
-
-*Temporarily remove a container (overloaded or otherwise) from a Service load balancer.*
-
- A container can mark itself "not ready" temporarily
-
-  (e.g. if it's overloaded or needs to reload/restart/garbage collect...)
+- Indicates if the container is ready to serve traffic

 - If a container becomes "unready" it might be ready again soon

@@ -106,9 +80,9 @@

  - runtime is busy doing garbage collection or initial data load

- To redirect new connections to other Pods
+- For processes that take a long time to start

-  (e.g. fail the readiness probe when the Pod's load is too high)
+  (more on that later)

 ---

@@ -146,35 +120,27 @@

 ---

+class: extra-details
+
 ## Startup probe

-*The container takes too long to start, and is killed by the liveness probe!*
+- Kubernetes 1.16 introduces a third type of probe: `startupProbe`

- By default, probes (including liveness) start immediately
+  (it is in `alpha` in Kubernetes 1.16)

- With the default probe interval and failure threshold:
+- It can be used to indicate "container not ready *yet*"

-  *a container must respond in less than 30 seconds, or it will be killed!*
+  - process is still starting

- There are two ways to avoid that:
+  - loading external data, priming caches

-  - set `initialDelaySeconds` (a fixed, rigid delay)
+- Before Kubernetes 1.16, we had to use the `initialDelaySeconds` parameter

-  - use a `startupProbe`
+  (available for both liveness and readiness probes)

- Kubernetes will run only the startup probe, and when it succeeds, run the other probes
+- `initialDelaySeconds` is a rigid delay (always wait X before running probes)

---
-
-## When to use a startup probe
-
- For containers that take a long time to start
-
-  (more than 30 seconds)
-
- Especially if that time can vary a lot
-
-  (e.g. fast in dev, slow in prod, or the other way around)
+- `startupProbe` works better when a container start time can vary a lot

 ---

@@ -224,16 +190,17 @@ Here is a pod template for the `rng` web service of the DockerCoins app:
 apiVersion: v1
 kind: Pod
 metadata:
-  name: healthy-app
+  name: rng-with-liveness
 spec:
  containers:
-  - name: myapp
-    image: myregistry.io/myapp:v1.0
+  - name: rng
+    image: dockercoins/rng:v0.1
    livenessProbe:
      httpGet:
-        path: /health
+        path: /
        port: 80
-      periodSeconds: 5
+      initialDelaySeconds: 10
+      periodSeconds: 1
 ```

 If the backend serves an error, or takes longer than 1s, 3 times in a row, it gets killed.
@@ -300,7 +267,7 @@ If the Redis process becomes unresponsive, it will be killed.

 (In that context, worker = process that doesn't accept connections)

- Readiness is useful mostly for rolling updates
+- Readiness isn't useful

  (because workers aren't backends for a service)

--- a/slides/k8s/helm-chart-format.md
+++ b/slides/k8s/helm-chart-format.md
@@ -48,7 +48,7 @@

 - If you haven't done it before, you need to add the repo for that chart

-.lab[
+.exercise[

 - Add the repo that holds the chart for the OWASP Juice Shop:
  ```bash
@@ -63,7 +63,7 @@

 - We can use `helm pull` to download a chart from a repo

-.lab[
+.exercise[

 - Download the tarball for `juice/juice-shop`:
  ```bash
@@ -85,7 +85,7 @@

 - Let's look at the files and directories in the `juice-shop` chart

-.lab[
+.exercise[

 - Display the tree structure of the chart we just downloaded:
  ```bash
@@ -108,7 +108,7 @@ We see the components mentioned above: `Chart.yaml`, `templates/`, `values.yaml`

  (using the standard Go template library)

-.lab[
+.exercise[

 - Look at the template file for the Service resource:
  ```bash
--- a/slides/k8s/helm-create-basic-chart.md
+++ b/slides/k8s/helm-create-basic-chart.md
@@ -6,7 +6,7 @@

  (Resource names, service types, number of replicas...)

-.lab[
+.exercise[

 - Create a sample chart:
  ```bash
@@ -27,7 +27,7 @@

 - There is a convenient `dockercoins.yml` in the repo

-.lab[
+.exercise[

 - Copy the YAML file to the `templates` subdirectory in the chart:
  ```bash
@@ -50,7 +50,7 @@

  (as surprising as it might seem!)

-.lab[
+.exercise[

 - Let's try to install the chart:
  ```
@@ -79,7 +79,7 @@ kind: Service, namespace: default, name: hasher

  - we can also tell Helm to use a different namespace

-.lab[
+.exercise[

 - Create a new namespace:
  ```bash
@@ -99,7 +99,7 @@ kind: Service, namespace: default, name: hasher

 - Let's try to see the release that we just deployed

-.lab[
+.exercise[

 - List Helm releases:
  ```bash
@@ -118,7 +118,7 @@ We have to specify its namespace (or switch to that namespace).

 - Try again, with the correct namespace

-.lab[
+.exercise[

 - List Helm releases in `helmcoins`:
  ```bash
@@ -133,7 +133,7 @@ We have to specify its namespace (or switch to that namespace).

 - We can check the worker logs, or the web UI

-.lab[
+.exercise[

 - Retrieve the NodePort number of the web UI:
  ```bash
@@ -181,7 +181,7 @@ have details about recommended annotations and labels.

 - Let's remove that chart before moving on

-.lab[
+.exercise[

 - Delete the release (don't forget to specify the namespace):
  ```bash
--- a/slides/k8s/helm-create-better-chart.md
+++ b/slides/k8s/helm-create-better-chart.md
@@ -24,7 +24,7 @@

 - This will give us a basic chart that we will customize

-.lab[
+.exercise[

 - Create a basic chart:
  ```bash
@@ -81,7 +81,7 @@ This creates a basic chart in the directory `helmcoins`.

 - Exception: for redis, we want to use the official image redis:latest

-.lab[
+.exercise[

 - Write YAML files for the 5 components, with the following model:
  ```yaml
@@ -98,7 +98,7 @@ This creates a basic chart in the directory `helmcoins`.

 - For convenience, let's work in a separate namespace

-.lab[
+.exercise[

 - Create a new namespace (if it doesn't already exist):
  ```bash
@@ -126,7 +126,7 @@ This creates a basic chart in the directory `helmcoins`.
  helm upgrade COMPONENT-NAME CHART-DIRECTORY --install
  ```

-.lab[
+.exercise[

 - Install the 5 components of DockerCoins:
  ```bash
@@ -165,7 +165,7 @@ class: extra-details

 - Let's see if DockerCoins is working!

-.lab[
+.exercise[

 - Check the logs of the worker:
  ```bash
@@ -187,7 +187,7 @@ There are *many* issues to fix!

 - It looks like our images can't be found

-.lab[
+.exercise[

 - Use `kubectl describe` on any of the pods in error

@@ -205,7 +205,7 @@ There are *many* issues to fix!

  (and try to find the one generating the Deployment resource)

-.lab[
+.exercise[

 - Show the structure of the `helmcoins` chart that Helm generated:
  ```bash
@@ -228,7 +228,7 @@ There are *many* issues to fix!

 - Let's look for `AppVersion` there!

-.lab[
+.exercise[

 - Check the file `helmcoins/Chart.yaml`

@@ -250,7 +250,7 @@ There are *many* issues to fix!

  (to match what we've specified in our values YAML files)

-.lab[
+.exercise[

 - Edit `helmcoins/templates/deployment.yaml`

@@ -266,7 +266,7 @@ There are *many* issues to fix!

 - To use the new template, we need to *upgrade* the release to use that chart

-.lab[
+.exercise[

 - Upgrade all components:
  ```bash
@@ -306,7 +306,7 @@ We should see all pods "Running". But ... not all of them are READY.

  (`kubectl describe` will retrieve the events related to the object)

-.lab[
+.exercise[

 - Check the events for the redis pods:
  ```bash
@@ -345,7 +345,7 @@ It's failing both its liveness and readiness probes!

  `{{ end }}` at the end

-.lab[
+.exercise[

 - Edit `helmcoins/templates/deployment.yaml`

@@ -386,7 +386,7 @@ This is what the new YAML should look like (added lines in yellow):

 - We need to upgrade all the services again to use the new chart

-.lab[
+.exercise[

 - Upgrade all components:
  ```bash
@@ -410,7 +410,7 @@ Everything should now be running!

 - Is this working now?

-.lab[
+.exercise[

 - Let's check the logs of the worker:
  ```bash
@@ -429,7 +429,7 @@ Typically, that error means that the `redis` service doesn't exist.

 - What about the services created by our chart?

-.lab[
+.exercise[

 - Check the list of services:
  ```bash
@@ -452,7 +452,7 @@ We need to change that!

 - `include` indicates a *template block* defined somewhere else

-.lab[
+.exercise[

 - Find where that `fullname` thing is defined:
  ```bash
@@ -473,7 +473,7 @@ We can look at the definition, but it's fairly complex ...

 - The name of the release is available as `{{ .Release.Name }}`

-.lab[
+.exercise[

 - Edit `helmcoins/templates/service.yaml`

@@ -528,7 +528,7 @@ We can look at the definition, but it's fairly complex ...

 - Let's add a `service.port` value to the redis release

-.lab[
+.exercise[

 - Edit `redis.yaml` to add:
  ```yaml
@@ -563,7 +563,7 @@ We can look at the definition, but it's fairly complex ...

 ## Changing the deployment template

-.lab[  
+.exercise[  

 - Edit `helmcoins/templates/deployment.yaml`

--- a/slides/k8s/helm-dependencies.md
+++ b/slides/k8s/helm-dependencies.md
@@ -51,7 +51,7 @@

 - First, let's edit `Chart.yaml`

-.lab[
+.exercise[

 - In `Chart.yaml`, fill the `dependencies` section:
  ```yaml
@@ -93,7 +93,7 @@ use Bitnami's Redis chart.

 - After adding the dependency, we ask Helm to pin an download it

-.lab[
+.exercise[

 - Ask Helm:
  ```bash
@@ -262,7 +262,7 @@ class: extra-details

 ## Embedding a dependency

-.lab[
+.exercise[

 - Decompress the chart:
  ```yaml
--- a/slides/k8s/helm-intro.md
+++ b/slides/k8s/helm-intro.md
@@ -203,7 +203,7 @@ class: extra-details

 - If the `helm` CLI is not installed in your environment, install it

-.lab[
+.exercise[

 - Check if `helm` is installed:
  ```bash
@@ -232,7 +232,7 @@ class: extra-details

 - They can be managed (installed, upgraded...) with the `helm` CLI

-.lab[
+.exercise[

 - Deploy Tiller:
  ```bash
@@ -258,7 +258,7 @@ class: extra-details
 - In a more realistic deployment, you might create per-user or per-team
  service accounts, roles, and role bindings

-.lab[
+.exercise[

 - Grant `cluster-admin` role to `kube-system:default` service account:
  ```bash
@@ -329,7 +329,7 @@ class: extra-details

 - We can use `helm search hub <keyword>`

-.lab[
+.exercise[

 - Look for the OWASP Juice Shop app:
  ```bash
@@ -351,7 +351,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop

 - We can also use the Artifact Hub search feature

-.lab[
+.exercise[

 - Go to https://artifacthub.io/

@@ -367,7 +367,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop

 - Click on the "Install" button, it will show instructions

-.lab[
+.exercise[

 - First, add the repository for that chart:
  ```bash
@@ -393,7 +393,7 @@ Note: it is also possible to install directly a chart, with `--repo https://...`

 - We can also use `--generate-name` to ask Helm to generate a name for us

-.lab[
+.exercise[

 - List the releases:
  ```bash
@@ -433,7 +433,7 @@ class: extra-details

 - We can use a selector to see these resources

-.lab[
+.exercise[

 - List all the resources created by this release:
  ```bash
@@ -472,7 +472,7 @@ It is defined in that chart. In other words, not all charts will provide this la

 - We can inspect a chart with `helm show` or `helm inspect`

-.lab[
+.exercise[

 - Look at the README for the app:
  ```bash
@@ -500,7 +500,7 @@ The `readme` may or may not have (accurate) explanations for the values.

 - We are going to update `my-juice-shop` to change the type of the service

-.lab[
+.exercise[

 - Update `my-juice-shop`:
  ```bash
@@ -523,7 +523,7 @@ All unspecified values will take the default values defined in the chart.

 - Let's check the app that we just installed

-.lab[
+.exercise[

 - Check the node port allocated to the service:
  ```bash
--- a/slides/k8s/helm-secrets.md
+++ b/slides/k8s/helm-secrets.md
@@ -16,7 +16,7 @@

 - If you haven't done it before, you need to add the repo for that chart

-.lab[
+.exercise[

 - Add the repo that holds the chart for the OWASP Juice Shop:
  ```bash
@@ -33,7 +33,7 @@

 - Let's use the `juice/juice-shop` chart as an example

-.lab[
+.exercise[

 - Install a release called `orange` with the chart `juice/juice-shop`:
  ```bash
@@ -53,7 +53,7 @@

 - Helm stores successive revisions of each release

-.lab[
+.exercise[

 - View the history for that release:
  ```bash
@@ -76,7 +76,7 @@ Where does that come from?

  - ConfigMaps, Secrets?

-.lab[
+.exercise[

 - Look for ConfigMaps and Secrets:
  ```bash
@@ -95,7 +95,7 @@ We should see a number of secrets with TYPE `helm.sh/release.v1`.

 - Let's find out what is in these Helm secrets

-.lab[
+.exercise[

 - Examine the secret corresponding to the second release of `orange`:
  ```bash
@@ -113,7 +113,7 @@ There is a key named `release`.

 - Let's see what's in this `release` thing!

-.lab[
+.exercise[

 - Dump the secret:
  ```bash
@@ -131,7 +131,7 @@ Secrets are encoded in base64. We need to decode that!

 - We can pipe the output through `base64 -d` or use go-template's `base64decode`

-.lab[
+.exercise[

 - Decode the secret:
  ```bash
@@ -155,7 +155,7 @@ Let's try one more round of decoding!

 - Just add one more base64 decode filter

-.lab[
+.exercise[

 - Decode it twice:
  ```bash
@@ -175,7 +175,7 @@ Let's try one more round of decoding!

 - We could use `file` to figure out the data type

-.lab[
+.exercise[

 - Pipe the decoded release through `file -`:
  ```bash
@@ -196,7 +196,7 @@ Gzipped data! It can be decoded with `gunzip -c`.

 - Let's uncompress the data and save it to a file

-.lab[
+.exercise[

 - Rerun the previous command, but with `| gunzip -c > release-info` :
  ```bash
--- a/slides/k8s/helm-values-schema-validation.md
+++ b/slides/k8s/helm-values-schema-validation.md
@@ -119,7 +119,7 @@

 - Let's try to install a couple releases with that schema!

-.lab[
+.exercise[

 - Try an invalid `pullPolicy`:
  ```bash
@@ -147,7 +147,7 @@

 - We can fix that with `"additionalProperties": false`

-.lab[
+.exercise[

 - Edit `values.schema.json` to add `"additionalProperties": false`
  ```json
@@ -165,7 +165,7 @@

 ## Testing with unknown properties

-.lab[
+.exercise[

 - Try to pass an extra property:
  ```bash
--- a/slides/k8s/horizontal-pod-autoscaler.md
+++ b/slides/k8s/horizontal-pod-autoscaler.md
@@ -76,7 +76,7 @@

  (it is a web server that will use 1s of CPU for each HTTP request)

-.lab[
+.exercise[

 - Deploy the web server:
  ```bash
@@ -101,7 +101,7 @@

 - Let's start a bunch of commands to watch what is happening

-.lab[
+.exercise[

 - Monitor pod CPU usage:
  ```bash
@@ -143,7 +143,7 @@

 - We will use `ab` (Apache Bench) to send traffic

-.lab[
+.exercise[

 - Send a lot of requests to the service, with a concurrency level of 3:
  ```bash
@@ -170,7 +170,7 @@ The CPU utilization should increase to 100%.

 - There is a helper command to do that for us: `kubectl autoscale`

-.lab[
+.exercise[

 - Create the HPA policy for the `busyhttp` deployment:
  ```bash
@@ -209,7 +209,7 @@ This can also be set with `--cpu-percent=`.

 - Since our server can use up to 1 core, let's request 1 core

-.lab[
+.exercise[

 - Edit the Deployment definition:
  ```bash
@@ -287,7 +287,7 @@ This can also be set with `--cpu-percent=`.

 - Since `busyhttp` uses CPU cycles, let's stop it before moving on

-.lab[
+.exercise[

 - Delete the `busyhttp` Deployment:
  ```bash
--- a/slides/k8s/hpa-v2.md
+++ b/slides/k8s/hpa-v2.md
@@ -62,7 +62,7 @@

 - That's the easy part!

-.lab[
+.exercise[

 - Create a new namespace and switch to it:
  ```bash
@@ -90,7 +90,7 @@

  (by about 100ms per `worker` Pod after the 3rd worker)

-.lab[
+.exercise[

 - Check the `webui` port and open it in your browser:
  ```bash
@@ -114,7 +114,7 @@

 - It monitors exactly one URL, that must be passed as a command-line argument

-.lab[
+.exercise[

 - Deploy `httplat`:
  ```bash
@@ -148,7 +148,7 @@ class: extra-details

  (because we can configure it dynamically with annotations)

-.lab[
+.exercise[

 - If it's not installed yet on the cluster, install Prometheus:
  ```bash
@@ -169,7 +169,7 @@ class: extra-details

 - We can use annotations to tell Prometheus to collect the metrics

-.lab[
+.exercise[

 - Tell Prometheus to "scrape" our latency exporter:
  ```bash
@@ -191,7 +191,7 @@ You'll need to instruct it to scrape http://httplat.customscaling.svc:9080/metri

 - Before moving on, confirm that Prometheus has our metrics

-.lab[
+.exercise[

 - Connect to Prometheus

@@ -407,7 +407,7 @@ Putting togeher @@LINK[k8s/hpa-v2-pa-httplat.yaml]:

 - Of course, it won't quite work yet (we're missing the *Prometheus adapter*)

-.lab[
+.exercise[

 - Create the HorizontalPodAutoscaler:
  ```bash
@@ -469,7 +469,7 @@ no custom metrics API (custom.metrics.k8s.io) registered

 - There is ~~an app~~ a Helm chart for that

-.lab[
+.exercise[

 - Install the Prometheus adapter:
  ```bash
@@ -534,7 +534,7 @@ Here is the rule that we need to add to the configuration:

 ## Editing the adapter's configuration

-.lab[
+.exercise[

 - Edit the adapter's ConfigMap:
  ```bash
--- a/slides/k8s/ingress-advanced.md
+++ b/slides/k8s/ingress-advanced.md
@@ -1,181 +0,0 @@
-## Optimizing request flow
-
- With most ingress controllers, requests follow this path:
-
-  HTTP client → load balancer → NodePort → ingress controller Pod → app Pod
-
- Sometimes, some of these components can be on the same machine
-
-  (e.g. ingress controller Pod and app Pod)
-
- But they can also be on different machines
-
-  (each arrow = a potential hop)
-
- This could add some unwanted latency!
-
-(See following diagrams)
-
---
-
-class: pic
-
-![](images/kubernetes-services/61-ING.png)
-
---
-
-class: pic
-
-![](images/kubernetes-services/62-ING-path.png)
-
---
-
-## External traffic policy
-
- The Service manifest has a field `spec.externalTrafficPolicy`
-
- Possible values are:
-
-  - `Cluster` (default) - load balance connections to all pods
-
-  - `Local` - only send connections to local pods (on the same node)
-
- When the policy is set to `Local`, we avoid one hop:
-
-  HTTP client → load balancer → NodePort .red[**→**] ingress controller Pod → app Pod
-
-(See diagram on next slide)
-
---
-
-class: pic
-
-![](images/kubernetes-services/63-ING-policy.png)
-
---
-
-## What if there is no Pod?
-
- If a connection for a Service arrives on a Node through a NodePort...
-
- ...And that Node doesn't host a Pod matching the selector of that Service...
-
-  (i.e. there is no local Pod)
-
- ...Then the connection is refused
-
- This can be detected from outside (by the external load balancer)
-
- The external load balancer won't send connections to these nodes
-
-(See diagram on next slide)
-
---
-
-class: pic
-
-![](images/kubernetes-services/64-ING-nolocal.png)
-
---
-
-class: extra-details
-
-## Internal traffic policy
-
- Since Kubernetes 1.21, there is also `spec.internalTrafficPolicy`
-
- It works similarly but for internal traffic
-
- It's an *alpha* feature
-
-  (not available by default; needs special steps to be enabled on the control plane)
-
- See the [documentation] for more details
-
-[documentation]: https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/
-
---
-
-## Other ways to save hops
-
- Run the ingress controller as a DaemonSet, using port 80 on the nodes:
-
-  HTTP client → load balancer → ingress controller on Node port 80 → app Pod
-
- Then simplify further by setting a set of DNS records pointing to the nodes:
-
-  HTTP client → ingress controller on Node port 80 → app Pod 
-
- Or run a combined load balancer / ingress controller at the edge of the cluster:
-
-  HTTP client → edge ingress controller → app Pod
-
---
-
-## Source IP address
-
- Obtaining the IP address of the HTTP client (from the app Pod) can be tricky!
-
- We should consider (at least) two steps:
-
-  - obtaining the IP address of the HTTP client (from the ingress controller)
-
-  - passing that IP address from the ingress controller to the HTTP client
-
- The second step is usually done by injecting an HTTP header
-
-  (typically `x-forwarded-for`)
-
- Most ingress controllers do that out of the box
-
- But how does the ingress controller obtain the IP address of the HTTP client? 🤔
-
---
-
-## Scenario 1, direct connection
-
- If the HTTP client connects directly to the ingress controller: easy!
-
-  - e.g. when running a combined load balancer / ingress controller
-
-  - or when running the ingress controller as a Daemon Set directly on port 80
-
---
-
-## Scenario 2, external load balancer
-
- Most external load balancers running in TCP mode don't expose client addresses
-
-  (HTTP client connects to load balancer; load balancer connects to ingress controller)
-
- The ingress controller will "see" the IP address of the load balancer
-
-  (instead of the IP address of the client)
-
- Many external load balancers support the [Proxy Protocol]
-
- This enables the ingress controller to "see" the IP address of the HTTP client
-
- It needs to be enabled on both ends (ingress controller and load balancer)
-
-[ProxyProtocol]: https://www.haproxy.com/blog/haproxy/proxy-protocol/
-
---
-
-## Scenario 3, leveraging `externalTrafficPolicy`
-
- In some cases, the external load balancer will preserve the HTTP client address
-
- It is then possible to set `externalTrafficPolicy` to `Local`
-
- The ingress controller will then "see" the HTTP client address
-
- If `externalTrafficPolicy` is set to `Cluster`:
-
-  - sometimes the client address will be visible
-
-  - when bouncing the connection to another node, the address might be changed
-
- This is a big "it depends!"
-
- Bottom line: rely on the two other techniques instead?
--- a/slides/k8s/ingress-tls.md
+++ b/slides/k8s/ingress-tls.md
@@ -85,7 +85,7 @@ class: extra-details

 - Let's set it now

-.lab[
+.exercise[

 - Set the `DOMAIN` environment variable:
  ```bash
@@ -120,7 +120,7 @@ class: extra-details

 - Thanks to `openssl`, generating a self-signed cert is just one command away!

-.lab[
+.exercise[

 - Generate a key and certificate:
  ```bash
@@ -175,7 +175,7 @@ class: extra-details

 - Let's use a volume to get easy access to the generated key and certificate

-.lab[
+.exercise[

 - Obtain a certificate from Let's Encrypt:
  ```bash
@@ -203,7 +203,7 @@ Remove `--test-cert` to obtain a *real* certificate.

  - they are owned by `root`

-.lab[
+.exercise[

 - Grant ourselves permissions on these files:
  ```bash
@@ -265,7 +265,7 @@ Remove `--test-cert` to obtain a *real* certificate.

 - However, the Endpoints needs to be adapted to put the current node's address

-.lab[
+.exercise[

 - Edit `~/containers.training/k8s/certbot.yaml`

@@ -286,7 +286,7 @@ Remove `--test-cert` to obtain a *real* certificate.

  (i.e. 8000)

-.lab[
+.exercise[

 - Run `certbot`:
  ```bash
@@ -312,7 +312,7 @@ Remove `--test-cert` to get a production certificate.

  (and owned by root)

-.lab[
+.exercise[

 - Grand ourselves permissions on these files:
  ```bash
@@ -338,7 +338,7 @@ Remove `--test-cert` to get a production certificate.

 - We can create a Secret to hold them

-.lab[
+.exercise[

 - Create the Secret:
  ```bash
@@ -402,7 +402,7 @@ class: extra-details

 ## Using the certificate

-.lab[
+.exercise[

 - Add the `tls` section to an existing Ingress

--- a/slides/k8s/ingress.md
+++ b/slides/k8s/ingress.md
@@ -37,19 +37,18 @@
 - Service with `type: LoadBalancer`

  - requires a particular controller (e.g. CCM, MetalLB)
+  - costs a bit of money for each service
  - if TLS is desired, it has to be implemented by the app
  - works for any TCP protocol (not just HTTP)
  - doesn't interpret the HTTP protocol (no fancy routing)
-  - costs a bit of money for each service

 - Ingress

  - requires an ingress controller
+  - flat cost regardless of number of ingresses
  - can implement TLS transparently for the app
  - only supports HTTP
  - can do content-based routing (e.g. per URI)
-  - lower cost per service
-    <br/>(exact pricing depends on provider's model)

 ---

@@ -123,46 +122,18 @@

 class: extra-details

-## Special cases
-
- GKE has "[GKE Ingress]", a custom ingress controller
-
-  (enabled by default)
-
- EKS has "AWS ALB Ingress Controller" as well
-
-  (not enabled by default, requires extra setup)
-
- They leverage cloud-specific HTTP load balancers
-
-  (GCP HTTP LB, AWS ALB)
-
- They typically a cost *per ingress resource*
-
-[GKE Ingress]: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress
-
---
-
-class: extra-details
-
 ## Single or multiple LoadBalancer

 - Most ingress controllers will create a LoadBalancer Service

-  (and will receive all HTTP/HTTPS traffic through it)
-
 - We need to point our DNS entries to the IP address of that LB

 - Some rare ingress controllers will allocate one LB per ingress resource

-  (example: the GKE Ingress and ALB Ingress mentioned previously)
+  (example: by default, the AWS ingress controller based on ALBs)

 - This leads to increased costs

- Note that it's possible to have multiple "rules" per ingress resource
-
-  (this will reduce costs but may be less convenient to manage)
-
 ---

 ## Ingress in action
@@ -251,22 +222,15 @@ class: extra-details

 ## Running Traefik

- The [Traefik documentation][traefikdoc] recommends to use a Helm chart
+- The [Traefik documentation](https://docs.traefik.io/user-guide/kubernetes/#deploy-trfik-using-a-deployment-or-daemonset) tells us to pick between Deployment and Daemon Set

- For simplicity, we're going to use a custom YAML manifest
+- We are going to use a Daemon Set so that each node can accept connections

- Our manifest will:
-
-  - use a Daemon Set so that each node can accept connections
+- We will do two minor changes to the [YAML provided by Traefik](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-ds.yaml):

  - enable `hostNetwork`

-  - add a *toleration* so that Traefik also runs on all nodes
-
- We could do the same with the official [Helm chart][traefikchart]
-
-[traefikdoc]: https://doc.traefik.io/traefik/getting-started/install-traefik/#use-the-helm-chart
-[traefikchart]: https://artifacthub.io/packages/helm/traefik/traefik
+  - add a *toleration* so that Traefik also runs on `node1`

 ---

@@ -290,7 +254,7 @@ class: extra-details

 ## Checking taints on our nodes

-.lab[
+.exercise[

 - Check our nodes specs:
  ```bash
@@ -341,7 +305,7 @@ class: extra-details

 ## Checking tolerations on the control plane

-.lab[
+.exercise[

 - Check tolerations for CoreDNS:
  ```bash
@@ -367,7 +331,7 @@ class: extra-details

 ## Special tolerations

-.lab[
+.exercise[

 - Check tolerations on `kube-proxy`:
  ```bash
@@ -396,7 +360,7 @@ This one is a special case that means "ignore all taints and run anyway."

  - [Traefik's RBAC rules](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-rbac.yaml) allowing it to watch necessary API objects

-.lab[
+.exercise[

 - Apply the YAML:
  ```bash
@@ -411,7 +375,7 @@ This one is a special case that means "ignore all taints and run anyway."

 - If Traefik started correctly, we now have a web server listening on each node

-.lab[
+.exercise[

 - Check that Traefik is serving 80/tcp:
  ```bash
@@ -430,7 +394,7 @@ This is normal: we haven't provided any ingress rule yet.

 - To make our lives easier, we will use [nip.io](http://nip.io)

- Check out `http://red.A.B.C.D.nip.io`
+- Check out `http://cheddar.A.B.C.D.nip.io`

  (replacing A.B.C.D with the IP address of `node1`)

@@ -446,7 +410,7 @@ This is normal: we haven't provided any ingress rule yet.

 - With the current install method, it's listening on port 8080

-.lab[
+.exercise[

 - Go to `http://node1:8080` (replacing `node1` with its IP address)

@@ -458,36 +422,38 @@ This is normal: we haven't provided any ingress rule yet.

 ## Setting up host-based routing ingress rules

- We are going to use the `jpetazzo/color` image
+- We are going to use `errm/cheese` images

- This image contains a simple static HTTP server on port 80
+  (there are [3 tags available](https://hub.docker.com/r/errm/cheese/tags/): wensleydale, cheddar, stilton)

- We will run 3 deployments (`red`, `green`, `blue`)
+- These images contain a simple static HTTP server sending a picture of cheese
+
+- We will run 3 deployments (one for each cheese)

 - We will create 3 services (one for each deployment)

 - Then we will create 3 ingress rules (one for each service)

- We will route `<color>.A.B.C.D.nip.io` to the corresponding deployment
+- We will route `<name-of-cheese>.A.B.C.D.nip.io` to the corresponding deployment

 ---

-## Running colorful web servers
+## Running cheesy web servers

-.lab[
+.exercise[

 - Run all three deployments:
  ```bash
-  kubectl create deployment red   --image=jpetazzo/color
-  kubectl create deployment green --image=jpetazzo/color
-  kubectl create deployment blue  --image=jpetazzo/color
+  kubectl create deployment cheddar --image=errm/cheese:cheddar
+  kubectl create deployment stilton --image=errm/cheese:stilton
+  kubectl create deployment wensleydale --image=errm/cheese:wensleydale
  ```

 - Create a service for each of them:
  ```bash
-  kubectl expose deployment red   --port=80
-  kubectl expose deployment green --port=80
-  kubectl expose deployment blue  --port=80
+  kubectl expose deployment cheddar --port=80
+  kubectl expose deployment stilton --port=80
+  kubectl expose deployment wensleydale --port=80
  ```

 ]
@@ -503,17 +469,17 @@ This is normal: we haven't provided any ingress rule yet.
 - Since Kubernetes 1.19, we can use `kubectl create ingress`

  ```bash
-  kubectl create ingress red \
-      --rule=red.`A.B.C.D`.nip.io/*=red:80
+  kubectl create ingress cheddar \
+      --rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80
  ```

 - We can specify multiple rules per resource

  ```bash
-  kubectl create ingress rgb \
-      --rule=red.`A.B.C.D`.nip.io/*=red:80 \
-      --rule=green.`A.B.C.D`.nip.io/*=green:80 \
-      --rule=blue.`A.B.C.D`.nip.io/*=blue:80
+  kubectl create ingress cheeses \
+      --rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80 \
+      --rule=stilton.`A.B.C.D`.nip.io/*=stilton:80 \
+      --rule=wensleydale.`A.B.C.D`.nip.io/*=wensleydale:80
  ```

 ---
@@ -523,14 +489,14 @@ This is normal: we haven't provided any ingress rule yet.
 - The `*` is important:

  ```
-  --rule=red.A.B.C.D.nip.io/`*`=red:80
+  --rule=cheddar.A.B.C.D.nip.io/`*`=cheddar:80
  ```

 - It means "all URIs below that path"

 - Without the `*`, it means "only that exact path"

-  (if we omit it, requests for e.g. `red.A.B.C.D.nip.io/hello` will 404)
+  (and requests for e.g. images or other URIs won't work)

 ---

@@ -542,15 +508,15 @@ Here is a minimal host-based ingress resource:
 apiVersion: networking.k8s.io/v1beta1
 kind: Ingress
 metadata:
-  name: red
+  name: cheddar
 spec:
  rules:
-  - host: red.`A.B.C.D`.nip.io
+  - host: cheddar.`A.B.C.D`.nip.io
    http:
      paths:
      - path: /
        backend:
-          serviceName: red
+          serviceName: cheddar
          servicePort: 80

 ```
@@ -574,8 +540,8 @@ class: extra-details
 - If we want to see "modern" YAML, we can use `-o yaml --dry-run=client`:

  ```bash
-  kubectl create ingress red -o yaml --dry-run=client \
-      --rule=red.`A.B.C.D`.nip.io/*=red:80
+  kubectl create ingress cheddar -o yaml --dry-run=client \
+      --rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80

  ```

@@ -643,21 +609,13 @@ class: extra-details

 ---

-## Vendor-specific example
+## A special feature in action

- Let's see how to implement *canary releases*
+- We're going to see how to implement *canary releases* with Traefik

- The example here will use Traefik v1
+- This feature is available on multiple ingress controllers

-  (which is obsolete)
-
- It won't work on your Kubernetes cluster!
-
-  (unless you're running an oooooold version of Kubernetes)
-
-  (and an equally oooooooold version of Traefik)
-
- We've left it here just as an example!
+- ... But it is configured very differently on each of them

 ---

@@ -698,7 +656,7 @@ class: extra-details

 ---

-## Canary releases with Traefik v1
+## Canary releases with Traefik

 - We need to deploy the canary and expose it with a separate service

@@ -710,6 +668,14 @@ class: extra-details

 - If we want, we can send requests to more than 2 services

+- Let's send requests to our 3 cheesy services!
+
+.exercise[
+
+- Create the resource shown on the next slide
+
+]
+
 ---

 ## The Ingress resource
@@ -719,34 +685,63 @@ class: extra-details
 apiVersion: networking.k8s.io/v1beta1
 kind: Ingress
 metadata:
-  name: rgb
+  name: cheeseplate
  annotations:
    traefik.ingress.kubernetes.io/service-weights: |
-      red: 50%
-      green: 25%
-      blue: 25%
+      cheddar: 50%
+      wensleydale: 25%
+      stilton: 25%
 spec:
  rules:
-  - host: rgb.`A.B.C.D`.nip.io
+  - host: cheeseplate.`A.B.C.D`.nip.io
    http:
      paths:
      - path: /
        backend:
-          serviceName: red
+          serviceName: cheddar
          servicePort: 80
      - path: /
        backend:
-          serviceName: green
+          serviceName: wensleydale
          servicePort: 80
      - path: /
        backend:
-          serviceName: blue
+          serviceName: stilton
          servicePort: 80
 ```
 ]

 ---

+## Testing the canary
+
+- Let's check the percentage of requests going to each service
+
+.exercise[
+
+- Continuously send HTTP requests to the new ingress:
+  ```bash
+    while sleep 0.1; do
+      curl -s http://cheeseplate.A.B.C.D.nip.io/
+    done
+  ```
+
+]
+
+We should see a 50/25/25 request mix.
+
+---
+
+class: extra-details
+
+## Load balancing fairness
+
+Note: if we use odd request ratios, the load balancing algorithm might appear to be broken on a small scale (when sending a small number of requests), but on a large scale (with many requests) it will be fair.
+
+For instance, with a 11%/89% ratio, we can see 79 requests going to the 89%-weighted service, and then requests alternating between the two services; then 79 requests again, etc. 
+
+---
+
 class: extra-details

 ## Other ingress controllers
--- a/Show More
+++ b/Show More