🚧 WIP NR adv k8s Fall 2022

2026-03-02 01:10:20 +00:00 · 2022-08-31 13:33:02 +02:00
15 changed files with 561 additions and 827 deletions
--- a/k8s/dashboard-insecure.yaml
+++ b/k8s/dashboard-insecure.yaml
@@ -17,8 +17,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 ---
@@ -30,8 +30,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-certs
  namespace: kubernetes-dashboard
 type: Opaque
@@ -43,8 +43,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-csrf
  namespace: kubernetes-dashboard
 type: Opaque
@@ -56,8 +56,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-key-holder
  namespace: kubernetes-dashboard
 type: Opaque
@@ -71,8 +71,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-settings
  namespace: kubernetes-dashboard
 ---
@@ -84,8 +84,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 rules:
 - apiGroups:
@@ -106,8 +106,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 roleRef:
  apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 rules:
@@ -182,8 +182,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 roleRef:
@@ -204,8 +204,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
    kubernetes.io/cluster-service: "true"
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 spec:
@@ -253,8 +253,8 @@ spec:
        app.kubernetes.io/instance: kubernetes-dashboard
        app.kubernetes.io/managed-by: Helm
        app.kubernetes.io/name: kubernetes-dashboard
-        app.kubernetes.io/version: 2.6.1
-        helm.sh/chart: kubernetes-dashboard-5.10.0
+        app.kubernetes.io/version: 2.5.0
+        helm.sh/chart: kubernetes-dashboard-5.2.0
    spec:
      containers:
      - args:
@@ -262,7 +262,7 @@ spec:
        - --sidecar-host=http://127.0.0.1:8000
        - --enable-skip-login
        - --enable-insecure-login
-        image: kubernetesui/dashboard:v2.6.1
+        image: kubernetesui/dashboard:v2.5.0
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
@@ -293,7 +293,7 @@ spec:
          name: kubernetes-dashboard-certs
        - mountPath: /tmp
          name: tmp-volume
-      - image: kubernetesui/metrics-scraper:v1.0.8
+      - image: kubernetesui/metrics-scraper:v1.0.7
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
--- a/k8s/dashboard-recommended.yaml
+++ b/k8s/dashboard-recommended.yaml
@@ -17,8 +17,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 ---
@@ -30,8 +30,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-certs
  namespace: kubernetes-dashboard
 type: Opaque
@@ -43,8 +43,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-csrf
  namespace: kubernetes-dashboard
 type: Opaque
@@ -56,8 +56,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-key-holder
  namespace: kubernetes-dashboard
 type: Opaque
@@ -71,8 +71,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-settings
  namespace: kubernetes-dashboard
 ---
@@ -84,8 +84,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 rules:
 - apiGroups:
@@ -106,8 +106,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 roleRef:
  apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 rules:
@@ -182,8 +182,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 roleRef:
@@ -204,8 +204,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
    kubernetes.io/cluster-service: "true"
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 spec:
@@ -253,15 +253,15 @@ spec:
        app.kubernetes.io/instance: kubernetes-dashboard
        app.kubernetes.io/managed-by: Helm
        app.kubernetes.io/name: kubernetes-dashboard
-        app.kubernetes.io/version: 2.6.1
-        helm.sh/chart: kubernetes-dashboard-5.10.0
+        app.kubernetes.io/version: 2.5.0
+        helm.sh/chart: kubernetes-dashboard-5.2.0
    spec:
      containers:
      - args:
        - --namespace=kubernetes-dashboard
        - --auto-generate-certificates
        - --sidecar-host=http://127.0.0.1:8000
-        image: kubernetesui/dashboard:v2.6.1
+        image: kubernetesui/dashboard:v2.5.0
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
@@ -292,7 +292,7 @@ spec:
          name: kubernetes-dashboard-certs
        - mountPath: /tmp
          name: tmp-volume
-      - image: kubernetesui/metrics-scraper:v1.0.8
+      - image: kubernetesui/metrics-scraper:v1.0.7
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
--- a/k8s/dashboard-with-token.yaml
+++ b/k8s/dashboard-with-token.yaml
@@ -17,8 +17,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 ---
@@ -30,8 +30,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-certs
  namespace: kubernetes-dashboard
 type: Opaque
@@ -43,8 +43,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-csrf
  namespace: kubernetes-dashboard
 type: Opaque
@@ -56,8 +56,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-key-holder
  namespace: kubernetes-dashboard
 type: Opaque
@@ -71,8 +71,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-settings
  namespace: kubernetes-dashboard
 ---
@@ -84,8 +84,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 rules:
 - apiGroups:
@@ -106,8 +106,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard-metrics
 roleRef:
  apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 rules:
@@ -182,8 +182,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 roleRef:
@@ -204,8 +204,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
    kubernetes.io/cluster-service: "true"
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
    app.kubernetes.io/instance: kubernetes-dashboard
    app.kubernetes.io/managed-by: Helm
    app.kubernetes.io/name: kubernetes-dashboard
-    app.kubernetes.io/version: 2.6.1
-    helm.sh/chart: kubernetes-dashboard-5.10.0
+    app.kubernetes.io/version: 2.5.0
+    helm.sh/chart: kubernetes-dashboard-5.2.0
  name: kubernetes-dashboard
  namespace: kubernetes-dashboard
 spec:
@@ -253,15 +253,15 @@ spec:
        app.kubernetes.io/instance: kubernetes-dashboard
        app.kubernetes.io/managed-by: Helm
        app.kubernetes.io/name: kubernetes-dashboard
-        app.kubernetes.io/version: 2.6.1
-        helm.sh/chart: kubernetes-dashboard-5.10.0
+        app.kubernetes.io/version: 2.5.0
+        helm.sh/chart: kubernetes-dashboard-5.2.0
    spec:
      containers:
      - args:
        - --namespace=kubernetes-dashboard
        - --auto-generate-certificates
        - --sidecar-host=http://127.0.0.1:8000
-        image: kubernetesui/dashboard:v2.6.1
+        image: kubernetesui/dashboard:v2.5.0
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
@@ -292,7 +292,7 @@ spec:
          name: kubernetes-dashboard-certs
        - mountPath: /tmp
          name: tmp-volume
-      - image: kubernetesui/metrics-scraper:v1.0.8
+      - image: kubernetesui/metrics-scraper:v1.0.7
        imagePullPolicy: IfNotPresent
        livenessProbe:
          httpGet:
--- a/prepare-vms/lib/commands.sh
+++ b/prepare-vms/lib/commands.sh
@@ -276,14 +276,13 @@ EOF
    "

    ##VERSION## https://github.com/docker/compose/releases
-    COMPOSE_VERSION=v2.11.1
-    COMPOSE_PLATFORM='linux-$(uname -m)'
-    
-    # Just in case you need Compose 1.X, you can use the following lines.
-    # (But it will probably only work for x86_64 machines.)
-    #COMPOSE_VERSION=1.29.2
-    #COMPOSE_PLATFORM='Linux-$(uname -m)'
-
+    if [ "$ARCHITECTURE" ]; then
+        COMPOSE_VERSION=v2.2.3
+        COMPOSE_PLATFORM='linux-$(uname -m)'
+    else
+        COMPOSE_VERSION=1.29.2
+        COMPOSE_PLATFORM='Linux-$(uname -m)'
+    fi
    pssh "
    set -e
    ### Install docker-compose.
@@ -493,7 +492,7 @@ _cmd_kubetools() {
    # Install kube-ps1
    pssh "
    set -e
-    if ! [ -d /opt/kube-ps1 ]; then
+    if ! [ -f /opt/kube-ps1 ]; then
      cd /tmp
      git clone https://github.com/jonmosco/kube-ps1
      sudo mv kube-ps1 /opt/kube-ps1
--- a/slides/exercises/healthchecks-brief.md
+++ b/slides/exercises/healthchecks-brief.md
@@ -4,6 +4,6 @@

  (we will use the `rng` service in the dockercoins app)

- See what happens when the load increases
+- See what happens when the load increses

  (spoiler alert: it involves timeouts!)
--- a/slides/interlude-form.md
+++ b/slides/interlude-form.md
@@ -1,17 +0,0 @@
-# Interlude
-
- As mentioned earlier:
-
-  *the content of this course will be adapted to suit your needs!*
-
- Please take a look at the form that we just shared in Slack
-
-  (you don't need to fill it *right now*)
-
- If there are parts that you are curious about, ask us now!
-
- We'll ask you to fill the form after today's session
-
-  (before the end of the day, basically, so we can process the results by tomorrow)
-
- Thank you!
--- a/slides/k8s/architecture.md
+++ b/slides/k8s/architecture.md
@@ -203,12 +203,12 @@ What does that mean?

 ## Let's experiment a bit!

- The examples in this section require a Kubernetes cluster
-
-  (any local development cluster will suffice)
+- For this section, connect to the first node of the `test` cluster

 .lab[

+- SSH to the first node of the test cluster
+
 - Check that the cluster is operational:
  ```bash
  kubectl get nodes
--- a/slides/k8s/healthchecks.md
+++ b/slides/k8s/healthchecks.md
@@ -1,62 +1,46 @@
 # Healthchecks

- Containers can have *healthchecks* (also called "probes")
+- Containers can have *healthchecks*

- There are three kinds of healthchecks, corresponding to different use-cases:
+- There are three kinds of healthchecks, corresponding to very different use-cases:

-  `startupProbe`, `readinessProbe`, `livenessProbe`
+  - liveness  = detect when a container is "dead" and needs to be restarted
+
+  - readiness = detect when a container is ready to serve traffic
+
+  - startup = detect if a container has finished to boot

 - These healthchecks are optional (we can use none, all, or some of them)

- Different probes are available:
+- Different probes are available (HTTP request, TCP connection, program execution)

-  HTTP GET, TCP connection, arbitrary program execution, GRPC
-
- All these probes have a binary result (success/failure)
-
- Probes that aren't defined will default to a "success" result
+- Let's see the difference and how to use them!

 ---

-## Use-cases in brief
-
-*My container takes a long time to boot before being able to serve traffic.*
-
-→ use a `startupProbe` (but often a `readinessProbe` can also do the job)
-
-*Sometimes, my container is unavailable or overloaded, and needs to e.g. be taken temporarily out of load balancer rotation.*
-
-→ use a `readinessProbe`
-
-*Sometimes, my container enters a broken state which can only be fixed by a restart.*
-
-→ use a `livenessProbe`
-
---
-
-## Liveness probes
+## Liveness probe

 *This container is dead, we don't know how to fix it, other than restarting it.*

- Check if the container is dead or alive
+- Indicates if the container is dead or alive

- If Kubernetes determines that the container is dead:
+- A dead container cannot come back to life

-  - it terminates the container gracefully
+- If the liveness probe fails, the container is killed (destroyed)

-  - it restarts the container (unless the Pod's `restartPolicy` is `Never`)
+  (to make really sure that it's really dead; no zombies or undeads!)

- With the default parameters, it takes:
+- What happens next depends on the pod's `restartPolicy`:

-  - up to 30 seconds to determine that the container is dead
+  - `Never`: the container is not restarted

-  - up to 30 seconds to terminate it
+  - `OnFailure` or `Always`: the container is restarted

 ---

 ## When to use a liveness probe

- To detect failures that can't be recovered
+- To indicate failures that can't be recovered

  - deadlocks (causing all requests to time out)

@@ -64,45 +48,47 @@

 - Anything where our incident response would be "just restart/reboot it"

---
-
-## Liveness probes gotchas
-
 .warning[**Do not** use liveness probes for problems that can't be fixed by a restart]

 - Otherwise we just restart our pods for no reason, creating useless load

-.warning[**Do not** depend on other services within a liveness probe]
+---

- Otherwise we can experience cascading failures
+## Readiness probe (1)

-  (example: web server liveness probe that makes a requests to a database)
+*Make sure that a container is ready before continuing a rolling update.*

-.warning[**Make sure** that liveness probes respond quickly]
+- Indicates if the container is ready to handle traffic

- The default probe timeout is 1 second (this can be tuned!)
+- When doing a rolling update, the Deployment controller waits for Pods to be ready

- If the probe takes longer than that, it will eventually cause a restart
+  (a Pod is ready when all the containers in the Pod are ready)
+
+- Improves reliability and safety of rolling updates:
+
+  - don't roll out a broken version (that doesn't pass readiness checks)
+
+  - don't lose processing capacity during a rolling update

 ---

-## Readiness probes
+## Readiness probe (2)

-*Sometimes, my container "needs a break".*
+*Temporarily remove a container (overloaded or otherwise) from a Service load balancer.*

- Check if the container is ready or not
+- A container can mark itself "not ready" temporarily

- If the container is not ready, its Pod is not ready
+  (e.g. if it's overloaded or needs to reload/restart/garbage collect...)

- If the Pod belongs to a Service, it is removed from its Endpoints
+- If a container becomes "unready" it might be ready again soon

-  (it stops receiving new connections but existing ones are not affected)
+- If the readiness probe fails:

- If there is a rolling update in progress, it might pause
+  - the container is *not* killed

-  (Kubernetes will try to respect the MaxUnavailable parameter)
+  - if the pod is a member of a service, it is temporarily removed

- As soon as the readiness probe suceeds again, everything goes back to normal
+  - it is re-added as soon as the readiness probe passes again

 ---

@@ -116,31 +102,67 @@

 - To indicate temporary failure or unavailability

-  - runtime is busy doing garbage collection or (re)loading data
-
  - application can only service *N* parallel connections

-  - new connections will be directed to other Pods
+  - runtime is busy doing garbage collection or initial data load
+
+- To redirect new connections to other Pods
+
+  (e.g. fail the readiness probe when the Pod's load is too high)

 ---

-## Startup probes
+## Dependencies

-*My container takes a long time to boot before being able to serve traffic.*
+- If a web server depends on a database to function, and the database is down:

- After creating a container, Kubernetes runs its startup probe
+  - the web server's liveness probe should succeed

- The container will be considered "unhealthy" until the probe succeeds
+  - the web server's readiness probe should fail

- As long as the container is "unhealthy", its Pod...:
+- Same thing for any hard dependency (without which the container can't work)

-  - is not added to Services' endpoints
+.warning[**Do not** fail liveness probes for problems that are external to the container]

-  - is not considered as "available" for rolling update purposes
+---

- Readiness and liveness probes are enabled *after* startup probe reports success
+## Timing and thresholds

-  (if there is no startup probe, readiness and liveness probes are enabled right away)
+- Probes are executed at intervals of `periodSeconds` (default: 10)
+
+- The timeout for a probe is set with `timeoutSeconds` (default: 1)
+
+.warning[If a probe takes longer than that, it is considered as a FAIL]
+
+- A probe is considered successful after `successThreshold` successes (default: 1)
+
+- A probe is considered failing after `failureThreshold` failures (default: 3)
+
+- A probe can have an `initialDelaySeconds` parameter (default: 0)
+
+- Kubernetes will wait that amount of time before running the probe for the first time
+
+  (this is important to avoid killing services that take a long time to start)
+
+---
+
+## Startup probe
+
+*The container takes too long to start, and is killed by the liveness probe!*
+
+- By default, probes (including liveness) start immediately
+
+- With the default probe interval and failure threshold:
+
+  *a container must respond in less than 30 seconds, or it will be killed!*
+
+- There are two ways to avoid that:
+
+  - set `initialDelaySeconds` (a fixed, rigid delay)
+
+  - use a `startupProbe`
+
+- Kubernetes will run only the startup probe, and when it succeeds, run the other probes

 ---

@@ -156,296 +178,121 @@

 ---

-## Startup probes gotchas
-
- When defining a `startupProbe`, we almost always want to adjust its parameters
-
-  (specifically, its `failureThreshold` - this is explained in next slide)
-
- Otherwise, if the container fails to start within 30 seconds...
-
-  *Kubernetes terminates the container and restarts it!*
-
- Sometimes, it's easier/simpler to use a `readinessProbe` instead
-
-  (except when also using a `livenessProbe`)
-
---
-
-## Timing and thresholds
-
- Probes are executed at intervals of `periodSeconds` (default: 10)
-
- The timeout for a probe is set with `timeoutSeconds` (default: 1)
-
-.warning[If a probe takes longer than that, it is considered as a FAIL]
-
-.warning[For liveness probes **and startup probes** this terminates and restarts the container]
-
- A probe is considered successful after `successThreshold` successes (default: 1)
-
- A probe is considered failing after `failureThreshold` failures (default: 3)
-
- All these parameters can be set independently for each probe
-
---
-
-class: extra-details
-
-## `initialDelaySeconds`
-
- A probe can have an `initialDelaySeconds` parameter (default: 0)
-
- Kubernetes will wait that amount of time before running the probe for the first time
-
- It is generally better to use a `startupProbe` instead
-
-  (but this parameter did exist before startup probes were implemented)
-
---
-
-class: extra-details
-
-## `readinessProbe` vs `startupProbe`
-
- A lot of blog posts / documentations / tutorials recommend readiness probes...
-
- ...even in scenarios where a startup probe would seem more appropriate!
-
- This is because startup probes are relatively recent
-
-  (they reached GA status in Kubernetes 1.20)
-
- When there is no `livenessProbe`, using a `readinessProbe` is simpler:
-
-  - a `startupProbe` generally requires to change the `failureThreshold`
-
-  - a `startupProbe` generally also requires a `readinessProbe`
-
-  - a single `readinessProbe` can fulfill both roles
-
---
-
 ## Different types of probes

- Kubernetes supports the following mechanisms:
+- HTTP request

-  - `exec` (arbitrary program execution)
+  - specify URL of the request (and optional headers)

-  - `httpGet` (HTTP GET request)
+  - any status code between 200 and 399 indicates success

-  - `tcpSocket` (check if a TCP port is accepting connections)
+- TCP connection

-  - `grpc` (standard [GRPC Health Checking Protocol][grpc])
+  - the probe succeeds if the TCP port is open

- All probes give binary results ("it works" or "it doesn't")
+- arbitrary exec

- Let's see the specific details for each of them!
+  - a command is executed in the container

-[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
+  - exit status of zero indicates success

 ---

-## `exec`
+## Benefits of using probes

- Runs an arbitrary program *inside* the container
+- Rolling updates proceed when containers are *actually ready*

-  (like with `kubectl exec` or `docker exec`)
+  (as opposed to merely started)

- The program must be available in the container image
+- Containers in a broken state get killed and restarted

- Kubernetes uses the exit status of the program
+  (instead of serving errors or timeouts)

-  (standard UNIX convention: 0 = success, anything else = failure)
+- Unavailable backends get removed from load balancer rotation
+
+  (thus improving response times across the board)
+
+- If a probe is not defined, it's as if there was an "always successful" probe

 ---

-## `exec` example
+## Example: HTTP probe

-When the worker is ready, it should create `/tmp/ready`.
-<br/>
-The following probe will give it 5 minutes to do so.
+Here is a pod template for the `rng` web service of the DockerCoins app:

 ```yaml
 apiVersion: v1
 kind: Pod
 metadata:
-  name: queueworker
+  name: healthy-app
 spec:
  containers:
-  - name: worker
-    image: myregistry.../worker:v1.0
-    startupProbe:
-      exec:
-        command:
-        - test
-        - -f
-        - /tmp/ready
-      failureThreshold: 30
-```
-
---
-
-## Using shell constructs
-
- If we want to use pipes, conditionals, etc. we should invoke a shell
-
- Example:
-  ```yaml
-    exec:
-      command:
-      - sh
-      - -c
-      - "curl http://localhost:5000/status | jq .ready | grep true"
-  ```
-
---
-
-## `httpGet`
-
- Make an HTTP GET request to the container
-
- The request will be made by Kubelet
-
-  (doesn't require extra binaries in the container image)
-
- `port` must be specified
-
- `path` and extra `httpHeaders` can be specified optionally
-
- Kubernetes uses HTTP status code of the response:
-
-  - 200-399 = success
-
-  - anything else = failure
-
---
-
-## `httpGet` example
-
-The following liveness probe restarts the container if it stops responding on `/healthz`:
-
-```yaml
-apiVersion: v1
-kind: Pod
-metadata:
-  name: frontend
-spec:
-  containers:
-  - name: frontend
-    image: myregistry.../frontend:v1.0
+  - name: myapp
+    image: myregistry.io/myapp:v1.0
    livenessProbe:
      httpGet:
+        path: /health
        port: 80
-        path: /healthz
+      periodSeconds: 5
 ```

---
-
-## `tcpSocket`
-
- Kubernetes checks if the indicated TCP port accepts connections
-
- There is no additional check
-
-.warning[It's quite possible for a process to be broken, but still accept TCP connections!]
+If the backend serves an error, or takes longer than 1s, 3 times in a row, it gets killed.

 ---

-## `grpc`
+## Example: exec probe

-<!-- ##VERSION## -->
+Here is a pod template for a Redis server:

- Available in beta since Kubernetes 1.24
+```yaml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: redis-with-liveness
+spec:
+  containers:
+  - name: redis
+    image: redis
+    livenessProbe:
+      exec:
+        command: ["redis-cli", "ping"]
+```

- Leverages standard [GRPC Health Checking Protocol][grpc]
-
-[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
+If the Redis process becomes unresponsive, it will be killed.

 ---

-## Best practices for healthchecks
+## Questions to ask before adding healthchecks

- Readiness probes are almost always beneficial
+- Do we want liveness, readiness, both?

-  - don't hesitate to add them early!
+  (sometimes, we can use the same check, but with different failure thresholds)

-  - we can even make them *mandatory*
+- Do we have existing HTTP endpoints that we can use?

- Be more careful with liveness and startup probes
+- Do we need to add new endpoints, or perhaps use something else?

-  - they aren't always necessary
+- Are our healthchecks likely to use resources and/or slow down the app?

-  - they can even cause harm
+- Do they depend on additional services?
+
+  (this can be particularly tricky, see next slide)

 ---

-## Readiness probes
+## Healthchecks and dependencies

- Almost always beneficial
+- Liveness checks should not be influenced by the state of external services

- Exceptions:
+- All checks should reply quickly (by default, less than 1 second)

-  - web service that doesn't have a dedicated "health" or "ping" route
+- Otherwise, they are considered to fail

-  - ...and all requests are "expensive" (e.g. lots of external calls)
+- This might require to check the health of dependencies asynchronously

---
-
-## Liveness probes
-
- If we're not careful, we end up restarting containers for no reason
-
-  (which can cause additional load on the cluster, cascading failures, data loss, etc.)
-
- Suggestion:
-
-  - don't add liveness probes immediately
-
-  - wait until you have a bit of production experience with that code
-
-  - then add narrow-scoped healthchecks to detect specific failure modes
-
- Readiness and liveness probes should be different
-
-  (different check *or* different timeouts *or* different thresholds)
-
---
-
-## Startup probes
-
- Only beneficial for containers that need a long time to start
-
-  (more than 30 seconds)
-
- If there is no liveness probe, it's simpler to just use a readiness probe
-
-  (since we probably want to have a readiness probe anyway)
-
- In other words, startup probes are useful in one situation:
-
-  *we have a liveness probe, AND the container needs a lot of time to start*
-
- Don't forget to change the `failureThreshold`
-
-  (otherwise the container will fail to start and be killed)
-
---
-
-## Recap of the gotchas
-
- The default timeout is 1 second
-
-  - if a probe takes longer than 1 second to reply, Kubernetes considers that it fails
-
-  - this can be changed by setting the `timeoutSeconds` parameter
-    <br/>(or refactoring the probe)
-
- Liveness probes should not be influenced by the state of external services
-
- Liveness probes and readiness probes should have different paramters
-
- For startup probes, remember to increase the `failureThreshold`
+  (e.g. if a database or API might be healthy but still take more than
+  1 second to reply, we should check the status asynchronously and report
+  a cached status)

 ---

@@ -453,21 +300,21 @@ spec:

 (In that context, worker = process that doesn't accept connections)

- A relatively easy solution is to use files
+- Readiness is useful mostly for rolling updates

- For a startup or readiness probe:
+  (because workers aren't backends for a service)

-  - worker creates `/tmp/ready` when it's ready
-  - probe checks the existence of `/tmp/ready`
+- Liveness may help us restart a broken worker, but how can we check it?

- For a liveness probe:
+- Embedding an HTTP server is a (potentially expensive) option

-  - worker touches `/tmp/alive` regularly
-    <br/>(e.g. just before starting to work on a job)
-  - probe checks that the timestamp on `/tmp/alive` is recent
-  - if the timestamp is old, it means that the worker is stuck
+- Using a "lease" file can be relatively easy:

- Sometimes it can also make sense to embed a web server in the worker
+  - touch a file during each iteration of the main loop
+
+  - check the timestamp of that file from an exec probe
+
+- Writing logs (and checking them from the probe) also works

 ???

--- a/slides/k8s/ingress-canary.md
+++ b/slides/k8s/ingress-canary.md
@@ -1,148 +0,0 @@
-## Ingress and canary releases
-
- Let's see how to implement *canary releases*
-
- The example here will use Traefik v1
-
-  (which is obsolete)
-
- It won't work on your Kubernetes cluster!
-
-  (unless you're running an oooooold version of Kubernetes)
-
-  (and an equally oooooooold version of Traefik)
-
- We've left it here just as an example!
-
---
-
-## Canary releases
-
- A *canary release* (or canary launch or canary deployment) is a release that will process only a small fraction of the workload
-
- After deploying the canary, we compare its metrics to the normal release
-
- If the metrics look good, the canary will progressively receive more traffic
-
-  (until it gets 100% and becomes the new normal release)
-
- If the metrics aren't good, the canary is automatically removed
-
- When we deploy a bad release, only a tiny fraction of traffic is affected
-
---
-
-## Various ways to implement canary
-
- Example 1: canary for a microservice
-
-  - 1% of all requests (sampled randomly) are sent to the canary
-  - the remaining 99% are sent to the normal release
-
- Example 2: canary for a web app
-
-  - 1% of users are sent to the canary web site
-  - the remaining 99% are sent to the normal release
-
- Example 3: canary for shipping physical goods
-
-  - 1% of orders are shipped with the canary process
-  - the remaining 99% are shipped with the normal process
-
- We're going to implement example 1 (per-request routing)
-
---
-
-## Canary releases with Traefik v1
-
- We need to deploy the canary and expose it with a separate service
-
- Then, in the Ingress resource, we need:
-
-  - multiple `paths` entries (one for each service, canary and normal)
-
-  - an extra annotation indicating the weight of each service
-
- If we want, we can send requests to more than 2 services
-
---
-
-## The Ingress resource
-
-.small[
-```yaml
-apiVersion: networking.k8s.io/v1beta1
-kind: Ingress
-metadata:
-  name: rgb
-  annotations:
-    traefik.ingress.kubernetes.io/service-weights: |
-      red: 50%
-      green: 25%
-      blue: 25%
-spec:
-  rules:
-  - host: rgb.`A.B.C.D`.nip.io
-    http:
-      paths:
-      - path: /
-        backend:
-          serviceName: red
-          servicePort: 80
-      - path: /
-        backend:
-          serviceName: green
-          servicePort: 80
-      - path: /
-        backend:
-          serviceName: blue
-          servicePort: 80
-```
-]
-
---
-
-class: extra-details
-
-## Other ingress controllers
-
-*Just to illustrate how different things are ...*
-
- With the NGINX ingress controller:
-
-  - define two ingress ressources
-    <br/>
-    (specifying rules with the same host+path)
-
-  - add `nginx.ingress.kubernetes.io/canary` annotations on each
-
-
- With Linkerd2:
-
-  - define two services
-
-  - define an extra service for the weighted aggregate of the two
-
-  - define a TrafficSplit (this is a CRD introduced by the SMI spec)
-
---
-
-class: extra-details
-
-## We need more than that
-
-What we saw is just one of the multiple building blocks that we need to achieve a canary release.
-
-We also need:
-
- metrics (latency, performance ...) for our releases
-
- automation to alter canary weights
-
-  (increase canary weight if metrics look good; decrease otherwise)
-
- a mechanism to manage the lifecycle of the canary releases
-
-  (create them, promote them, delete them ...)
-
-For inspiration, check [flagger by Weave](https://github.com/weaveworks/flagger).
--- a/slides/k8s/ingress.md
+++ b/slides/k8s/ingress.md
@@ -1,36 +1,34 @@
 # Exposing HTTP services with Ingress resources

- Service = layer 4 (TCP, UDP, SCTP)
+- HTTP services are typically exposed on port 80

-  - works with every TCP/UDP/SCTP protocol
+  (and 443 for HTTPS)

-  - doesn't "see" or interpret HTTP
+- `NodePort` services are great, but they are *not* on port 80

- Ingress = layer 7 (HTTP)
+  (by default, they use port range 30000-32767)

-  - only for HTTP
-
-  - can route requests depending on URI or host header
-
-  - can handle TLS
+- How can we get *many* HTTP services on port 80? 🤔

 ---

-## Why should we use Ingress resources?
+## Various ways to expose something on port 80

-A few use-cases:
+- Service with `type: LoadBalancer`

- URI routing (e.g. for single page apps)
+  *costs a little bit of money; not always available*

-  `/api` → service `api:5000`
+- Service with one (or multiple) `ExternalIP`

-  everything else → service `static:80`
+  *requires public nodes; limited by number of nodes*

- Cost optimization
+- Service with `hostPort` or `hostNetwork`

-  (because individual `LoadBalancer` services typically cost money)
+  *same limitations as `ExternalIP`; even harder to manage*

- Automatic handling of TLS certificates
+- Ingress resources
+
+  *addresses all these limitations, yay!*

 ---

@@ -183,70 +181,20 @@ class: extra-details

 ---

-## Accepting connections on port 80 (and 443)
-
- Web site users don't want to specify port numbers
-
-  (e.g. "connect to https://blahblah.whatever:31550")
-
- Our ingress controller needs to actually be exposed on port 80
-
-  (and 443 if we want to handle HTTPS)
-
- Let's see how we can achieve that!
-
---
-
-## Various ways to expose something on port 80
-
- Service with `type: LoadBalancer`
-
-  *costs a little bit of money; not always available*
-
- Service with one (or multiple) `ExternalIP`
-
-  *requires public nodes; limited by number of nodes*
-
- Service with `hostPort` or `hostNetwork`
-
-  *same limitations as `ExternalIP`; even harder to manage*
-
---
-
 ## Deploying pods listening on port 80

- We are going to run Traefik in Pods with `hostNetwork: true`
+- We want our ingress load balancer to be available on port 80

-  (so that our load balancer can use the "real" port 80 of our nodes)
+- The best way to do that would be with a `LoadBalancer` service

- Traefik Pods will be created by a DaemonSet
+  ... but it requires support from the underlying infrastructure

-  (so that we get one instance of Traefik on every node of the cluster)
+- Instead, we are going to use the `hostNetwork` mode on the Traefik pods

- This means that we will be able to connect to any node of the cluster on port 80
-
-.warning[This is not typical of a production setup!]
+- Let's see what this `hostNetwork` mode is about ...

 ---

-## Doing it in production
-
- When running "on cloud", the easiest option is a `LoadBalancer` service
-
- When running "on prem", it depends:
-
-  - [MetalLB] is a good option if a pool of public IP addresses is available
-
-  - otherwise, using `externalIPs` on a few nodes (2-3 for redundancy)
-
- Many variations/optimizations are possible depending on our exact scenario!
-
-[MetalLB]: https://metallb.org/
-
---
-
-class: extra-details
-
 ## Without `hostNetwork`

 - Normally, each pod gets its own *network namespace*
@@ -263,8 +211,6 @@ class: extra-details

 ---

-class: extra-details
-
 ## With `hostNetwork: true`

 - No network namespace gets created
@@ -283,6 +229,26 @@ class: extra-details

 ---

+class: extra-details
+
+## Other techniques to expose port 80
+
+- We could use pods specifying `hostPort: 80` 
+
+  ... but with most CNI plugins, this [doesn't work or requires additional setup](https://github.com/kubernetes/kubernetes/issues/23920)
+
+- We could use a `NodePort` service
+
+  ... but that requires [changing the `--service-node-port-range` flag in the API server](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/)
+
+- We could create a service with an external IP
+
+  ... this would work, but would require a few extra steps
+
+  (figuring out the IP address and adding it to the service)
+
+---
+
 ## Running Traefik

 - The [Traefik documentation][traefikdoc] recommends to use a Helm chart
@@ -304,8 +270,6 @@ class: extra-details

 ---

-class: extra-details
-
 ## Taints and tolerations

 - A *taint* is an attribute added to a node
@@ -532,6 +496,10 @@ This is normal: we haven't provided any ingress rule yet.

 ## Creating ingress resources

+- Before Kubernetes 1.19, we must use YAML manifests
+
+  (see example on next slide)
+
 - Since Kubernetes 1.19, we can use `kubectl create ingress`

  ```bash
@@ -566,21 +534,7 @@ This is normal: we haven't provided any ingress rule yet.

 ---

-## Before Kubernetes 1.19
-
- Before Kubernetes 1.19:
-
-  - `kubectl create ingress` wasn't available
-
-  - `apiVersion: networking.k8s.io/v1` wasn't supported
-
- It was necessary to use YAML, and `apiVersion: networking.k8s.io/v1beta1`
-
-  (see example on next slide)
-
---
-
-## YAML for old ingress resources
+## Ingress resources in YAML

 Here is a minimal host-based ingress resource:

@@ -601,15 +555,23 @@ spec:

 ```

+(It is in `k8s/ingress.yaml`.)
+
 ---

-## YAML for new ingress resources
+class: extra-details
+
+## Ingress API version
+
+- The YAML on the previous slide uses `apiVersion: networking.k8s.io/v1beta1`

 - Starting with Kubernetes 1.19, `networking.k8s.io/v1` is available

- And we can use `kubectl create ingress` 🎉
+- However, with Kubernetes 1.19 (and later), we can use `kubectl create ingress`

- We can see "modern" YAML with `-o yaml --dry-run=client`:
+- We chose to keep an "old" (deprecated!) YAML example for folks still using older versions of Kubernetes
+
+- If we want to see "modern" YAML, we can use `-o yaml --dry-run=client`:

  ```bash
  kubectl create ingress red -o yaml --dry-run=client \
@@ -679,6 +641,157 @@ class: extra-details

 - It is still in alpha stage

+---
+
+## Vendor-specific example
+
+- Let's see how to implement *canary releases*
+
+- The example here will use Traefik v1
+
+  (which is obsolete)
+
+- It won't work on your Kubernetes cluster!
+
+  (unless you're running an oooooold version of Kubernetes)
+
+  (and an equally oooooooold version of Traefik)
+
+- We've left it here just as an example!
+
+---
+
+## Canary releases
+
+- A *canary release* (or canary launch or canary deployment) is a release that will process only a small fraction of the workload
+
+- After deploying the canary, we compare its metrics to the normal release
+
+- If the metrics look good, the canary will progressively receive more traffic
+
+  (until it gets 100% and becomes the new normal release)
+
+- If the metrics aren't good, the canary is automatically removed
+
+- When we deploy a bad release, only a tiny fraction of traffic is affected
+
+---
+
+## Various ways to implement canary
+
+- Example 1: canary for a microservice
+
+  - 1% of all requests (sampled randomly) are sent to the canary
+  - the remaining 99% are sent to the normal release
+
+- Example 2: canary for a web app
+
+  - 1% of users are sent to the canary web site
+  - the remaining 99% are sent to the normal release
+
+- Example 3: canary for shipping physical goods
+
+  - 1% of orders are shipped with the canary process
+  - the remaining 99% are shipped with the normal process
+
+- We're going to implement example 1 (per-request routing)
+
+---
+
+## Canary releases with Traefik v1
+
+- We need to deploy the canary and expose it with a separate service
+
+- Then, in the Ingress resource, we need:
+
+  - multiple `paths` entries (one for each service, canary and normal)
+
+  - an extra annotation indicating the weight of each service
+
+- If we want, we can send requests to more than 2 services
+
+---
+
+## The Ingress resource
+
+.small[
+```yaml
+apiVersion: networking.k8s.io/v1beta1
+kind: Ingress
+metadata:
+  name: rgb
+  annotations:
+    traefik.ingress.kubernetes.io/service-weights: |
+      red: 50%
+      green: 25%
+      blue: 25%
+spec:
+  rules:
+  - host: rgb.`A.B.C.D`.nip.io
+    http:
+      paths:
+      - path: /
+        backend:
+          serviceName: red
+          servicePort: 80
+      - path: /
+        backend:
+          serviceName: green
+          servicePort: 80
+      - path: /
+        backend:
+          serviceName: blue
+          servicePort: 80
+```
+]
+
+---
+
+class: extra-details
+
+## Other ingress controllers
+
+*Just to illustrate how different things are ...*
+
+- With the NGINX ingress controller:
+
+  - define two ingress ressources
+    <br/>
+    (specifying rules with the same host+path)
+
+  - add `nginx.ingress.kubernetes.io/canary` annotations on each
+
+
+- With Linkerd2:
+
+  - define two services
+
+  - define an extra service for the weighted aggregate of the two
+
+  - define a TrafficSplit (this is a CRD introduced by the SMI spec)
+
+---
+
+class: extra-details
+
+## We need more than that
+
+What we saw is just one of the multiple building blocks that we need to achieve a canary release.
+
+We also need:
+
+- metrics (latency, performance ...) for our releases
+
+- automation to alter canary weights
+
+  (increase canary weight if metrics look good; decrease otherwise)
+
+- a mechanism to manage the lifecycle of the canary releases
+
+  (create them, promote them, delete them ...)
+
+For inspiration, check [flagger by Weave](https://github.com/weaveworks/flagger).
+
 ???

 :EN:- The Ingress resource
--- a/slides/k8s/prereqs-admin.md
+++ b/slides/k8s/prereqs-admin.md
@@ -40,9 +40,7 @@

 - Each person gets their own private set of VMs

- The connection information is on a shared spreadsheet
-
-  (URL to be shared through portal and chat)
+- Each person should have a printed card with connection information

 - We will connect to these VMs with SSH

--- a/slides/k8s/resource-limits.md
+++ b/slides/k8s/resource-limits.md
@@ -14,20 +14,32 @@

 - CPU is a *compressible resource*

-  - it can be preempted immediately without adverse effect
-
-  - if we have N CPU and need 2N, we run at 50% speed
+  (it can be preempted immediately without adverse effect)

 - Memory is an *incompressible resource*

-  - it needs to be swapped out to be reclaimed; and this is costly
-
-  - if we have N GB RAM and need 2N, we might run at... 0.1% speed!
+  (it needs to be swapped out to be reclaimed; and this is costly)

 - As a result, exceeding limits will have different consequences for CPU and memory

 ---

+## Exceeding CPU limits
+
+- CPU can be reclaimed instantaneously
+
+  (in fact, it is preempted hundreds of times per second, at each context switch)
+
+- If a container uses too much CPU, it can be throttled
+
+  (it will be scheduled less often)
+
+- The processes in that container will run slower
+
+  (or rather: they will not run faster)
+
+---
+
 class: extra-details

 ## CPU limits implementation details
@@ -134,59 +146,39 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f

 ---

-## Running low on memory
+## Exceeding memory limits

- When the system runs low on memory, it starts to reclaim used memory
+- Memory needs to be swapped out before being reclaimed

-  (we talk about "memory pressure")
+- "Swapping" means writing memory pages to disk, which is very slow

- Option 1: free up some buffers and caches
+- On a classic system, a process that swaps can get 1000x slower

-  (fastest option; might affect performance if cache memory runs very low)
+  (because disk I/O is 1000x slower than memory I/O)

- Option 2: swap, i.e. write to disk some memory of one process to give it to another
+- Exceeding the memory limit (even by a small amount) can reduce performance *a lot*

-  (can have a huge negative impact on performance because disks are slow)
+- Kubernetes *does not support swap* (more on that later!)

- Option 3: terminate a process and reclaim all its memory
-
-  (OOM or Out Of Memory Killer on Linux)
+- Exceeding the memory limit will cause the container to be killed

 ---

-## Memory limits on Kubernetes
+## Limits vs requests

- Kubernetes *does not support swap*
-
-  (but it may support it in the future, thanks to [KEP 2400])
-
- If a container exceeds its memory *limit*, it gets killed immediately
-
- If a node is overcommitted and under memory pressure, it will terminate some pods
-
-  (see next slide for some details about what "overcommit" means here!)
-
-[KEP 2400]: https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2400-node-swap/README.md#implementation-history
-
---
-
-## Overcommitting resources
-
- *Limits* are "hard limits" (a container *cannot* exceed its limits)
+- Limits are "hard limits" (they can't be exceeded)

  - a container exceeding its memory limit is killed

  - a container exceeding its CPU limit is throttled

- On a given node, the sum of pod *limits* can be higher than the node size
+- Requests are used for scheduling purposes

- *Requests* are used for scheduling purposes
+  - a container using *less* than what it requested will never be killed or throttled

-  - a container can use more than its requested CPU or RAM amounts
+  - the scheduler uses the requested sizes to determine placement

-  - a container using *less* than what it requested should never be killed or throttled
-
- On a given node, the sum of pod *requests* cannot be higher than the node size
+  - the resources requested by all pods on a node will never exceed the node size

 ---

@@ -230,31 +222,9 @@ Each pod is assigned a QoS class (visible in `status.qosClass`).

 ---

-class: extra-details
+## Where is my swap?

-## CPU and RAM reservation
-
- Kubernetes passes resources requests and limits to the container engine
-
- The container engine applies these requests and limits with specific mechanisms
-
- Example: on Linux, this is typically done with control groups aka cgroups
-
- Most systems use cgroups v1, but cgroups v2 are slowly being rolled out
-
-  (e.g. available in Ubuntu 22.04 LTS)
-
- Cgroups v2 have new, interesting features for memory control:
-
-  - ability to set "minimum" memory amounts (to effectively reserve memory)
-
-  - better control on the amount of swap used by a container
-
---
-
-class: extra-details
-
-## What's the deal with swap?
+- The semantics of memory and swap limits on Linux cgroups are complex

 - With cgroups v1, it's not possible to disable swap for a cgroup

@@ -268,8 +238,6 @@ class: extra-details

 - The simplest solution was to disable swap entirely

- Kubelet will refuse to start if it detects that swap is enabled!
-
 ---

 ## Alternative point of view
@@ -300,7 +268,7 @@ class: extra-details

 - You will need to add the flag `--fail-swap-on=false` to kubelet

-  (remember: it won't otherwise start if it detects that swap is enabled)
+  (otherwise, it won't start!)

 ---

@@ -698,18 +666,6 @@ class: extra-details

 ---

-## Underutilization
-
- Remember: when assigning a pod to a node, the scheduler looks at *requests*
-
-  (not at current utilization on the node)
-
- If pods request resources but don't use them, this can lead to underutilization
-
-  (because the scheduler will consider that the node is full and can't fit new pods)
-
---
-
 ## Viewing a namespace limits and quotas

 - `kubectl describe namespace` will display resource limits and quotas
--- a/slides/k8s/setup-devel.md
+++ b/slides/k8s/setup-devel.md
@@ -20,17 +20,13 @@

 ## Docker Desktop

- Available on Linux, Mac, and Windows
-
- Free for personal use and small businesses
-
-  (less than 250 employees and less than $10 millions in annual revenue)
+- Available on Mac and Windows

 - Gives you one cluster with one node

- Streamlined installation and user experience
+- Very easy to use if you are already using Docker Desktop:

- Great integration with various network stacks and e.g. corporate VPNs
+  go to Docker Desktop preferences and enable Kubernetes

 - Ideal for Docker users who need good integration between both platforms

@@ -44,11 +40,13 @@

 - Runs Kubernetes nodes in Docker containers

- Can deploy multiple clusters, with multiple nodes
+- Can deploy multiple clusters, with multiple nodes, and multiple master nodes

- Runs the control plane on Kubernetes nodes
+- As of June 2020, two versions co-exist: stable (1.7) and beta (3.0)

- Control plane can also run on multiple nodes
+- They have different syntax and options, this can be confusing
+
+  (but don't let that stop you!)

 ---

@@ -86,7 +84,7 @@

 - More advanced scenarios require writing a short [config file](https://kind.sigs.k8s.io/docs/user/quick-start#configuring-your-kind-cluster)

-  (to define multiple nodes, multiple control plane nodes, set Kubernetes versions ...)
+  (to define multiple nodes, multiple master nodes, set Kubernetes versions ...)
 
 - Can deploy multiple clusters

@@ -126,9 +124,7 @@

 ## [Rancher Desktop](https://rancherdesktop.io/)

- Available on Linux, Mac, and Windows
-
- Free and open-source
+- Available on Mac and Windows

 - Runs a single cluster with a single node

@@ -138,7 +134,7 @@

 - Emphasis on ease of use (like Docker Desktop)

- Relatively young product (first release in May 2021)
+- Very young product (first release in May 2021)

 - Based on k3s and other proven components

--- a/slides/kube-adv.yml
+++ b/slides/kube-adv.yml
@@ -2,11 +2,11 @@ title: |
  Advanced
  Kubernetes

-chat: "[Slack](https://newrelic.slack.com/archives/C0438EFM97F)"
+chat: "[Slack](#FIXME)"

 gitrepo: github.com/jpetazzo/container.training

-slides: https://2022-09-nr2.container.training/
+slides: https://2022-09-nr.container.training/

 #slidenumberprefix: "#SomeHashTag &mdash; "

@@ -26,84 +26,59 @@ content:
 - #1
  - k8s/prereqs-admin.md
  - k8s/architecture.md
+  - k8s/internal-apis.md
  - k8s/deploymentslideshow.md
  - k8s/dmuc.md
-  - interlude-form.md
  - k8s/multinode.md
  - k8s/cni.md
  - k8s/interco.md
  - k8s/cni-internals.md
-  - k8s/apilb.md
 - #2
-  - k8s/demo-apps.md
-  - k8s/netpol.md
-  - k8s/authn-authz.md
+  - k8s/apilb.md
+  - k8s/internal-apis.md
+  - k8s/staticpods.md
+  #- k8s/cluster-upgrade.md
+  - k8s/control-plane-auth.md
  - k8s/user-cert.md
  - k8s/csr-api.md
  - k8s/openid-connect.md
  - k8s/pod-security-intro.md
  - k8s/pod-security-policies.md
  - k8s/pod-security-admission.md
-  - exercises/netpol-details.md
-  - exercises/rbac-details.md
 - #3
-  - k8s/helm-intro.md
-  - k8s/ingress.md
+  - k8s/extending-api.md
+  - k8s/crd.md
+  - k8s/operators.md
+  - k8s/sealed-secrets.md
  - k8s/cert-manager.md
  - k8s/ingress-tls.md
  - k8s/ingress-advanced.md
+  #- k8s/eck.md
+- #4
+  - k8s/admission.md
+  - k8s/cainjector.md
+  - k8s/kyverno.md
+  - k8s/aggregation-layer.md
+  - k8s/metrics-server.md
+  - k8s/hpa-v2.md
+- #5
+  - k8s/operators-design.md
+  - k8s/operators-example.md
+  - k8s/owners-and-dependents.md
+  #- k8s/kubebuilder.md
+  - k8s/events.md
+  - k8s/finalizers.md
+  - shared/thankyou.md
+- #6
+  - |
+    # (Extra content)
  - k8s/kustomize.md
+  - k8s/helm-intro.md
  - k8s/helm-chart-format.md
  - k8s/helm-create-basic-chart.md
  - k8s/helm-create-better-chart.md
  - k8s/helm-dependencies.md
  - k8s/helm-values-schema-validation.md
  - k8s/helm-secrets.md
-  - exercises/helm-generic-chart-details.md
-  - exercises/helm-umbrella-chart-details.md
- #4
-  - k8s/extending-api.md
-  - k8s/admission.md
-  - k8s/cainjector.md
-  - k8s/kyverno.md
-  - k8s/crd.md
-  - k8s/operators.md
-  - k8s/sealed-secrets.md
-  - k8s/operators-design.md
-  - k8s/operators-example.md
-  - k8s/owners-and-dependents.md
-  - k8s/events.md
-  - k8s/finalizers.md
-  - exercises/sealed-secrets-details.md
- #5
-  - k8s/resource-limits.md
-  - k8s/cluster-sizing.md
-  - k8s/cluster-autoscaler.md
-  - k8s/horizontal-pod-autoscaler.md
-  - k8s/aggregation-layer.md
-  - k8s/metrics-server.md
-  - k8s/hpa-v2.md
-  - k8s/batch-jobs.md
-  - k8s/statefulsets.md
-  - k8s/consul.md
-  - k8s/pv-pvc-sc.md
-  - k8s/volume-claim-templates.md
-  - k8s/stateful-failover.md
-  - shared/thankyou.md
-
-  - |
-    # (Extra content I)
-  - k8s/setup-devel.md
-  - k8s/accessinternal.md
-  - k8s/kubectlproxy.md  
-  - k8s/k9s.md
-  - k8s/tilt.md
  - k8s/ytt.md
-
-  - |
-    # (Extra content II)
-  - k8s/internal-apis.md
-  - k8s/staticpods.md
-  - k8s/cluster-upgrade.md
-  - k8s/control-plane-auth.md
-  - k8s/kubebuilder.md
+  - k8s/apiserver-deepdive.md
--- a/slides/logistics-template.md
+++ b/slides/logistics-template.md
@@ -1,27 +1,38 @@
-## Intros
+## Intros & disclaimers

- Hello! We are:
+- Hello! I'm Jérôme Petazzoni ([@jpetazzo])

-  - Jérôme Petazzoni ([@jpetazzo])
+- I have ...

-  - Dana Engebretson ([@bigdana])
+  - extensive experience running *containers* in production
+  - limited experience running *Kubernetes* in production

- The training will from 8am to noon, Monday to Friday
+  - taught Docker and Kubernetes many times, to large audiences
+  - less frequently taught operators and API internals

- There will be breaks every hour
+  - written a lot of Python code during my career; but much less Go
+
+  - learned way more than I expected just by writing some chapters of this course (!)
+
+---
+
+## Logistics
+
+- The training will from ... to ..., Monday to Friday
+
+- There will be short breaks every hour, and a longer break in the middle

 - Feel free to interrupt for questions at any time

 - *Especially when you see full screen container pictures!*

-  (I will watch them in awkward silence while I wait for your questions)
+  (I will watch them in silence while I wait for your questions)

 - Live feedback, questions, help: @@CHAT@@

 <!-- -->

 [@alexbuisine]: https://twitter.com/alexbuisine
-[@bigdana]: https://twitter.com/bigdana
 [EphemeraSearch]: https://ephemerasearch.com/
 [@jpetazzo]: https://twitter.com/jpetazzo
 [@s0ulshake]: https://twitter.com/s0ulshake
@@ -29,12 +40,16 @@

 ---

-## Dynamic content
+## Exercises

- The content of this course will be adapted to suit your needs!
+- At the end of each day, there is a series of exercises

- We'll share a link to a form so you can tell us what we should work on
+- To make the most out of the training, please try the exercises!

- Expect the content of the deck to change between Monday and Tuesday
+  (it will help to practice and memorize the content of the day)

-  (we will reorder/reorganize the content accordingly)
+- We recommend to take at least one hour to work on the exercises
+
+  (if you understood the content of the day, it will be much faster)
+
+- Each day will start with a quick review of the exercises of the previous day