Update README.md

➕ Add some info about profiles and .env
♻️ Improve PriorityClass slides
2026-02-17 19:19:55 +00:00 · 2022-02-02 17:01:23 +01:00 · 2022-01-31 19:48:12 +01:00 · 2022-01-27 13:14:26 +01:00 · 2022-01-27 13:14:16 +01:00 · 2022-01-27 11:23:43 +01:00
238 changed files with 8934 additions and 3161 deletions
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # Container Training

+(Test for livecycle)
+
 This repository (formerly known as `orchestration-workshop`)
 contains materials (slides, scripts, demo app, and other
 code samples) used for various workshops, tutorials, and
--- a/dockercoins/Tiltfile
+++ b/dockercoins/Tiltfile
@@ -1,14 +1,67 @@
+# (1) Setting up a registry, and telling Tilt to use it.
+
+# Tilt needs a registry to store images.
+
+# The following manifest defines a Deployment to run a basic Docker registry,
+# and a NodePort Service to access it. Using a NodePort means that we don't
+# need to obtain a TLS certificate, because we will be accessing the registry
+# through localhost.
 k8s_yaml('../k8s/tilt-registry.yaml')
+
+# Tell Tilt to use the registry that we just deployed instead of whatever
+# is defined in our Kubernetes resources. Tilt will patch image names to
+# use our registry.
 default_registry('localhost:30555')
+
+# Create a port forward so that we can access the registry from our local
+# environment, too. Note that if you run Tilt directly from a Kubernetes node
+# (which is not typical, but might happen in some lab/training environments)
+# the following might cause an error because port 30555 is already taken.
+k8s_resource(workload='tilt-registry', port_forwards='30555:5000')
+
+# (2) Telling Tilt how to build and run our app.
+
+# The following two lines will use the kubectl-build plugin
+# to leverage buildkit and build the images in our Kubernetes
+# cluster. This is not enabled by default, because it requires
+# the plugin to be installed.
+# See https://github.com/vmware-tanzu/buildkit-cli-for-kubectl
+# for more information about this plugin.
+#load('ext://kubectl_build', 'kubectl_build')
+#docker_build = kubectl_build
+
+# Our Kubernetes manifests use images 'dockercoins/...' so we tell Tilt
+# how each of these images should be built. The first argument is the name
+# of the image, the second argument is the directory containing the build
+# context (i.e. the Dockerfile to build the image).
 docker_build('dockercoins/hasher', 'hasher')
 docker_build('dockercoins/rng', 'rng')
 docker_build('dockercoins/webui', 'webui')
 docker_build('dockercoins/worker', 'worker')
+
+# The following manifests defines five Deployments and four Services for
+# our application.
 k8s_yaml('../k8s/dockercoins.yaml')

-# Uncomment the following line to let tilt run with the default kubeadm cluster-admin context.
-#allow_k8s_contexts('kubernetes-admin@kubernetes')
+# (3) Finishing touches.

-# While we're here: if you're controlling a remote cluster, uncomment that line.
-# It will create a port forward so that you can access the remote registry.
-#k8s_resource(workload='registry', port_forwards='30555:5000')
+# The following line lets Tilt run with the default kubeadm cluster-admin context.
+allow_k8s_contexts('kubernetes-admin@kubernetes')
+
+# This will run an ngrok tunnel to expose Tilt to the outside world.
+# This is intended to be used when Tilt runs on a remote machine.
+local_resource(name='ngrok:tunnel', serve_cmd='ngrok http 10350')
+
+# This will wait until the ngrok tunnel is up, and show its URL to the user.
+# We send the output to /dev/tty so that it doesn't get intercepted by
+# Tilt, and gets displayed to the user's terminal instead.
+# Note: this assumes that the ngrok instance will be running on port 4040.
+# If you have other ngrok instances running on the machine, this might not work.
+local_resource(name='ngrok:showurl', cmd='''
+  while sleep 1; do
+    TUNNELS=$(curl -fsSL http://localhost:4040/api/tunnels | jq -r .tunnels[].public_url)
+    [ "$TUNNELS" ] && break
+  done
+  printf "\nYou should be able to connect to the Tilt UI with the following URL(s): %s\n" "$TUNNELS" >/dev/tty
+  '''
+)
--- a/dockercoins/webui/Dockerfile
+++ b/dockercoins/webui/Dockerfile
@@ -1,6 +1,6 @@
 FROM node:4-slim
 RUN npm install express
-RUN npm install redis
+RUN npm install redis@3
 COPY files/ /files/
 COPY webui.js /
 CMD ["node", "webui.js"]
--- a/k8s/admission-configuration.yaml
+++ b/k8s/admission-configuration.yaml
@@ -0,0 +1,16 @@
+apiVersion: apiserver.config.k8s.io/v1
+kind: AdmissionConfiguration
+plugins:
+- name: PodSecurity
+  configuration:
+    apiVersion: pod-security.admission.config.k8s.io/v1alpha1
+    kind: PodSecurityConfiguration
+    defaults:
+      enforce: baseline
+      audit: baseline
+      warn: baseline
+    exemptions:
+      usernames:
+      - cluster-admin
+      namespaces:
+      - kube-system
--- a/k8s/consul-1.yaml
+++ b/k8s/consul-1.yaml
@@ -3,6 +3,12 @@
 # - no actual persistence
 # - scaling down to 1 will break the cluster
 # - pods may be colocated
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -28,11 +34,6 @@ subjects:
    name: consul
 ---
 apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
-apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -61,7 +62,7 @@ spec:
      serviceAccountName: consul
      containers:
        - name: consul
-          image: "consul:1.8"
+          image: "consul:1.11"
          env:
            - name: NAMESPACE
              valueFrom:
--- a/k8s/consul-2.yaml
+++ b/k8s/consul-2.yaml
@@ -2,6 +2,12 @@
 # There is still no actual persistence, but:
 # - podAntiaffinity prevents pod colocation
 # - clusters works when scaling down to 1 (thanks to lifecycle hook)
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -27,11 +33,6 @@ subjects:
    name: consul
 ---
 apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
-apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -68,7 +69,7 @@ spec:
      terminationGracePeriodSeconds: 10
      containers:
        - name: consul
-          image: "consul:1.8"
+          image: "consul:1.11"
          env:
            - name: NAMESPACE
              valueFrom:
--- a/k8s/consul-3.yaml
+++ b/k8s/consul-3.yaml
@@ -1,5 +1,11 @@
 # Even better Consul cluster.
 # That one uses a volumeClaimTemplate to achieve true persistence.
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
 apiVersion: rbac.authorization.k8s.io/v1
 kind: Role
 metadata:
@@ -25,11 +31,6 @@ subjects:
    name: consul
 ---
 apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: consul
---
-apiVersion: v1
 kind: Service
 metadata:
  name: consul
@@ -75,7 +76,7 @@ spec:
      terminationGracePeriodSeconds: 10
      containers:
        - name: consul
-          image: "consul:1.8"
+          image: "consul:1.11"
          volumeMounts:
            - name: data
              mountPath: /consul/data
--- a/k8s/haproxy.cfg
+++ b/k8s/haproxy.cfg
@@ -1,18 +1,16 @@
 global
  daemon
-  maxconn 256

 defaults
  mode tcp
-  timeout connect 5000ms
-  timeout client 50000ms
-  timeout server 50000ms
+  timeout connect 5s
+  timeout client 50s
+  timeout server 50s

-frontend the-frontend
+listen very-basic-load-balancer
  bind *:80
-  default_backend the-backend
-
-backend the-backend
-  server google.com-80 google.com:80 maxconn 32 check
-  server ibm.fr-80 ibm.fr:80 maxconn 32 check
+  server blue color.blue.svc:80
+  server green color.green.svc:80

+# Note: the services above must exist,
+# otherwise HAproxy won't start.
--- a/k8s/kyverno-ingress-domain-name-1.yaml
+++ b/k8s/kyverno-ingress-domain-name-1.yaml
@@ -0,0 +1,28 @@
+apiVersion: kyverno.io/v1
+kind: ClusterPolicy
+metadata:
+  name: ingress-domain-name
+spec:
+  rules:
+  - name: create-ingress
+    match:
+      resources: 
+        kinds:
+        - Service
+    generate: 
+      kind: Ingress
+      name: "{{request.object.metadata.name}}"
+      namespace: "{{request.object.metadata.namespace}}"
+      data:
+        spec:
+          rules:
+          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
+            http:
+              paths:
+              - backend:
+                  service:
+                    name: "{{request.object.metadata.name}}"
+                    port:
+                      number: 80
+                path: /
+                pathType: Prefix
--- a/k8s/kyverno-ingress-domain-name-2.yaml
+++ b/k8s/kyverno-ingress-domain-name-2.yaml
@@ -0,0 +1,32 @@
+apiVersion: kyverno.io/v1
+kind: ClusterPolicy
+metadata:
+  name: ingress-domain-name
+spec:
+  rules:
+  - name: create-ingress
+    match:
+      resources: 
+        kinds:
+        - Service
+    preconditions:
+    - key: "{{request.object.spec.ports[0].name}}"
+      operator: Equals
+      value: http
+    generate: 
+      kind: Ingress
+      name: "{{request.object.metadata.name}}"
+      namespace: "{{request.object.metadata.namespace}}"
+      data:
+        spec:
+          rules:
+          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
+            http:
+              paths:
+              - backend:
+                  service:
+                    name: "{{request.object.metadata.name}}"
+                    port:
+                      name: http
+                path: /
+                pathType: Prefix
--- a/k8s/kyverno-ingress-domain-name-3.yaml
+++ b/k8s/kyverno-ingress-domain-name-3.yaml
@@ -0,0 +1,37 @@
+apiVersion: kyverno.io/v1
+kind: ClusterPolicy
+metadata:
+  name: ingress-domain-name
+spec:
+  rules:
+  - name: create-ingress
+    context:
+    - name: configmap
+      configMap:
+        name: ingress-domain-name
+        namespace: "{{request.object.metadata.namespace}}"
+    match:
+      resources: 
+        kinds:
+        - Service
+    preconditions:
+    - key: "{{request.object.spec.ports[0].name}}"
+      operator: Equals
+      value: http
+    generate: 
+      kind: Ingress
+      name: "{{request.object.metadata.name}}"
+      namespace: "{{request.object.metadata.namespace}}"
+      data:
+        spec:
+          rules:
+          - host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.{{configmap.data.domain}}"
+            http:
+              paths:
+              - backend:
+                  service:
+                    name: "{{request.object.metadata.name}}"
+                    port:
+                      name: http
+                path: /
+                pathType: Prefix
--- a/k8s/mounter.yaml
+++ b/k8s/mounter.yaml
@@ -0,0 +1,20 @@
+kind: Pod
+apiVersion: v1
+metadata:
+  generateName: mounter-
+  labels:
+    container.training/mounter: ""
+spec:
+  volumes:
+  - name: pvc
+    persistentVolumeClaim:
+      claimName: my-pvc-XYZ45
+  containers:
+  - name: mounter
+    image: alpine
+    stdin: true
+    tty: true
+    volumeMounts:
+    - name: pvc
+      mountPath: /pvc
+    workingDir: /pvc
--- a/k8s/netpol-dockercoins.yaml
+++ b/k8s/netpol-dockercoins.yaml
@@ -3,8 +3,7 @@ apiVersion: networking.k8s.io/v1
 metadata:
  name: deny-from-other-namespaces
 spec:
-  podSelector:
-    matchLabels:
+  podSelector: {}
  ingress:
  - from:
    - podSelector: {}
--- a/k8s/pv.yaml
+++ b/k8s/pv.yaml
@@ -0,0 +1,20 @@
+kind: PersistentVolume
+apiVersion: v1
+metadata:
+  generateName: my-pv-
+  labels:
+    container.training/pv: ""
+spec:
+  accessModes:
+  - ReadWriteOnce
+  - ReadWriteMany
+  capacity:
+    storage: 1G
+  hostPath:
+    path: /tmp/my-pv
+  #storageClassName: my-sc
+  #claimRef:
+  #  kind: PersistentVolumeClaim
+  #  apiVersion: v1
+  #  namespace: default
+  #  name: my-pvc-XYZ45
--- a/k8s/pvc.yaml
+++ b/k8s/pvc.yaml
@@ -0,0 +1,13 @@
+kind: PersistentVolumeClaim
+apiVersion: v1
+metadata:
+  generateName: my-pvc-
+  labels:
+    container.training/pvc: ""
+spec:
+  accessModes:
+  - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1G
+  #storageClassName: my-sc
--- a/k8s/rainbow.yaml
+++ b/k8s/rainbow.yaml
@@ -0,0 +1,147 @@
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: blue
+  labels:
+    app: rainbow
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: rainbow
+    color: blue
+  name: color
+  namespace: blue
+spec:
+  selector:
+    matchLabels:
+      app: rainbow
+      color: blue
+  template:
+    metadata:
+      labels:
+        app: rainbow
+        color: blue
+    spec:
+      containers:
+      - image: jpetazzo/color
+        name: color
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: rainbow
+    color: blue
+  name: color
+  namespace: blue
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: rainbow
+    color: blue
+  type: ClusterIP
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: green
+  labels:
+    app: rainbow
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: rainbow
+    color: green
+  name: color
+  namespace: green
+spec:
+  selector:
+    matchLabels:
+      app: rainbow
+      color: green
+  template:
+    metadata:
+      labels:
+        app: rainbow
+        color: green
+    spec:
+      containers:
+      - image: jpetazzo/color
+        name: color
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: rainbow
+    color: green
+  name: color
+  namespace: green
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: rainbow
+    color: green
+  type: ClusterIP
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: red
+  labels:
+    app: rainbow
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app: rainbow
+    color: red
+  name: color
+  namespace: red
+spec:
+  selector:
+    matchLabels:
+      app: rainbow
+      color: red
+  template:
+    metadata:
+      labels:
+        app: rainbow
+        color: red
+    spec:
+      containers:
+      - image: jpetazzo/color
+        name: color
+---
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    app: rainbow
+    color: red
+  name: color
+  namespace: red
+spec:
+  ports:
+  - name: http
+    port: 80
+    protocol: TCP
+    targetPort: 80
+  selector:
+    app: rainbow
+    color: red
+  type: ClusterIP
--- a/k8s/traefik-v2.yaml
+++ b/k8s/traefik-v2.yaml
@@ -35,6 +35,9 @@ spec:
        - name: http
          containerPort: 80
          hostPort: 80
+        - name: https
+          containerPort: 443
+          hostPort: 443
        - name: admin
          containerPort: 8080
          hostPort: 8080
@@ -100,3 +103,12 @@ subjects:
 - kind: ServiceAccount
  name: traefik-ingress-controller
  namespace: kube-system
+---
+kind: IngressClass
+apiVersion: networking.k8s.io/v1
+metadata:
+  name: traefik
+  annotations:
+    ingressclass.kubernetes.io/is-default-class: "true"
+spec:
+  controller: traefik.io/ingress-controller
--- a/prepare-tf/README.md
+++ b/prepare-tf/README.md
@@ -1,16 +1,106 @@
-This directory contains a Terraform configuration to deploy
-a bunch of Kubernetes clusters on various cloud providers, using their respective managed Kubernetes products.
+⚠️ This is work in progress. The UX needs to be improved,
+and the docs could be better.

-To use it:
+This directory contains a Terraform configuration to deploy
+a bunch of Kubernetes clusters on various cloud providers,
+using their respective managed Kubernetes products.
+
+## With shell wrapper
+
+This is the recommended use. It makes it easy to start N clusters
+on any provider. It will create a directory with a name like
+`tag-YYYY-MM-DD-HH-MM-SS-SEED-PROVIDER`, copy the Terraform configuration
+to that directory, then create the clusters using that configuration.
+
+1. One-time setup: configure provider authentication for the provider(s) that you wish to use.
+
+- Digital Ocean:
+  ```bash
+  doctl auth init
+  ```
+
+- Google Cloud Platform: you will need to create a project named `prepare-tf`
+  and enable the relevant APIs for this project (sorry, if you're new to GCP,
+  this sounds vague; but if you're familiar with it you know what to do; if you
+  want to change the project name you can edit the Terraform configuration)
+
+- Linode:
+  ```bash
+  linode-cli configure
+  ```
+
+- Oracle Cloud: FIXME
+  (set up `oci` through the `oci-cli` Python package)
+
+- Scaleway: run `scw init`
+
+2. Optional: set number of clusters, cluster size, and region.
+
+By default, 1 cluster will be configured, with 2 nodes, and auto-scaling up to 5 nodes.
+
+If you want, you can override these parameters, with the following variables.
+
+```bash
+export TF_VAR_how_many_clusters=5
+export TF_VAR_min_nodes_per_pool=2
+export TF_VAR_max_nodes_per_pool=4
+export TF_VAR_location=xxx
+```
+
+The `location` variable is optional. Each provider should have a default value.
+The value of the `location` variable is provider-specific. Examples:
+
+| Provider      | Example value     | How to see possible values
+|---------------|-------------------|---------------------------
+| Digital Ocean | `ams3`            | `doctl compute region list`
+| Google Cloud  | `europe-north1-a` | `gcloud  compute zones list`
+| Linode        | `eu-central`      | `linode-cli regions list`
+| Oracle Cloud  | `eu-stockholm-1`  | `oci iam region list`
+
+You can also specify multiple locations, and then they will be
+used in round-robin fashion.
+
+For example, with Google Cloud, since the default quotas are very
+low (my account is limited to 8 public IP addresses per zone, and
+my requests to increase that quota were denied) you can do the
+following:
+
+```bash
+export TF_VAR_location=$(gcloud compute zones list --format=json | jq -r .[].name | grep ^europe)
+```
+
+Then when you apply, clusters will be created across all available
+zones in Europe. (When I write this, there are 20+ zones in Europe,
+so even with my quota, I can create 40 clusters.)
+
+3. Run!
+
+```bash
+./run.sh <providername>
+```
+
+(If you don't specify a provider name, it will list available providers.)
+
+4. Shutting down
+
+Go to the directory that was created by the previous step (`tag-YYYY-MM...`)
+and run `terraform destroy`.
+
+You can also run `./clean.sh` which will destroy ALL clusters deployed by the previous run script.
+
+## Without shell wrapper
+
+Expert mode.
+
+Useful to run steps sperarately, and/or when working on the Terraform configurations.

 1. Select the provider you wish to use.

-Change the `source` attribute of the `module "clusters"` section.
-Check the content of the `modules` directory to see available choices.
+Go to the `source` directory and edit `main.tf`.

-```bash
-vim main.tf
-```
+Change the `source` attribute of the `module "clusters"` section.
+
+Check the content of the `modules` directory to see available choices.

 2. Initialize the provider.

@@ -20,24 +110,20 @@ terraform init

 3. Configure provider authentication.

- Digital Ocean: `export DIGITALOCEAN_ACCESS_TOKEN=...`
-  (check `~/.config/doctl/config.yaml` for the token)
- Linode: `export LINODE_TOKEN=...`
-  (check `~/.config/linode-cli` for the token)
- Oracle Cloud: it should use `~/.oci/config`
- Scaleway: run `scw init`
+See steps above, and add the following extra steps:
+
+- Digital Coean:
+  ```bash
+  export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
+  ```
+
+- Linode:
+  ```bash
+  export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
+  ```

 4. Decide how many clusters and how many nodes per clusters you want.

-```bash
-export TF_VAR_how_many_clusters=5
-export TF_VAR_min_nodes_per_pool=2
-# Optional (will enable autoscaler when available)
-export TF_VAR_max_nodes_per_pool=4
-# Optional (will only work on some providers)
-export TF_VAR_enable_arm_pool=true
-```
-
 5. Provision clusters.

 ```bash
@@ -46,7 +132,7 @@ terraform apply

 6. Perform second stage provisioning.

-This will install a SSH server on the clusters.
+This will install an SSH server on the clusters.

 ```bash
 cd stage2
@@ -72,5 +158,5 @@ terraform destroy
 9. Clean up stage2.

 ```bash
-rm stage/terraform.tfstate*
+rm stage2/terraform.tfstate*
 ```
--- a/prepare-tf/cleanup.sh
+++ b/prepare-tf/cleanup.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
+export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
+for T in  tag-*; do
+(
+  cd $T
+  terraform apply -destroy -auto-approve && mv ../$T ../deleted$T
+)
+done
--- a/prepare-tf/locals.tf
+++ b/prepare-tf/locals.tf
@@ -1,16 +0,0 @@
-resource "random_string" "_" {
-  length  = 5
-  special = false
-  upper   = false
-}
-
-resource "time_static" "_" {}
-
-locals {
-  tag = format("tf-%s-%s", formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339), random_string._.result)
-  # Common tags to be assigned to all resources
-  common_tags = [
-    "created-by=terraform",
-    "tag=${local.tag}"
-  ]
-}
--- a/prepare-tf/run.sh
+++ b/prepare-tf/run.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+set -e
+
+TIME=$(which time)
+
+PROVIDER=$1
+[ "$PROVIDER" ] || {
+  echo "Please specify a provider as first argument, or 'ALL' for parallel mode."
+  echo "Available providers:"
+  ls -1 source/modules
+  exit 1
+}
+
+[ "$TAG" ] || {
+  TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
+  RANDOMTAG=$(base64 /dev/urandom | tr A-Z a-z | tr -d /+ | head -c5)
+  export TAG=tag-$TIMESTAMP-$RANDOMTAG
+}
+
+[ "$PROVIDER" = "ALL" ] && {
+  for PROVIDER in $(ls -1 source/modules); do
+    $TERMINAL -T $TAG-$PROVIDER -e sh -c "
+      export TAG=$TAG-$PROVIDER
+      $0 $PROVIDER
+      cd $TAG-$PROVIDER
+      bash
+      " &
+  done
+  exit 0
+}
+
+[ -d "source/modules/$PROVIDER" ] || {
+  echo "Provider '$PROVIDER' not found."
+  echo "Available providers:"
+  ls -1 source/modules
+  exit 1  
+}
+
+export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
+export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
+
+cp -a source $TAG
+cd $TAG
+cp -r modules/$PROVIDER modules/PROVIDER
+$TIME -o time.1.init terraform init
+$TIME -o time.2.stage1 terraform apply -auto-approve
+cd stage2
+$TIME -o ../time.3.init terraform init
+$TIME -o ../time.4.stage2 terraform apply -auto-approve
--- a/prepare-tf/source/locals.tf
+++ b/prepare-tf/source/locals.tf
@@ -0,0 +1,19 @@
+resource "random_string" "_" {
+  length  = 4
+  number  = false
+  special = false
+  upper   = false
+}
+
+resource "time_static" "_" {}
+
+locals {
+  timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
+  tag       = random_string._.result
+  # Common tags to be assigned to all resources
+  common_tags = [
+    "created-by-terraform",
+    format("created-at-%s", local.timestamp),
+    format("created-for-%s", local.tag)
+  ]
+}
--- a/prepare-tf/source/main.tf
+++ b/prepare-tf/source/main.tf
@@ -1,5 +1,5 @@
 module "clusters" {
-  source             = "./modules/linode"
+  source             = "./modules/PROVIDER"
  for_each           = local.clusters
  cluster_name       = each.value.cluster_name
  min_nodes_per_pool = var.min_nodes_per_pool
@@ -7,22 +7,24 @@ module "clusters" {
  enable_arm_pool    = var.enable_arm_pool
  node_size          = var.node_size
  common_tags        = local.common_tags
+  location           = each.value.location
 }

 locals {
  clusters = {
    for i in range(101, 101 + var.how_many_clusters) :
    i => {
-      cluster_name    = format("%s-%03d", local.tag, i)
-      kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
-      #dashdash_kubeconfig = format("--kubeconfig=./stage2/kubeconfig.%03d", i)
+      cluster_name     = format("%s-%03d", local.tag, i)
+      kubeconfig_path  = format("./stage2/kubeconfig.%03d", i)
      externalips_path = format("./stage2/externalips.%03d", i)
+      flags_path       = format("./stage2/flags.%03d", i)
+      location         = local.locations[i % length(local.locations)]
    }
  }
 }

 resource "local_file" "stage2" {
-  filename = "./stage2/main.tf"
+  filename        = "./stage2/main.tf"
  file_permission = "0644"
  content = templatefile(
    "./stage2.tmpl",
@@ -30,6 +32,15 @@ resource "local_file" "stage2" {
  )
 }

+resource "local_file" "flags" {
+  for_each        = local.clusters
+  filename        = each.value.flags_path
+  file_permission = "0600"
+  content         = <<-EOT
+    has_metrics_server: ${module.clusters[each.key].has_metrics_server}
+  EOT
+}
+
 resource "local_file" "kubeconfig" {
  for_each        = local.clusters
  filename        = each.value.kubeconfig_path
@@ -59,8 +70,8 @@ resource "null_resource" "wait_for_nodes" {
 }

 data "external" "externalips" {
-  for_each = local.clusters
-  depends_on = [ null_resource.wait_for_nodes ]
+  for_each   = local.clusters
+  depends_on = [null_resource.wait_for_nodes]
  program = [
    "sh",
    "-c",
--- a/prepare-tf/source/modules/digitalocean/main.tf
+++ b/prepare-tf/source/modules/digitalocean/main.tf
@@ -1,12 +1,13 @@
 resource "digitalocean_kubernetes_cluster" "_" {
-  name    = var.cluster_name
-  tags    = local.common_tags
-  region  = var.region
+  name = var.cluster_name
+  tags = var.common_tags
+  # Region is mandatory, so let's provide a default value.
+  region  = var.location != null ? var.location : "nyc1"
  version = var.k8s_version

  node_pool {
-    name       = "dok-x86"
-    tags       = local.common_tags
+    name       = "x86"
+    tags       = var.common_tags
    size       = local.node_type
    auto_scale = true
    min_nodes  = var.min_nodes_per_pool
--- a/prepare-tf/source/modules/digitalocean/outputs.tf
+++ b/prepare-tf/source/modules/digitalocean/outputs.tf
@@ -5,3 +5,7 @@ output "kubeconfig" {
 output "cluster_id" {
  value = digitalocean_kubernetes_cluster._.id
 }
+
+output "has_metrics_server" {
+  value = false
+}
--- a/prepare-tf/source/modules/digitalocean/providers.tf
+++ b/prepare-tf/source/modules/digitalocean/providers.tf
--- a/prepare-tf/source/modules/digitalocean/variables.tf
+++ b/prepare-tf/source/modules/digitalocean/variables.tf
@@ -8,10 +8,6 @@ variable "common_tags" {
  default = []
 }

-locals {
-  common_tags = [for tag in var.common_tags : replace(tag, "=", "-")]
-}
-
 variable "node_size" {
  type    = string
  default = "M"
@@ -46,14 +42,16 @@ locals {
  node_type = var.node_types[var.node_size]
 }

-variable "region" {
+# To view supported regions, run:
+# doctl compute region list
+variable "location" {
  type    = string
-  default = "ams3"
+  default = null
 }

 # To view supported versions, run:
 # doctl kubernetes options versions -o json | jq -r .[].slug
 variable "k8s_version" {
  type    = string
-  default = "1.21.3-do.0"
+  default = "1.21.5-do.0"
 }
--- a/prepare-tf/source/modules/googlecloud/main.tf
+++ b/prepare-tf/source/modules/googlecloud/main.tf
@@ -0,0 +1,65 @@
+resource "google_container_cluster" "_" {
+  name               = var.cluster_name
+  project            = local.project
+  location           = local.location
+  min_master_version = var.k8s_version
+
+  # To deploy private clusters, uncomment the section below,
+  # and uncomment the block in network.tf.
+  # Private clusters require extra resources (Cloud NAT,
+  # router, network, subnet) and the quota for some of these
+  # resources is fairly low on GCP; so if you want to deploy
+  # a lot of private clusters (more than 10), you can use these
+  # blocks as a base but you will probably have to refactor
+  # things quite a bit (you will at least need to define a single
+  # shared router and use it across all the clusters).
+  /*
+  network    = google_compute_network._.name
+  subnetwork = google_compute_subnetwork._.name
+
+  private_cluster_config {
+    enable_private_nodes = true
+    # This must be set to "false".
+    # (Otherwise, access to the public endpoint is disabled.)
+    enable_private_endpoint = false
+    # This must be set to a /28.
+    # I think it shouldn't collide with the pod network subnet.
+    master_ipv4_cidr_block = "10.255.255.0/28"
+  }
+  # Private clusters require "VPC_NATIVE" networking mode
+  # (as opposed to the legacy "ROUTES").
+  networking_mode = "VPC_NATIVE"
+  # ip_allocation_policy is required for VPC_NATIVE clusters.
+  ip_allocation_policy {
+    # This is the block that will be used for pods.
+    cluster_ipv4_cidr_block = "10.0.0.0/12"
+    # The services block is optional
+    # (GKE will pick one automatically).
+    #services_ipv4_cidr_block = ""
+  }
+  */
+
+  node_pool {
+    name = "x86"
+    node_config {
+      tags         = var.common_tags
+      machine_type = local.node_type
+    }
+    initial_node_count = var.min_nodes_per_pool
+    autoscaling {
+      min_node_count = var.min_nodes_per_pool
+      max_node_count = max(var.min_nodes_per_pool, var.max_nodes_per_pool)
+    }
+  }
+
+  # This is not strictly necessary.
+  # We'll see if we end up using it.
+  # (If it is removed, make sure to also remove the corresponding
+  # key+cert variables from outputs.tf!)
+  master_auth {
+    client_certificate_config {
+      issue_client_certificate = true
+    }
+  }
+}
+
--- a/prepare-tf/source/modules/googlecloud/network.tf
+++ b/prepare-tf/source/modules/googlecloud/network.tf
@@ -0,0 +1,38 @@
+/*
+resource "google_compute_network" "_" {
+  name    = var.cluster_name
+  project = local.project
+  # The default is to create subnets automatically.
+  # However, this creates one subnet per zone in all regions,
+  # which causes a quick exhaustion of the subnet quota.
+  auto_create_subnetworks = false
+}
+
+resource "google_compute_subnetwork" "_" {
+  name          = var.cluster_name
+  ip_cidr_range = "10.254.0.0/16"
+  region        = local.region
+  network       = google_compute_network._.id
+  project       = local.project
+}
+
+resource "google_compute_router" "_" {
+  name    = var.cluster_name
+  region  = local.region
+  network = google_compute_network._.name
+  project = local.project
+}
+
+resource "google_compute_router_nat" "_" {
+  name    = var.cluster_name
+  router  = google_compute_router._.name
+  region  = local.region
+  project = local.project
+  # Everyone in the network is allowed to NAT out.
+  # (We would change this if we only wanted to allow specific subnets to NAT out.)
+  source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
+  # Pick NAT addresses automatically.
+  # (We would change this if we wanted to use specific addresses to NAT out.)
+  nat_ip_allocate_option = "AUTO_ONLY"
+}
+*/
--- a/prepare-tf/source/modules/googlecloud/outputs.tf
+++ b/prepare-tf/source/modules/googlecloud/outputs.tf
@@ -0,0 +1,35 @@
+data "google_client_config" "_" {}
+
+output "kubeconfig" {
+  value = <<-EOT
+    apiVersion: v1
+    kind: Config
+    current-context: ${google_container_cluster._.name}
+    clusters:
+    - name: ${google_container_cluster._.name}
+      cluster:
+        server: https://${google_container_cluster._.endpoint}
+        certificate-authority-data: ${google_container_cluster._.master_auth[0].cluster_ca_certificate}
+    contexts:
+    - name: ${google_container_cluster._.name}
+      context:
+        cluster: ${google_container_cluster._.name}
+        user: client-token
+    users:
+    - name: client-cert
+      user:
+        client-key-data: ${google_container_cluster._.master_auth[0].client_key}
+        client-certificate-data: ${google_container_cluster._.master_auth[0].client_certificate}
+    - name: client-token
+      user:
+        token: ${data.google_client_config._.access_token}
+    EOT
+}
+
+output "cluster_id" {
+  value = google_container_cluster._.id
+}
+
+output "has_metrics_server" {
+  value = true
+}
--- a/prepare-tf/source/modules/googlecloud/providers.tf
+++ b/prepare-tf/source/modules/googlecloud/providers.tf
@@ -0,0 +1,8 @@
+terraform {
+  required_providers {
+    google = {
+      source  = "hashicorp/google"
+      version = "4.5.0"
+    }
+  }
+}
--- a/prepare-tf/source/modules/googlecloud/variables.tf
+++ b/prepare-tf/source/modules/googlecloud/variables.tf
@@ -0,0 +1,68 @@
+variable "cluster_name" {
+  type    = string
+  default = "deployed-with-terraform"
+}
+
+variable "common_tags" {
+  type    = list(string)
+  default = []
+}
+
+variable "node_size" {
+  type    = string
+  default = "M"
+}
+
+variable "min_nodes_per_pool" {
+  type    = number
+  default = 2
+}
+
+variable "max_nodes_per_pool" {
+  type    = number
+  default = 5
+}
+
+# FIXME
+variable "enable_arm_pool" {
+  type    = bool
+  default = false
+}
+
+variable "node_types" {
+  type = map(string)
+  default = {
+    "S" = "e2-small"
+    "M" = "e2-medium"
+    "L" = "e2-standard-2"
+  }
+}
+
+locals {
+  node_type = var.node_types[var.node_size]
+}
+
+# To view supported locations, run:
+# gcloud compute zones list
+variable "location" {
+  type    = string
+  default = null
+}
+
+# To view supported versions, run:
+# gcloud container get-server-config --region=europe-north1 '--format=flattened(channels)'
+# But it's also possible to just specify e.g. "1.20" and it figures it out.
+variable "k8s_version" {
+  type    = string
+  default = "1.21"
+}
+
+locals {
+  location = var.location != null ? var.location : "europe-north1-a"
+  region   = replace(local.location, "/-[a-z]$/", "")
+  # Unfortunately, the following line doesn't work
+  # (that attribute just returns an empty string)
+  # so we have to hard-code the project name.
+  #project = data.google_client_config._.project
+  project = "prepare-tf"
+}
--- a/prepare-tf/source/modules/linode/main.tf
+++ b/prepare-tf/source/modules/linode/main.tf
@@ -1,7 +1,8 @@
 resource "linode_lke_cluster" "_" {
-  label       = var.cluster_name
-  tags        = var.common_tags
-  region      = var.region
+  label = var.cluster_name
+  tags  = var.common_tags
+  # "region" is mandatory, so let's provide a default value if none was given.
+  region      = var.location != null ? var.location : "eu-central"
  k8s_version = var.k8s_version

  pool {
--- a/prepare-tf/source/modules/linode/outputs.tf
+++ b/prepare-tf/source/modules/linode/outputs.tf
@@ -5,3 +5,7 @@ output "kubeconfig" {
 output "cluster_id" {
  value = linode_lke_cluster._.id
 }
+
+output "has_metrics_server" {
+  value = false
+}
--- a/prepare-tf/source/modules/linode/providers.tf
+++ b/prepare-tf/source/modules/linode/providers.tf
--- a/prepare-tf/source/modules/linode/variables.tf
+++ b/prepare-tf/source/modules/linode/variables.tf
@@ -42,11 +42,11 @@ locals {
  node_type = var.node_types[var.node_size]
 }

-# To view supported versions, run:
+# To view supported regions, run:
 # linode-cli regions list
-variable "region" {
+variable "location" {
  type    = string
-  default = "us-east"
+  default = null
 }

 # To view supported versions, run:
--- a/prepare-tf/source/modules/oraclecloud/main.tf
+++ b/prepare-tf/source/modules/oraclecloud/main.tf
@@ -1,6 +1,7 @@
 resource "oci_identity_compartment" "_" {
-  name        = var.cluster_name
-  description = var.cluster_name
+  name          = var.cluster_name
+  description   = var.cluster_name
+  enable_delete = true
 }

 locals {
--- a/prepare-tf/source/modules/oraclecloud/network.tf
+++ b/prepare-tf/source/modules/oraclecloud/network.tf
--- a/prepare-tf/source/modules/oraclecloud/outputs.tf
+++ b/prepare-tf/source/modules/oraclecloud/outputs.tf
@@ -9,3 +9,7 @@ output "kubeconfig" {
 output "cluster_id" {
  value = oci_containerengine_cluster._.id
 }
+
+output "has_metrics_server" {
+  value = false
+}
--- a/prepare-tf/source/modules/oraclecloud/providers.tf
+++ b/prepare-tf/source/modules/oraclecloud/providers.tf
--- a/prepare-tf/source/modules/oraclecloud/variables.tf
+++ b/prepare-tf/source/modules/oraclecloud/variables.tf
@@ -70,6 +70,13 @@ locals {
  node_type = var.node_types[var.node_size]
 }

+# To view supported regions, run:
+# oci iam region list | jq .data[].name
+variable "location" {
+  type    = string
+  default = null
+}
+
 # To view supported versions, run:
 # oci ce cluster-options get --cluster-option-id all | jq -r '.data["kubernetes-versions"][]'
 variable "k8s_version" {
--- a/prepare-tf/source/modules/scaleway/main.tf
+++ b/prepare-tf/source/modules/scaleway/main.tf
@@ -1,13 +1,15 @@
 resource "scaleway_k8s_cluster" "_" {
-  name    = var.cluster_name
-  tags    = var.common_tags
-  version = var.k8s_version
-  cni     = var.cni
+  name                        = var.cluster_name
+  region                      = var.location
+  tags                        = var.common_tags
+  version                     = var.k8s_version
+  cni                         = var.cni
+  delete_additional_resources = true
 }

 resource "scaleway_k8s_pool" "_" {
  cluster_id  = scaleway_k8s_cluster._.id
-  name        = "scw-x86"
+  name        = "x86"
  tags        = var.common_tags
  node_type   = local.node_type
  size        = var.min_nodes_per_pool
--- a/prepare-tf/source/modules/scaleway/outputs.tf
+++ b/prepare-tf/source/modules/scaleway/outputs.tf
@@ -5,3 +5,7 @@ output "kubeconfig" {
 output "cluster_id" {
  value = scaleway_k8s_cluster._.id
 }
+
+output "has_metrics_server" {
+  value = sort([var.k8s_version, "1.22"])[0] == "1.22"
+}
--- a/prepare-tf/source/modules/scaleway/providers.tf
+++ b/prepare-tf/source/modules/scaleway/providers.tf
--- a/prepare-tf/source/modules/scaleway/variables.tf
+++ b/prepare-tf/source/modules/scaleway/variables.tf
@@ -47,7 +47,12 @@ variable "cni" {
  default = "cilium"
 }

-# See supported versions with:
+variable "location" {
+  type    = string
+  default = null
+}
+
+# To view supported versions, run:
 # scw k8s version list -o json | jq -r .[].name
 variable "k8s_version" {
  type    = string
--- a/prepare-tf/source/providers.tf
+++ b/prepare-tf/source/providers.tf
--- a/prepare-tf/source/stage2.tmpl
+++ b/prepare-tf/source/stage2.tmpl
@@ -2,7 +2,7 @@ terraform {
  required_providers {
    kubernetes = {
      source  = "hashicorp/kubernetes"
-      version = "2.0.3"
+      version = "2.7.1"
    }
  }
 }
@@ -119,6 +119,11 @@ resource "kubernetes_cluster_role_binding" "shpod_${index}" {
    name      = "shpod"
    namespace = "shpod"
  }
+  subject {
+    api_group = "rbac.authorization.k8s.io"
+    kind      = "Group"
+    name      = "shpod-cluster-admins"
+  }
 }

 resource "random_string" "shpod_${index}" {
@@ -135,9 +140,14 @@ provider "helm" {
 }

 resource "helm_release" "metrics_server_${index}" {
+  # Some providers pre-install metrics-server.
+  # Some don't. Let's install metrics-server,
+  # but only if it's not already installed.
+  count = yamldecode(file("./flags.${index}"))["has_metrics_server"] ? 0 : 1
  provider = helm.cluster_${index}
  repository = "https://charts.bitnami.com/bitnami"
  chart = "metrics-server"
+  version = "5.8.8"
  name = "metrics-server"
  namespace = "metrics-server"
  create_namespace = true
@@ -181,7 +191,7 @@ resource "kubernetes_config_map" "kubeconfig_${index}" {
      - name: cluster-admin
        user:
          client-key-data: $${base64encode(tls_private_key.cluster_admin_${index}.private_key_pem)}
-          client-certificate-data: $${base64encode(kubernetes_certificate_signing_request.cluster_admin_${index}.certificate)}
+          client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}.certificate)}
    EOT
  }
 }
@@ -195,11 +205,14 @@ resource "tls_cert_request" "cluster_admin_${index}" {
  private_key_pem = tls_private_key.cluster_admin_${index}.private_key_pem
  subject {
    common_name = "cluster-admin"
-    organization = "system:masters"
+    # Note: CSR API v1 doesn't allow issuing certs with "system:masters" anymore.
+    #organization = "system:masters"
+    # We'll use this custom group name instead.cluster-admin user.
+    organization = "shpod-cluster-admins"
  }
 }

-resource "kubernetes_certificate_signing_request" "cluster_admin_${index}" {
+resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
  provider = kubernetes.cluster_${index}
  metadata {
    name = "cluster-admin"
@@ -207,6 +220,7 @@ resource "kubernetes_certificate_signing_request" "cluster_admin_${index}" {
  spec {
    usages = ["client auth"]
    request = tls_cert_request.cluster_admin_${index}.cert_request_pem
+    signer_name = "kubernetes.io/kube-apiserver-client"
  }
  auto_approve = true
 }
--- a/prepare-tf/source/variables.tf
+++ b/prepare-tf/source/variables.tf
@@ -0,0 +1,40 @@
+variable "how_many_clusters" {
+  type    = number
+  default = 1
+}
+
+variable "node_size" {
+  type    = string
+  default = "M"
+  # Can be S, M, L.
+  # We map these values to different specific instance types for each provider,
+  # but the idea is that they shoudl correspond to the following sizes:
+  # S = 2 GB RAM
+  # M = 4 GB RAM
+  # L = 8 GB RAM
+}
+
+variable "min_nodes_per_pool" {
+  type    = number
+  default = 1
+}
+
+variable "max_nodes_per_pool" {
+  type    = number
+  default = 0
+}
+
+variable "enable_arm_pool" {
+  type    = bool
+  default = false
+}
+
+variable "location" {
+  type    = string
+  default = null
+}
+
+# TODO: perhaps handle if it's space-separated instead of newline?
+locals {
+  locations = var.location == null ? [null] : split("\n", var.location)
+}
--- a/prepare-tf/variables.tf
+++ b/prepare-tf/variables.tf
@@ -1,28 +0,0 @@
-variable "how_many_clusters" {
-  type    = number
-  default = 2
-}
-
-variable "node_size" {
-  type    = string
-  default = "M"
-  # Can be S, M, L.
-  # S = 2 GB RAM
-  # M = 4 GB RAM
-  # L = 8 GB RAM
-}
-
-variable "min_nodes_per_pool" {
-  type    = number
-  default = 1
-}
-
-variable "max_nodes_per_pool" {
-  type    = number
-  default = 0
-}
-
-variable "enable_arm_pool" {
-  type    = bool
-  default = true
-}
--- a/prepare-vms/README.md
+++ b/prepare-vms/README.md
@@ -14,7 +14,9 @@ These tools can help you to create VMs on:

 - [Docker](https://docs.docker.com/engine/installation/)
 - [Docker Compose](https://docs.docker.com/compose/install/)
- [Parallel SSH](https://code.google.com/archive/p/parallel-ssh/) (on a Mac: `brew install pssh`) 
+- [Parallel SSH](https://github.com/lilydjwg/pssh)
+  (should be installable with `pip install git+https://github.com/lilydjwg/pssh`;
+  on a Mac, try `brew install pssh`)

 Depending on the infrastructure that you want to use, you also need to install
 the CLI that is specific to that cloud. For OpenStack deployments, you will
--- a/prepare-vms/lib/commands.sh
+++ b/prepare-vms/lib/commands.sh
@@ -75,9 +75,11 @@ _cmd_createuser() {
    echo '$USER_LOGIN ALL=(ALL) NOPASSWD:ALL' | sudo tee /etc/sudoers.d/$USER_LOGIN
    "

+    # The MaxAuthTries is here to help with folks who have many SSH keys.
    pssh "
    set -e
    sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
+    sudo sed -i 's/#MaxAuthTries 6/MaxAuthTries 42/' /etc/ssh/sshd_config
    sudo service ssh restart
    "

@@ -236,6 +238,12 @@ _cmd_docker() {
    sudo add-apt-repository 'deb https://download.docker.com/linux/ubuntu bionic stable'
    sudo apt-get -q update
    sudo apt-get -qy install docker-ce
+
+    # Add registry mirror configuration.
+    if ! [ -f /etc/docker/daemon.json ]; then
+        echo '{\"registry-mirrors\": [\"https://mirror.gcr.io\"]}' | sudo tee /etc/docker/daemon.json
+        sudo systemctl restart docker
+    fi
    "

    ##VERSION## https://github.com/docker/compose/releases
@@ -246,7 +254,7 @@ _cmd_docker() {
        COMPOSE_VERSION=1.29.2
        COMPOSE_PLATFORM='Linux-$(uname -m)'
    fi
-    pssh -i "
+    pssh "
    set -e
    ### Install docker-compose.
    sudo curl -fsSL -o /usr/local/bin/docker-compose \
@@ -303,13 +311,15 @@ _cmd_kube() {
    need_login_password

    # Optional version, e.g. 1.13.5
-    KUBEVERSION=$2
+    SETTINGS=tags/$TAG/settings.yaml
+    KUBEVERSION=$(awk '/^kubernetes_version:/ {print $2}' $SETTINGS)
    if [ "$KUBEVERSION" ]; then
-        EXTRA_APTGET="=$KUBEVERSION-00"
-        EXTRA_KUBEADM="kubernetesVersion: v$KUBEVERSION"
-    else
-        EXTRA_APTGET=""
-        EXTRA_KUBEADM=""
+        pssh "
+        sudo tee /etc/apt/preferences.d/kubernetes <<EOF
+Package: kubectl kubeadm kubelet
+Pin: version $KUBEVERSION*
+Pin-Priority: 1000
+EOF"
    fi

    # Install packages
@@ -320,7 +330,8 @@ _cmd_kube() {
    sudo tee /etc/apt/sources.list.d/kubernetes.list"
    pssh --timeout 200 "
    sudo apt-get update -q &&
-    sudo apt-get install -qy kubelet$EXTRA_APTGET kubeadm$EXTRA_APTGET kubectl$EXTRA_APTGET &&
+    sudo apt-get install -qy kubelet kubeadm kubectl &&
+    sudo apt-mark hold kubelet kubeadm kubectl
    kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
    echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
    echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
@@ -332,6 +343,11 @@ _cmd_kube() {
        sudo swapoff -a"
    fi

+    # Re-enable CRI interface in containerd
+    pssh "
+    echo '# Use default parameters for containerd.' | sudo tee /etc/containerd/config.toml
+    sudo systemctl restart containerd"
+
    # Initialize kube control plane
    pssh --timeout 200 "
    if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
@@ -341,19 +357,38 @@ kind: InitConfiguration
 apiVersion: kubeadm.k8s.io/v1beta2
 bootstrapTokens:
 - token: \$(cat /tmp/token)
+nodeRegistration:
+  # Comment out the next line to switch back to Docker.
+  criSocket: /run/containerd/containerd.sock
+  ignorePreflightErrors:
+  - NumCPU
+---
+kind: JoinConfiguration
+apiVersion: kubeadm.k8s.io/v1beta2
+discovery:
+  bootstrapToken:
+    apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
+    token: \$(cat /tmp/token)
+    unsafeSkipCAVerification: true
+nodeRegistration:
+  # Comment out the next line to switch back to Docker.
+  criSocket: /run/containerd/containerd.sock
+  ignorePreflightErrors:
+  - NumCPU
 ---
 kind: KubeletConfiguration
 apiVersion: kubelet.config.k8s.io/v1beta1
-cgroupDriver: cgroupfs
+# The following line is necessary when using Docker.
+# It doesn't seem necessary when using containerd.
+#cgroupDriver: cgroupfs
 ---
 kind: ClusterConfiguration
 apiVersion: kubeadm.k8s.io/v1beta2
 apiServer:
  certSANs:
  - \$(cat /tmp/ipv4)
-$EXTRA_KUBEADM
 EOF
-	sudo kubeadm init --config=/tmp/kubeadm-config.yaml --ignore-preflight-errors=NumCPU
+	sudo kubeadm init --config=/tmp/kubeadm-config.yaml
    fi"

    # Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
@@ -377,8 +412,8 @@ EOF
    pssh --timeout 200 "
    if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
        FIRSTNODE=\$(cat /etc/name_of_first_node) &&
-        TOKEN=\$(ssh $SSHOPTS \$FIRSTNODE cat /tmp/token) &&
-        sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
+        ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
+        sudo kubeadm join --config /tmp/kubeadm-config.yaml
    fi"

    # Install metrics server
@@ -442,7 +477,7 @@ EOF
    # Install stern
    ##VERSION## https://github.com/stern/stern/releases
    STERN_VERSION=1.20.1
-    FILENAME=stern_${STERN_VERSION}_linux_${HERP_DERP_ARCH}
+    FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
    URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
    pssh "
    if [ ! -x /usr/local/bin/stern ]; then
@@ -464,12 +499,12 @@ EOF
    # Install kustomize
    ##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
    KUSTOMIZE_VERSION=v4.4.0
-    URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${HERP_DERP_ARCH}.tar.gz
+    URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
    pssh "
    if [ ! -x /usr/local/bin/kustomize ]; then
        curl -fsSL $URL |
        sudo tar -C /usr/local/bin -zx kustomize
-        echo complete -C /usr/local/bin/kustomize kustomize | sudo tee /etc/bash_completion.d/kustomize
+        kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
        kustomize version
    fi"

@@ -529,8 +564,9 @@ EOF
    # But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
    pssh "
    if [ ! -x /usr/local/bin/tilt ]; then
-        FILENAME=tilt.0.22.13.linux.$TILT_ARCH.tar.gz
-        curl -fsSL https://github.com/tilt-dev/tilt/releases/latest/download/\$FILENAME |
+        TILT_VERSION=0.22.15
+        FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
+        curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
        sudo tar -zxvf- -C /usr/local/bin tilt
        tilt version
    fi"
@@ -677,7 +713,7 @@ _cmd_tailhist () {
    ARCH=${ARCHITECTURE-amd64}
    [ "$ARCH" = "aarch64" ] && ARCH=arm64

-    pssh -i "
+    pssh "
    set -e
    wget https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
    unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
--- a/prepare-vms/lib/infra/openstack-tf.sh
+++ b/prepare-vms/lib/infra/openstack-tf.sh
@@ -1,7 +1,7 @@
 infra_start() {
        COUNT=$1

-        cp terraform/*.tf tags/$TAG
+        cp terraform-openstack/*.tf tags/$TAG
        (
                cd tags/$TAG
                if ! terraform init; then
@@ -14,9 +14,9 @@ infra_start() {
                        die "Aborting."
                fi
                echo prefix = \"$TAG\" >> terraform.tfvars
-                echo count = \"$COUNT\" >> terraform.tfvars
+                echo how_many_nodes = \"$COUNT\" >> terraform.tfvars
                terraform apply -auto-approve
-                terraform output ip_addresses > ips.txt
+                terraform output -raw ip_addresses > ips.txt
        )
 }

--- a/prepare-vms/lib/pssh.sh
+++ b/prepare-vms/lib/pssh.sh
@@ -26,6 +26,7 @@ pssh() {

    $PSSH -h $HOSTFILE -l $LOGIN \
        --par 100 \
+        --timeout 300 \
        -O LogLevel=ERROR \
        -O UserKnownHostsFile=/dev/null \
        -O StrictHostKeyChecking=no \
--- a/prepare-vms/netlify-dns.sh
+++ b/prepare-vms/netlify-dns.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+
+# https://open-api.netlify.com/#tag/dnsZone
+[ "$1" ] || {
+  echo ""
+  echo "Add a record in Netlify DNS."
+  echo "This script is hardcoded to add a record to container.training".
+  echo ""
+  echo "Syntax:"
+  echo "$0 list"
+  echo "$0 add <name> <ipaddr>"
+  echo "$0 del <recordid>"
+  echo ""
+  echo "Example to create a A record for eu.container.training:"
+  echo "$0 add eu 185.145.250.0"
+  echo ""
+  exit 1
+}
+
+NETLIFY_USERID=$(jq .userId < ~/.config/netlify/config.json)
+NETLIFY_TOKEN=$(jq -r .users[$NETLIFY_USERID].auth.token < ~/.config/netlify/config.json)
+
+netlify() {
+  URI=$1
+  shift
+  http https://api.netlify.com/api/v1/$URI "$@" "Authorization:Bearer $NETLIFY_TOKEN"
+}
+
+ZONE_ID=$(netlify dns_zones |
+          jq -r '.[] | select ( .name == "container.training" ) | .id')
+
+_list() {
+  netlify dns_zones/$ZONE_ID/dns_records |
+    jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
+}
+
+_add() {
+  NAME=$1.container.training
+  ADDR=$2
+
+
+  # It looks like if we create two identical records, then delete one of them,
+  # Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
+  # though it's still visible through the API and the website?)
+
+  if netlify dns_zones/$ZONE_ID/dns_records |
+          jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
+          grep .
+  then
+    echo "It looks like that record already exists. Refusing to create it."
+    exit 1
+  fi
+
+  netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
+
+  netlify dns_zones/$ZONE_ID/dns_records |
+          jq '.[] | select(.hostname=="'$NAME'")'
+}
+
+_del() {
+  RECORD_ID=$1
+  # OK, since that one is dangerous, I'm putting the whole request explicitly here
+  http DELETE \
+    https://api.netlify.com/api/v1/dns_zones/$ZONE_ID/dns_records/$RECORD_ID \
+    "Authorization:Bearer $NETLIFY_TOKEN"
+}
+
+case "$1" in
+  list)
+    _list
+    ;;
+  add)
+    _add $2 $3
+    ;;
+  del)
+    _del $2
+    ;;
+  *)
+    echo "Unknown command '$1'."
+    exit 1
+    ;;
+esac
--- a/prepare-vms/settings/admin-oldversion.yaml
+++ b/prepare-vms/settings/admin-oldversion.yaml
@@ -0,0 +1,33 @@
+# Number of VMs per cluster
+clustersize: 3
+
+# The hostname of each node will be clusterprefix + a number
+clusterprefix: oldversion
+
+# Jinja2 template to use to generate ready-to-cut cards
+cards_template: cards.html
+
+# Use "Letter" in the US, and "A4" everywhere else
+paper_size: A4
+
+# Login and password that students will use
+user_login: k8s
+user_password: training
+
+# For a list of old versions, check:
+# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
+kubernetes_version: 1.18.20
+
+image:
+
+steps:
+  - wait
+  - clusterize
+  - tools
+  - docker
+  - createuser
+  - webssh
+  - tailhist
+  - kube
+  - kubetools
+  - kubetest
--- a/prepare-vms/setup-admin-clusters.sh
+++ b/prepare-vms/setup-admin-clusters.sh
@@ -3,7 +3,7 @@ set -e

 export AWS_INSTANCE_TYPE=t3a.small

-INFRA=infra/aws-us-east-2
+INFRA=infra/aws-eu-north-1

 STUDENTS=2

@@ -33,9 +33,15 @@ TAG=$PREFIX-$SETTINGS
 	--settings settings/$SETTINGS.yaml \
 	--students $STUDENTS

-#INFRA=infra/aws-us-west-1
+INFRA=infra/enix

-export AWS_INSTANCE_TYPE=t3a.medium
+SETTINGS=admin-oldversion
+TAG=$PREFIX-$SETTINGS
+./workshopctl start \
+	--tag $TAG \
+	--infra $INFRA \
+	--settings settings/$SETTINGS.yaml \
+	--students $STUDENTS

 SETTINGS=admin-test
 TAG=$PREFIX-$SETTINGS
--- a/prepare-vms/terraform-openstack/keypair.tf
+++ b/prepare-vms/terraform-openstack/keypair.tf
@@ -0,0 +1,5 @@
+resource "openstack_compute_keypair_v2" "ssh_deploy_key" {
+  name       = var.prefix
+  public_key = file("~/.ssh/id_rsa.pub")
+}
+
--- a/prepare-vms/terraform-openstack/machines.tf
+++ b/prepare-vms/terraform-openstack/machines.tf
@@ -0,0 +1,34 @@
+resource "openstack_compute_instance_v2" "machine" {
+  count           = var.how_many_nodes
+  name            = format("%s-%04d", var.prefix, count.index+1)
+  image_name      = var.image
+  flavor_name     = var.flavor
+  security_groups = [openstack_networking_secgroup_v2.full_access.name]
+  key_pair        = openstack_compute_keypair_v2.ssh_deploy_key.name
+
+  network {
+    name        = openstack_networking_network_v2.internal.name
+    fixed_ip_v4 = cidrhost(openstack_networking_subnet_v2.internal.cidr, count.index+10)
+  }
+}
+
+resource "openstack_compute_floatingip_v2" "machine" {
+  count           = var.how_many_nodes
+  # This is something provided to us by Enix when our tenant was provisioned.
+  pool = "Public Floating"
+}
+
+resource "openstack_compute_floatingip_associate_v2" "machine" {
+  count           = var.how_many_nodes
+  floating_ip = openstack_compute_floatingip_v2.machine.*.address[count.index]
+  instance_id = openstack_compute_instance_v2.machine.*.id[count.index]
+  fixed_ip    = cidrhost(openstack_networking_subnet_v2.internal.cidr, count.index+10)
+}
+
+output "ip_addresses" {
+  value = join("", formatlist("%s\n", openstack_compute_floatingip_v2.machine.*.address))
+}
+
+variable "flavor" {}
+
+variable "image" {}
--- a/prepare-vms/terraform-openstack/network.tf
+++ b/prepare-vms/terraform-openstack/network.tf
@@ -1,23 +1,23 @@
 resource "openstack_networking_network_v2" "internal" {
-  name           = "${var.prefix}"
+  name           = var.prefix
 }

 resource "openstack_networking_subnet_v2" "internal" {
-  name            = "${var.prefix}"
-  network_id      = "${openstack_networking_network_v2.internal.id}"
+  name            = var.prefix
+  network_id      = openstack_networking_network_v2.internal.id
  cidr            = "10.10.0.0/16"
  ip_version      = 4
  dns_nameservers = ["1.1.1.1"]
 }

 resource "openstack_networking_router_v2" "router" {
-  name                = "${var.prefix}"
+  name                = var.prefix
  external_network_id = "15f0c299-1f50-42a6-9aff-63ea5b75f3fc"
 }

 resource "openstack_networking_router_interface_v2" "router_internal" {
-  router_id = "${openstack_networking_router_v2.router.id}"
-  subnet_id = "${openstack_networking_subnet_v2.internal.id}"
+  router_id = openstack_networking_router_v2.router.id
+  subnet_id = openstack_networking_subnet_v2.internal.id
 }


--- a/prepare-vms/terraform-openstack/provider.tf
+++ b/prepare-vms/terraform-openstack/provider.tf
@@ -0,0 +1,23 @@
+terraform {
+required_version = ">= 1"
+  required_providers {
+    openstack = {
+      source  = "terraform-provider-openstack/openstack"
+      version = "~> 1.45.0"
+    }
+  }
+}
+
+provider "openstack" {
+  user_name   = var.user
+  tenant_name = var.tenant
+  domain_name = var.domain
+  password    = var.password
+  auth_url    = var.auth_url
+}
+
+variable "user" {}
+variable "tenant" {}
+variable "domain" {}
+variable "password" {}
+variable "auth_url" {}
--- a/prepare-vms/terraform-openstack/secgroup.tf
+++ b/prepare-vms/terraform-openstack/secgroup.tf
@@ -7,6 +7,6 @@ resource "openstack_networking_secgroup_rule_v2" "full_access" {
  ethertype         = "IPv4"
  protocol          = ""
  remote_ip_prefix  = "0.0.0.0/0"
-  security_group_id = "${openstack_networking_secgroup_v2.full_access.id}"
+  security_group_id = openstack_networking_secgroup_v2.full_access.id
 }

--- a/prepare-vms/terraform-openstack/vars.tf
+++ b/prepare-vms/terraform-openstack/vars.tf
@@ -0,0 +1,7 @@
+variable "prefix" {
+  type = string
+}
+
+variable "how_many_nodes" {
+  type = number
+}
--- a/prepare-vms/terraform/keypair.tf
+++ b/prepare-vms/terraform/keypair.tf
@@ -1,5 +0,0 @@
-resource "openstack_compute_keypair_v2" "ssh_deploy_key" {
-  name       = "${var.prefix}"
-  public_key = "${file("~/.ssh/id_rsa.pub")}"
-}
-
--- a/prepare-vms/terraform/machines.tf
+++ b/prepare-vms/terraform/machines.tf
@@ -1,34 +0,0 @@
-resource "openstack_compute_instance_v2" "machine" {
-  count           = "${var.count}" 
-  name            = "${format("%s-%04d", "${var.prefix}", count.index+1)}"
-  image_name      = "${var.image}"
-  flavor_name     = "${var.flavor}"
-  security_groups = ["${openstack_networking_secgroup_v2.full_access.name}"]
-  key_pair        = "${openstack_compute_keypair_v2.ssh_deploy_key.name}"
-
-  network {
-    name        = "${openstack_networking_network_v2.internal.name}"
-    fixed_ip_v4 = "${cidrhost("${openstack_networking_subnet_v2.internal.cidr}", count.index+10)}"
-  }
-}
-
-resource "openstack_compute_floatingip_v2" "machine" {
-  count = "${var.count}"
-  # This is something provided to us by Enix when our tenant was provisioned.
-  pool = "Public Floating"
-}
-
-resource "openstack_compute_floatingip_associate_v2" "machine" {
-  count       = "${var.count}"
-  floating_ip = "${openstack_compute_floatingip_v2.machine.*.address[count.index]}"
-  instance_id = "${openstack_compute_instance_v2.machine.*.id[count.index]}"
-  fixed_ip    = "${cidrhost("${openstack_networking_subnet_v2.internal.cidr}", count.index+10)}"
-}
-
-output "ip_addresses" {
-  value = "${join("\n", openstack_compute_floatingip_v2.machine.*.address)}"
-}
-
-variable "flavor" {}
-
-variable "image" {}
--- a/prepare-vms/terraform/provider.tf
+++ b/prepare-vms/terraform/provider.tf
@@ -1,13 +0,0 @@
-provider "openstack" {
-  user_name   = "${var.user}"
-  tenant_name = "${var.tenant}"
-  domain_name = "${var.domain}"
-  password    = "${var.password}"
-  auth_url    = "${var.auth_url}"
-}
-
-variable "user" {}
-variable "tenant" {}
-variable "domain" {}
-variable "password" {}
-variable "auth_url" {}
--- a/prepare-vms/terraform/vars.tf
+++ b/prepare-vms/terraform/vars.tf
@@ -1,7 +0,0 @@
-variable "prefix" {
-  type = "string"
-}
-
-variable "count" {
-  type = "string"
-}
--- a/slides/_redirects
+++ b/slides/_redirects
@@ -2,7 +2,6 @@
 #/ /kube-halfday.yml.html 200!
 #/ /kube-fullday.yml.html 200!
 #/ /kube-twodays.yml.html 200!
-/ /week2.yml.html 200!

 # And this allows to do "git clone https://container.training".
 /info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
--- a/slides/containers/Compose_For_Dev_Stacks.md
+++ b/slides/containers/Compose_For_Dev_Stacks.md
@@ -96,7 +96,7 @@ Compose will be smart, and only recreate the containers that have changed.

 When working with interpreted languages:

- dont' rebuild each time
+- don't rebuild each time

 - leverage a `volumes` section instead

@@ -250,6 +250,24 @@ For the full list, check: https://docs.docker.com/compose/compose-file/

 ---

+## Configuring a Compose stack
+
+- Follow [12-factor app configuration principles][12factorconfig]
+
+  (configure the app through environment variables)
+
+- Provide (in the repo) a default environment file suitable for development
+
+  (no secret or sensitive value)
+
+- Copy the default environment file to `.env` and tweak it
+
+  (or: provide a script to generate `.env` from a template)
+
+[12factorconfig]: https://12factor.net/config
+
+---
+
 ## Running multiple copies of a stack

 - Copy the stack in two different directories, e.g. `front` and `frontcopy`
@@ -331,7 +349,7 @@ Use `docker-compose down -v` to remove everything including volumes.

 - The data in the old container is lost...

- ... Except if the container is using a *volume*
+- ...Except if the container is using a *volume*

 - Compose will then re-attach that volume to the new container

@@ -343,6 +361,102 @@ Use `docker-compose down -v` to remove everything including volumes.

 ---

+## Gotchas with volumes
+
+- Unfortunately, Docker volumes don't have labels or metadata
+
+- Compose tracks volumes thanks to their associated container
+
+- If the container is deleted, the volume gets orphaned
+
+- Example: `docker-compose down && docker-compose up`
+
+  - the old volume still exists, detached from its container
+
+  - a new volume gets created
+
+- `docker-compose down -v`/`--volumes` deletes volumes
+
+  (but **not** `docker-compose down && docker-compose down -v`!)
+ 
+---
+
+## Managing volumes explicitly
+
+Option 1: *named volumes*
+
+```yaml
+services:
+  app:
+    volumes:
+    - data:/some/path
+volumes:
+  data:
+```
+
+- Volume will be named `<project>_data`
+
+- It won't be orphaned with `docker-compose down`
+
+- It will correctly be removed with `docker-compose down -v`
+
+---
+
+## Managing volumes explicitly
+
+Option 2: *relative paths*
+
+```yaml
+services:
+  app:
+    volumes:
+    - ./data:/some/path
+```
+
+- Makes it easy to colocate the app and its data
+
+  (for migration, backups, disk usage accounting...)
+
+- Won't be removed by `docker-compose down -v`
+
+---
+
+## Managing complex stacks
+
+- Compose provides multiple features to manage complex stacks
+
+  (with many containers)
+
+- `-f`/`--file`/`$COMPOSE_FILE` can be a list of Compose files
+
+  (separated by `:` and merged together)
+
+- Services can be assigned to one or more *profiles*
+
+- `--profile`/`$COMPOSE_PROFILE` can be a list of comma-separated profiles
+
+  (see [Using service profiles][profiles] in the Compose documentation)
+
+- These variables can be set in `.env`
+
+[profiles]: https://docs.docker.com/compose/profiles/
+
+---
+
+## Dependencies
+
+- A service can have a `depends_on` section
+
+  (listing one or more other services)
+
+- This is used when bringing up individual services
+
+  (e.g. `docker-compose up blah` or `docker-compose run foo`)
+
+⚠️ It doesn't make a service "wait" for another one to be up!
+
+---
+
 class: extra-details

 ## A bit of history and trivia
--- a/slides/containers/Publishing_To_Docker_Hub.md
+++ b/slides/containers/Publishing_To_Docker_Hub.md
@@ -109,7 +109,7 @@ class: extra-details

 - Example: [ctr.run](https://ctr.run/)

-.exercise[
+.lab[

 - Use ctr.run to automatically build a container image and run it:
  ```bash
--- a/slides/containers/intro.md
+++ b/slides/containers/intro.md
@@ -28,7 +28,7 @@ class: self-paced
 - Likewise, it will take more than merely *reading* these slides
  to make you an expert

- These slides include *tons* of exercises and examples
+- These slides include *tons* of demos, exercises, and examples

 - They assume that you have access to a machine running Docker

--- a/slides/exercises/appconfig-brief.md
+++ b/slides/exercises/appconfig-brief.md
@@ -0,0 +1,5 @@
+## Exercise — Application Configuration
+
+- Configure an application with a ConfigMap
+
+- Generate configuration file from the downward API
--- a/slides/exercises/appconfig-details.md
+++ b/slides/exercises/appconfig-details.md
@@ -0,0 +1,87 @@
+# Exercise — Application Configuration
+
+- We want to configure an application with a ConfigMap
+
+- We will use the "rainbow" example shown previously
+
+  (HAProxy load balancing traffic to services in multiple namespaces)
+
+- We won't provide the HAProxy configuration file
+
+- Instead, we will provide a list of namespaces
+
+  (e.g. as a space-delimited list in a ConfigMap)
+
+- Our Pod should generate the HAProxy configuration using the ConfigMap
+
+---
+
+## Setup
+
+- Let's say that we have the "rainbow" app deployed:
+  ```bash
+  kubectl apply -f ~/container.training/k8s/rainbow.yaml
+  ```
+
+- And a ConfigMap like the following one:
+  ```bash
+  kubectl create configmap rainbow --from-literal=namespaces="blue green"
+  ```
+
+---
+
+## Goal 1
+
+- We want a Deployment and a Service called `rainbow`
+
+- The `rainbow` Service should load balance across Namespaces `blue` and `green`
+
+  (i.e. to the Services called `color` in both these Namespaces)
+
+- We want to be able to update the configuration:
+
+  - update the ConfigMap to put `blue green red`
+
+  - what should we do so that HAproxy picks up the change?
+
+---
+
+## Goal 2
+
+- Check what happens if we specify a backend that doesn't exist
+
+  (e.g. add `purple` to the list of namespaces)
+
+- If we specify invalid backends to HAProxy, it won't start!
+
+- Implement a workaround among these two:
+
+  - remove invalid backends from the list before starting HAProxy
+
+  - wait until all backends are valid before starting HAProxy
+
+---
+
+## Goal 3
+
+- We'd like HAProxy to pick up ConfigMap updates automatically
+
+- How can we do that?
+
+---
+
+## Hints
+
+- Check the following slides if you need help!
+
+--
+
+- We want to generate the HAProxy configuration in an `initContainer`
+
+--
+
+- The `namespaces` entry of the `rainbow` ConfigMap should be exposed to the `initContainer`
+
+--
+
+- The HAProxy configuration should be in a volume shared with HAProxy
--- a/slides/exercises/dmuc-brief.md
+++ b/slides/exercises/dmuc-brief.md
@@ -0,0 +1,7 @@
+## Exercise — Build a Cluster
+
+- Deploy a cluster by configuring and running each component manually
+
+- Add CNI networking
+
+- Generate and validate ServiceAccount tokens
--- a/slides/exercises/dmuc-details.md
+++ b/slides/exercises/dmuc-details.md
@@ -0,0 +1,33 @@
+# Exercise — Build a Cluster
+
+- Step 1: deploy a cluster
+
+  - follow the steps in the "Dessine-moi un cluster" section
+
+- Step 2: add CNI networking
+
+  - une kube-router
+
+  - interconnect with the route-reflector
+
+  - check that you receive the routes of other clusters
+
+- Step 3: generate and validate ServiceAccount tokens
+
+  - see next slide for help!
+
+---
+
+## ServiceAccount tokens
+
+- We need to generate a TLS key pair and certificate
+
+- A self-signed key will work
+
+- We don't need anything particular in the certificate
+
+  (no particular CN, key use flags, etc.)
+
+- The key needs to be passed to both API server and controller manager
+
+- Check that ServiceAccount tokens are generated correctly
--- a/slides/exercises/healthchecks-brief.md
+++ b/slides/exercises/healthchecks-brief.md
@@ -4,8 +4,6 @@

  (we will use the `rng` service in the dockercoins app)

- Observe the correct behavior of the readiness probe
+- See what happens when the load increses

-  (when deploying e.g. an invalid image)
-
- Observe the behavior of the liveness probe
+  (spoiler alert: it involves timeouts!)
--- a/slides/exercises/healthchecks-details.md
+++ b/slides/exercises/healthchecks-details.md
@@ -2,36 +2,85 @@

 - We want to add healthchecks to the `rng` service in dockercoins

- First, deploy a new copy of dockercoins
+- The `rng` service exhibits an interesting behavior under load:

- Then, add a readiness probe on the `rng` service
+  *its latency increases (which will cause probes to time out!)*

-  (using a simple HTTP check on the `/` route of the service)
+- We want to see:

- Check what happens when deploying an invalid image for `rng` (e.g. `alpine`)
+  - what happens when the readiness probe fails

- Then roll back `rng` to the original image and add a liveness probe
+  - what happens when the liveness probe fails

-  (with the same parameters)
-
- Scale up the `worker` service (to 15+ workers) and observe
-
- What happens, and how can we improve the situation?
+  - how to set "appropriate" probes and probe parameters

 ---

-## Goal
+## Setup

- *Before* adding the readiness probe:
+- First, deploy a new copy of dockercoins

-  updating the image of the `rng` service with `alpine` should break it
+  (for instance, in a brand new namespace)

- *After* adding the readiness probe:
+- Pro tip #1: ping (e.g. with `httping`) the `rng` service at all times

-  updating the image of the `rng` service with `alpine` shouldn't break it
+  - it should initially show a few milliseconds latency

- When adding the liveness probe, nothing special should happen
+  - that will increase when we scale up

- Scaling the `worker` service will then cause disruptions
+  - it will also let us detect when the service goes "boom"

- The final goal is to understand why, and how to fix it
+- Pro tip #2: also keep an eye on the web UI
+
+---
+
+## Readiness
+
+- Add a readiness probe to `rng`
+
+  - this requires editing the pod template in the Deployment manifest
+
+  - use a simple HTTP check on the `/` route of the service
+
+  - keep all other parameters (timeouts, thresholds...) at their default values
+
+- Check what happens when deploying an invalid image for `rng` (e.g. `alpine`)
+
+*(If the probe was set up correctly, the app will continue to work,
+because Kubernetes won't switch over the traffic to the `alpine` containers,
+because they don't pass the readiness probe.)*
+
+---
+
+## Readiness under load
+
+- Then roll back `rng` to the original image
+
+- Check what happens when we scale up the `worker` Deployment to 15+ workers
+
+  (get the latency above 1 second)
+
+*(We should now observe intermittent unavailability of the service, i.e. every
+30 seconds it will be unreachable for a bit, then come back, then go away again, etc.)*
+
+---
+
+## Liveness
+
+- Now replace the readiness probe with a liveness probe
+
+- What happens now?
+
+*(At first the behavior looks the same as with the readiness probe:
+service becomes unreachable, then reachable again, etc.; but there is
+a significant difference behind the scenes. What is it?)*
+
+---
+
+## Readiness and liveness
+
+- Bonus questions!
+
+- What happens if we enable both probes at the same time?
+
+- What strategies can we use so that both probes are useful?
--- a/slides/exercises/ingress-details.md
+++ b/slides/exercises/ingress-details.md
@@ -6,7 +6,7 @@

  - the web app itself (dockercoins, NGINX, whatever we want)

-  - an ingress controller (we suggest Traefik)
+  - an ingress controller

  - a domain name (`use \*.nip.io` or `\*.localdev.me`)

@@ -16,7 +16,7 @@

 ## Goal

- We want to be able to access the web app using an URL like:
+- We want to be able to access the web app using a URL like:

  http://webapp.localdev.me

@@ -30,11 +30,13 @@

 ## Hints

- Traefik can be installed with Helm
+- For the ingress controller, we can use:

-  (it can be found on the Artifact Hub)
+  - [ingress-nginx](https://github.com/kubernetes/ingress-nginx/blob/main/docs/deploy/index.md)

- If using Kubernetes 1.22+, make sure to use Traefik 2.5+
+  - the [Traefik Helm chart](https://doc.traefik.io/traefik/getting-started/install-traefik/#use-the-helm-chart)
+
+  - the container.training [Traefik DaemonSet](https://raw.githubusercontent.com/jpetazzo/container.training/main/k8s/traefik-v2.yaml)

 - If our cluster supports LoadBalancer Services: easy

--- a/slides/exercises/ingress-secret-policy-brief.md
+++ b/slides/exercises/ingress-secret-policy-brief.md
@@ -1,3 +1,5 @@
+⚠️ BROKEN EXERCISE - DO NOT USE
+
 ## Exercise — Ingress Secret Policy

 *Implement policy to limit impact of ingress controller vulnerabilities.*
--- a/slides/exercises/ingress-secret-policy-details.md
+++ b/slides/exercises/ingress-secret-policy-details.md
@@ -1,3 +1,5 @@
+⚠️ BROKEN EXERCISE - DO NOT USE
+
 # Exercise — Ingress Secret Policy

 - Most ingress controllers have access to all Secrets
@@ -8,56 +10,86 @@

  (by allowing attacker to access all secrets, including API tokens)

+- See for instance [CVE-2021-25742](https://github.com/kubernetes/ingress-nginx/issues/7837)
+
 - How can we prevent that?

 ---

-## Preparation
+## Step 1: Ingress Controller

- Deploy an ingress controller
+- Deploy an Ingress Controller

- Deploy cert-manager
-
- Create a ClusterIssuer using Let's Encrypt
-
-  (suggestion: also create a ClusterIssuer using LE's staging env)
+  (e.g. Traefik or NGINX; you can use @@LINK[k8s/traefik-v2.yaml])

 - Create a trivial web app (e.g. NGINX, `jpetazzo/color`...)

+- Expose it with an Ingress
+
+  (e.g. use `app.<ip-address>.nip.io`)
+
+- Check that you can access it through `http://app.<ip-address>.nip.io`
+
+---
+
+## Step 2: cert-manager
+
+- Deploy cert-manager
+
+- Create a ClusterIssuer using Let's Encrypt staging environment
+
+  (e.g. with @@LINK[k8s/cm-clusterissuer.yaml])
+
 - Create an Ingress for the app, with TLS enabled

+  (e.g. use `appsecure.<ip-address>.nip.io`)
+
 - Tell cert-manager to obtain a certificate for that Ingress

-  (suggestion: use the `cert-manager.io/cluster-issuer` annotation)
+  - option 1: manually create a Certificate (e.g. with @@LINK[k8s/cm-certificate.yaml])
+
+  - option 2: use the `cert-manager.io/cluster-issuer` annotation
+
+- Check that you get the Let's Encrypt certificate was issued

 ---

-## Strategy
+## Step 3: RBAC

- Remove the ingress controller's permission to read all Secrets
+- Remove the Ingress Controller's permission to read all Secrets

- Grant selective access to Secrets
+- Restart the Ingress Controller

-  (only give access to secrets that hold ingress TLS keys and certs)
+- Check that https://appsecure doesn't serve the Let's Encrypt cert

- Automatically grant access by using Kyverno's "generate" mechanism
+- Grant permission to read the certificate's Secret

-  (automatically create Role + RoleBinding when Certificate is created)
-
- Bonus: think about threat model for an insider attacker
-
-  (and how to mitigate it)
+- Check that https://appsecure serve the Let's Encrypt cert again

 ---

-## Goal
+## Step 4: Kyverno

- When a Certificate (cert-manager CRD) is created, automatically create:
+- Install Kyverno

-  - A Role granting read access to the Certificate's Secret
+- Write a Kyverno policy to automatically grant permission to read Secrets

-  - A RoleBinding granting that Role to our Ingress controller
+  (e.g. when a cert-manager Certificate is created)

- Check that the Ingress controller TLS still works
+- Check @@LINK[k8s/kyverno-namespace-setup.yaml] for inspiration

- ...But that the Ingress controller can't read other secrets
+- Hint: you need to automatically create a Role and RoleBinding
+
+- Create another app + another Ingress with TLS
+
+- Check that the Certificate, Secret, Role, RoleBinding are created
+
+- Check that the new app correctly serves the Let's Encrypt cert
+
+---
+
+## Step 5: double-check
+
+- Check that the Ingress Controller can't access other secrets
+
+  (e.g. by manually creating a Secret and checking with `kubectl exec`?)
--- a/slides/exercises/k8sfundamentals-details.md
+++ b/slides/exercises/k8sfundamentals-details.md
@@ -8,25 +8,37 @@

 - We'll use one Deployment for each component

-  (see next slide for the images to use)
+  (created with `kubectl create deployment`)

 - We'll connect them with Services

- We'll check that we can access the web UI in a browser
+  (create with `kubectl expose`)

 ---

 ## Images

- hasher → `dockercoins/hasher:v0.1`
+- We'll use the following images:

- redis → `redis`
+  - hasher → `dockercoins/hasher:v0.1`

- rng → `dockercoins/rng:v0.1`
+  - redis → `redis`

- webui → `dockercoins/webui:v0.1`
+  - rng → `dockercoins/rng:v0.1`

- worker → `dockercoins/worker:v0.1`
+  - webui → `dockercoins/webui:v0.1`
+
+  - worker → `dockercoins/worker:v0.1`
+
+- All services should be internal services, except the web UI
+
+  (since we want to be able to connect to the web UI from outside)
+
+---
+
+class: pic
+
+![Dockercoins architecture diagram](images/dockercoins-diagram.png)

 ---

@@ -34,7 +46,7 @@

 - We should be able to see the web UI in our browser

-  (with the graph showing approximatiely 3-4 hashes/second)
+  (with the graph showing approximately 3-4 hashes/second)

 ---

@@ -44,4 +56,4 @@

  (check the logs of the worker; they indicate the port numbers)

- The web UI can be exposed with a NodePort Service
+- The web UI can be exposed with a NodePort or LoadBalancer Service
--- a/slides/exercises/kyverno-ingress-domain-name-brief.md
+++ b/slides/exercises/kyverno-ingress-domain-name-brief.md
@@ -0,0 +1,9 @@
+## Exercise — Generating Ingress With Kyverno
+
+- When a Service gets created, automatically generate an Ingress
+
+- Step 1: expose all services with a hard-coded domain name
+
+- Step 2: only expose services that have a port named `http`
+
+- Step 3: configure the domain name with a per-namespace ConfigMap
--- a/slides/exercises/kyverno-ingress-domain-name-details.md
+++ b/slides/exercises/kyverno-ingress-domain-name-details.md
@@ -0,0 +1,33 @@
+# Exercise — Generating Ingress With Kyverno
+
+When a Service gets created...
+
+*(for instance, Service `blue` in Namespace `rainbow`)*
+
+...Automatically generate an Ingress.
+
+*(for instance, with host name `blue.rainbow.MYDOMAIN.COM`)*
+
+---
+
+## Goals
+
+- Step 1: expose all services with a hard-coded domain name
+
+- Step 2: only expose services that have a port named `http`
+
+- Step 3: configure the domain name with a per-namespace ConfigMap
+
+  (e.g. `kubectl create configmap ingress-domain-name --from-literal=domain=1.2.3.4.nip.io`)
+
+---
+
+## Hints
+
+- We want to use a Kyverno `generate` ClusterPolicy
+
+- For step 1, check [Generate Resources](https://kyverno.io/docs/writing-policies/generate/) documentation
+
+- For step 2, check [Preconditions](https://kyverno.io/docs/writing-policies/preconditions/) documentation
+
+- For step 3, check [External Data Sources](https://kyverno.io/docs/writing-policies/external-data-sources/) documentation
--- a/slides/exercises/remotecluster-brief.md
+++ b/slides/exercises/remotecluster-brief.md
@@ -0,0 +1,9 @@
+## Exercise — Remote Cluster
+
+- Install kubectl locally
+
+- Retrieve the kubeconfig file of our remote cluster
+
+- Deploy dockercoins on that cluster
+
+- Access an internal service without exposing it
--- a/slides/exercises/remotecluster-details.md
+++ b/slides/exercises/remotecluster-details.md
@@ -0,0 +1,62 @@
+# Exercise — Remote Cluster
+
+- We want to control a remote cluster
+
+- Then we want to run a copy of dockercoins on that cluster
+
+- We want to be able to connect to an internal service
+
+---
+
+## Goal
+
+- Be able to access e.g. hasher, rng, or webui
+
+  (without exposing them with a NodePort or LoadBalancer service)
+
+---
+
+## Getting access to the cluster
+
+- If you don't have `kubectl` on your machine, install it
+
+- Download the kubeconfig file from the remote cluster
+
+  (you can use `scp` or even copy-paste it)
+
+- If you already have a kubeconfig file on your machine:
+
+  - save the remote kubeconfig with another name (e.g. `~/.kube/config.remote`)
+
+  - set the `KUBECONFIG` environment variable to point to that file name
+
+  - ...or use the `--kubeconfig=...` option with `kubectl`
+
+- Check that you can access the cluster (e.g. `kubectl get nodes`)
+
+---
+
+## If you get an error...
+
+⚠️ The following applies to clusters deployed with `kubeadm`
+
+- If you have a cluster where the nodes are named `node1`, `node2`, etc.
+
+- `kubectl` commands might show connection errors with internal IP addresses
+
+  (e.g. 10.10... or 172.17...)
+
+- In that case, you might need to edit the `kubeconfig` file:
+
+  - find the server address
+
+  - update it to put the *external* address of the first node of the cluster
+
+---
+
+
+## Deploying an app
+
+- Deploy another copy of dockercoins from your local machine
+
+- Access internal services (e.g. with `kubectl port-forward`)
--- a/slides/exercises/sealed-secrets-details.md
+++ b/slides/exercises/sealed-secrets-details.md
@@ -2,7 +2,7 @@

 This is a "combo exercise" to practice the following concepts:

- Secrets (mounting them in containers)
+- Secrets (exposing them in containers)

 - RBAC (granting specific permissions to specific users)

@@ -14,17 +14,35 @@ For this exercise, you will need two clusters.

 (It can be two local clusters.)

-We will call them "source cluster" and "target cluster".
+We will call them "dev cluster" and "prod cluster".
+
+---
+
+## Overview
+
+- For simplicity, our application will be NGINX (or `jpetazzo/color`)
+
+- Our application needs two secrets:
+
+  - a *logging API token* (not too sensitive; same in dev and prod)
+
+  - a *database password* (sensitive; different in dev and prod)
+
+- Secrets can be exposed as env vars, or mounted in volumes
+
+  (it doesn't matter for this exercise)
+
+- We want to prepare and deploy the application in the dev cluster
+
+- ...Then deploy it to the prod cluster

 ---

 ## Step 1 (easy)

- Install the sealed secrets operator on both clusters
+- On the dev cluster, create a Namespace called `dev`

- On source cluster, create a Namespace called `dev`
-
- Create two sealed secrets, `verysecure` and `veryverysecure`
+- Create the two secrets, `logging-api-token` and `database-password`

  (the content doesn't matter; put a random string of your choice)

@@ -34,66 +52,66 @@ We will call them "source cluster" and "target cluster".

 - Verify that the secrets are available to the Deployment

+  (e.g. with `kubectl exec`)
+
+- Generate YAML manifests for the application (Deployment+Secrets)
+
 ---

 ## Step 2 (medium)

- Create another Namespace called `prod`
+- Deploy the sealed secrets operator on the dev cluster

-  (on the source cluster)
+- In the YAML, replace the Secrets with SealedSecrets

- Create the same Deployment `app` using both secrets
+- Delete the `dev` Namespace, recreate it, redeploy the app

- Verify that the secrets are available to the Deployment
+  (to make sure everything works fine)
+
+- Create a `staging` Namespace and try to deploy the app
+
+- If something doesn't work, fix it
+
+--
+
+- Hint: set the *scope* of the sealed secrets

 ---

 ## Step 3 (hard)

- On the target cluster, create a Namespace called `prod`
+- On the prod cluster, create a Namespace called `prod`

- Create the `app` Deployment and both sealed secrets
+- Try to deploy the application using the YAML manifests

-  (do not copy the Secrets; only the sealed secrets)
+- It won't work (the cluster needs the sealing key)

- Check the next slide if you need a hint!
+- Fix it!
+
+  (check the next slides if you need hints)

 --

 - You will have to copy the Sealed Secret private key

+--
+
+- And restart the operator so that it picks up the key
+
 ---

 ## Step 4 (medium)

-On the target cluster, create the Namespace `dev`.
-
-Let's say that user `alice` has access to the target cluster.
+Let's say that we have a user called `alice` on the prod cluster.

 (You can use `kubectl --as=alice` to impersonate her.)

 We want Alice to be able to:

- deploy the whole application
+- deploy the whole application in the `prod` namespace

- access the `verysecure` secret
+- access the *logging API token* secret

- but *not* the `veryverysecure` secret
+- but *not* the *database password* secret

---
-
-## Step 5 (hard)
-
- Make sure that Alice can view the logs of the Deployment
-
- Can you think of a way for Alice to access the `veryverysecure` Secret?
-
-  (check next slide for a hint)
-
--
-
- `kubectl exec`, maybe?
-
--
-
- Can you think of a way to prevent that?
+- view the logs of the app
--- a/slides/exercises/tf-nodepools-brief.md
+++ b/slides/exercises/tf-nodepools-brief.md
@@ -0,0 +1,9 @@
+## Exercise — Terraform Node Pools
+
+- Write a Terraform configuration to deploy a cluster
+
+- The cluster should have two node pools with autoscaling
+
+- Deploy two apps, each using exclusively one node pool
+
+- Bonus: deploy an app balanced across both node pools
--- a/slides/exercises/tf-nodepools-details.md
+++ b/slides/exercises/tf-nodepools-details.md
@@ -0,0 +1,69 @@
+# Exercise — Terraform Node Pools
+
+- Write a Terraform configuration to deploy a cluster
+
+- The cluster should have two node pools with autoscaling
+
+- Deploy two apps, each using exclusively one node pool
+
+- Bonus: deploy an app balanced across both node pools
+
+---
+
+## Cluster deployment
+
+- Write a Terraform configuration to deploy a cluster
+
+- We want to have two node pools with autoscaling
+
+- Example for sizing:
+
+  - 4 GB / 1 CPU per node
+
+  - pools of 1 to 4 nodes
+
+---
+
+## Cluster autoscaling
+
+- Deploy an app on the cluster
+
+  (you can use `nginx`, `jpetazzo/color`...)
+
+- Set a resource request (e.g. 1 GB RAM)
+
+- Scale up and verify that the autoscaler kicks in
+
+---
+
+## Pool isolation
+
+- We want to deploy two apps
+
+- The first app should be deployed exclusively on the first pool
+
+- The second app should be deployed exclusively on the second pool
+
+- Check the next slide for hints!
+
+---
+
+## Hints
+
+- One solution involves adding a `nodeSelector` to the pod templates
+
+- Another solution involves adding:
+
+  - `taints` to the node pools
+
+  - matching `tolerations` to the pod templates
+
+---
+
+## Balancing
+
+- Step 1: make sure that the pools are not balanced
+
+- Step 2: deploy a new app, check that it goes to the emptiest pool
+
+- Step 3: update the app so that it balances (as much as possible) between pools
--- a/slides/find-unmerged-changes.sh
+++ b/slides/find-unmerged-changes.sh
@@ -0,0 +1,60 @@
+#!/bin/sh
+
+# The materials for a given training live in their own branch.
+# Sometimes, we write custom content (or simply new content) for a training,
+# and that content doesn't get merged back to main. This script tries to
+# detect that with the following heuristics:
+# - list all remote branches
+# - for each remote branch, list the changes that weren't merged into main
+#   (using "diff main...$BRANCH", three dots)
+# - ignore a bunch of training-specific files that change all the time anyway
+# - for the remaining files, compute the diff between main and the branch
+#   (using "diff main..$BRANCH", two dots)
+# - ignore changes of less than 10 lines
+# - also ignore a few red herrings
+# - display whatever is left
+
+# For "git diff" (in the filter function) to work correctly, we must be
+# at the root of the repo.
+cd $(git rev-parse --show-toplevel)
+
+BRANCHES=$(git branch -r | grep -v origin/HEAD | grep origin/2)
+
+filter() {
+  threshold=10
+  while read filename; do
+    case $filename in
+      # Generic training-specific files
+      slides/*.html) continue;;
+      slides/*.yml) continue;;
+      slides/logistics*.md) continue;;
+      # Specific content that can be ignored
+      #slides/containers/Local_Environment.md) threshold=100;;
+      # Content that was moved/refactored enough to confuse us
+      slides/containers/Local_Environment.md) threshold=100;;
+      slides/exercises.md) continue;;
+      slides/k8s/batch-jobs) threshold=20;;
+      # Renames
+      */{*}*) continue;;
+    esac
+    git diff --find-renames --numstat main..$BRANCH -- "$filename" | {
+      # If the files are identical, the diff will be empty, and "read" will fail.
+      read plus minus filename || return
+      # Ignore binary files (FIXME though?)
+      if [ $plus = - ]; then
+        return
+      fi
+      diff=$((plus-minus))
+      if [ $diff -gt $threshold ]; then
+        echo git diff main..$BRANCH -- $filename
+      fi
+    }
+  done
+}
+
+for BRANCH in $BRANCHES; do
+  if FILES=$(git diff --find-renames --name-only main...$BRANCH | filter | grep .); then
+    echo "🌳 $BRANCH:"
+    echo "$FILES"
+  fi
+done
--- a/slides/intro-fullday.yml
+++ b/slides/intro-fullday.yml
@@ -0,0 +1,70 @@
+title: |
+  Introduction
+  to Containers
+
+chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
+#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
+
+gitrepo: github.com/jpetazzo/container.training
+
+slides: https://container.training/
+
+#slidenumberprefix: "#SomeHashTag &mdash; "
+
+exclude:
+- self-paced
+
+content:
+- shared/title.md
+- logistics.md
+- containers/intro.md
+- shared/about-slides.md
+- shared/chat-room-im.md
+#- shared/chat-room-slack.md
+#- shared/chat-room-zoom-meeting.md
+#- shared/chat-room-zoom-webinar.md
+- shared/toc.md
+-
+  #- containers/Docker_Overview.md
+  #- containers/Docker_History.md
+  - containers/Training_Environment.md
+  #- containers/Installing_Docker.md
+  - containers/First_Containers.md
+  - containers/Background_Containers.md
+  #- containers/Start_And_Attach.md
+  - containers/Naming_And_Inspecting.md
+  #- containers/Labels.md
+  - containers/Getting_Inside.md
+  - containers/Initial_Images.md
+-
+  - containers/Building_Images_Interactively.md
+  - containers/Building_Images_With_Dockerfiles.md
+  - containers/Cmd_And_Entrypoint.md
+  - containers/Copying_Files_During_Build.md
+  - containers/Exercise_Dockerfile_Basic.md
+-
+  - containers/Container_Networking_Basics.md
+  #- containers/Network_Drivers.md
+  - containers/Local_Development_Workflow.md
+  - containers/Container_Network_Model.md
+  - containers/Compose_For_Dev_Stacks.md
+  - containers/Exercise_Composefile.md
+-
+  - containers/Multi_Stage_Builds.md
+  #- containers/Publishing_To_Docker_Hub.md
+  - containers/Dockerfile_Tips.md
+  - containers/Exercise_Dockerfile_Advanced.md
+  #- containers/Docker_Machine.md
+  #- containers/Advanced_Dockerfiles.md
+  #- containers/Init_Systems.md
+  #- containers/Application_Configuration.md
+  #- containers/Logging.md
+  #- containers/Namespaces_Cgroups.md
+  #- containers/Copy_On_Write.md
+  #- containers/Containers_From_Scratch.md
+  #- containers/Container_Engines.md
+  #- containers/Pods_Anatomy.md
+  #- containers/Ecosystem.md
+  #- containers/Orchestration_Overview.md
+  - shared/thankyou.md
+  - containers/links.md
--- a/slides/intro-selfpaced.yml
+++ b/slides/intro-selfpaced.yml
@@ -0,0 +1,71 @@
+title: |
+  Introduction
+  to Containers
+
+chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
+#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
+
+gitrepo: github.com/jpetazzo/container.training
+
+slides: https://container.training/
+
+#slidenumberprefix: "#SomeHashTag &mdash; "
+
+exclude:
+- in-person
+
+content:
+- shared/title.md
+# - shared/logistics.md
+- containers/intro.md
+- shared/about-slides.md
+#- shared/chat-room-im.md
+#- shared/chat-room-slack.md
+#- shared/chat-room-zoom-meeting.md
+#- shared/chat-room-zoom-webinar.md
+- shared/toc.md
+- - containers/Docker_Overview.md
+  - containers/Docker_History.md
+  - containers/Training_Environment.md
+  - containers/Installing_Docker.md
+  - containers/First_Containers.md
+  - containers/Background_Containers.md
+  - containers/Start_And_Attach.md
+- - containers/Initial_Images.md
+  - containers/Building_Images_Interactively.md
+  - containers/Building_Images_With_Dockerfiles.md
+  - containers/Cmd_And_Entrypoint.md
+  - containers/Copying_Files_During_Build.md
+  - containers/Exercise_Dockerfile_Basic.md
+- - containers/Multi_Stage_Builds.md
+  - containers/Publishing_To_Docker_Hub.md
+  - containers/Dockerfile_Tips.md
+  - containers/Exercise_Dockerfile_Advanced.md
+- - containers/Naming_And_Inspecting.md
+  - containers/Labels.md
+  - containers/Getting_Inside.md
+- - containers/Container_Networking_Basics.md
+  - containers/Network_Drivers.md
+  - containers/Container_Network_Model.md
+  #- containers/Connecting_Containers_With_Links.md
+  - containers/Ambassadors.md
+- - containers/Local_Development_Workflow.md
+  - containers/Windows_Containers.md
+  - containers/Working_With_Volumes.md
+  - containers/Compose_For_Dev_Stacks.md
+  - containers/Exercise_Composefile.md
+  - containers/Docker_Machine.md
+- - containers/Advanced_Dockerfiles.md
+  - containers/Init_Systems.md
+  - containers/Application_Configuration.md
+  - containers/Logging.md
+  - containers/Resource_Limits.md
+- - containers/Namespaces_Cgroups.md
+  - containers/Copy_On_Write.md
+  #- containers/Containers_From_Scratch.md
+- - containers/Container_Engines.md
+  - containers/Pods_Anatomy.md
+  - containers/Ecosystem.md
+  - containers/Orchestration_Overview.md
+  - shared/thankyou.md
+  - containers/links.md
--- a/slides/intro-twodays.yml
+++ b/slides/intro-twodays.yml
@@ -0,0 +1,79 @@
+title: |
+  Introduction
+  to Containers
+
+chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
+#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
+
+gitrepo: github.com/jpetazzo/container.training
+
+slides: https://container.training/
+
+#slidenumberprefix: "#SomeHashTag &mdash; "
+
+exclude:
+- self-paced
+
+content:
+- shared/title.md
+- logistics.md
+- containers/intro.md
+- shared/about-slides.md
+- shared/chat-room-im.md
+#- shared/chat-room-slack.md
+#- shared/chat-room-zoom-meeting.md
+#- shared/chat-room-zoom-webinar.md
+- shared/toc.md
+- # DAY 1
+  - containers/Docker_Overview.md
+  #- containers/Docker_History.md
+  - containers/Training_Environment.md
+  - containers/First_Containers.md
+  - containers/Background_Containers.md
+  - containers/Initial_Images.md
+-
+  - containers/Building_Images_Interactively.md
+  - containers/Building_Images_With_Dockerfiles.md
+  - containers/Cmd_And_Entrypoint.md
+  - containers/Copying_Files_During_Build.md
+  - containers/Exercise_Dockerfile_Basic.md
+-
+  - containers/Dockerfile_Tips.md
+  - containers/Multi_Stage_Builds.md
+  - containers/Publishing_To_Docker_Hub.md
+  - containers/Exercise_Dockerfile_Advanced.md
+-
+  - containers/Naming_And_Inspecting.md
+  - containers/Labels.md
+  - containers/Start_And_Attach.md
+  - containers/Getting_Inside.md
+  - containers/Resource_Limits.md
+- # DAY 2
+  - containers/Container_Networking_Basics.md
+  - containers/Network_Drivers.md
+  - containers/Container_Network_Model.md
+-
+  - containers/Local_Development_Workflow.md
+  - containers/Working_With_Volumes.md
+  - containers/Compose_For_Dev_Stacks.md
+  - containers/Exercise_Composefile.md
+-
+  - containers/Installing_Docker.md
+  - containers/Container_Engines.md
+  - containers/Init_Systems.md
+  - containers/Advanced_Dockerfiles.md
+-
+  - containers/Application_Configuration.md
+  - containers/Logging.md
+  - containers/Orchestration_Overview.md
+-
+  - shared/thankyou.md
+  - containers/links.md
+#-
+  #- containers/Docker_Machine.md
+  #- containers/Ambassadors.md
+  #- containers/Namespaces_Cgroups.md
+  #- containers/Copy_On_Write.md
+  #- containers/Containers_From_Scratch.md
+  #- containers/Pods_Anatomy.md
+  #- containers/Ecosystem.md
--- a/slides/k8s/access-eks-cluster.md
+++ b/slides/k8s/access-eks-cluster.md
@@ -32,7 +32,7 @@

 - You're welcome to use whatever you like (e.g. AWS profiles)

-.exercise[
+.lab[

 - Set the AWS region, API access key, and secret key:
  ```bash
@@ -58,7 +58,7 @@

  - register it in our kubeconfig file

-.exercise[
+.lab[

 - Update our kubeconfig file:
  ```bash
--- a/slides/k8s/accessinternal.md
+++ b/slides/k8s/accessinternal.md
@@ -20,13 +20,13 @@

 ## Suspension of disbelief

-The exercises in this section assume that we have set up `kubectl` on our
+The labs and demos in this section assume that we have set up `kubectl` on our
 local machine in order to access a remote cluster.

 We will therefore show how to access services and pods of the remote cluster,
 from our local machine.

-You can also run these exercises directly on the cluster (if you haven't
+You can also run these commands directly on the cluster (if you haven't
 installed and set up `kubectl` locally).

 Running commands locally will be less useful
@@ -58,7 +58,7 @@ installed and set up `kubectl` to communicate with your cluster.

 - Let's access the `webui` service through `kubectl proxy`

-.exercise[
+.lab[

 - Run an API proxy in the background:
  ```bash
@@ -101,7 +101,7 @@ installed and set up `kubectl` to communicate with your cluster.

 - Let's access our remote Redis server

-.exercise[
+.lab[

 - Forward connections from local port 10000 to remote port 6379:
  ```bash
--- a/slides/k8s/admission.md
+++ b/slides/k8s/admission.md
@@ -198,7 +198,7 @@ Some examples ...

  (the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)

-.exercise[
+.lab[

 - Go to the webhook directory:
  ```bash
@@ -244,7 +244,7 @@ class: extra-details

 - We need to update the configuration with the correct `url`

-.exercise[
+.lab[

 - Edit the webhook configuration manifest:
  ```bash
@@ -271,7 +271,7 @@ class: extra-details

  (so if the webhook server is down, we can still create pods)

-.exercise[
+.lab[

 - Register the webhook:
  ```bash
@@ -288,7 +288,7 @@ It is strongly recommended to tail the logs of the API server while doing that.

 - Let's create a pod and try to set a `color` label

-.exercise[
+.lab[

 - Create a pod named `chroma`:
  ```bash
@@ -328,7 +328,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 ## Update the webhook configuration

-.exercise[
+.lab[

 - First, check the ngrok URL of the tunnel for the Flask app:
  ```bash
@@ -395,7 +395,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 ## Let's get to work!

-.exercise[
+.lab[

 - Make sure we're in the right directory:
  ```bash
@@ -424,7 +424,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

  ... we'll store it in a ConfigMap, and install dependencies on the fly

-.exercise[
+.lab[

 - Load the webhook source in a ConfigMap:
  ```bash
@@ -446,7 +446,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

  (of course, there are plenty others options; e.g. `cfssl`)

-.exercise[
+.lab[

 - Generate a self-signed certificate:
  ```bash
@@ -470,7 +470,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 - Let's reconfigure the webhook to use our Service instead of ngrok

-.exercise[
+.lab[

 - Edit the webhook configuration manifest:
  ```bash
@@ -504,7 +504,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).

 Shell to the rescue!

-.exercise[
+.lab[

 - Load up our cert and encode it in base64:
  ```bash
--- a/slides/k8s/aggregation-layer.md
+++ b/slides/k8s/aggregation-layer.md
@@ -66,7 +66,7 @@

 - We'll ask `kubectl` to show us the exacts requests that it's making

-.exercise[
+.lab[

 - Check the URI for a cluster-scope, "core" resource, e.g. a Node:
  ```bash
@@ -122,7 +122,7 @@ class: extra-details

 - What about namespaced resources?

-.exercise[
+.lab[

 - Check the URI for a namespaced, "core" resource, e.g. a Service:
  ```bash
@@ -169,7 +169,7 @@ class: extra-details

 ## Accessing a subresource

-.exercise[
+.lab[

 - List `kube-proxy` pods:
  ```bash
@@ -200,7 +200,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout

 - There are at least three useful commands to introspect the API server

-.exercise[
+.lab[

 - List resources types, their group, kind, short names, and scope:
  ```bash
@@ -249,7 +249,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout

 The following assumes that `metrics-server` is deployed on your cluster.

-.exercise[
+.lab[

 - Check that the metrics.k8s.io is registered with `metrics-server`:
  ```bash
@@ -271,7 +271,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - We can have multiple resources with the same name

-.exercise[
+.lab[

 - Look for resources named `node`:
  ```bash
@@ -298,7 +298,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - But we can look at the raw data (with `-o json` or `-o yaml`)

-.exercise[
+.lab[

 - Look at NodeMetrics objects with one of these commands:
  ```bash
@@ -320,7 +320,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 --

-.exercise[
+.lab[

 - Display node metrics:
  ```bash
@@ -342,7 +342,7 @@ The following assumes that `metrics-server` is deployed on your cluster.

 - Then we can register that server by creating an APIService resource

-.exercise[
+.lab[

 - Check the definition used for the `metrics-server`:
  ```bash
--- a/slides/k8s/apiserver-deepdive.md
+++ b/slides/k8s/apiserver-deepdive.md
@@ -103,7 +103,7 @@ class: extra-details

 ---

-## `WithWaitGroup`, 
+## `WithWaitGroup`

 - When we shutdown, tells clients (with in-flight requests) to retry

--- a/slides/k8s/architecture.md
+++ b/slides/k8s/architecture.md
@@ -20,25 +20,67 @@ The control plane can run:

 - in containers, on the same nodes that run other application workloads

-  (example: [Minikube](https://github.com/kubernetes/minikube); 1 node runs everything, [kind](https://kind.sigs.k8s.io/))
+  (default behavior for local clusters like [Minikube](https://github.com/kubernetes/minikube), [kind](https://kind.sigs.k8s.io/)...)

 - on a dedicated node

-  (example: a cluster installed with kubeadm)
+  (default behavior when deploying with kubeadm)

 - on a dedicated set of nodes

-  (example: [Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way); [kops](https://github.com/kubernetes/kops))
+  ([Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way); [kops](https://github.com/kubernetes/kops); also kubeadm)

 - outside of the cluster

-  (example: most managed clusters like AKS, EKS, GKE)
+  (most managed clusters like AKS, DOK, EKS, GKE, Kapsule, LKE, OKE...)

 ---

 class: pic

-![Kubernetes architecture diagram: control plane and nodes](images/k8s-arch2.png)
+![](images/control-planes/single-node-dev.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/managed-kubernetes.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/single-control-and-workers.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/stacked-control-plane.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/non-dedicated-stacked-nodes.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/advanced-control-plane.svg)
+
+---
+
+class: pic
+
+![](images/control-planes/advanced-control-plane-split-events.svg)
+
+---
+
+class: pic
+
+![Kubernetes architecture diagram: communication between components](images/k8s-arch4-thanks-luxas.png)

 ---

@@ -115,12 +157,6 @@ The kubelet agent uses a number of special-purpose protocols and interfaces, inc

 ---

-class: pic
-
-![Kubernetes architecture diagram: communication between components](images/k8s-arch4-thanks-luxas.png)
-
---
-
 # The Kubernetes API

 [
@@ -167,9 +203,9 @@ What does that mean?

 ## Let's experiment a bit!

- For the exercises in this section, connect to the first node of the `test` cluster
+- For this section, connect to the first node of the `test` cluster

-.exercise[
+.lab[

 - SSH to the first node of the test cluster

@@ -188,7 +224,7 @@ What does that mean?

 - Let's create a simple object

-.exercise[
+.lab[

 - Create a namespace with the following command:
  ```bash
@@ -210,7 +246,7 @@ This is equivalent to `kubectl create namespace hello`.

 - Let's retrieve the object we just created

-.exercise[
+.lab[

 - Read back our object:
  ```bash
@@ -318,7 +354,7 @@ class: extra-details

 - The easiest way is to use `kubectl label`

-.exercise[
+.lab[

 - In one terminal, watch namespaces:
  ```bash
@@ -366,7 +402,7 @@ class: extra-details

  - DELETED resources

-.exercise[
+.lab[

 - In one terminal, watch pods, displaying full events:
  ```bash
--- a/slides/k8s/authn-authz.md
+++ b/slides/k8s/authn-authz.md
@@ -110,7 +110,7 @@

 - TLS client certificates

-  (that's what we've been doing with `kubectl` so far)
+  (that's the default for clusters provisioned with `kubeadm`)

 - Bearer tokens

@@ -146,17 +146,15 @@

 ## Authentication with TLS certificates

- This is enabled in most Kubernetes deployments
+- Enabled in almost all Kubernetes deployments

- The user name is derived from the `CN` in the client certificates
+- The user name is indicated by the `CN` in the client certificate

- The groups are derived from the `O` fields in the client certificate
+- The groups are indicated by the `O` fields in the client certificate

 - From the point of view of the Kubernetes API, users do not exist

-  (i.e. they are not stored in etcd or anywhere else)
-
- Users can be created (and added to groups) independently of the API
+  (i.e. there is no resource with `kind: User`)

 - The Kubernetes API can be set up to use your custom CA to validate client certs

@@ -164,44 +162,21 @@

 class: extra-details

-## Viewing our admin certificate
+## Authentication for kubelet

- Let's inspect the certificate we've been using all this time!
+- In most clusters, kubelets authenticate using certificates

-.exercise[
+  (`O=system:nodes`, `CN=system:node:name-of-the-node`)

- This command will show the `CN` and `O` fields for our certificate:
-  ```bash
-  kubectl config view \
-          --raw \
-          -o json \
-          | jq -r .users[0].user[\"client-certificate-data\"] \
-          | openssl base64 -d -A \
-          | openssl x509 -text \
-          | grep Subject:
-  ```
+- The Kubernetse API can act as a CA

-]
+  (by wrapping an X509 CSR into a CertificateSigningRequest resource)

-Let's break down that command together! 😅
+- This enables kubelets to renew their own certificates

---
+- It can also be used to issue user certificates

-class: extra-details
-
-## Breaking down the command
-
- `kubectl config view` shows the Kubernetes user configuration
- `--raw` includes certificate information (which shows as REDACTED otherwise)
- `-o json` outputs the information in JSON format
- `| jq ...` extracts the field with the user certificate (in base64)
- `| openssl base64 -d -A` decodes the base64 format (now we have a PEM file)
- `| openssl x509 -text` parses the certificate and outputs it as plain text
- `| grep Subject:` shows us the line that interests us
-
-→ We are user `kubernetes-admin`, in group `system:masters`.
-
-(We will see later how and why this gives us the permissions that we have.)
+  (but it lacks flexibility; e.g. validity can't be customized)

 ---

@@ -215,17 +190,31 @@ class: extra-details

  (if their key is compromised, or they leave the organization)

- Option 1: re-create a new CA and re-issue everyone's certificates
-  <br/>
-  → Maybe OK if we only have a few users; no way otherwise
+- Issue short-lived certificates if you use them to authenticate users!

- Option 2: don't use groups; grant permissions to individual users
-  <br/>
-  → Inconvenient if we have many users and teams; error-prone
+  (short-lived = a few hours)

- Option 3: issue short-lived certificates (e.g. 24 hours) and renew them often
-  <br/>
-  → This can be facilitated by e.g. Vault or by the Kubernetes CSR API
+- This can be facilitated by e.g. Vault, cert-manager...
+
+---
+
+## What if a certificate is compromised?
+
+- Option 1: wait for the certificate to expire
+
+  (which is why short-lived certs are convenient!)
+
+- Option 2: remove access from that certificate's user and groups
+
+  - if that user was `bob.smith`, create a new user `bob.smith.2`
+
+  - if Bob was in groups `dev`, create a new group `dev.2`
+
+  - let's agree that this is not a great solution!
+
+- Option 3: re-create a new CA and re-issue all certificates
+
+  - let's agree that this is an even worse solution!

 ---

@@ -269,6 +258,95 @@ class: extra-details

 class: extra-details

+## Checking our authentication method
+
+- Let's check our kubeconfig file
+
+- Do we have a certificate, a token, or something else?
+
+---
+
+class: extra-details
+
+## Inspecting a certificate
+
+If we have a certificate, let's use the following command:
+
+```bash
+kubectl config view \
+        --raw \
+        -o json \
+        | jq -r .users[0].user[\"client-certificate-data\"] \
+        | openssl base64 -d -A \
+        | openssl x509 -text \
+        | grep Subject:
+```
+
+This command will show the `CN` and `O` fields for our certificate.
+
+---
+
+class: extra-details
+
+## Breaking down the command
+
+- `kubectl config view` shows the Kubernetes user configuration
+- `--raw` includes certificate information (which shows as REDACTED otherwise)
+- `-o json` outputs the information in JSON format
+- `| jq ...` extracts the field with the user certificate (in base64)
+- `| openssl base64 -d -A` decodes the base64 format (now we have a PEM file)
+- `| openssl x509 -text` parses the certificate and outputs it as plain text
+- `| grep Subject:` shows us the line that interests us
+
+→ We are user `kubernetes-admin`, in group `system:masters`.
+
+(We will see later how and why this gives us the permissions that we have.)
+
+---
+
+class: extra-details
+
+## Inspecting a token
+
+If we have a token, let's use the following command:
+
+```bash
+kubectl config view \
+        --raw \
+        -o json \
+        | jq -r .users[0].user.token \
+        | base64 -d \
+        | cut -d. -f2 \
+        | base64 -d \
+        | jq .
+```
+
+If our token is a JWT / OIDC token, this command will show its content.
+
+---
+
+class: extra-details
+
+## Other authentication methods
+
+- Other types of tokens
+
+  - these tokens are typically shorter than JWT or OIDC tokens
+
+  - it is generally not possible to extract information from them
+
+- Plugins
+
+  - some clusters use external `exec` plugins
+
+  - these plugins typically use API keys to generate or obtain tokens
+
+  - example: the AWS EKS authenticator works this way
+
+---
+
+class: extra-details
+
 ## Token authentication in practice

 - We are going to list existing service accounts
@@ -283,7 +361,7 @@ class: extra-details

 ## Listing service accounts

-.exercise[
+.lab[

 - The resource name is `serviceaccount` or `sa` for short:
  ```bash
@@ -300,7 +378,7 @@ class: extra-details

 ## Finding the secret

-.exercise[
+.lab[

 - List the secrets for the `default` service account:
  ```bash
@@ -320,7 +398,7 @@ class: extra-details

 - The token is stored in the secret, wrapped with base64 encoding

-.exercise[
+.lab[

 - View the secret:
  ```bash
@@ -343,7 +421,7 @@ class: extra-details

 - Let's send a request to the API, without and with the token

-.exercise[
+.lab[

 - Find the ClusterIP for the `kubernetes` service:
  ```bash
@@ -417,6 +495,49 @@ class: extra-details

 ---

+class: extra-details
+
+## Listing all possible verbs
+
+- The Kubernetes API is self-documented
+
+- We can ask it which resources, subresources, and verb exist
+
+- One way to do this is to use:
+
+  - `kubectl get --raw /api/v1` (for core resources with `apiVersion: v1`)
+
+  - `kubectl get --raw /apis/<group>/<version>` (for other resources)
+
+- The JSON response can be formatted with e.g. `jq` for readability
+
+---
+
+class: extra-details
+
+## Examples
+
+- List all verbs across all `v1` resources
+
+  ```bash
+  kubectl get --raw /api/v1 | jq -r .resources[].verbs[] | sort -u
+  ```
+
+- List all resources and subresources in `apps/v1`
+
+  ```bash
+  kubectl get --raw /apis/apps/v1 | jq -r .resources[].name
+  ```
+
+- List which verbs are available on which resources in `networking.k8s.io`
+
+  ```bash
+  kubectl get --raw /apis/networking.k8s.io/v1 | \
+          jq -r '.resources[] | .name + ": " + (.verbs | join(", "))'
+  ```
+
+---
+
 ## From rules to roles to rolebindings

 - A *role* is an API object containing a list of *rules*
@@ -491,23 +612,16 @@ class: extra-details

 ## Running a pod

- We will run an `alpine` pod and install `kubectl` there
+- We'll use [Nixery](https://nixery.dev/) to run a pod with `curl` and `kubectl`

-.exercise[
+- Nixery automatically generates images with the requested packages

- Run a one-time pod:
+.lab[
+
+- Run our pod:
  ```bash
  kubectl run eyepod --rm -ti --restart=Never \
-          --image alpine
-  ```
-
- Install `curl`, then use it to install `kubectl`:
-  ```bash
-  apk add --no-cache curl
-  URLBASE=https://storage.googleapis.com/kubernetes-release/release
-  KUBEVER=$(curl -s $URLBASE/stable.txt)
-  curl -LO $URLBASE/$KUBEVER/bin/linux/amd64/kubectl
-  chmod +x kubectl
+          --image nixery.dev/shell/curl/kubectl -- bash
  ```

 ]
@@ -518,7 +632,7 @@ class: extra-details

 - Normally, at this point, we don't have any API permission

-.exercise[
+.lab[

 - Check our permissions with `kubectl`:
  ```bash
@@ -544,7 +658,7 @@ class: extra-details

  (but again, we could call it `view` or whatever we like)

-.exercise[
+.lab[

 - Create the new role binding:
  ```bash
@@ -602,7 +716,7 @@ It's important to note a couple of details in these flags...

 - We should be able to *view* things, but not to *edit* them

-.exercise[
+.lab[

 - Check our permissions with `kubectl`:
  ```bash
@@ -703,7 +817,7 @@ class: extra-details

 - We can list the actions that are available to us:

-  ````bash
+  ```bash
  kubectl auth can-i --list
  ```

@@ -857,6 +971,18 @@ class: extra-details
  kubectl describe clusterrole cluster-admin
  ```

+---
+
+## `list` vs. `get`
+
+⚠️ `list` grants read permissions to resources!
+
+- It's not possible to give permission to list resources without also reading them
+
+- This has implications for e.g. Secrets
+
+  (if a controller needs to be able to enumerate Secrets, it will be able to read them)
+
 ???

 :EN:- Authentication and authorization in Kubernetes
--- a/slides/k8s/authoring-yaml.md
+++ b/slides/k8s/authoring-yaml.md
@@ -44,7 +44,7 @@

 - Ask `kubectl` to generate the YAML

-  (with a `kubectl create --dry-run -o yaml`)
+  (with a `kubectl create --dry-run=client -o yaml`)

 - Use The Docs, Luke

@@ -91,23 +91,19 @@

 ## Generating YAML without creating resources

- We can use the `--dry-run` option
+- We can use the `--dry-run=client` option

-.exercise[
+.lab[

 - Generate the YAML for a Deployment without creating it:
  ```bash
-  kubectl create deployment web --image nginx --dry-run
+  kubectl create deployment web --image nginx --dry-run=client
  ```

 - Optionally clean it up with `kubectl neat`, too

 ]

-Note: in recent versions of Kubernetes, we should use `--dry-run=client`
-
-(Or `--dry-run=server`; more on that later!)
-
 ---

 class: extra-details
@@ -130,9 +126,9 @@ class: extra-details

 class: extra-details

-## The limits of `kubectl apply --dry-run`
+## The limits of `kubectl apply --dry-run=client`

-.exercise[
+.lab[

 - Generate the YAML for a deployment:
  ```bash
@@ -146,7 +142,7 @@ class: extra-details

 - Ask `kubectl` what would be applied:
  ```bash
-  kubectl apply -f web.yaml --dry-run --validate=false -o yaml
+  kubectl apply -f web.yaml --dry-run=client --validate=false -o yaml
  ```

 ]
@@ -165,7 +161,7 @@ class: extra-details

  (all validation and mutation hooks will be executed)

-.exercise[
+.lab[

 - Try the same YAML file as earlier, with server-side dry run:
  ```bash
@@ -204,7 +200,7 @@ class: extra-details

 - `kubectl diff` does a server-side dry run, *and* shows differences

-.exercise[
+.lab[

 - Try `kubectl diff` on the YAML that we tweaked earlier:
  ```bash
@@ -245,19 +241,37 @@ Note: we don't need to specify `--validate=false` here.

 - Get started with `kubectl create deployment` and `kubectl expose`

- Dump the YAML with `kubectl get -o yaml`
+  (until you have something that works)

- Tweak that YAML and `kubectl apply` it back
+- Then, run these commands again, but with `-o yaml --dry-run=client`

- Store that YAML for reference (for further deployments)
+  (to generate and save YAML manifests)

- Feel free to clean up the YAML:
+- Try to apply these manifests in a clean environment

-  - remove fields you don't know
+  (e.g. a new Namespace)

-  - check that it still works!
+- Check that everything works; tweak and iterate if needed

- That YAML will be useful later when using e.g. Kustomize or Helm
+- Commit the YAML to a repo 💯🏆️
+
+---
+
+## "Day 2" YAML
+
+- Don't hesitate to remove unused fields
+
+  (e.g. `creationTimestamp: null`, most `{}` values...)
+
+- Check your YAML with:
+
+  [kube-score](https://github.com/zegl/kube-score) (installable with krew)
+
+  [kube-linter](https://github.com/stackrox/kube-linter)
+
+- Check live resources with tools like [popeye](https://popeyecli.io/)
+
+- Remember that like all linters, they need to be configured for your needs!

 ???

--- a/slides/k8s/aws-eks.md
+++ b/slides/k8s/aws-eks.md
@@ -0,0 +1,693 @@
+# Amazon EKS
+
+- Elastic Kubernetes Service
+
+- AWS runs the Kubernetes control plane
+
+  (all we see is an API server endpoint)
+
+- Pods can run on any combination of:
+
+  - EKS-managed nodes
+
+  - self-managed nodes
+
+  - Fargate
+
+- Leverages and integrates with AWS services and APIs
+
+---
+
+## Some integrations
+
+- Authenticate with IAM users and roles
+
+- Associate IAM roles to Kubernetes ServiceAccounts
+
+- Load balance traffic with ALB/ELB/NLB
+
+- Persist data with EBS/EFS
+
+- Label nodes with instance ID, instance type, region, AZ ...
+
+- Pods can be "first class citizens" of VPC
+
+---
+
+## Pros/cons
+
+- Fully managed control plane
+
+- Handles deployment, upgrade, scaling of the control plane
+
+- Available versions and features tend to lag a bit
+
+- Doesn't fit the most demanding users
+
+  ("demanding" starts somewhere between 100 and 1000 nodes)
+
+---
+
+## Good to know ...
+
+- Some integrations are specific to EKS
+
+  (some authentication models)
+
+- Many integrations are *not* specific to EKS
+
+- The Cloud Controller Manager can run outside of EKS
+
+  (and provide LoadBalancer services, EBS volumes, and more)
+
+---
+
+# Provisioning clusters
+
+- AWS console, API, CLI
+
+- `eksctl`
+
+- Infrastructure-as-Code
+
+---
+
+## AWS "native" provisioning
+
+- AWS web console
+
+  - click-click-click!
+
+  - difficulty: low
+
+- AWS API or CLI
+
+  - must provide subnets, ARNs
+
+  - difficulty: medium
+
+---
+
+## `eksctl`
+
+- Originally developed by Weave
+
+  (back when AWS "native" provisioning wasn't very good)
+
+- `eksctl create cluster` just works™
+
+- Has been "adopted" by AWS
+
+  (is listed in official documentations)
+
+---
+
+## Infrastructure-as-Code
+
+- Cloud Formation
+
+- Terraform
+
+  [terraform-aws-eks](https://github.com/terraform-aws-modules/terraform-aws-eks)
+  by the community
+  ([example](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/basic))
+
+  [terraform-provider-aws](https://github.com/hashicorp/terraform-provider-aws)
+  by Hashicorp
+  ([example](https://github.com/hashicorp/terraform-provider-aws/tree/main/examples/eks-getting-started))
+
+  [Kubestack](https://www.kubestack.com/)
+
+---
+
+## Node groups
+
+- Virtually all provisioning models have a concept of "node group"
+
+- Node group = group of similar nodes in an ASG
+
+  - can span multiple AZ
+
+  - can have instances of different types¹
+
+- A cluster will need at least one node group
+
+.footnote[¹As I understand it, to specify fallbacks if one instance type is unavailable or out of capacity.]
+
+---
+
+# IAM → EKS authentication
+
+- Access EKS clusters using IAM users and roles
+
+- No special role, permission, or policy is needed in IAM
+
+  (but the `eks:DescribeCluster` permission can be useful, see later)
+
+- Users and roles need to be explicitly listed in the cluster
+
+- Configuration is done through a ConfigMap in the cluster
+
+---
+
+## Setting it up
+
+- Nothing to do when creating the cluster
+
+  (feature is always enabled)
+
+- Users and roles are *mapped* to Kubernetes users and groups
+
+  (through the `aws-auth` ConfigMap in `kube-system`)
+
+- That's it!
+
+---
+
+## Mapping
+
+- The `aws-auth` ConfigMap can contain two entries:
+
+  - `mapRoles` (map IAM roles)
+
+  - `mapUsers` (map IAM users)
+
+- Each entry is a YAML file
+
+- Each entry includes:
+
+  - `rolearn` or `userarn` to map
+
+  - `username` (as a string)
+
+  - `groups` (as a list; can be empty)
+
+---
+
+## Example
+
+```yaml
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  namespace: kube-system
+  name: aws-auth
+data:
+  mapRoles: `|`
+    - rolearn: arn:aws:iam::111122223333:role/blah
+      username: blah
+      groups: [ devs, ops ]
+  mapUsers: `|`
+    - userarn: arn:aws:iam::111122223333:user/alice
+      username: alice
+      groups: [ system:masters ]
+    - userarn: arn:aws:iam::111122223333:user/bob
+      username: bob
+      groups: [ system:masters ]
+```
+
+---
+
+## Client setup
+
+- We need either the `aws` CLI or the `aws-iam-authenticator`
+
+- We use them as `exec` plugins in `~/.kube/config`
+
+- Done automatically by `eksctl`
+
+- Or manually with `aws eks update-kubeconfig`
+
+- Discovering the address of the API server requires one IAM permission
+
+  ```json
+    "Action": [
+        "eks:DescribeCluster"
+    ],
+    "Resource": "arn:aws:eks:<region>:<account>:cluster/<cluster-name>"
+  ```
+
+  (wildcards can be used when specifying the resource)
+
+---
+
+class: extra-details
+
+## How it works
+
+- The helper generates a token
+
+  (with `aws eks get-token` or `aws-iam-authenticator token`)
+
+- Note: these calls will always succeed!
+
+  (even if AWS API keys are invalid)
+
+- The token is used to authenticate with the Kubernetes API
+
+- AWS' Kubernetes API server will decode and validate the token
+
+  (and map the underlying user or role accordingly)
+
+---
+
+## Read The Fine Manual
+
+https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
+
+---
+
+# EKS → IAM authentication
+
+- Access AWS services from workloads running on EKS
+
+  (e.g.: access S3 bucket from code running in a Pod)
+
+- This works by associating an IAM role to a K8S ServiceAccount
+
+- There are also a few specific roles used internally by EKS
+
+  (e.g. to let the nodes establish network configurations)
+
+- ... We won't talk about these
+
+---
+
+## The big picture
+
+- One-time setup task
+
+  ([create an OIDC provider associated to our EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html))
+
+- Create (or update) a role with an appropriate *trust policy*
+
+  (more on that later)
+
+- Annotate service accounts to map them to that role
+
+  `eks.amazonaws.com/role-arn=arn:aws:iam::111122223333:role/some-iam-role`
+
+- Create (or re-create) pods using that ServiceAccount
+
+- The pods can now use that role!
+
+---
+
+## Trust policies
+
+- IAM roles have a *trust policy* (aka *assume role policy*)
+
+  (cf `aws iam create-role ... --assume-role-policy-document ...`)
+
+- That policy contains a *statement* list
+
+- This list indicates who/what is allowed to assume (use) the role
+
+- In the current scenario, that policy will contain something saying:
+
+  *ServiceAccount S on EKS cluster C is allowed to use this role*
+
+---
+
+## Trust policy for a single ServiceAccount
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Principal": {
+        "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
+      },
+      "Action": "sts:AssumeRoleWithWebIdentity",
+      "Condition": {
+        "StringEquals": {
+          "${OIDC_PROVIDER}:sub":
+            "system:serviceaccount:<namespace>:<service-account>"
+        }
+      }
+    }
+  ]
+}
+```
+
+---
+
+## Trust policy for multiple ServiceAccounts
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Effect": "Allow",
+      "Principal": {
+        "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
+      },
+      "Action": "sts:AssumeRoleWithWebIdentity",
+      "Condition": {
+        "StringLike": {
+            "${OIDC_PROVIDER}:sub": 
+              ["system:serviceaccount:container-training:*"]
+        }
+      }
+    }
+  ]
+}
+```
+
+---
+
+## The little details
+
+- When pods are created, they are processed by a mutating webhook
+
+  (typically named `pod-identity-webhook`)
+
+- Pods using a ServiceAccount with the right annotation get:
+
+  - an extra token
+    <br/>
+    (mounted in `/var/run/secrets/eks.amazonaws.com/serviceaccount/token`)
+
+  - a few env vars
+    <br/>
+    (including `AWS_WEB_IDENTITY_TOKEN_FILE` and `AWS_ROLE_ARN`)
+
+- AWS client libraries and tooling will work this that
+
+  (see [this list](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-minimum-sdk.html) for supported versions)
+
+---
+
+# CNI
+
+- EKS is a compliant Kubernetes implementation
+
+  (which means we can use a wide range of CNI plugins)
+
+- However, the recommended CNI plugin is the "AWS VPC CNI"
+
+  (https://github.com/aws/amazon-vpc-cni-k8s)
+
+- Pods are then "first class citizens" of AWS VPC
+
+---
+
+## AWS VPC CNI
+
+- Each Pod gets an address in a VPC subnet
+
+- No overlay network, no encapsulation, no overhead
+
+  (other than AWS network fabric, obviously)
+
+- Probably the fastest network option when running on AWS
+
+- Allows "direct" load balancing (more on that later)
+
+- Can use security groups with Pod traffic
+
+- But: limits the number of Pods per Node
+
+- But: more complex configuration (more on that later)
+
+---
+
+## Number of Pods per Node
+
+- Each Pod gets an IP address on an ENI
+
+  (Elastic Network Interface)
+
+- EC2 instances can only have a limited number of ENIs
+
+  (the exact limit depends on the instance type)
+
+- ENIs can only have a limited number of IP addresses
+
+  (with variations here as well)
+
+- This gives limits of e.g. 35 pods on `t3.large`, 29 on `c5.large` ...
+
+  (see
+  [full list of limits per instance type](https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt
+)
+  and
+  [ENI/IP details](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/pkg/awsutils/vpc_ip_resource_limit.go
+))
+
+---
+
+## Limits?
+
+- These limits might seem low
+
+- They're not *that* low if you compute e.g. the RAM/Pod ratio
+
+- Except if you're running lots if tiny pods
+
+- Bottom line: do the math!
+
+---
+
+class: extra-details
+
+## Pre-loading
+
+- It can take a little while to allocate/attach an ENI
+
+- The AWS VPC CNI can keep a few extra addresses on each Node
+
+  (by default, one ENI worth of IP addresses)
+
+- This is tunable if needed
+
+  (see [the docs](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/docs/eni-and-ip-target.md
+) for details)
+
+---
+
+## Better load balancing
+
+- The default path for inbound traffic is:
+
+  Load balancer → NodePort → Pod
+
+- With the AWS VPC CNI, it becomes possible to do:
+
+  Load balancer → Pod
+
+- More on that in the load balancing section!
+
+---
+
+## Configuration complexity
+
+- The AWS VPC CNI is a very good solution when running EKS
+
+- It brings optimized solutions to various use-cases:
+
+  - direct load balancing
+  - user authentication
+  - interconnection with other infrastructure
+  - etc.
+
+- Keep in mind that all these solutions are AWS-specific
+
+- They can require a non-trivial amount of specific configuration
+
+- Especially when moving from a simple POC to an IAC deployment!
+
+---
+
+# Load Balancers
+
+- Here be dragons!
+
+- Multiple options, each with different pros/cons
+
+- It's necessary to know both AWS products and K8S concepts
+
+---
+
+## AWS load balancers
+
+- CLB / Classic Load Balancer (formerly known as ELB)
+
+  - can work in L4 (TCP) or L7 (HTTP) mode
+  - can do TLS unrolling
+  - can't do websockets, HTTP/2, content-based routing ...
+
+- NLB / Network Load Balancer
+
+  - high-performance L4 load balancer with TLS support
+
+- ALB / Application Load Balancer
+
+  - HTTP load balancer
+  - can do TLS unrolling
+  - can do websockets, HTTP/2, content-based routing ...
+
+---
+
+## Load balancing modes
+
+- "IP targets"
+
+  - send traffic directly from LB to Pods
+
+  - Pods must use the AWS VPC CNI
+
+  - compatible with Fargate Pods
+
+- "Instance targets"
+
+  - send traffic to a NodePort (generally incurs an extra hop)
+
+  - Pods can use any CNI
+
+  - not compatible with Fargate Pods
+
+- Each LB (Service) can use a different mode, if necessary
+
+---
+
+## Kubernetes load balancers
+
+- Service (L4)
+
+  - ClusterIP: internal load balancing
+  - NodePort: external load balancing on ports >30000
+  - LoadBalancer: external load balancing on the port you want
+  - ExternalIP: external load balancing directly on nodes
+
+- Ingress (L7 HTTP)
+
+  - partial content-based routing (`Host` header, request path)
+  - requires an Ingress Controller (in front)
+  - works with Services (in back)
+
+---
+
+## Two controllers are available
+
+- Kubernetes "in-tree" load balancer controller
+
+  - always available
+  - used by default for LoadBalancer Services
+  - creates CLB by default; can also do NLB
+  - can only do "instance targets"
+  - can use extra CLB features (TLS, HTTP)
+
+- AWS Load Balancer Controller (fka AWS ALB Ingress Controller)
+
+  - optional add-on (requires additional config)
+  - primarily meant to be an Ingress Controller
+  - creates NLB and ALB
+  - can do "instance targets" and "IP targets"
+  - can also be used for LoadBalancer Services with type `nlb-ip`
+
+- They can run side by side
+
+---
+
+## Which one should we use?
+
+- AWS Load Balancer Controller supports "IP targets"
+
+  (which means direct routing of traffic to Pods)
+
+- It can be used as an Ingress controller
+
+- It *seems* to be the perfect solution for EKS!
+
+- However ...
+
+---
+
+## Caveats
+
+- AWS Load Balancer Controller requires extensive configuration
+
+  - a few hours to a few days to get it to work in a POC ...
+
+  - a few days to a few weeks to industrialize that process?
+
+- It's AWS-specific
+
+- It still introduces an extra hop, even if that hop is invisible
+
+- Other ingress controllers can have interesting features
+
+  (canary deployment, A/B testing ...)
+
+---
+
+## Noteworthy annotations and docs
+
+- `service.beta.kubernetes.io/aws-load-balancer-type: nlb-ip`
+
+  - LoadBalancer Service with "IP targets" ([docs](https://kubernetes-sigs.github.io/aws-load-balancer-controller/latest/guide/service/nlb_ip_mode/))
+  - requires AWS Load Balancer Controller
+
+- `service.beta.kubernetes.io/aws-load-balancer-internal: "true"`
+
+  - internal load balancer (for private VPC)
+
+- `service.beta.kubernetes.io/aws-load-balancer-type: nlb`
+
+  - opt for NLB instead of CLB with in-tree controller
+
+- `service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: "*"`
+
+  - use HAProxy [PROXY protocol](https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt)
+
+---
+
+## TLS-related annotations
+
+- `service.beta.kubernetes.io/aws-load-balancer-ssl-cert`
+
+  - enable TLS and use that certificate
+  - example value: `arn:aws:acm:<region>:<account>:certificate/<cert-id>`
+
+- `service.beta.kubernetes.io/aws-load-balancer-ssl-ports`
+
+  - enable TLS *only* on the specified ports (when multiple ports are exposed)
+  - example value: `"443,8443"`
+
+- `service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy`
+
+  - specify ciphers and other TLS parameters to use (see [that list](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/elb-security-policy-table.html))
+  - example value: `"ELBSecurityPolicy-TLS-1-2-2017-01"`
+
+---
+
+## To HTTP(S) or not to HTTP(S)
+
+- `service.beta.kubernetes.io/aws-load-balancer-backend-protocol`
+
+  - can be either `http`, `https`, `ssl`, or `tcp`
+
+  - if `https` or `ssl`: enable TLS to the backend
+
+  - if `http` or `https`: enable HTTP `x-forwarded-for` headers (with `http` or `https`)
+
+???
+
+## Cluster autoscaling
+
+## Logging
+
+https://docs.aws.amazon.com/eks/latest/userguide/logging-using-cloudtrail.html
+
+:EN:- Working with EKS
+:EN:- Cluster and user provisioning
+:EN:- Networking and load balancing
+
+:FR:- Travailler avec EKS
+:FR:- Outils de déploiement
+:FR:- Intégration avec IAM
+:FR:- Fonctionalités réseau
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jérôme Petazzoni	0cb13b62cb	Update README.md	2022-02-02 17:01:23 +01:00
Jérôme Petazzoni	e1290c5b84	➕ Add some info about profiles and .env	2022-01-31 19:48:12 +01:00
Jérôme Petazzoni	2c2574fece	♻️ Improve PriorityClass slides	2022-01-27 13:14:26 +01:00
Jérôme Petazzoni	5c96b40bbd	🐞 Fix kustomize completion	2022-01-27 13:14:16 +01:00
Jérôme Petazzoni	5aa20362eb	♻️ Update healthcheck content	2022-01-27 11:23:43 +01:00
Jérôme Petazzoni	a01fecf679	♻️ Bump Consul version and move SA at the beginning of the YAML It's a tiny bit easier to run through the YAML when it starts with the ServiceAccount, I find.	2022-01-27 10:40:37 +01:00
Jérôme Petazzoni	b75d6562b5	🏭️ Rewrite kubectl-run chapter	2022-01-27 10:36:52 +01:00
Jérôme Petazzoni	7f5944b157	📍 Correctly pin+hold package versions with APT preferences	2022-01-27 08:59:12 +01:00
Jérôme Petazzoni	21287d16bf	♻️ Switch to containerd	2022-01-26 21:05:01 +01:00
Jérôme Petazzoni	9434b40b58	🐞 Fix a couple of search-and-replace mistakes	2022-01-23 10:39:54 +01:00
Jérôme Petazzoni	b59f5dd00d	Merge pull request #606 from sebgl/fix-pvc-link Update link to the PersistentVolumeClaimBinder design doc	2022-01-23 09:08:11 +01:00
sebgl	d8ad0021cc	Update link to the PersistentVolumeClaimBinder design doc It looks like that doc has been moved elsewhere. This commit updates the link to (what I think is) the intended page.	2022-01-21 10:34:35 +01:00
Jérôme Petazzoni	8dbd6d54a0	🐞 Add warning about initial_node_count	2022-01-20 11:49:28 +01:00
Jérôme Petazzoni	b454749e92	🐞 Add info about Terraform provider version pinning	2022-01-20 09:29:11 +01:00
Jérôme Petazzoni	9a71d0e260	📃 Add gcloud auth application-default login	2022-01-19 11:24:00 +01:00
Jérôme Petazzoni	25e844fdf4	⏫ Bump up version numbers in upgrade labs	2022-01-18 12:16:46 +01:00
Jérôme Petazzoni	c40f4f5f2a	📝 Update ingress chapter Replace cheese images with jpetazz/color. Add details on GKE Ingress and clarify cost for cloud ingress. Mention that Traefik canary v1 is obsolete.	2022-01-18 12:09:33 +01:00
Jérôme Petazzoni	cfa89b3ab5	📃 Update AJ's affiliation	2022-01-17 19:18:09 +01:00
Jérôme Petazzoni	a10cf8d9c3	➕ Add GKE networking; kubernetes resource creation in TF	2022-01-17 18:18:49 +01:00
Jérôme Petazzoni	749e5da20b	➕ Add command to remove a DNS record	2022-01-17 11:08:11 +01:00
Jérôme Petazzoni	69c7ac2371	➕ Add Terraform workshop with GKE and node pools	2022-01-17 00:00:49 +01:00
Jérôme Petazzoni	de0ad83686	➕ Add quick intro to demo apps	2022-01-16 16:01:58 +01:00
Jérôme Petazzoni	f630f08713	🔧 Uniformize labels in rainbow demo app	2022-01-16 16:01:03 +01:00
Jérôme Petazzoni	920a075afe	🔧 Pin old cluster to an even older version	2022-01-15 18:36:16 +01:00
Jérôme Petazzoni	a47c51618b	🔧 Improve GKE config to spread across multiple locations GCP quotas are fairly limited (on my account, I can only use 8 public IP addresses per zone, which means that I cannot deploy many public clusters in a single zone). I tried to use private clusters, but that causes other problems. This refactoring makes it possible to spread clusters across multiple zones. Since I have access to 20+ zones in Europe and 20+ zones in the US, this lets me create a lot of public clusters and simplifies the module quite a bit.	2022-01-14 12:30:55 +01:00
Jérôme Petazzoni	f3156513b8	🏭️ Add wrapper script for 'prepare-tf' This should make it easy to start a bunch of clusters (using the new Terraform provisioning method) on various providers.	2022-01-11 10:11:42 +01:00
Jérôme Petazzoni	96de30ca78	🐞 Minor typo fix in help line	2022-01-10 21:05:34 +01:00
Jérôme Petazzoni	8de9e6e868	🏭️ Refactor prepare-tf - fix tags so that they don't contain '=' - install metrics-server only if necessary - set a maximum size to GKE node pool - change tags to be shorter	2022-01-09 20:51:58 +01:00
Jérôme Petazzoni	7eb90b9d6f	Merge pull request #555 from barpilot/gitops update gitops slides	2022-01-09 17:31:22 +01:00
Jérôme Petazzoni	931455ba31	📃 Add GCP to doc and tweak them a bit	2022-01-07 15:40:56 +01:00
Jérôme Petazzoni	f02cef0351	➕ Add content about externalTrafficPolicy Describe impact of extra hops when using an ingress controller. Also discuss how to preserve the HTTP client IP address.	2022-01-06 20:44:36 +01:00
Jérôme Petazzoni	9054fd58ea	🙏🏻 Add acknowledgements+thanks to @soulshake	2022-01-06 13:32:04 +01:00
Jérôme Petazzoni	24aa1ae9f7	✨ More tweaks on the cluster autoscaler content	2022-01-06 12:52:28 +01:00
Jérôme Petazzoni	c1c4e48457	✨ Tweaks on the cluster autoscaler content	2022-01-06 12:05:12 +01:00
Jérôme Petazzoni	0614087b2f	⏫ Update CSR API to v1 in Terraform deployment configs	2022-01-06 11:54:43 +01:00
Jérôme Petazzoni	3745d0e12a	➕ Add cluster autoscaler section	2022-01-06 11:49:36 +01:00
Jérôme Petazzoni	90885e49cf	➕ Add Terraform configurations for GKE	2022-01-04 18:51:35 +01:00
Jérôme Petazzoni	07d02e345e	🛠️ Add script to find unmerged changes	2022-01-04 12:50:20 +01:00
Jérôme Petazzoni	f2311545cd	🔙 Backport EKS section from flatiron training	2022-01-04 11:30:46 +01:00
Jérôme Petazzoni	e902962f3a	🩺 Update healthcheck exercise	2022-01-03 19:36:16 +01:00
Jérôme Petazzoni	ee7547999c	♻️ Update pssh install instructions	2022-01-03 18:06:11 +01:00
Jérôme Petazzoni	34fd6c0393	🔒️ Move slides links to HTTPS	2022-01-03 13:20:55 +01:00
Jérôme Petazzoni	e67fca695e	🛠️ Add 'list' function to Netlify helper script	2022-01-03 13:18:31 +01:00
Jérôme Petazzoni	b56e54eaec	♻️ s/exercise/lab/ Now that we have a good number of longer exercises, it makes sense to rename the shorter demos/labs into 'labs' to avoid confusion between the two.	2021-12-29 17:18:07 +01:00
Jérôme Petazzoni	2669eae49b	Merge pull request #599 from soulshake/patch-1 Fix typo "an URL"	2021-12-15 16:21:51 +01:00
AJ Bowen	c26e51d69c	Fix typo "an URL"	2021-12-15 05:44:09 -06:00
Jérôme Petazzoni	c9518631e5	🧹 Delete OCI compartments	2021-12-14 17:35:36 +01:00
Jérôme Petazzoni	164651c461	➕ Add new Kyverno exercise	2021-12-14 16:39:06 +01:00
Jérôme Petazzoni	1d8062f1dc	📃 Improve README to show how to set token variables	2021-12-14 15:46:00 +01:00
Jérôme Petazzoni	98671ea756	🔑 Minor tweaks in netpol section	2021-12-10 16:27:50 +01:00
Jérôme Petazzoni	f0c0266c06	📦️ Add info to use kubectl-build in Tiltfile	2021-12-10 15:16:20 +01:00
Jérôme Petazzoni	1c48145cc4	➕ Add ConfigMap exercise	2021-12-07 14:02:54 +01:00
Jérôme Petazzoni	eced9b46d6	🔑 Increase MaxAuthTries in SSH for folks with many keys	2021-12-07 12:08:44 +01:00
Jérôme Petazzoni	74947e8265	➕ Add exercises with remote clusters	2021-12-06 15:16:38 +01:00
Jérôme Petazzoni	9f9016de0c	🧹 Delete Load Balancers etc. when deleting Kapsule clusters	2021-12-06 12:10:59 +01:00
Jérôme Petazzoni	cd9751a765	📍 Pin metrics-server version (0.5 seems broken on my clusters?)	2021-12-03 12:03:54 +01:00
Jérôme Petazzoni	e48448128d	♻️ Update Stern information	2021-12-03 12:03:26 +01:00
Jérôme Petazzoni	66b161d8ec	🔧 Tweak ingress exercise	2021-12-01 16:53:47 +01:00
Jérôme Petazzoni	031a2f7019	🔧 Tweak healthcheck exercise	2021-12-01 16:49:00 +01:00
Jérôme Petazzoni	6fb446b900	🔧 Improve explanations of the Dockercoins exercise	2021-11-30 10:47:37 +01:00
Jérôme Petazzoni	ba45fe932f	🐚 Add script to configure Netlify DNS	2021-11-29 12:25:14 +01:00
Jérôme Petazzoni	4adb75f0cb	🌈 Update HAProxy example to use literal blue/green deployment	2021-11-28 20:30:38 +01:00
Jérôme Petazzoni	c9507b4c83	📍 Pin Redis version	2021-11-28 13:12:33 +01:00
Jérôme Petazzoni	c9e7dd6dfa	🌉 Add ngrok tunnel in Tiltfile	2021-11-28 13:07:17 +01:00
Jérôme Petazzoni	bc761d512a	➕ Add commands to list verbs, resources, subresources	2021-11-28 11:36:44 +01:00
Jérôme Petazzoni	a368a3c21b	♻️ Update information about bootkube	2021-11-25 15:01:44 +01:00
Jérôme Petazzoni	607158bda3	🐞 Fix two small typos	2021-11-21 21:47:35 +01:00
Jérôme Petazzoni	52015b81fe	🏭️ Refactor stateful apps content	2021-11-20 22:00:50 +01:00
Jérôme Petazzoni	93d8a23c81	➕ Add 'oldversion' cluster in admin course deployment script	2021-11-19 15:49:55 +01:00
Jérôme Petazzoni	5e50f2a3a4	➕ Add Pod Security Admission	2021-11-18 18:24:43 +01:00
Jérôme Petazzoni	5d3ab6b61f	➕ Add dmuc exercise	2021-11-18 09:09:40 +01:00
Jérôme Petazzoni	ff260c2731	✨ Minor improvements	2021-11-17 22:15:01 +01:00
Jérôme Petazzoni	2fc6d23d1e	♻️ Prepare for upcoming Terraform updates	2021-11-17 20:13:34 +01:00
Jérôme Petazzoni	bbbcadeb26	🐞 Typo fix	2021-11-15 15:58:20 +01:00
Jérôme Petazzoni	fe46b62f14	🐞 Fix missing directory (thanks @tianon)	2021-11-13 19:28:41 +01:00
Jérôme Petazzoni	60e5d5627b	Merge pull request #598 from tianon/whitespace Fix very minor whitespace typo	2021-11-13 19:27:44 +01:00
Tianon Gravi	be1bf50a43	Fix very minor whitespace typo	2021-11-12 17:00:16 -08:00
Jérôme Petazzoni	2893ec8c7f	🖼️ Add mirror.gcr.io as Docker registry mirror	2021-11-12 16:21:04 +01:00
Jérôme Petazzoni	dc89be170a	Merge pull request #597 from tianon/exemple Fix "exemple" typo	2021-11-12 14:27:38 +01:00
Jérôme Petazzoni	8f03ce674a	🐞 Fix secret names in exercise	2021-11-12 08:31:07 +01:00
Jérôme Petazzoni	23eb0ed771	📃 Add command to list regions in DO provider	2021-11-12 08:28:25 +01:00
Tianon Gravi	cc62e19274	Fix "exemple" typo	2021-11-11 16:17:11 -08:00
Jérôme Petazzoni	92cd81b170	⏫ Update DOK version slug	2021-11-11 09:22:00 +01:00
Jérôme Petazzoni	d9e29eb4a4	♻️ Update and clarify Ingress+Kyverno+RBAC exercise	2021-11-09 08:38:14 +01:00
Jérôme Petazzoni	00b167207d	🐞 Fix a few download URLs	2021-11-08 17:12:54 +01:00
Jérôme Petazzoni	d34017cff1	⏫ Upgrade OpenStack Terraform config to Terraform 1.0	2021-11-08 17:12:31 +01:00
Jérôme Petazzoni	d53ba51a9f	🐞 Fix Sealed Secrets Helm release name	2021-11-08 15:03:24 +01:00
Jérôme Petazzoni	90ce84ace3	♻️ Update sealed secrets + RBAC + YAML exercise	2021-11-08 08:41:53 +01:00
Jérôme Petazzoni	bcbfc747a2	📝 Update YAML authoring section; add linters	2021-11-07 19:29:33 +01:00
Jérôme Petazzoni	2f83b7f256	✏️ Add IngressClass in Traefik YAML This is necessary with recent version of Traefik, because it won't pick up Ingress resources that don't have an IngressClass. So let's add an IngressClass and make it the default.	2021-11-07 18:33:21 +01:00
Jérôme Petazzoni	753324cc89	🔑 Update RBAC section We won't always have a kubeconfig with a TLS cert in it. Let's break down different methods to analyze kubeconfig depending on whether there is a TLS cert or a token in it.	2021-11-07 16:23:29 +01:00
Guilhem Lettron	3d724d87db	gitops: update create branch method	2020-04-29 22:09:52 +02:00
Guilhem Lettron	8c04154430	gitops: update Flux log for identity.pub	2020-04-29 22:07:02 +02:00
Guilhem Lettron	66b7d118ba	gitops: add Flux helm install method	2020-04-29 22:04:41 +02:00
Guilhem Lettron	a772fff88e	gitops: flux use kustomize	2020-04-29 21:57:54 +02:00
Guilhem Lettron	57af933c2d	gitops: add missing `cd`	2020-04-29 21:55:56 +02:00
Guilhem Lettron	4888ec1f5b	gitops: add bash highlight	2020-04-29 21:54:27 +02:00