Compare commits

..

1 Commits

Author SHA1 Message Date
Jérôme Petazzoni
99285050d2 🚧 WIP NR adv k8s Fall 2022 2022-08-31 13:33:02 +02:00
15 changed files with 561 additions and 827 deletions

View File

@@ -17,8 +17,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
@@ -30,8 +30,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque
@@ -43,8 +43,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
@@ -56,8 +56,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque
@@ -71,8 +71,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard
---
@@ -84,8 +84,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
rules:
- apiGroups:
@@ -106,8 +106,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
@@ -182,8 +182,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
@@ -204,8 +204,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
kubernetes.io/cluster-service: "true"
name: kubernetes-dashboard
namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
@@ -253,8 +253,8 @@ spec:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
spec:
containers:
- args:
@@ -262,7 +262,7 @@ spec:
- --sidecar-host=http://127.0.0.1:8000
- --enable-skip-login
- --enable-insecure-login
image: kubernetesui/dashboard:v2.6.1
image: kubernetesui/dashboard:v2.5.0
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
@@ -293,7 +293,7 @@ spec:
name: kubernetes-dashboard-certs
- mountPath: /tmp
name: tmp-volume
- image: kubernetesui/metrics-scraper:v1.0.8
- image: kubernetesui/metrics-scraper:v1.0.7
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:

View File

@@ -17,8 +17,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
@@ -30,8 +30,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque
@@ -43,8 +43,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
@@ -56,8 +56,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque
@@ -71,8 +71,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard
---
@@ -84,8 +84,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
rules:
- apiGroups:
@@ -106,8 +106,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
@@ -182,8 +182,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
@@ -204,8 +204,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
kubernetes.io/cluster-service: "true"
name: kubernetes-dashboard
namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
@@ -253,15 +253,15 @@ spec:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
spec:
containers:
- args:
- --namespace=kubernetes-dashboard
- --auto-generate-certificates
- --sidecar-host=http://127.0.0.1:8000
image: kubernetesui/dashboard:v2.6.1
image: kubernetesui/dashboard:v2.5.0
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
@@ -292,7 +292,7 @@ spec:
name: kubernetes-dashboard-certs
- mountPath: /tmp
name: tmp-volume
- image: kubernetesui/metrics-scraper:v1.0.8
- image: kubernetesui/metrics-scraper:v1.0.7
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:

View File

@@ -17,8 +17,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
---
@@ -30,8 +30,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-certs
namespace: kubernetes-dashboard
type: Opaque
@@ -43,8 +43,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-csrf
namespace: kubernetes-dashboard
type: Opaque
@@ -56,8 +56,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-key-holder
namespace: kubernetes-dashboard
type: Opaque
@@ -71,8 +71,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-settings
namespace: kubernetes-dashboard
---
@@ -84,8 +84,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
rules:
- apiGroups:
@@ -106,8 +106,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard-metrics
roleRef:
apiGroup: rbac.authorization.k8s.io
@@ -126,8 +126,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
rules:
@@ -182,8 +182,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
roleRef:
@@ -204,8 +204,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
kubernetes.io/cluster-service: "true"
name: kubernetes-dashboard
namespace: kubernetes-dashboard
@@ -229,8 +229,8 @@ metadata:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
name: kubernetes-dashboard
namespace: kubernetes-dashboard
spec:
@@ -253,15 +253,15 @@ spec:
app.kubernetes.io/instance: kubernetes-dashboard
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: kubernetes-dashboard
app.kubernetes.io/version: 2.6.1
helm.sh/chart: kubernetes-dashboard-5.10.0
app.kubernetes.io/version: 2.5.0
helm.sh/chart: kubernetes-dashboard-5.2.0
spec:
containers:
- args:
- --namespace=kubernetes-dashboard
- --auto-generate-certificates
- --sidecar-host=http://127.0.0.1:8000
image: kubernetesui/dashboard:v2.6.1
image: kubernetesui/dashboard:v2.5.0
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:
@@ -292,7 +292,7 @@ spec:
name: kubernetes-dashboard-certs
- mountPath: /tmp
name: tmp-volume
- image: kubernetesui/metrics-scraper:v1.0.8
- image: kubernetesui/metrics-scraper:v1.0.7
imagePullPolicy: IfNotPresent
livenessProbe:
httpGet:

View File

@@ -276,14 +276,13 @@ EOF
"
##VERSION## https://github.com/docker/compose/releases
COMPOSE_VERSION=v2.11.1
COMPOSE_PLATFORM='linux-$(uname -m)'
# Just in case you need Compose 1.X, you can use the following lines.
# (But it will probably only work for x86_64 machines.)
#COMPOSE_VERSION=1.29.2
#COMPOSE_PLATFORM='Linux-$(uname -m)'
if [ "$ARCHITECTURE" ]; then
COMPOSE_VERSION=v2.2.3
COMPOSE_PLATFORM='linux-$(uname -m)'
else
COMPOSE_VERSION=1.29.2
COMPOSE_PLATFORM='Linux-$(uname -m)'
fi
pssh "
set -e
### Install docker-compose.
@@ -493,7 +492,7 @@ _cmd_kubetools() {
# Install kube-ps1
pssh "
set -e
if ! [ -d /opt/kube-ps1 ]; then
if ! [ -f /opt/kube-ps1 ]; then
cd /tmp
git clone https://github.com/jonmosco/kube-ps1
sudo mv kube-ps1 /opt/kube-ps1

View File

@@ -4,6 +4,6 @@
(we will use the `rng` service in the dockercoins app)
- See what happens when the load increases
- See what happens when the load increses
(spoiler alert: it involves timeouts!)

View File

@@ -1,17 +0,0 @@
# Interlude
- As mentioned earlier:
*the content of this course will be adapted to suit your needs!*
- Please take a look at the form that we just shared in Slack
(you don't need to fill it *right now*)
- If there are parts that you are curious about, ask us now!
- We'll ask you to fill the form after today's session
(before the end of the day, basically, so we can process the results by tomorrow)
- Thank you!

View File

@@ -203,12 +203,12 @@ What does that mean?
## Let's experiment a bit!
- The examples in this section require a Kubernetes cluster
(any local development cluster will suffice)
- For this section, connect to the first node of the `test` cluster
.lab[
- SSH to the first node of the test cluster
- Check that the cluster is operational:
```bash
kubectl get nodes

View File

@@ -1,62 +1,46 @@
# Healthchecks
- Containers can have *healthchecks* (also called "probes")
- Containers can have *healthchecks*
- There are three kinds of healthchecks, corresponding to different use-cases:
- There are three kinds of healthchecks, corresponding to very different use-cases:
`startupProbe`, `readinessProbe`, `livenessProbe`
- liveness = detect when a container is "dead" and needs to be restarted
- readiness = detect when a container is ready to serve traffic
- startup = detect if a container has finished to boot
- These healthchecks are optional (we can use none, all, or some of them)
- Different probes are available:
- Different probes are available (HTTP request, TCP connection, program execution)
HTTP GET, TCP connection, arbitrary program execution, GRPC
- All these probes have a binary result (success/failure)
- Probes that aren't defined will default to a "success" result
- Let's see the difference and how to use them!
---
## Use-cases in brief
*My container takes a long time to boot before being able to serve traffic.*
→ use a `startupProbe` (but often a `readinessProbe` can also do the job)
*Sometimes, my container is unavailable or overloaded, and needs to e.g. be taken temporarily out of load balancer rotation.*
→ use a `readinessProbe`
*Sometimes, my container enters a broken state which can only be fixed by a restart.*
→ use a `livenessProbe`
---
## Liveness probes
## Liveness probe
*This container is dead, we don't know how to fix it, other than restarting it.*
- Check if the container is dead or alive
- Indicates if the container is dead or alive
- If Kubernetes determines that the container is dead:
- A dead container cannot come back to life
- it terminates the container gracefully
- If the liveness probe fails, the container is killed (destroyed)
- it restarts the container (unless the Pod's `restartPolicy` is `Never`)
(to make really sure that it's really dead; no zombies or undeads!)
- With the default parameters, it takes:
- What happens next depends on the pod's `restartPolicy`:
- up to 30 seconds to determine that the container is dead
- `Never`: the container is not restarted
- up to 30 seconds to terminate it
- `OnFailure` or `Always`: the container is restarted
---
## When to use a liveness probe
- To detect failures that can't be recovered
- To indicate failures that can't be recovered
- deadlocks (causing all requests to time out)
@@ -64,45 +48,47 @@
- Anything where our incident response would be "just restart/reboot it"
---
## Liveness probes gotchas
.warning[**Do not** use liveness probes for problems that can't be fixed by a restart]
- Otherwise we just restart our pods for no reason, creating useless load
.warning[**Do not** depend on other services within a liveness probe]
---
- Otherwise we can experience cascading failures
## Readiness probe (1)
(example: web server liveness probe that makes a requests to a database)
*Make sure that a container is ready before continuing a rolling update.*
.warning[**Make sure** that liveness probes respond quickly]
- Indicates if the container is ready to handle traffic
- The default probe timeout is 1 second (this can be tuned!)
- When doing a rolling update, the Deployment controller waits for Pods to be ready
- If the probe takes longer than that, it will eventually cause a restart
(a Pod is ready when all the containers in the Pod are ready)
- Improves reliability and safety of rolling updates:
- don't roll out a broken version (that doesn't pass readiness checks)
- don't lose processing capacity during a rolling update
---
## Readiness probes
## Readiness probe (2)
*Sometimes, my container "needs a break".*
*Temporarily remove a container (overloaded or otherwise) from a Service load balancer.*
- Check if the container is ready or not
- A container can mark itself "not ready" temporarily
- If the container is not ready, its Pod is not ready
(e.g. if it's overloaded or needs to reload/restart/garbage collect...)
- If the Pod belongs to a Service, it is removed from its Endpoints
- If a container becomes "unready" it might be ready again soon
(it stops receiving new connections but existing ones are not affected)
- If the readiness probe fails:
- If there is a rolling update in progress, it might pause
- the container is *not* killed
(Kubernetes will try to respect the MaxUnavailable parameter)
- if the pod is a member of a service, it is temporarily removed
- As soon as the readiness probe suceeds again, everything goes back to normal
- it is re-added as soon as the readiness probe passes again
---
@@ -116,31 +102,67 @@
- To indicate temporary failure or unavailability
- runtime is busy doing garbage collection or (re)loading data
- application can only service *N* parallel connections
- new connections will be directed to other Pods
- runtime is busy doing garbage collection or initial data load
- To redirect new connections to other Pods
(e.g. fail the readiness probe when the Pod's load is too high)
---
## Startup probes
## Dependencies
*My container takes a long time to boot before being able to serve traffic.*
- If a web server depends on a database to function, and the database is down:
- After creating a container, Kubernetes runs its startup probe
- the web server's liveness probe should succeed
- The container will be considered "unhealthy" until the probe succeeds
- the web server's readiness probe should fail
- As long as the container is "unhealthy", its Pod...:
- Same thing for any hard dependency (without which the container can't work)
- is not added to Services' endpoints
.warning[**Do not** fail liveness probes for problems that are external to the container]
- is not considered as "available" for rolling update purposes
---
- Readiness and liveness probes are enabled *after* startup probe reports success
## Timing and thresholds
(if there is no startup probe, readiness and liveness probes are enabled right away)
- Probes are executed at intervals of `periodSeconds` (default: 10)
- The timeout for a probe is set with `timeoutSeconds` (default: 1)
.warning[If a probe takes longer than that, it is considered as a FAIL]
- A probe is considered successful after `successThreshold` successes (default: 1)
- A probe is considered failing after `failureThreshold` failures (default: 3)
- A probe can have an `initialDelaySeconds` parameter (default: 0)
- Kubernetes will wait that amount of time before running the probe for the first time
(this is important to avoid killing services that take a long time to start)
---
## Startup probe
*The container takes too long to start, and is killed by the liveness probe!*
- By default, probes (including liveness) start immediately
- With the default probe interval and failure threshold:
*a container must respond in less than 30 seconds, or it will be killed!*
- There are two ways to avoid that:
- set `initialDelaySeconds` (a fixed, rigid delay)
- use a `startupProbe`
- Kubernetes will run only the startup probe, and when it succeeds, run the other probes
---
@@ -156,296 +178,121 @@
---
## Startup probes gotchas
- When defining a `startupProbe`, we almost always want to adjust its parameters
(specifically, its `failureThreshold` - this is explained in next slide)
- Otherwise, if the container fails to start within 30 seconds...
*Kubernetes terminates the container and restarts it!*
- Sometimes, it's easier/simpler to use a `readinessProbe` instead
(except when also using a `livenessProbe`)
---
## Timing and thresholds
- Probes are executed at intervals of `periodSeconds` (default: 10)
- The timeout for a probe is set with `timeoutSeconds` (default: 1)
.warning[If a probe takes longer than that, it is considered as a FAIL]
.warning[For liveness probes **and startup probes** this terminates and restarts the container]
- A probe is considered successful after `successThreshold` successes (default: 1)
- A probe is considered failing after `failureThreshold` failures (default: 3)
- All these parameters can be set independently for each probe
---
class: extra-details
## `initialDelaySeconds`
- A probe can have an `initialDelaySeconds` parameter (default: 0)
- Kubernetes will wait that amount of time before running the probe for the first time
- It is generally better to use a `startupProbe` instead
(but this parameter did exist before startup probes were implemented)
---
class: extra-details
## `readinessProbe` vs `startupProbe`
- A lot of blog posts / documentations / tutorials recommend readiness probes...
- ...even in scenarios where a startup probe would seem more appropriate!
- This is because startup probes are relatively recent
(they reached GA status in Kubernetes 1.20)
- When there is no `livenessProbe`, using a `readinessProbe` is simpler:
- a `startupProbe` generally requires to change the `failureThreshold`
- a `startupProbe` generally also requires a `readinessProbe`
- a single `readinessProbe` can fulfill both roles
---
## Different types of probes
- Kubernetes supports the following mechanisms:
- HTTP request
- `exec` (arbitrary program execution)
- specify URL of the request (and optional headers)
- `httpGet` (HTTP GET request)
- any status code between 200 and 399 indicates success
- `tcpSocket` (check if a TCP port is accepting connections)
- TCP connection
- `grpc` (standard [GRPC Health Checking Protocol][grpc])
- the probe succeeds if the TCP port is open
- All probes give binary results ("it works" or "it doesn't")
- arbitrary exec
- Let's see the specific details for each of them!
- a command is executed in the container
[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
- exit status of zero indicates success
---
## `exec`
## Benefits of using probes
- Runs an arbitrary program *inside* the container
- Rolling updates proceed when containers are *actually ready*
(like with `kubectl exec` or `docker exec`)
(as opposed to merely started)
- The program must be available in the container image
- Containers in a broken state get killed and restarted
- Kubernetes uses the exit status of the program
(instead of serving errors or timeouts)
(standard UNIX convention: 0 = success, anything else = failure)
- Unavailable backends get removed from load balancer rotation
(thus improving response times across the board)
- If a probe is not defined, it's as if there was an "always successful" probe
---
## `exec` example
## Example: HTTP probe
When the worker is ready, it should create `/tmp/ready`.
<br/>
The following probe will give it 5 minutes to do so.
Here is a pod template for the `rng` web service of the DockerCoins app:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: queueworker
name: healthy-app
spec:
containers:
- name: worker
image: myregistry.../worker:v1.0
startupProbe:
exec:
command:
- test
- -f
- /tmp/ready
failureThreshold: 30
```
---
## Using shell constructs
- If we want to use pipes, conditionals, etc. we should invoke a shell
- Example:
```yaml
exec:
command:
- sh
- -c
- "curl http://localhost:5000/status | jq .ready | grep true"
```
---
## `httpGet`
- Make an HTTP GET request to the container
- The request will be made by Kubelet
(doesn't require extra binaries in the container image)
- `port` must be specified
- `path` and extra `httpHeaders` can be specified optionally
- Kubernetes uses HTTP status code of the response:
- 200-399 = success
- anything else = failure
---
## `httpGet` example
The following liveness probe restarts the container if it stops responding on `/healthz`:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: frontend
spec:
containers:
- name: frontend
image: myregistry.../frontend:v1.0
- name: myapp
image: myregistry.io/myapp:v1.0
livenessProbe:
httpGet:
path: /health
port: 80
path: /healthz
periodSeconds: 5
```
---
## `tcpSocket`
- Kubernetes checks if the indicated TCP port accepts connections
- There is no additional check
.warning[It's quite possible for a process to be broken, but still accept TCP connections!]
If the backend serves an error, or takes longer than 1s, 3 times in a row, it gets killed.
---
## `grpc`
## Example: exec probe
<!-- ##VERSION## -->
Here is a pod template for a Redis server:
- Available in beta since Kubernetes 1.24
```yaml
apiVersion: v1
kind: Pod
metadata:
name: redis-with-liveness
spec:
containers:
- name: redis
image: redis
livenessProbe:
exec:
command: ["redis-cli", "ping"]
```
- Leverages standard [GRPC Health Checking Protocol][grpc]
[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
If the Redis process becomes unresponsive, it will be killed.
---
## Best practices for healthchecks
## Questions to ask before adding healthchecks
- Readiness probes are almost always beneficial
- Do we want liveness, readiness, both?
- don't hesitate to add them early!
(sometimes, we can use the same check, but with different failure thresholds)
- we can even make them *mandatory*
- Do we have existing HTTP endpoints that we can use?
- Be more careful with liveness and startup probes
- Do we need to add new endpoints, or perhaps use something else?
- they aren't always necessary
- Are our healthchecks likely to use resources and/or slow down the app?
- they can even cause harm
- Do they depend on additional services?
(this can be particularly tricky, see next slide)
---
## Readiness probes
## Healthchecks and dependencies
- Almost always beneficial
- Liveness checks should not be influenced by the state of external services
- Exceptions:
- All checks should reply quickly (by default, less than 1 second)
- web service that doesn't have a dedicated "health" or "ping" route
- Otherwise, they are considered to fail
- ...and all requests are "expensive" (e.g. lots of external calls)
- This might require to check the health of dependencies asynchronously
---
## Liveness probes
- If we're not careful, we end up restarting containers for no reason
(which can cause additional load on the cluster, cascading failures, data loss, etc.)
- Suggestion:
- don't add liveness probes immediately
- wait until you have a bit of production experience with that code
- then add narrow-scoped healthchecks to detect specific failure modes
- Readiness and liveness probes should be different
(different check *or* different timeouts *or* different thresholds)
---
## Startup probes
- Only beneficial for containers that need a long time to start
(more than 30 seconds)
- If there is no liveness probe, it's simpler to just use a readiness probe
(since we probably want to have a readiness probe anyway)
- In other words, startup probes are useful in one situation:
*we have a liveness probe, AND the container needs a lot of time to start*
- Don't forget to change the `failureThreshold`
(otherwise the container will fail to start and be killed)
---
## Recap of the gotchas
- The default timeout is 1 second
- if a probe takes longer than 1 second to reply, Kubernetes considers that it fails
- this can be changed by setting the `timeoutSeconds` parameter
<br/>(or refactoring the probe)
- Liveness probes should not be influenced by the state of external services
- Liveness probes and readiness probes should have different paramters
- For startup probes, remember to increase the `failureThreshold`
(e.g. if a database or API might be healthy but still take more than
1 second to reply, we should check the status asynchronously and report
a cached status)
---
@@ -453,21 +300,21 @@ spec:
(In that context, worker = process that doesn't accept connections)
- A relatively easy solution is to use files
- Readiness is useful mostly for rolling updates
- For a startup or readiness probe:
(because workers aren't backends for a service)
- worker creates `/tmp/ready` when it's ready
- probe checks the existence of `/tmp/ready`
- Liveness may help us restart a broken worker, but how can we check it?
- For a liveness probe:
- Embedding an HTTP server is a (potentially expensive) option
- worker touches `/tmp/alive` regularly
<br/>(e.g. just before starting to work on a job)
- probe checks that the timestamp on `/tmp/alive` is recent
- if the timestamp is old, it means that the worker is stuck
- Using a "lease" file can be relatively easy:
- Sometimes it can also make sense to embed a web server in the worker
- touch a file during each iteration of the main loop
- check the timestamp of that file from an exec probe
- Writing logs (and checking them from the probe) also works
???

View File

@@ -1,148 +0,0 @@
## Ingress and canary releases
- Let's see how to implement *canary releases*
- The example here will use Traefik v1
(which is obsolete)
- It won't work on your Kubernetes cluster!
(unless you're running an oooooold version of Kubernetes)
(and an equally oooooooold version of Traefik)
- We've left it here just as an example!
---
## Canary releases
- A *canary release* (or canary launch or canary deployment) is a release that will process only a small fraction of the workload
- After deploying the canary, we compare its metrics to the normal release
- If the metrics look good, the canary will progressively receive more traffic
(until it gets 100% and becomes the new normal release)
- If the metrics aren't good, the canary is automatically removed
- When we deploy a bad release, only a tiny fraction of traffic is affected
---
## Various ways to implement canary
- Example 1: canary for a microservice
- 1% of all requests (sampled randomly) are sent to the canary
- the remaining 99% are sent to the normal release
- Example 2: canary for a web app
- 1% of users are sent to the canary web site
- the remaining 99% are sent to the normal release
- Example 3: canary for shipping physical goods
- 1% of orders are shipped with the canary process
- the remaining 99% are shipped with the normal process
- We're going to implement example 1 (per-request routing)
---
## Canary releases with Traefik v1
- We need to deploy the canary and expose it with a separate service
- Then, in the Ingress resource, we need:
- multiple `paths` entries (one for each service, canary and normal)
- an extra annotation indicating the weight of each service
- If we want, we can send requests to more than 2 services
---
## The Ingress resource
.small[
```yaml
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: rgb
annotations:
traefik.ingress.kubernetes.io/service-weights: |
red: 50%
green: 25%
blue: 25%
spec:
rules:
- host: rgb.`A.B.C.D`.nip.io
http:
paths:
- path: /
backend:
serviceName: red
servicePort: 80
- path: /
backend:
serviceName: green
servicePort: 80
- path: /
backend:
serviceName: blue
servicePort: 80
```
]
---
class: extra-details
## Other ingress controllers
*Just to illustrate how different things are ...*
- With the NGINX ingress controller:
- define two ingress ressources
<br/>
(specifying rules with the same host+path)
- add `nginx.ingress.kubernetes.io/canary` annotations on each
- With Linkerd2:
- define two services
- define an extra service for the weighted aggregate of the two
- define a TrafficSplit (this is a CRD introduced by the SMI spec)
---
class: extra-details
## We need more than that
What we saw is just one of the multiple building blocks that we need to achieve a canary release.
We also need:
- metrics (latency, performance ...) for our releases
- automation to alter canary weights
(increase canary weight if metrics look good; decrease otherwise)
- a mechanism to manage the lifecycle of the canary releases
(create them, promote them, delete them ...)
For inspiration, check [flagger by Weave](https://github.com/weaveworks/flagger).

View File

@@ -1,36 +1,34 @@
# Exposing HTTP services with Ingress resources
- Service = layer 4 (TCP, UDP, SCTP)
- HTTP services are typically exposed on port 80
- works with every TCP/UDP/SCTP protocol
(and 443 for HTTPS)
- doesn't "see" or interpret HTTP
- `NodePort` services are great, but they are *not* on port 80
- Ingress = layer 7 (HTTP)
(by default, they use port range 30000-32767)
- only for HTTP
- can route requests depending on URI or host header
- can handle TLS
- How can we get *many* HTTP services on port 80? 🤔
---
## Why should we use Ingress resources?
## Various ways to expose something on port 80
A few use-cases:
- Service with `type: LoadBalancer`
- URI routing (e.g. for single page apps)
*costs a little bit of money; not always available*
`/api` → service `api:5000`
- Service with one (or multiple) `ExternalIP`
everything else → service `static:80`
*requires public nodes; limited by number of nodes*
- Cost optimization
- Service with `hostPort` or `hostNetwork`
(because individual `LoadBalancer` services typically cost money)
*same limitations as `ExternalIP`; even harder to manage*
- Automatic handling of TLS certificates
- Ingress resources
*addresses all these limitations, yay!*
---
@@ -183,70 +181,20 @@ class: extra-details
---
## Accepting connections on port 80 (and 443)
- Web site users don't want to specify port numbers
(e.g. "connect to https://blahblah.whatever:31550")
- Our ingress controller needs to actually be exposed on port 80
(and 443 if we want to handle HTTPS)
- Let's see how we can achieve that!
---
## Various ways to expose something on port 80
- Service with `type: LoadBalancer`
*costs a little bit of money; not always available*
- Service with one (or multiple) `ExternalIP`
*requires public nodes; limited by number of nodes*
- Service with `hostPort` or `hostNetwork`
*same limitations as `ExternalIP`; even harder to manage*
---
## Deploying pods listening on port 80
- We are going to run Traefik in Pods with `hostNetwork: true`
- We want our ingress load balancer to be available on port 80
(so that our load balancer can use the "real" port 80 of our nodes)
- The best way to do that would be with a `LoadBalancer` service
- Traefik Pods will be created by a DaemonSet
... but it requires support from the underlying infrastructure
(so that we get one instance of Traefik on every node of the cluster)
- Instead, we are going to use the `hostNetwork` mode on the Traefik pods
- This means that we will be able to connect to any node of the cluster on port 80
.warning[This is not typical of a production setup!]
- Let's see what this `hostNetwork` mode is about ...
---
## Doing it in production
- When running "on cloud", the easiest option is a `LoadBalancer` service
- When running "on prem", it depends:
- [MetalLB] is a good option if a pool of public IP addresses is available
- otherwise, using `externalIPs` on a few nodes (2-3 for redundancy)
- Many variations/optimizations are possible depending on our exact scenario!
[MetalLB]: https://metallb.org/
---
class: extra-details
## Without `hostNetwork`
- Normally, each pod gets its own *network namespace*
@@ -263,8 +211,6 @@ class: extra-details
---
class: extra-details
## With `hostNetwork: true`
- No network namespace gets created
@@ -283,6 +229,26 @@ class: extra-details
---
class: extra-details
## Other techniques to expose port 80
- We could use pods specifying `hostPort: 80`
... but with most CNI plugins, this [doesn't work or requires additional setup](https://github.com/kubernetes/kubernetes/issues/23920)
- We could use a `NodePort` service
... but that requires [changing the `--service-node-port-range` flag in the API server](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/)
- We could create a service with an external IP
... this would work, but would require a few extra steps
(figuring out the IP address and adding it to the service)
---
## Running Traefik
- The [Traefik documentation][traefikdoc] recommends to use a Helm chart
@@ -304,8 +270,6 @@ class: extra-details
---
class: extra-details
## Taints and tolerations
- A *taint* is an attribute added to a node
@@ -532,6 +496,10 @@ This is normal: we haven't provided any ingress rule yet.
## Creating ingress resources
- Before Kubernetes 1.19, we must use YAML manifests
(see example on next slide)
- Since Kubernetes 1.19, we can use `kubectl create ingress`
```bash
@@ -566,21 +534,7 @@ This is normal: we haven't provided any ingress rule yet.
---
## Before Kubernetes 1.19
- Before Kubernetes 1.19:
- `kubectl create ingress` wasn't available
- `apiVersion: networking.k8s.io/v1` wasn't supported
- It was necessary to use YAML, and `apiVersion: networking.k8s.io/v1beta1`
(see example on next slide)
---
## YAML for old ingress resources
## Ingress resources in YAML
Here is a minimal host-based ingress resource:
@@ -601,15 +555,23 @@ spec:
```
(It is in `k8s/ingress.yaml`.)
---
## YAML for new ingress resources
class: extra-details
## Ingress API version
- The YAML on the previous slide uses `apiVersion: networking.k8s.io/v1beta1`
- Starting with Kubernetes 1.19, `networking.k8s.io/v1` is available
- And we can use `kubectl create ingress` 🎉
- However, with Kubernetes 1.19 (and later), we can use `kubectl create ingress`
- We can see "modern" YAML with `-o yaml --dry-run=client`:
- We chose to keep an "old" (deprecated!) YAML example for folks still using older versions of Kubernetes
- If we want to see "modern" YAML, we can use `-o yaml --dry-run=client`:
```bash
kubectl create ingress red -o yaml --dry-run=client \
@@ -679,6 +641,157 @@ class: extra-details
- It is still in alpha stage
---
## Vendor-specific example
- Let's see how to implement *canary releases*
- The example here will use Traefik v1
(which is obsolete)
- It won't work on your Kubernetes cluster!
(unless you're running an oooooold version of Kubernetes)
(and an equally oooooooold version of Traefik)
- We've left it here just as an example!
---
## Canary releases
- A *canary release* (or canary launch or canary deployment) is a release that will process only a small fraction of the workload
- After deploying the canary, we compare its metrics to the normal release
- If the metrics look good, the canary will progressively receive more traffic
(until it gets 100% and becomes the new normal release)
- If the metrics aren't good, the canary is automatically removed
- When we deploy a bad release, only a tiny fraction of traffic is affected
---
## Various ways to implement canary
- Example 1: canary for a microservice
- 1% of all requests (sampled randomly) are sent to the canary
- the remaining 99% are sent to the normal release
- Example 2: canary for a web app
- 1% of users are sent to the canary web site
- the remaining 99% are sent to the normal release
- Example 3: canary for shipping physical goods
- 1% of orders are shipped with the canary process
- the remaining 99% are shipped with the normal process
- We're going to implement example 1 (per-request routing)
---
## Canary releases with Traefik v1
- We need to deploy the canary and expose it with a separate service
- Then, in the Ingress resource, we need:
- multiple `paths` entries (one for each service, canary and normal)
- an extra annotation indicating the weight of each service
- If we want, we can send requests to more than 2 services
---
## The Ingress resource
.small[
```yaml
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: rgb
annotations:
traefik.ingress.kubernetes.io/service-weights: |
red: 50%
green: 25%
blue: 25%
spec:
rules:
- host: rgb.`A.B.C.D`.nip.io
http:
paths:
- path: /
backend:
serviceName: red
servicePort: 80
- path: /
backend:
serviceName: green
servicePort: 80
- path: /
backend:
serviceName: blue
servicePort: 80
```
]
---
class: extra-details
## Other ingress controllers
*Just to illustrate how different things are ...*
- With the NGINX ingress controller:
- define two ingress ressources
<br/>
(specifying rules with the same host+path)
- add `nginx.ingress.kubernetes.io/canary` annotations on each
- With Linkerd2:
- define two services
- define an extra service for the weighted aggregate of the two
- define a TrafficSplit (this is a CRD introduced by the SMI spec)
---
class: extra-details
## We need more than that
What we saw is just one of the multiple building blocks that we need to achieve a canary release.
We also need:
- metrics (latency, performance ...) for our releases
- automation to alter canary weights
(increase canary weight if metrics look good; decrease otherwise)
- a mechanism to manage the lifecycle of the canary releases
(create them, promote them, delete them ...)
For inspiration, check [flagger by Weave](https://github.com/weaveworks/flagger).
???
:EN:- The Ingress resource

View File

@@ -40,9 +40,7 @@
- Each person gets their own private set of VMs
- The connection information is on a shared spreadsheet
(URL to be shared through portal and chat)
- Each person should have a printed card with connection information
- We will connect to these VMs with SSH

View File

@@ -14,20 +14,32 @@
- CPU is a *compressible resource*
- it can be preempted immediately without adverse effect
- if we have N CPU and need 2N, we run at 50% speed
(it can be preempted immediately without adverse effect)
- Memory is an *incompressible resource*
- it needs to be swapped out to be reclaimed; and this is costly
- if we have N GB RAM and need 2N, we might run at... 0.1% speed!
(it needs to be swapped out to be reclaimed; and this is costly)
- As a result, exceeding limits will have different consequences for CPU and memory
---
## Exceeding CPU limits
- CPU can be reclaimed instantaneously
(in fact, it is preempted hundreds of times per second, at each context switch)
- If a container uses too much CPU, it can be throttled
(it will be scheduled less often)
- The processes in that container will run slower
(or rather: they will not run faster)
---
class: extra-details
## CPU limits implementation details
@@ -134,59 +146,39 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
---
## Running low on memory
## Exceeding memory limits
- When the system runs low on memory, it starts to reclaim used memory
- Memory needs to be swapped out before being reclaimed
(we talk about "memory pressure")
- "Swapping" means writing memory pages to disk, which is very slow
- Option 1: free up some buffers and caches
- On a classic system, a process that swaps can get 1000x slower
(fastest option; might affect performance if cache memory runs very low)
(because disk I/O is 1000x slower than memory I/O)
- Option 2: swap, i.e. write to disk some memory of one process to give it to another
- Exceeding the memory limit (even by a small amount) can reduce performance *a lot*
(can have a huge negative impact on performance because disks are slow)
- Kubernetes *does not support swap* (more on that later!)
- Option 3: terminate a process and reclaim all its memory
(OOM or Out Of Memory Killer on Linux)
- Exceeding the memory limit will cause the container to be killed
---
## Memory limits on Kubernetes
## Limits vs requests
- Kubernetes *does not support swap*
(but it may support it in the future, thanks to [KEP 2400])
- If a container exceeds its memory *limit*, it gets killed immediately
- If a node is overcommitted and under memory pressure, it will terminate some pods
(see next slide for some details about what "overcommit" means here!)
[KEP 2400]: https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2400-node-swap/README.md#implementation-history
---
## Overcommitting resources
- *Limits* are "hard limits" (a container *cannot* exceed its limits)
- Limits are "hard limits" (they can't be exceeded)
- a container exceeding its memory limit is killed
- a container exceeding its CPU limit is throttled
- On a given node, the sum of pod *limits* can be higher than the node size
- Requests are used for scheduling purposes
- *Requests* are used for scheduling purposes
- a container using *less* than what it requested will never be killed or throttled
- a container can use more than its requested CPU or RAM amounts
- the scheduler uses the requested sizes to determine placement
- a container using *less* than what it requested should never be killed or throttled
- On a given node, the sum of pod *requests* cannot be higher than the node size
- the resources requested by all pods on a node will never exceed the node size
---
@@ -230,31 +222,9 @@ Each pod is assigned a QoS class (visible in `status.qosClass`).
---
class: extra-details
## Where is my swap?
## CPU and RAM reservation
- Kubernetes passes resources requests and limits to the container engine
- The container engine applies these requests and limits with specific mechanisms
- Example: on Linux, this is typically done with control groups aka cgroups
- Most systems use cgroups v1, but cgroups v2 are slowly being rolled out
(e.g. available in Ubuntu 22.04 LTS)
- Cgroups v2 have new, interesting features for memory control:
- ability to set "minimum" memory amounts (to effectively reserve memory)
- better control on the amount of swap used by a container
---
class: extra-details
## What's the deal with swap?
- The semantics of memory and swap limits on Linux cgroups are complex
- With cgroups v1, it's not possible to disable swap for a cgroup
@@ -268,8 +238,6 @@ class: extra-details
- The simplest solution was to disable swap entirely
- Kubelet will refuse to start if it detects that swap is enabled!
---
## Alternative point of view
@@ -300,7 +268,7 @@ class: extra-details
- You will need to add the flag `--fail-swap-on=false` to kubelet
(remember: it won't otherwise start if it detects that swap is enabled)
(otherwise, it won't start!)
---
@@ -698,18 +666,6 @@ class: extra-details
---
## Underutilization
- Remember: when assigning a pod to a node, the scheduler looks at *requests*
(not at current utilization on the node)
- If pods request resources but don't use them, this can lead to underutilization
(because the scheduler will consider that the node is full and can't fit new pods)
---
## Viewing a namespace limits and quotas
- `kubectl describe namespace` will display resource limits and quotas

View File

@@ -20,17 +20,13 @@
## Docker Desktop
- Available on Linux, Mac, and Windows
- Free for personal use and small businesses
(less than 250 employees and less than $10 millions in annual revenue)
- Available on Mac and Windows
- Gives you one cluster with one node
- Streamlined installation and user experience
- Very easy to use if you are already using Docker Desktop:
- Great integration with various network stacks and e.g. corporate VPNs
go to Docker Desktop preferences and enable Kubernetes
- Ideal for Docker users who need good integration between both platforms
@@ -44,11 +40,13 @@
- Runs Kubernetes nodes in Docker containers
- Can deploy multiple clusters, with multiple nodes
- Can deploy multiple clusters, with multiple nodes, and multiple master nodes
- Runs the control plane on Kubernetes nodes
- As of June 2020, two versions co-exist: stable (1.7) and beta (3.0)
- Control plane can also run on multiple nodes
- They have different syntax and options, this can be confusing
(but don't let that stop you!)
---
@@ -86,7 +84,7 @@
- More advanced scenarios require writing a short [config file](https://kind.sigs.k8s.io/docs/user/quick-start#configuring-your-kind-cluster)
(to define multiple nodes, multiple control plane nodes, set Kubernetes versions ...)
(to define multiple nodes, multiple master nodes, set Kubernetes versions ...)
- Can deploy multiple clusters
@@ -126,9 +124,7 @@
## [Rancher Desktop](https://rancherdesktop.io/)
- Available on Linux, Mac, and Windows
- Free and open-source
- Available on Mac and Windows
- Runs a single cluster with a single node
@@ -138,7 +134,7 @@
- Emphasis on ease of use (like Docker Desktop)
- Relatively young product (first release in May 2021)
- Very young product (first release in May 2021)
- Based on k3s and other proven components

View File

@@ -2,11 +2,11 @@ title: |
Advanced
Kubernetes
chat: "[Slack](https://newrelic.slack.com/archives/C0438EFM97F)"
chat: "[Slack](#FIXME)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2022-09-nr2.container.training/
slides: https://2022-09-nr.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -26,84 +26,59 @@ content:
- #1
- k8s/prereqs-admin.md
- k8s/architecture.md
- k8s/internal-apis.md
- k8s/deploymentslideshow.md
- k8s/dmuc.md
- interlude-form.md
- k8s/multinode.md
- k8s/cni.md
- k8s/interco.md
- k8s/cni-internals.md
- k8s/apilb.md
- #2
- k8s/demo-apps.md
- k8s/netpol.md
- k8s/authn-authz.md
- k8s/apilb.md
- k8s/internal-apis.md
- k8s/staticpods.md
#- k8s/cluster-upgrade.md
- k8s/control-plane-auth.md
- k8s/user-cert.md
- k8s/csr-api.md
- k8s/openid-connect.md
- k8s/pod-security-intro.md
- k8s/pod-security-policies.md
- k8s/pod-security-admission.md
- exercises/netpol-details.md
- exercises/rbac-details.md
- #3
- k8s/helm-intro.md
- k8s/ingress.md
- k8s/extending-api.md
- k8s/crd.md
- k8s/operators.md
- k8s/sealed-secrets.md
- k8s/cert-manager.md
- k8s/ingress-tls.md
- k8s/ingress-advanced.md
#- k8s/eck.md
- #4
- k8s/admission.md
- k8s/cainjector.md
- k8s/kyverno.md
- k8s/aggregation-layer.md
- k8s/metrics-server.md
- k8s/hpa-v2.md
- #5
- k8s/operators-design.md
- k8s/operators-example.md
- k8s/owners-and-dependents.md
#- k8s/kubebuilder.md
- k8s/events.md
- k8s/finalizers.md
- shared/thankyou.md
- #6
- |
# (Extra content)
- k8s/kustomize.md
- k8s/helm-intro.md
- k8s/helm-chart-format.md
- k8s/helm-create-basic-chart.md
- k8s/helm-create-better-chart.md
- k8s/helm-dependencies.md
- k8s/helm-values-schema-validation.md
- k8s/helm-secrets.md
- exercises/helm-generic-chart-details.md
- exercises/helm-umbrella-chart-details.md
- #4
- k8s/extending-api.md
- k8s/admission.md
- k8s/cainjector.md
- k8s/kyverno.md
- k8s/crd.md
- k8s/operators.md
- k8s/sealed-secrets.md
- k8s/operators-design.md
- k8s/operators-example.md
- k8s/owners-and-dependents.md
- k8s/events.md
- k8s/finalizers.md
- exercises/sealed-secrets-details.md
- #5
- k8s/resource-limits.md
- k8s/cluster-sizing.md
- k8s/cluster-autoscaler.md
- k8s/horizontal-pod-autoscaler.md
- k8s/aggregation-layer.md
- k8s/metrics-server.md
- k8s/hpa-v2.md
- k8s/batch-jobs.md
- k8s/statefulsets.md
- k8s/consul.md
- k8s/pv-pvc-sc.md
- k8s/volume-claim-templates.md
- k8s/stateful-failover.md
- shared/thankyou.md
-
- |
# (Extra content I)
- k8s/setup-devel.md
- k8s/accessinternal.md
- k8s/kubectlproxy.md
- k8s/k9s.md
- k8s/tilt.md
- k8s/ytt.md
-
- |
# (Extra content II)
- k8s/internal-apis.md
- k8s/staticpods.md
- k8s/cluster-upgrade.md
- k8s/control-plane-auth.md
- k8s/kubebuilder.md
- k8s/apiserver-deepdive.md

View File

@@ -1,27 +1,38 @@
## Intros
## Intros & disclaimers
- Hello! We are:
- Hello! I'm Jérôme Petazzoni ([@jpetazzo])
- Jérôme Petazzoni ([@jpetazzo])
- I have ...
- Dana Engebretson ([@bigdana])
- extensive experience running *containers* in production
- limited experience running *Kubernetes* in production
- The training will from 8am to noon, Monday to Friday
- taught Docker and Kubernetes many times, to large audiences
- less frequently taught operators and API internals
- There will be breaks every hour
- written a lot of Python code during my career; but much less Go
- learned way more than I expected just by writing some chapters of this course (!)
---
## Logistics
- The training will from ... to ..., Monday to Friday
- There will be short breaks every hour, and a longer break in the middle
- Feel free to interrupt for questions at any time
- *Especially when you see full screen container pictures!*
(I will watch them in awkward silence while I wait for your questions)
(I will watch them in silence while I wait for your questions)
- Live feedback, questions, help: @@CHAT@@
<!-- -->
[@alexbuisine]: https://twitter.com/alexbuisine
[@bigdana]: https://twitter.com/bigdana
[EphemeraSearch]: https://ephemerasearch.com/
[@jpetazzo]: https://twitter.com/jpetazzo
[@s0ulshake]: https://twitter.com/s0ulshake
@@ -29,12 +40,16 @@
---
## Dynamic content
## Exercises
- The content of this course will be adapted to suit your needs!
- At the end of each day, there is a series of exercises
- We'll share a link to a form so you can tell us what we should work on
- To make the most out of the training, please try the exercises!
- Expect the content of the deck to change between Monday and Tuesday
(it will help to practice and memorize the content of the day)
(we will reorder/reorganize the content accordingly)
- We recommend to take at least one hour to work on the exercises
(if you understood the content of the day, it will be much faster)
- Each day will start with a quick review of the exercises of the previous day