mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-02-28 00:13:51 +00:00
Compare commits
1 Commits
2024-04-su
...
2023-12-de
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
77606044f6 |
@@ -1,6 +1,6 @@
|
||||
FROM ruby:alpine
|
||||
RUN apk add --update build-base curl
|
||||
RUN gem install sinatra --version '~> 3'
|
||||
RUN gem install sinatra
|
||||
RUN gem install thin
|
||||
ADD hasher.rb /
|
||||
CMD ["ruby", "hasher.rb"]
|
||||
|
||||
@@ -13,8 +13,6 @@ TF_VAR_location=fr-par-2 \
|
||||
# set kubeconfig file
|
||||
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
|
||||
|
||||
export KUBECONFIG=~/kubeconfig
|
||||
|
||||
# set external_ip labels
|
||||
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
|
||||
while read node address; do
|
||||
|
||||
@@ -741,7 +741,7 @@ EOF
|
||||
# Install popeye
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/popeye ]; then
|
||||
FILENAME=popeye_Linux_$ARCH.tar.gz &&
|
||||
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
|
||||
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
|
||||
sudo tar -zxvf- -C /usr/local/bin popeye
|
||||
popeye version
|
||||
@@ -829,14 +829,6 @@ EOF
|
||||
sudo tar -zxvf- -C /usr/local/bin kubent
|
||||
kubent --version
|
||||
fi"
|
||||
|
||||
# Ngrok. Note that unfortunately, this is the x86_64 binary.
|
||||
# We might have to rethink how to handle this for multi-arch environments.
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/ngrok ]; then
|
||||
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
|
||||
sudo tar -zxvf- -C /usr/local/bin ngrok
|
||||
fi"
|
||||
}
|
||||
|
||||
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
|
||||
@@ -970,19 +962,12 @@ _cmd_standardize() {
|
||||
# Disable unattended upgrades so that they don't mess up with the subsequent steps
|
||||
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
|
||||
# Some cloud providers think that it's smart to disable password authentication.
|
||||
# We need to re-neable it, though.
|
||||
# Digital Ocecan
|
||||
# Digital Ocean's cloud init disables password authentication; re-enable it.
|
||||
pssh "
|
||||
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
# AWS
|
||||
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
|
||||
# Special case for oracle since their iptables blocks everything but SSH
|
||||
pssh "
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
DOMAINS=~/Dropbox/domains.txt
|
||||
IPS=ips.txt
|
||||
|
||||
. ./dns-cloudflare.sh
|
||||
|
||||
paste "$DOMAINS" "$IPS" | while read domain ips; do
|
||||
if ! [ "$domain" ]; then
|
||||
echo "⚠️ No more domains!"
|
||||
exit 1
|
||||
fi
|
||||
_clear_zone "$domain"
|
||||
_populate_zone "$domain" $ips
|
||||
done
|
||||
echo "✅ All done."
|
||||
@@ -1,7 +1,5 @@
|
||||
#export TF_VAR_node_size=GP2.4
|
||||
#export TF_VAR_node_size=g6-standard-6
|
||||
#export TF_VAR_node_size=m7i.xlarge
|
||||
|
||||
|
||||
CLUSTERSIZE=1
|
||||
|
||||
|
||||
@@ -1,23 +1,10 @@
|
||||
resource "scaleway_vpc_private_network" "_" {
|
||||
}
|
||||
|
||||
# This is a kind of hack to use a custom security group with Kapsulse.
|
||||
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
|
||||
|
||||
resource "scaleway_instance_security_group" "_" {
|
||||
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
|
||||
inbound_default_policy = "accept"
|
||||
outbound_default_policy = "accept"
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
name = var.cluster_name
|
||||
#region = var.location
|
||||
tags = var.common_tags
|
||||
version = local.k8s_version
|
||||
type = "kapsule"
|
||||
cni = "cilium"
|
||||
delete_additional_resources = true
|
||||
private_network_id = scaleway_vpc_private_network._.id
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_pool" "_" {
|
||||
@@ -30,7 +17,6 @@ resource "scaleway_k8s_pool" "_" {
|
||||
max_size = var.max_nodes_per_pool
|
||||
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
|
||||
autohealing = true
|
||||
depends_on = [ scaleway_instance_security_group._ ]
|
||||
}
|
||||
|
||||
data "scaleway_k8s_version" "_" {
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#/ /kube-halfday.yml.html 200!
|
||||
#/ /kube-fullday.yml.html 200!
|
||||
#/ /kube-twodays.yml.html 200!
|
||||
/ /all.yml.html 200!
|
||||
/ /kube.yml.html 200!
|
||||
|
||||
# And this allows to do "git clone https://container.training".
|
||||
/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
|
||||
|
||||
110
slides/all.yml
110
slides/all.yml
@@ -1,110 +0,0 @@
|
||||
title: |
|
||||
Docker & Kubernetes
|
||||
|
||||
chat: "[FIXME](https://FIXME.container.training/mattermost/)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2024-04-suadeo.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- containers/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
- # DAY 1
|
||||
#- containers/Docker_Overview.md
|
||||
#- containers/Docker_History.md
|
||||
- containers/Training_Environment.md
|
||||
#- containers/Installing_Docker.md
|
||||
- containers/First_Containers.md
|
||||
- containers/Background_Containers.md
|
||||
- containers/Initial_Images.md
|
||||
- containers/Building_Images_Interactively.md
|
||||
- containers/Building_Images_With_Dockerfiles.md
|
||||
- containers/Cmd_And_Entrypoint.md
|
||||
- containers/Copying_Files_During_Build.md
|
||||
- containers/Exercise_Dockerfile_Basic.md
|
||||
- containers/Dockerfile_Tips.md
|
||||
- containers/Multi_Stage_Builds.md
|
||||
- containers/Container_Networking_Basics.md
|
||||
- # DAY 2
|
||||
- containers/Local_Development_Workflow.md
|
||||
- containers/Getting_Inside.md
|
||||
- containers/Container_Network_Model.md
|
||||
- containers/Compose_For_Dev_Stacks.md
|
||||
- containers/Exercise_Composefile.md
|
||||
- containers/Exercise_Dockerfile_Advanced.md
|
||||
- |
|
||||
# Kubernetes
|
||||
- shared/connecting.md
|
||||
#- k8s/versions-k8s.md
|
||||
- shared/sampleapp.md
|
||||
#- shared/composescale.md
|
||||
#- shared/hastyconclusions.md
|
||||
- shared/composedown.md
|
||||
- k8s/concepts-k8s.md
|
||||
- # DAY 3
|
||||
- k8s/kubectlget.md
|
||||
- k8s/kubectl-run.md
|
||||
- k8s/kubectlexpose.md
|
||||
- k8s/service-types.md
|
||||
- k8s/kubenet.md
|
||||
- k8s/shippingimages.md
|
||||
#- k8s/buildshiprun-selfhosted.md
|
||||
- k8s/buildshiprun-dockerhub.md
|
||||
- k8s/labels-annotations.md
|
||||
- k8s/kubectl-logs.md
|
||||
- k8s/logs-cli.md
|
||||
- exercises/k8sfundamentals-details.md
|
||||
#- k8s/exercise-wordsmith.md
|
||||
- k8s/ourapponkube.md
|
||||
#- k8s/setup-overview.md
|
||||
- k8s/setup-devel.md
|
||||
#- k8s/setup-managed.md
|
||||
#- k8s/setup-selfhosted.md
|
||||
- k8s/localkubeconfig.md
|
||||
- k8s/accessinternal.md
|
||||
#- k8s/kubectlproxy.md
|
||||
- shared/declarative.md
|
||||
- k8s/declarative.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- exercises/localcluster-details.md
|
||||
- # DAY 4
|
||||
#- k8s/kubectlscale.md
|
||||
- shared/yaml.md
|
||||
- k8s/yamldeploy.md
|
||||
- k8s/namespaces.md
|
||||
- k8s/scalingdockercoins.md
|
||||
- shared/hastyconclusions.md
|
||||
- k8s/daemonset.md
|
||||
- k8s/rollout.md
|
||||
- k8s/healthchecks.md
|
||||
#- k8s/healthchecks-more.md
|
||||
- k8s/volumes.md
|
||||
- k8s/configuration.md
|
||||
- k8s/secrets.md
|
||||
- exercises/yaml-details.md
|
||||
- shared/thankyou.md
|
||||
-
|
||||
- |
|
||||
# (Docker extras)
|
||||
- containers/Start_And_Attach.md
|
||||
- containers/Naming_And_Inspecting.md
|
||||
- containers/Labels.md
|
||||
- containers/Advanced_Dockerfiles.md
|
||||
- containers/Network_Drivers.md
|
||||
-
|
||||
- |
|
||||
# (Kubernetes extras)
|
||||
- k8s/k9s.md
|
||||
- k8s/ingress.md
|
||||
@@ -1,4 +1,4 @@
|
||||
## Exercise — Enable RBAC
|
||||
## Exercise — Enable RBAC on our custom cluster
|
||||
|
||||
- Enable RBAC on a manually-deployed control plane
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Exercise — Enable RBAC
|
||||
# Exercise — Enable RBAC on our custom cluster
|
||||
|
||||
- We want to enable RBAC on the "polykube" cluster
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Exercise — Sealed Secrets
|
||||
# Exercise — Sealed Secrets (and more RBAC!)
|
||||
|
||||
This is a "combo exercise" to practice the following concepts:
|
||||
|
||||
|
||||
@@ -198,64 +198,6 @@ Some examples ...
|
||||
|
||||
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
|
||||
|
||||
- We will need an ngrok account for the tunnels
|
||||
|
||||
(a free account is fine)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Ngrok in production
|
||||
|
||||
- Ngrok was initially known for its local webhook development features
|
||||
|
||||
- It now supports production scenarios as well
|
||||
|
||||
(load balancing, WAF, authentication, circuit-breaking...)
|
||||
|
||||
- Including some that are very relevant to Kubernetes
|
||||
|
||||
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
|
||||
|
||||
---
|
||||
|
||||
## Ngrok tokens
|
||||
|
||||
- If you're attending a live training, you might have an ngrok token
|
||||
|
||||
- Look in `~/ngrok.env` and if that file exists, copy it to the stack:
|
||||
|
||||
.lab[
|
||||
|
||||
```bash
|
||||
cp ~/ngrok.env ~/container.training/webhooks/admission/.env
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Starting the whole stack
|
||||
|
||||
.lab[
|
||||
|
||||
- Go to the webhook directory:
|
||||
@@ -274,6 +216,28 @@ cp ~/ngrok.env ~/container.training/webhooks/admission/.env
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
- Probably not for production, though
|
||||
|
||||
(webhook requests and responses now pass through the ngrok platform)
|
||||
|
||||
---
|
||||
|
||||
## Update the webhook configuration
|
||||
|
||||
- We have a webhook configuration in `k8s/webhook-configuration.yaml`
|
||||
|
||||
@@ -46,11 +46,11 @@ In the real world...
|
||||
|
||||
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
|
||||
|
||||
- There are **voluntary** and **involuntary** disruptions
|
||||
- There are **voluntary** and **unvoluntary** disruptions
|
||||
|
||||
- voluntary = directly initiated by humans (including by mistake!)
|
||||
|
||||
- involuntary = everything else
|
||||
- unvoluntary = everything else
|
||||
|
||||
- In this section, we're going to see what they are and how to prevent them
|
||||
|
||||
@@ -64,7 +64,7 @@ In the real world...
|
||||
|
||||
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
|
||||
|
||||
- **Involuntary** disruption (even if it results from human error!)
|
||||
- **Unvoluntary** disruption (even if it results from human error!)
|
||||
|
||||
- Consequence: all workloads on that node become unresponsive
|
||||
|
||||
@@ -116,7 +116,7 @@ In the real world...
|
||||
|
||||
(because a pod is using too much memory and no limit was set)
|
||||
|
||||
- **Involuntary** disruption
|
||||
- **Unvoluntary** disruption
|
||||
|
||||
- Consequence: kubelet starts to *evict* some pods
|
||||
|
||||
@@ -507,7 +507,7 @@ spec:
|
||||
|
||||
???
|
||||
|
||||
:EN:- Voluntary and involuntary disruptions
|
||||
:EN:- Voluntary and unvoluntary disruptions
|
||||
:EN:- Pod Disruption Budgets
|
||||
:FR:- "Disruptions" volontaires et involontaires
|
||||
:FR:- Pod Disruption Budgets
|
||||
|
||||
314
slides/k8s/hpa-v2-keda.md
Normal file
314
slides/k8s/hpa-v2-keda.md
Normal file
@@ -0,0 +1,314 @@
|
||||
# Scaling with custom metrics
|
||||
|
||||
- The HorizontalPodAutoscaler v1 can only scale on Pod CPU usage
|
||||
|
||||
- Sometimes, we need to scale using other metrics:
|
||||
|
||||
- memory
|
||||
|
||||
- requests per second
|
||||
|
||||
- latency
|
||||
|
||||
- active sessions
|
||||
|
||||
- items in a work queue
|
||||
|
||||
- ...
|
||||
|
||||
- The HorizontalPodAutoscaler v2 can do it!
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
⚠️ Autoscaling on custom metrics is fairly complex!
|
||||
|
||||
- We need some metrics system
|
||||
|
||||
(Prometheus is a popular option, but others are possible too)
|
||||
|
||||
- We need our metrics (latency, traffic...) to be fed in the system
|
||||
|
||||
(with Prometheus, this might require a custom exporter)
|
||||
|
||||
- We need to expose these metrics to Kubernetes
|
||||
|
||||
(Kubernetes doesn't "speak" the Prometheus API)
|
||||
|
||||
- Then we can set up autoscaling!
|
||||
|
||||
---
|
||||
|
||||
## The plan
|
||||
|
||||
- We will deploy the DockerCoins demo app
|
||||
|
||||
(one of its components has a bottleneck; its latency will increase under load)
|
||||
|
||||
- We will use Prometheus to collect and store metrics
|
||||
|
||||
- We will deploy a tiny HTTP latency monitor (a Prometheus *exporter*)
|
||||
|
||||
- We will then use KEDA with a "Prometheus Scaler"
|
||||
|
||||
---
|
||||
|
||||
## Deploying DockerCoins
|
||||
|
||||
- That's the easy part!
|
||||
|
||||
.lab[
|
||||
|
||||
- Create a new namespace and switch to it:
|
||||
```bash
|
||||
kubectl create namespace customscaling
|
||||
kns customscaling
|
||||
```
|
||||
|
||||
- Deploy DockerCoins, and scale up the `worker` Deployment:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/dockercoins.yaml
|
||||
kubectl scale deployment worker --replicas=10
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Current state of affairs
|
||||
|
||||
- The `rng` service is a bottleneck
|
||||
|
||||
(it cannot handle more than 10 requests/second)
|
||||
|
||||
- With enough traffic, its latency increases
|
||||
|
||||
(by about 100ms per `worker` Pod after the 3rd worker)
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the `webui` port and open it in your browser:
|
||||
```bash
|
||||
kubectl get service webui
|
||||
```
|
||||
|
||||
- Check the `rng` ClusterIP and test it with e.g. `httping`:
|
||||
```bash
|
||||
kubectl get service rng
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Measuring latency
|
||||
|
||||
- We will use a tiny custom Prometheus exporter, [httplat](https://github.com/jpetazzo/httplat)
|
||||
|
||||
- `httplat` exposes Prometheus metrics on port 9080 (by default)
|
||||
|
||||
- It monitors exactly one URL, that must be passed as a command-line argument
|
||||
|
||||
.lab[
|
||||
|
||||
- Deploy `httplat`:
|
||||
```bash
|
||||
kubectl create deployment httplat --image=jpetazzo/httplat -- httplat http://rng/
|
||||
```
|
||||
|
||||
- Expose it:
|
||||
```bash
|
||||
kubectl expose deployment httplat --port=9080
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Measuring latency in the real world
|
||||
|
||||
- We are using this tiny custom exporter for simplicity
|
||||
|
||||
- A more common method to collect latency is to use a service mesh
|
||||
|
||||
- A service mesh can usually collect latency for *all* services automatically
|
||||
|
||||
---
|
||||
|
||||
## Install Prometheus
|
||||
|
||||
- We will use the Prometheus community Helm chart
|
||||
|
||||
(because we can configure it dynamically with annotations)
|
||||
|
||||
.lab[
|
||||
|
||||
- If it's not installed yet on the cluster, install Prometheus:
|
||||
```bash
|
||||
helm upgrade --install prometheus prometheus \
|
||||
--repo https://prometheus-community.github.io/helm-charts \
|
||||
--namespace prometheus --create-namespace \
|
||||
--set server.service.type=NodePort \
|
||||
--set server.service.nodePort=30090 \
|
||||
--set server.persistentVolume.enabled=false \
|
||||
--set alertmanager.enabled=false
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Configure Prometheus
|
||||
|
||||
- We can use annotations to tell Prometheus to collect the metrics
|
||||
|
||||
.lab[
|
||||
|
||||
- Tell Prometheus to "scrape" our latency exporter:
|
||||
```bash
|
||||
kubectl annotate service httplat \
|
||||
prometheus.io/scrape=true \
|
||||
prometheus.io/port=9080 \
|
||||
prometheus.io/path=/metrics
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
If you deployed Prometheus differently, you might have to configure it manually.
|
||||
|
||||
You'll need to instruct it to scrape http://httplat.customscaling.svc:9080/metrics.
|
||||
|
||||
---
|
||||
|
||||
## Make sure that metrics get collected
|
||||
|
||||
- Before moving on, confirm that Prometheus has our metrics
|
||||
|
||||
.lab[
|
||||
|
||||
- Connect to Prometheus
|
||||
|
||||
(if you installed it like instructed above, it is exposed as a NodePort on port 30090)
|
||||
|
||||
- Check that `httplat` metrics are available
|
||||
|
||||
- You can try to graph the following PromQL expression:
|
||||
```
|
||||
rate(httplat_latency_seconds_sum[2m])/rate(httplat_latency_seconds_count[2m])
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- Make sure that the exporter works:
|
||||
|
||||
- get the ClusterIP of the exporter with `kubectl get svc httplat`
|
||||
|
||||
- `curl http://<ClusterIP>:9080/metrics`
|
||||
|
||||
- check that the result includes the `httplat` histogram
|
||||
|
||||
- Make sure that Prometheus is scraping the exporter:
|
||||
|
||||
- go to `Status` / `Targets` in Prometheus
|
||||
|
||||
- make sure that `httplat` shows up in there
|
||||
|
||||
---
|
||||
|
||||
## Installing KEDA
|
||||
|
||||
- Multiple possibilities, as explained in the [documentation](https://keda.sh/docs/2.12/deploy/)
|
||||
|
||||
- For simplicity we can use the YAML version with admission webhooks
|
||||
|
||||
---
|
||||
|
||||
## Creating a "Scaler"
|
||||
|
||||
- With KEDA, instead of creating an HPA policy directly, we create a "Scaled Object"
|
||||
|
||||
- The "Scaled Object" will take care of:
|
||||
|
||||
- registering and exposing our custom metric in KEDA'a aggregation layer
|
||||
|
||||
- creating the HPA policy that consumes that metric
|
||||
|
||||
- See the [Prometheus Scaler documentation](https://keda.sh/docs/2.12/scalers/prometheus/)
|
||||
|
||||
---
|
||||
|
||||
## Witness the marvel of custom autoscaling
|
||||
|
||||
(Sort of)
|
||||
|
||||
- After a short while, the `rng` Deployment will scale up
|
||||
|
||||
- It should scale up until the latency drops below 100ms
|
||||
|
||||
(and continue to scale up a little bit more after that)
|
||||
|
||||
- Then, since the latency will be well below 100ms, it will scale down
|
||||
|
||||
- ... and back up again, etc.
|
||||
|
||||
(See pictures on next slides!)
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## What's going on?
|
||||
|
||||
- The autoscaler's information is slightly out of date
|
||||
|
||||
(not by much; probably between 1 and 2 minute)
|
||||
|
||||
- It's enough to cause the oscillations to happen
|
||||
|
||||
- One possible fix is to tell the autoscaler to wait a bit after each action
|
||||
|
||||
- It will reduce oscillations, but will also slow down its reaction time
|
||||
|
||||
(and therefore, how fast it reacts to a peak of traffic)
|
||||
|
||||
---
|
||||
|
||||
## What's going on? Take 2
|
||||
|
||||
- As soon as the measured latency is *significantly* below our target (100ms) ...
|
||||
|
||||
the autoscaler tries to scale down
|
||||
|
||||
- If the latency is measured at 20ms ...
|
||||
|
||||
the autoscaler will try to *divide the number of pods by five!*
|
||||
|
||||
- One possible solution: apply a formula to the measured latency,
|
||||
so that values between e.g. 10 and 100ms get very close to 100ms.
|
||||
|
||||
- Another solution: instead of targetting for a specific latency,
|
||||
target a 95th percentile latency or something similar, using
|
||||
a more advanced PromQL expression (and leveraging the fact that
|
||||
we have histograms instead of raw values).
|
||||
|
||||
???
|
||||
|
||||
:EN:- Autoscaling with custom metrics
|
||||
:FR:- Suivi de charge avancé (HPAv2)
|
||||
@@ -337,7 +337,7 @@ kustomize edit add label app.kubernetes.io/name:dockercoins
|
||||
|
||||
- Assuming that `commonLabels` have been set as shown on the previous slide:
|
||||
```bash
|
||||
kubectl apply -k . --prune --selector app.kubernetes.io/name=dockercoins
|
||||
kubectl apply -k . --prune --selector app.kubernetes.io.name=dockercoins
|
||||
```
|
||||
|
||||
- ... This command removes resources that have been removed from the kustomization
|
||||
|
||||
@@ -536,12 +536,12 @@ Note: the `apiVersion` field appears to be optional.
|
||||
- Excerpt:
|
||||
```yaml
|
||||
generate:
|
||||
kind: LimitRange
|
||||
name: default-limitrange
|
||||
namespace: "{{request.object.metadata.name}}"
|
||||
data:
|
||||
spec:
|
||||
limits:
|
||||
kind: LimitRange
|
||||
name: default-limitrange
|
||||
namespace: "{{request.object.metadata.name}}"
|
||||
data:
|
||||
spec:
|
||||
limits:
|
||||
```
|
||||
|
||||
- Note that we have to specify the `namespace`
|
||||
|
||||
@@ -533,193 +533,6 @@ This set of resources makes sure that this service won't be killed (as long as i
|
||||
|
||||
---
|
||||
|
||||
# Defining min, max, and default resources
|
||||
|
||||
- We can create LimitRange objects to indicate any combination of:
|
||||
|
||||
- min and/or max resources allowed per pod
|
||||
|
||||
- default resource *limits*
|
||||
|
||||
- default resource *requests*
|
||||
|
||||
- maximal burst ratio (*limit/request*)
|
||||
|
||||
- LimitRange objects are namespaced
|
||||
|
||||
- They apply to their namespace only
|
||||
|
||||
---
|
||||
|
||||
## LimitRange example
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: LimitRange
|
||||
metadata:
|
||||
name: my-very-detailed-limitrange
|
||||
spec:
|
||||
limits:
|
||||
- type: Container
|
||||
min:
|
||||
cpu: "100m"
|
||||
max:
|
||||
cpu: "2000m"
|
||||
memory: "1Gi"
|
||||
default:
|
||||
cpu: "500m"
|
||||
memory: "250Mi"
|
||||
defaultRequest:
|
||||
cpu: "500m"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Example explanation
|
||||
|
||||
The YAML on the previous slide shows an example LimitRange object specifying very detailed limits on CPU usage,
|
||||
and providing defaults on RAM usage.
|
||||
|
||||
Note the `type: Container` line: in the future,
|
||||
it might also be possible to specify limits
|
||||
per Pod, but it's not [officially documented yet](https://github.com/kubernetes/website/issues/9585).
|
||||
|
||||
---
|
||||
|
||||
## LimitRange details
|
||||
|
||||
- LimitRange restrictions are enforced only when a Pod is created
|
||||
|
||||
(they don't apply retroactively)
|
||||
|
||||
- They don't prevent creation of e.g. an invalid Deployment or DaemonSet
|
||||
|
||||
(but the pods will not be created as long as the LimitRange is in effect)
|
||||
|
||||
- If there are multiple LimitRange restrictions, they all apply together
|
||||
|
||||
(which means that it's possible to specify conflicting LimitRanges,
|
||||
<br/>preventing any Pod from being created)
|
||||
|
||||
- If a LimitRange specifies a `max` for a resource but no `default`,
|
||||
<br/>that `max` value becomes the `default` limit too
|
||||
|
||||
---
|
||||
|
||||
# Namespace quotas
|
||||
|
||||
- We can also set quotas per namespace
|
||||
|
||||
- Quotas apply to the total usage in a namespace
|
||||
|
||||
(e.g. total CPU limits of all pods in a given namespace)
|
||||
|
||||
- Quotas can apply to resource limits and/or requests
|
||||
|
||||
(like the CPU and memory limits that we saw earlier)
|
||||
|
||||
- Quotas can also apply to other resources:
|
||||
|
||||
- "extended" resources (like GPUs)
|
||||
|
||||
- storage size
|
||||
|
||||
- number of objects (number of pods, services...)
|
||||
|
||||
---
|
||||
|
||||
## Creating a quota for a namespace
|
||||
|
||||
- Quotas are enforced by creating a ResourceQuota object
|
||||
|
||||
- ResourceQuota objects are namespaced, and apply to their namespace only
|
||||
|
||||
- We can have multiple ResourceQuota objects in the same namespace
|
||||
|
||||
- The most restrictive values are used
|
||||
|
||||
---
|
||||
|
||||
## Limiting total CPU/memory usage
|
||||
|
||||
- The following YAML specifies an upper bound for *limits* and *requests*:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ResourceQuota
|
||||
metadata:
|
||||
name: a-little-bit-of-compute
|
||||
spec:
|
||||
hard:
|
||||
requests.cpu: "10"
|
||||
requests.memory: 10Gi
|
||||
limits.cpu: "20"
|
||||
limits.memory: 20Gi
|
||||
```
|
||||
|
||||
These quotas will apply to the namespace where the ResourceQuota is created.
|
||||
|
||||
---
|
||||
|
||||
## Limiting number of objects
|
||||
|
||||
- The following YAML specifies how many objects of specific types can be created:
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ResourceQuota
|
||||
metadata:
|
||||
name: quota-for-objects
|
||||
spec:
|
||||
hard:
|
||||
pods: 100
|
||||
services: 10
|
||||
secrets: 10
|
||||
configmaps: 10
|
||||
persistentvolumeclaims: 20
|
||||
services.nodeports: 0
|
||||
services.loadbalancers: 0
|
||||
count/roles.rbac.authorization.k8s.io: 10
|
||||
```
|
||||
|
||||
(The `count/` syntax allows limiting arbitrary objects, including CRDs.)
|
||||
|
||||
---
|
||||
|
||||
## YAML vs CLI
|
||||
|
||||
- Quotas can be created with a YAML definition
|
||||
|
||||
- ...Or with the `kubectl create quota` command
|
||||
|
||||
- Example:
|
||||
```bash
|
||||
kubectl create quota my-resource-quota --hard=pods=300,limits.memory=300Gi
|
||||
```
|
||||
|
||||
- With both YAML and CLI form, the values are always under the `hard` section
|
||||
|
||||
(there is no `soft` quota)
|
||||
|
||||
---
|
||||
|
||||
## Viewing current usage
|
||||
|
||||
When a ResourceQuota is created, we can see how much of it is used:
|
||||
|
||||
```
|
||||
kubectl describe resourcequota my-resource-quota
|
||||
|
||||
Name: my-resource-quota
|
||||
Namespace: default
|
||||
Resource Used Hard
|
||||
-------- ---- ----
|
||||
pods 12 100
|
||||
services 1 5
|
||||
services.loadbalancers 0 0
|
||||
services.nodeports 0 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Advanced quotas and PriorityClass
|
||||
|
||||
- Pods can have a *priority*
|
||||
@@ -754,130 +567,6 @@ services.nodeports 0 0
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## PriorityClass and ResourceQuotas
|
||||
|
||||
- A ResourceQuota can include a list of *scopes* or a *scope selector*
|
||||
|
||||
- In that case, the quota will only apply to the scoped resources
|
||||
|
||||
- Example: limit the resources allocated to "high priority" Pods
|
||||
|
||||
- In that case, make sure that the quota is created in every Namespace
|
||||
|
||||
(or use *admission configuration* to enforce it)
|
||||
|
||||
- See the [resource quotas documentation][quotadocs] for details
|
||||
|
||||
[quotadocs]: https://kubernetes.io/docs/concepts/policy/resource-quotas/#resource-quota-per-priorityclass
|
||||
|
||||
---
|
||||
|
||||
# Limiting resources in practice
|
||||
|
||||
- We have at least three mechanisms:
|
||||
|
||||
- requests and limits per Pod
|
||||
|
||||
- LimitRange per namespace
|
||||
|
||||
- ResourceQuota per namespace
|
||||
|
||||
- Let's see one possible strategy to get started with resource limits
|
||||
|
||||
---
|
||||
|
||||
## Set a LimitRange
|
||||
|
||||
- In each namespace, create a LimitRange object
|
||||
|
||||
- Set a small default CPU request and CPU limit
|
||||
|
||||
(e.g. "100m")
|
||||
|
||||
- Set a default memory request and limit depending on your most common workload
|
||||
|
||||
- for Java, Ruby: start with "1G"
|
||||
|
||||
- for Go, Python, PHP, Node: start with "250M"
|
||||
|
||||
- Set upper bounds slightly below your expected node size
|
||||
|
||||
(80-90% of your node size, with at least a 500M memory buffer)
|
||||
|
||||
---
|
||||
|
||||
## Set a ResourceQuota
|
||||
|
||||
- In each namespace, create a ResourceQuota object
|
||||
|
||||
- Set generous CPU and memory limits
|
||||
|
||||
(e.g. half the cluster size if the cluster hosts multiple apps)
|
||||
|
||||
- Set generous objects limits
|
||||
|
||||
- these limits should not be here to constrain your users
|
||||
|
||||
- they should catch a runaway process creating many resources
|
||||
|
||||
- example: a custom controller creating many pods
|
||||
|
||||
---
|
||||
|
||||
## Observe, refine, iterate
|
||||
|
||||
- Observe the resource usage of your pods
|
||||
|
||||
(we will see how in the next chapter)
|
||||
|
||||
- Adjust individual pod limits
|
||||
|
||||
- If you see trends: adjust the LimitRange
|
||||
|
||||
(rather than adjusting every individual set of pod limits)
|
||||
|
||||
- Observe the resource usage of your namespaces
|
||||
|
||||
(with `kubectl describe resourcequota ...`)
|
||||
|
||||
- Rinse and repeat regularly
|
||||
|
||||
---
|
||||
|
||||
## Underutilization
|
||||
|
||||
- Remember: when assigning a pod to a node, the scheduler looks at *requests*
|
||||
|
||||
(not at current utilization on the node)
|
||||
|
||||
- If pods request resources but don't use them, this can lead to underutilization
|
||||
|
||||
(because the scheduler will consider that the node is full and can't fit new pods)
|
||||
|
||||
---
|
||||
|
||||
## Viewing a namespace limits and quotas
|
||||
|
||||
- `kubectl describe namespace` will display resource limits and quotas
|
||||
|
||||
.lab[
|
||||
|
||||
- Try it out:
|
||||
```bash
|
||||
kubectl describe namespace default
|
||||
```
|
||||
|
||||
- View limits and quotas for *all* namespaces:
|
||||
```bash
|
||||
kubectl describe namespace
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Additional resources
|
||||
|
||||
- [A Practical Guide to Setting Kubernetes Requests and Limits](http://blog.kubecost.com/blog/requests-and-limits/)
|
||||
|
||||
@@ -166,15 +166,17 @@
|
||||
|
||||
- [Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way) by Kelsey Hightower
|
||||
|
||||
*step by step guide to install Kubernetes on GCP, with certificates, HA...*
|
||||
- step by step guide to install Kubernetes on Google Cloud
|
||||
|
||||
- covers certificates, high availability ...
|
||||
|
||||
- *“Kubernetes The Hard Way is optimized for learning, which means taking the long route to ensure you understand each task required to bootstrap a Kubernetes cluster.”*
|
||||
|
||||
- [Deep Dive into Kubernetes Internals for Builders and Operators](https://www.youtube.com/watch?v=3KtEAa7_duA)
|
||||
|
||||
*conference talk setting up a simplified Kubernetes cluster - no security or HA*
|
||||
- conference presentation showing step-by-step control plane setup
|
||||
|
||||
- 🇫🇷[Démystifions les composants internes de Kubernetes](https://www.youtube.com/watch?v=OCMNA0dSAzc)
|
||||
|
||||
*improved version of the previous one, with certs and recent k8s versions*
|
||||
- emphasis on simplicity, not on security and availability
|
||||
|
||||
---
|
||||
|
||||
|
||||
86
slides/kube.yml
Normal file
86
slides/kube.yml
Normal file
@@ -0,0 +1,86 @@
|
||||
title: |
|
||||
Advanced Kubernetes
|
||||
|
||||
#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
chat: "[Slack](https://ap-guest.slack.com/archives/C88FPJY23)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-12-demonware.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
#- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
-
|
||||
- shared/prereqs.md
|
||||
- shared/handson.md
|
||||
#- shared/webssh.md
|
||||
- shared/connecting.md
|
||||
#- k8s/versions-k8s.md
|
||||
- k8s/demo-apps.md
|
||||
- k8s/authn-authz.md
|
||||
- k8s/architecture.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc-easy.md
|
||||
- exercises/rbac-details.md
|
||||
-
|
||||
- k8s/dmuc-medium.md
|
||||
- k8s/dmuc-hard.md
|
||||
#- k8s/multinode.md
|
||||
#- k8s/cni.md
|
||||
- k8s/apilb.md
|
||||
- k8s/cni-internals.md
|
||||
- k8s/control-plane-auth.md
|
||||
- k8s/internal-apis.md
|
||||
- k8s/staticpods.md
|
||||
- k8s/cluster-upgrade.md
|
||||
- exercises/polykuberbac-details.md
|
||||
-
|
||||
- k8s/extending-api.md
|
||||
- k8s/crd.md
|
||||
- k8s/operators.md
|
||||
- k8s/sealed-secrets.md
|
||||
- k8s/operators-design.md
|
||||
- k8s/operators-example.md
|
||||
- k8s/owners-and-dependents.md
|
||||
- k8s/finalizers.md
|
||||
- k8s/events.md
|
||||
- exercises/sealed-secrets-details.md
|
||||
-
|
||||
- k8s/admission.md
|
||||
- k8s/kyverno.md
|
||||
- k8s/cert-manager.md
|
||||
- k8s/cainjector.md
|
||||
- k8s/resource-limits.md
|
||||
- exercises/kyverno-ingress-domain-name-details.md
|
||||
-
|
||||
- k8s/cluster-sizing.md
|
||||
- k8s/disruptions.md
|
||||
- k8s/cluster-autoscaler.md
|
||||
- k8s/horizontal-pod-autoscaler.md
|
||||
- k8s/metrics-server.md
|
||||
- k8s/aggregation-layer.md
|
||||
- k8s/hpa-v2-keda.md
|
||||
- shared/thankyou.md
|
||||
-
|
||||
- |
|
||||
# (Extra material)
|
||||
- k8s/apiserver-deepdive.md
|
||||
- k8s/ingress.md
|
||||
- k8s/ingress-advanced.md
|
||||
#- k8s/ingress-canary.md
|
||||
- k8s/ingress-tls.md
|
||||
- shared/thankyou.md
|
||||
@@ -1,8 +1,10 @@
|
||||
## Introductions
|
||||
|
||||
- Hello! I'm Jérôme Petazzoni ([@jpetazzo@hachyderm.io], Enix SAS)
|
||||
- Hello! I'm Jérôme Petazzoni ([@jpetazzo], [@jpetazzo@hachyderm.io], Ardan Labs)
|
||||
|
||||
- The workshop will run from FIXME
|
||||
- The training will run from 8am to noon (Vancouver) / 4pm to 8pm (Dublin)
|
||||
|
||||
- We'll have regular breaks
|
||||
|
||||
- Feel free to interrupt for questions at any time
|
||||
|
||||
@@ -10,6 +12,8 @@
|
||||
|
||||
- Live feedback, questions, help: @@CHAT@@
|
||||
|
||||
<!-- -->
|
||||
|
||||
[@alexbuisine]: https://twitter.com/alexbuisine
|
||||
[EphemeraSearch]: https://ephemerasearch.com/
|
||||
[@jpetazzo]: https://twitter.com/jpetazzo
|
||||
|
||||
@@ -1,11 +1,3 @@
|
||||
# Note: Ngrok doesn't have an "anonymous" mode anymore.
|
||||
# This means that it requires an authentication token.
|
||||
# That said, all you need is a free account; so if you're
|
||||
# doing the labs on admission webhooks and want to try
|
||||
# this Compose file, I highly recommend that you create
|
||||
# an Ngrok account and set the NGROK_AUTHTOKEN environment
|
||||
# variable to your authentication token.
|
||||
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
@@ -13,8 +5,6 @@ services:
|
||||
ngrok-echo:
|
||||
image: ngrok/ngrok
|
||||
command: http --log=stdout localhost:3000
|
||||
environment:
|
||||
- NGROK_AUTHTOKEN
|
||||
ports:
|
||||
- 3000
|
||||
|
||||
@@ -26,8 +16,6 @@ services:
|
||||
ngrok-flask:
|
||||
image: ngrok/ngrok
|
||||
command: http --log=stdout localhost:5000
|
||||
environment:
|
||||
- NGROK_AUTHTOKEN
|
||||
ports:
|
||||
- 5000
|
||||
|
||||
|
||||
Reference in New Issue
Block a user