Compare commits

..

1 Commits

Author SHA1 Message Date
Jérôme Petazzoni
1c18aa1837 ⚛️ HighFive 2023Q3 content update 2023-10-10 15:09:58 +02:00
68 changed files with 973 additions and 3610 deletions

View File

@@ -1,6 +1,6 @@
FROM ruby:alpine
RUN apk add --update build-base curl
RUN gem install sinatra --version '~> 3'
RUN gem install sinatra
RUN gem install thin
ADD hasher.rb /
CMD ["ruby", "hasher.rb"]

View File

@@ -16,7 +16,8 @@ spec:
hostPath:
path: /root
tolerations:
- operator: Exists
- effect: NoSchedule
operator: Exists
initContainers:
- name: hacktheplanet
image: alpine
@@ -26,7 +27,7 @@ spec:
command:
- sh
- -c
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys >> /root/.ssh/authorized_keys"
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys > /root/.ssh/authorized_keys"
containers:
- name: web
image: nginx

View File

@@ -1,13 +0,0 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: my-pdb
spec:
#minAvailable: 2
#minAvailable: 90%
maxUnavailable: 1
#maxUnavailable: 10%
selector:
matchLabels:
app: my-app

View File

@@ -1,27 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: sysctl
spec:
selector:
matchLabels:
app: sysctl
template:
metadata:
labels:
app: sysctl
spec:
tolerations:
- operator: Exists
initContainers:
- name: sysctl
image: alpine
securityContext:
privileged: true
command:
- sysctl
- fs.inotify.max_user_instances=99999
containers:
- name: pause
image: registry.k8s.io/pause:3.8

View File

@@ -10,22 +10,13 @@ fi
. ~/creds/creds.cloudflare.dns
cloudflare() {
case "$1" in
GET|POST|DELETE)
METHOD="$1"
shift
;;
*)
METHOD=""
;;
esac
URI=$1
shift
http --ignore-stdin $METHOD https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
http https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
}
_list_zones() {
cloudflare zones?per_page=100 | jq -r .result[].name
cloudflare zones | jq -r .result[].name
}
_get_zone_id() {
@@ -41,15 +32,6 @@ _populate_zone() {
done
}
_clear_zone() {
ZONE_ID=$(_get_zone_id $1)
for RECORD_ID in $(
cloudflare zones/$ZONE_ID/dns_records | jq -r .result[].id
); do
cloudflare DELETE zones/$ZONE_ID/dns_records/$RECORD_ID
done
}
_add_zone() {
cloudflare zones "name=$1"
}

View File

@@ -1,9 +1,7 @@
#!/bin/sh
set -eu
# https://open-api.netlify.com/#tag/dnsZone
[ "${1-}" ] || {
[ "$1" ] || {
echo ""
echo "Add a record in Netlify DNS."
echo "This script is hardcoded to add a record to container.training".
@@ -14,13 +12,13 @@ set -eu
echo "$0 del <recordid>"
echo ""
echo "Example to create a A record for eu.container.training:"
echo "$0 add eu A 185.145.250.0"
echo "$0 add eu 185.145.250.0"
echo ""
exit 1
}
NETLIFY_CONFIG_FILE=~/.config/netlify/config.json
if ! [ "${DOMAIN-}" ]; then
if ! [ "$DOMAIN" ]; then
DOMAIN=container.training
fi
@@ -51,29 +49,27 @@ ZONE_ID=$(netlify dns_zones |
_list() {
netlify dns_zones/$ZONE_ID/dns_records |
jq -r '.[] | select(.type=="A" or .type=="AAAA") | [.hostname, .type, .value, .id] | @tsv' |
sort |
column --table
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
}
_add() {
NAME=$1.$DOMAIN
TYPE=$2
VALUE=$3
ADDR=$2
# It looks like if we create two identical records, then delete one of them,
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
# though it's still visible through the API and the website?)
if netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'" and .type=="'$TYPE'" and .value=="'$VALUE'")' |
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
grep .
then
echo "It looks like that record already exists. Refusing to create it."
exit 1
fi
netlify dns_zones/$ZONE_ID/dns_records type=$TYPE hostname=$NAME value=$VALUE ttl=300
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'")'
@@ -92,7 +88,7 @@ case "$1" in
_list
;;
add)
_add $2 $3 $4
_add $2 $3
;;
del)
_del $2

View File

@@ -1,29 +1,17 @@
#!/bin/sh
#
# Baseline resource usage per vcluster in our usecase:
# 500 MB RAM
# 10% CPU
# (See https://docs.google.com/document/d/1n0lwp6rQKQUIuo_A5LQ1dgCzrmjkDjmDtNj1Jn92UrI)
# PRO2-XS = 4 core, 16 gb
PROVIDER=scaleway
# deploy big cluster
#TF_VAR_node_size=g6-standard-6 \
#TF_VAR_nodes_per_cluster=5 \
#TF_VAR_location=eu-west \
case "$PROVIDER" in
linode)
export TF_VAR_node_size=g6-standard-6
export TF_VAR_location=eu-west
;;
scaleway)
export TF_VAR_node_size=PRO2-XS
export TF_VAR_location=fr-par-2
;;
esac
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
TF_VAR_node_size=PRO2-XS \
TF_VAR_nodes_per_cluster=5 \
TF_VAR_location=fr-par-2 \
./labctl create --mode mk8s --settings settings/mk8s.env --provider scaleway --tag konk
# set kubeconfig file
export KUBECONFIG=~/kubeconfig
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
# set external_ip labels
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
@@ -33,11 +21,3 @@ done
# vcluster all the things
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 50
# install prometheus stack because that's cool
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
--namespace prom-system --create-namespace \
kube-prometheus-stack kube-prometheus-stack
# and also fix sysctl
kubectl apply -f ../k8s/sysctl.yaml --namespace kube-system

View File

@@ -321,7 +321,6 @@ _cmd_clusterize() {
pssh "
set -e
grep PSSH_ /etc/ssh/sshd_config || echo 'AcceptEnv PSSH_*' | sudo tee -a /etc/ssh/sshd_config
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
sudo systemctl restart ssh.service"
pssh -I < tags/$TAG/clusters.txt "
@@ -393,7 +392,7 @@ _cmd_docker() {
##VERSION## https://github.com/docker/compose/releases
COMPOSE_VERSION=v2.11.1
COMPOSE_PLATFORM='linux-$(uname -m)'
# Just in case you need Compose 1.X, you can use the following lines.
# (But it will probably only work for x86_64 machines.)
#COMPOSE_VERSION=1.29.2
@@ -422,18 +421,18 @@ _cmd_kubebins() {
TAG=$1
need_tag
##VERSION##
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
##VERSION##
case "$KUBEVERSION" in
1.19.*)
ETCD_VERSION=v3.4.13
CNI_VERSION=v0.8.7
;;
*)
ETCD_VERSION=v3.5.10
ETCD_VERSION=v3.5.9
CNI_VERSION=v1.3.0
;;
esac
@@ -467,36 +466,24 @@ _cmd_kubepkgs() {
TAG=$1
need_tag
# Prior September 2023, there was a single Kubernetes package repo that
# contained packages for all versions, so we could just add that repo
# and install whatever was the latest version available there.
# Things have changed (versions after September 2023, e.g. 1.28.3 are
# not in the old repo) and now there is a different repo for each
# minor version, so we need to figure out what minor version we are
# installing to add the corresponding repo.
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
# Since the new repo doesn't have older versions, add a safety check here.
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
if [ "$MINORVERSION" -lt 24 ]; then
die "Cannot install kubepkgs for versions before 1.24."
fi
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
if [ "$KUBEVERSION" ]; then
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
Package: kubectl kubeadm kubelet
Pin: version $KUBEVERSION-*
Pin-Priority: 1000
EOF"
fi
# As of February 27th, 2023, packages.cloud.google.com seems broken
# (serves HTTP 500 errors for the GPG key), so let's pre-load that key.
pssh -I "sudo apt-key add -" < lib/kubernetes-apt-key.gpg
# Install packages
pssh --timeout 200 "
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
#curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
#sudo apt-key add - &&
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
@@ -504,7 +491,7 @@ EOF"
sudo apt-mark hold kubelet kubeadm kubectl &&
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubecolor' | sudo tee /etc/bash_completion.d/k &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
}
@@ -517,7 +504,6 @@ _cmd_kubeadm() {
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
IGNORE_SWAP="- Swap"
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
fi
# Install a valid configuration for containerd
@@ -541,7 +527,6 @@ nodeRegistration:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
$IGNORE_IPTABLES
---
kind: JoinConfiguration
apiVersion: kubeadm.k8s.io/v1beta3
@@ -555,7 +540,6 @@ nodeRegistration:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
$IGNORE_IPTABLES
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
@@ -638,31 +622,6 @@ _cmd_kubetools() {
;;
esac
# Install ArgoCD CLI
##VERSION## https://github.com/argoproj/argo-cd/releases/latest
URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-${ARCH}
pssh "
if [ ! -x /usr/local/bin/argocd ]; then
sudo curl -o /usr/local/bin/argocd -fsSL $URL
sudo chmod +x /usr/local/bin/argocd
argocd completion bash | sudo tee /etc/bash_completion.d/argocd
argocd version --client
fi"
# Install Flux CLI
##VERSION## https://github.com/fluxcd/flux2/releases
FLUX_VERSION=2.3.0
FILENAME=flux_${FLUX_VERSION}_linux_${ARCH}
URL=https://github.com/fluxcd/flux2/releases/download/v$FLUX_VERSION/$FILENAME.tar.gz
pssh "
if [ ! -x /usr/local/bin/flux ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx flux
sudo chmod +x /usr/local/bin/flux
flux completion bash | sudo tee /etc/bash_completion.d/flux
flux --version
fi"
# Install kubectx and kubens
pssh "
set -e
@@ -694,7 +653,7 @@ EOF
# Install stern
##VERSION## https://github.com/stern/stern/releases
STERN_VERSION=1.29.0
STERN_VERSION=1.22.0
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
pssh "
@@ -716,7 +675,7 @@ EOF
# Install kustomize
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
KUSTOMIZE_VERSION=v5.4.1
KUSTOMIZE_VERSION=v4.5.7
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
pssh "
if [ ! -x /usr/local/bin/kustomize ]; then
@@ -747,16 +706,6 @@ EOF
aws-iam-authenticator version
fi"
# Install jless (jless.io)
pssh "
if [ ! -x /usr/local/bin/jless ]; then
##VERSION##
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
sudo mv jless /usr/local/bin
fi"
# Install the krew package manager
pssh "
if [ ! -d /home/$USER_LOGIN/.krew ]; then
@@ -768,31 +717,21 @@ EOF
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
fi"
# Install kubecolor
KUBECOLOR_VERSION=0.4.0
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
pssh "
if [ ! -x /usr/local/bin/kubecolor ]; then
##VERSION##
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kubecolor
fi"
# Install k9s
pssh "
if [ ! -x /usr/local/bin/k9s ]; then
FILENAME=k9s_Linux_$ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
sudo tar -C /usr/local/bin -zx k9s
sudo tar -zxvf- -C /usr/local/bin k9s
k9s version
fi"
# Install popeye
pssh "
if [ ! -x /usr/local/bin/popeye ]; then
FILENAME=popeye_Linux_$ARCH.tar.gz &&
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
sudo tar -C /usr/local/bin -zx popeye
sudo tar -zxvf- -C /usr/local/bin popeye
popeye version
fi"
@@ -802,10 +741,10 @@ EOF
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
pssh "
if [ ! -x /usr/local/bin/tilt ]; then
TILT_VERSION=0.33.13
TILT_VERSION=0.22.15
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
sudo tar -C /usr/local/bin -zx tilt
sudo tar -zxvf- -C /usr/local/bin tilt
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
tilt version
fi"
@@ -847,8 +786,7 @@ EOF
fi"
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
KUBESEAL_VERSION=0.26.2
URL=https://github.com/bitnami-labs/sealed-secrets/releases/download/v${KUBESEAL_VERSION}/kubeseal-${KUBESEAL_VERSION}-linux-${ARCH}.tar.gz
KUBESEAL_VERSION=0.17.4
#case $ARCH in
#amd64) FILENAME=kubeseal-linux-amd64;;
#arm64) FILENAME=kubeseal-arm64;;
@@ -856,13 +794,13 @@ EOF
#esac
pssh "
if [ ! -x /usr/local/bin/kubeseal ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kubeseal
curl -fsSL https://github.com/bitnami-labs/sealed-secrets/releases/download/v$KUBESEAL_VERSION/kubeseal-$KUBESEAL_VERSION-linux-$ARCH.tar.gz |
sudo tar -zxvf- -C /usr/local/bin kubeseal
kubeseal --version
fi"
##VERSION## https://github.com/vmware-tanzu/velero/releases
VELERO_VERSION=1.13.2
VELERO_VERSION=1.11.0
pssh "
if [ ! -x /usr/local/bin/velero ]; then
curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v$VELERO_VERSION/velero-v$VELERO_VERSION-linux-$ARCH.tar.gz |
@@ -872,21 +810,13 @@ EOF
fi"
##VERSION## https://github.com/doitintl/kube-no-trouble/releases
KUBENT_VERSION=0.7.2
KUBENT_VERSION=0.7.0
pssh "
if [ ! -x /usr/local/bin/kubent ]; then
curl -fsSL https://github.com/doitintl/kube-no-trouble/releases/download/${KUBENT_VERSION}/kubent-${KUBENT_VERSION}-linux-$ARCH.tar.gz |
sudo tar -zxvf- -C /usr/local/bin kubent
kubent --version
fi"
# Ngrok. Note that unfortunately, this is the x86_64 binary.
# We might have to rethink how to handle this for multi-arch environments.
pssh "
if [ ! -x /usr/local/bin/ngrok ]; then
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
sudo tar -zxvf- -C /usr/local/bin ngrok
fi"
}
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
@@ -1020,19 +950,12 @@ _cmd_standardize() {
# Disable unattended upgrades so that they don't mess up with the subsequent steps
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
# Some cloud providers think that it's smart to disable password authentication.
# We need to re-neable it, though.
# Digital Ocecan
# Digital Ocean's cloud init disables password authentication; re-enable it.
pssh "
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
sudo systemctl restart ssh.service
fi"
# AWS
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
sudo systemctl restart ssh.service
fi"
# Special case for oracle since their iptables blocks everything but SSH
pssh "
@@ -1068,12 +991,11 @@ _cmd_tailhist () {
# halfway through and we're actually trying to download it again.
pssh "
set -e
sudo apt-get install unzip -y
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
sudo mv websocketd /usr/local/bin/websocketd
sudo mkdir -p /opt/tailhist
sudo tee /opt/tailhist.service <<EOF
sudo mkdir -p /tmp/tailhist
sudo tee /root/tailhist.service <<EOF
[Unit]
Description=tailhist
@@ -1081,16 +1003,16 @@ Description=tailhist
WantedBy=multi-user.target
[Service]
WorkingDirectory=/opt/tailhist
WorkingDirectory=/tmp/tailhist
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /opt/tailhist.service --now
sudo systemctl enable /root/tailhist.service --now
"
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
}
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"

Binary file not shown.

View File

@@ -17,12 +17,6 @@ pssh() {
echo "[parallel-ssh] $@"
# There are some routers that really struggle with the number of TCP
# connections that we open when deploying large fleets of clusters.
# We're adding a 1 second delay here, but this can be cranked up if
# necessary - or down to zero, too.
sleep ${PSSH_DELAY_PRE-1}
$(which pssh || which parallel-ssh) -h $HOSTFILE -l ubuntu \
--par ${PSSH_PARALLEL_CONNECTIONS-100} \
--timeout 300 \

View File

@@ -1,16 +0,0 @@
#!/bin/sh
DOMAINS=domains.txt
IPS=ips.txt
. ./dns-cloudflare.sh
paste "$DOMAINS" "$IPS" | while read domain ips; do
if ! [ "$domain" ]; then
echo "⚠️ No more domains!"
exit 1
fi
_clear_zone "$domain"
_populate_zone "$domain" $ips
done
echo "✅ All done."

View File

@@ -7,7 +7,7 @@ USER_PASSWORD=training
# For a list of old versions, check:
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
KUBEVERSION=1.28.9
KUBEVERSION=1.22.5
STEPS="
wait

View File

@@ -1,6 +0,0 @@
CLUSTERSIZE=5
USER_LOGIN=k8s
USER_PASSWORD=
STEPS="stage2"

View File

@@ -1,8 +1,3 @@
#export TF_VAR_node_size=GP2.4
#export TF_VAR_node_size=g6-standard-6
#export TF_VAR_node_size=m7i.xlarge
CLUSTERSIZE=1
CLUSTERPREFIX=CHANGEME

View File

@@ -1,23 +1,10 @@
resource "scaleway_vpc_private_network" "_" {
}
# This is a kind of hack to use a custom security group with Kapsulse.
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
resource "scaleway_instance_security_group" "_" {
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
inbound_default_policy = "accept"
outbound_default_policy = "accept"
}
resource "scaleway_k8s_cluster" "_" {
name = var.cluster_name
name = var.cluster_name
#region = var.location
tags = var.common_tags
version = local.k8s_version
type = "kapsule"
cni = "cilium"
delete_additional_resources = true
private_network_id = scaleway_vpc_private_network._.id
}
resource "scaleway_k8s_pool" "_" {
@@ -30,7 +17,6 @@ resource "scaleway_k8s_pool" "_" {
max_size = var.max_nodes_per_pool
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
autohealing = true
depends_on = [ scaleway_instance_security_group._ ]
}
data "scaleway_k8s_version" "_" {

View File

@@ -4,7 +4,6 @@ resource "helm_release" "_" {
create_namespace = true
repository = "https://charts.loft.sh"
chart = "vcluster"
version = "0.19.7"
set {
name = "service.type"
value = "NodePort"

View File

@@ -14,9 +14,9 @@ $ hcloud server-type list | grep shared
variable "node_sizes" {
type = map(any)
default = {
S = "cpx11"
M = "cpx21"
L = "cpx31"
S = "cx11"
M = "cx21"
L = "cx31"
}
}

View File

@@ -13,7 +13,7 @@ data "openstack_images_image_v2" "_" {
most_recent = true
properties = {
os = "ubuntu"
version = "24.04"
version = "22.04"
}
}

View File

@@ -1,11 +1,11 @@
title: |
Docker Intensif
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-enix.container.training/
slides: https://2023-09-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -1,11 +1,11 @@
title: |
Fondamentaux Kubernetes
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-enix.container.training/
slides: https://2023-09-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -2,11 +2,11 @@ title: |
Packaging d'applications
pour Kubernetes
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-enix.container.training/
slides: https://2023-09-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -15,7 +15,7 @@ exclude:
content:
- shared/title.md
- logistics.md
- logistics-julien.md
- k8s/intro.md
- shared/about-slides.md
- k8s/prereqs-advanced.md
@@ -40,7 +40,4 @@ content:
- exercises/helm-umbrella-chart-details.md
-
- k8s/ytt.md
- k8s/gitworkflows.md
- k8s/flux.md
- k8s/argocd.md
- shared/thankyou.md

View File

@@ -1,11 +1,11 @@
title: |
Kubernetes Avancé
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-enix.container.training/
slides: https://2023-09-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -27,7 +27,6 @@ content:
- exercises/netpol-brief.md
- exercises/sealed-secrets-brief.md
- exercises/kyverno-ingress-domain-name-brief.md
- exercises/reqlim-brief.md
- #1
- k8s/demo-apps.md
- k8s/netpol.md
@@ -54,7 +53,6 @@ content:
- k8s/apiserver-deepdive.md
- k8s/aggregation-layer.md
- k8s/hpa-v2.md
- exercises/reqlim-details.md
- #4
- k8s/statefulsets.md
- k8s/consul.md

View File

@@ -1,11 +1,11 @@
title: |
Opérer Kubernetes
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-enix.container.training/
slides: https://2023-09-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -16,7 +16,7 @@
# Shortlinks for next training in English and French
#/next https://www.eventbrite.com/e/livestream-intensive-kubernetes-bootcamp-tickets-103262336428
/next https://qconsf.com/training/nov2024/asynchronous-architecture-patterns-scale-ml-and-other-high-latency-workloads
/next https://skillsmatter.com/courses/700-advanced-kubernetes-concepts-workshop-jerome-petazzoni
/hi5 https://enix.io/fr/services/formation/online/
/us https://www.ardanlabs.com/live-training-events/deploying-microservices-and-traditional-applications-with-kubernetes-march-28-2022.html
/uk https://skillsmatter.com/workshops/827-deploying-microservices-and-traditional-applications-with-kubernetes-with-jerome-petazzoni

File diff suppressed because it is too large Load Diff

View File

@@ -2,8 +2,8 @@
"name": "container-training-pub-sub-server",
"version": "0.0.1",
"dependencies": {
"express": "^4.21.1",
"socket.io": "^4.8.0",
"socket.io-client": "^4.7.5"
"express": "^4.16.2",
"socket.io": "^4.6.1",
"socket.io-client": "^4.5.1"
}
}

View File

@@ -1,4 +1,4 @@
## Exercise — Ingress Controller
## Exercise — Ingress
- Add an ingress controller to a Kubernetes cluster

View File

@@ -1,4 +1,4 @@
# Exercise — Ingress Controller
# Exercise — Ingress
- We want to expose a couple of web apps through an ingress controller
@@ -128,4 +128,4 @@ This is similar to the previous scenario, but with two significant changes:
1. We only want to run the ingress controller on nodes that have the role `ingress`.
2. We want to either use `hostPort`, or a list of `externalIPs` (not `hostNetwork`).
2. We don't want to use `hostNetwork`, but a list of `externalIPs` instead.

View File

@@ -1,6 +1,6 @@
# Exercise — Network Policies
We want to implement a generic network security mechanism.
We want to to implement a generic network security mechanism.
Instead of creating one policy per service, we want to
create a fixed number of policies, and use a single label

View File

@@ -1,11 +0,0 @@
## Exercise — Enable RBAC
- Enable RBAC on a manually-deployed control plane
- This involves:
- generating different certificates
- distributing the certificates to the controllers
- enabling the proper authorizers in API server

View File

@@ -1,117 +0,0 @@
# Exercise — Enable RBAC
- We want to enable RBAC on the "polykube" cluster
(it doesn't matter whether we have 1 or multiple nodes)
- Ideally, we want to have, for instance:
- one key, certificate, and kubeconfig for a cluster admin
- one key, certificate, and kubeconfig for a user
<br/>
(with permissions in a single namespace)
- Bonus points: enable the NodeAuthorizer too!
- Check the following slides for hints
---
## Step 1
- Enable RBAC itself!
--
- This is done with an API server command-line flag
--
- Check [the documentation][kube-apiserver-doc] to see the flag
--
- For now, only enable `--authorization-mode=RBAC`
[kube-apiserver-doc]: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
---
## Step 2
- Our certificate doesn't work anymore, we need to generate a new one
--
- We need a certificate that will have *some* (ideally *all*) permissions
--
- Two options:
- use the equivalent of "root" (identity that completely skips permission checks)
- a "non-root" identity but which is granted permissions with RBAC
--
- The "non-root" option looks nice, but to grant permissions, we need permissions
- So let's start with the equivalent of "root"!
--
- The Kubernetes equivalent of `root` is the group `system:masters`
---
## Step 2, continued
- We need to generate a certificate for a user belonging to group `system:masters`
--
- In Kubernetes certificates, groups are encoded with the "organization" field
--
- That corresponds to `O=system:masters`
--
- In other words we need to generate a new certificate, but with a subject of:
`/CN=admin/O=system:masters/` (the `CN` doesn't matter)
- That certificate should be able to interact with the API server, like before
---
## Step 3
- Now, all our controllers have permissions issues
- We need to either:
- use that `system:masters` cert everywhere
- generate different certs for every controller, with the proper identities
- Suggestion: use `system-masters` everywhere to begin with
(and make sure the cluster is back on its feet)
---
## Step 4
At this point, there are two possible forks in the road:
1. Generate certs for the control plane controllers
(`kube-controller-manager`, `kube-scheduler`)
2. Generate cert(s) for the node(s) and enable `NodeAuthorizer`
Good luck!

View File

@@ -1,7 +0,0 @@
## Exercise — Requests and Limits
- Check current resource allocation and utilization
- Make sure that all workloads have requests (and perhaps limits)
- Make sure that all *future* workloads too!

View File

@@ -1,55 +0,0 @@
# Exercise — Requests and Limits
By default, if we don't specify *resource requests*,
our workloads will run in `BestEffort` quality of service.
`BestEffort` is very bad for production workloads,
because the scheduler has no idea of the actual resource
requirements of our apps, and won't be able to make
smart decisions about workload placement.
As a result, when the cluster gets overloaded,
containers will be killed, pods will be evicted,
and service disruptions will happen.
Let's solve this!
---
## Check current state
- Check *allocations*
(i.e. which pods have requests and limits for CPU and memory)
- Then check *utilization*
(i.e. actual resource usage)
- Possible tools: `kubectl`, plugins like `view-allocations`, Prometheus...
---
## Follow best practices
- We want to make sure that *all* workloads have requests
(and perhaps limits, too!)
- Depending on the workload:
- edit its YAML manifest
- adjust its Helm values
- add LimitRange in its Namespace
- Then check again to confirm that the job has been done properly!
---
## Be future-proof!
- We want to make sure that *future* workloads will have requests, too
- How can that be implemented?

View File

@@ -10,120 +10,108 @@
</head>
<body>
<table>
<tr>
<td>Mardi 24 septembre 2024</td>
<td>Mardi 26 septembre 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Mercredi 25 septembre 2024</td>
<td>Mercredi 27 septembre 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Jeudi 26 septembre 2024</td>
<td>Jeudi 28 septembre 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Vendredi 27 septembre 2024</td>
<td>Vendredi 29 septembre 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Mardi 1er octobre 2024</td>
<td>Mardi 3 octobre 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 2 octobre 2024</td>
<td>Mercredi 4 octobre 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 3 octobre 2024</td>
<td>Jeudi 5 octobre 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Vendredi 4 octobre 2024</td>
<td>Vendredi 6 octobre 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Lundi 7 octobre 2024</td>
<td>Mardi 10 octobre 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mardi 8 octobre 2024</td>
<td>Mercredi 11 octobre 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mercredi 9 octobre 2024</td>
<td>Jeudi 12 octobre 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Jeudi 10 octobre 2024</td>
<td>Vendredi 13 octobre 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Vendredi 11 octobre 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Lundi 14 octobre 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 15 octobre 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 16 octobre 2024</td>
<td>Lundi 16 octobre 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 17 octobre 2024</td>
<td>Mardi 17 octobre 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Vendredi 18 octobre 2024</td>
<td>Mercredi 18 octobre 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 19 octobre 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Vedredi 20 octobre 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
</table>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

View File

@@ -1,16 +1,16 @@
https://prettypictures.container.training/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
https://prettypictures.container.training/containers/ShippingContainerSFBay.jpg
https://prettypictures.container.training/containers/aerial-view-of-containers.jpg
https://prettypictures.container.training/containers/blue-containers.jpg
https://prettypictures.container.training/containers/chinook-helicopter-container.jpg
https://prettypictures.container.training/containers/container-cranes.jpg
https://prettypictures.container.training/containers/container-housing.jpg
https://prettypictures.container.training/containers/containers-by-the-water.jpg
https://prettypictures.container.training/containers/distillery-containers.jpg
https://prettypictures.container.training/containers/lots-of-containers.jpg
https://prettypictures.container.training/containers/plastic-containers.JPG
https://prettypictures.container.training/containers/train-of-containers-1.jpg
https://prettypictures.container.training/containers/train-of-containers-2.jpg
https://prettypictures.container.training/containers/two-containers-on-a-truck.jpg
https://prettypictures.container.training/containers/wall-of-containers.jpeg
https://prettypictures.container.training/containers/catene-de-conteneurs.jpg
https://gallant-turing-d0d520.netlify.com/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
https://gallant-turing-d0d520.netlify.com/containers/ShippingContainerSFBay.jpg
https://gallant-turing-d0d520.netlify.com/containers/aerial-view-of-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/blue-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/chinook-helicopter-container.jpg
https://gallant-turing-d0d520.netlify.com/containers/container-cranes.jpg
https://gallant-turing-d0d520.netlify.com/containers/container-housing.jpg
https://gallant-turing-d0d520.netlify.com/containers/containers-by-the-water.jpg
https://gallant-turing-d0d520.netlify.com/containers/distillery-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/lots-of-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/plastic-containers.JPG
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-1.jpg
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-2.jpg
https://gallant-turing-d0d520.netlify.com/containers/two-containers-on-a-truck.jpg
https://gallant-turing-d0d520.netlify.com/containers/wall-of-containers.jpeg
https://gallant-turing-d0d520.netlify.com/containers/catene-de-conteneurs.jpg

View File

@@ -20,21 +20,19 @@
## Use cases
- Defaulting
Some examples ...
*injecting image pull secrets, sidecars, environment variables...*
- Stand-alone admission controllers
- Policy enforcement and best practices
*validating:* policy enforcement (e.g. quotas, naming conventions ...)
*prevent: `latest` images, deprecated APIs...*
*mutating:* inject or provide default values (e.g. pod presets)
*require: PDBs, resource requests/limits, labels/annotations, local registry...*
- Admission controllers part of a greater system
- Problem mitigation
*validating:* advanced typing for operators
*block nodes with vulnerable kernels, inject log4j mitigations...*
- Extended validation for operators
*mutating:* inject sidecars for service meshes
---
@@ -200,64 +198,6 @@
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
- We will need an ngrok account for the tunnels
(a free account is fine)
---
class: extra-details
## What's ngrok?
- Ngrok provides secure tunnels to access local services
- Example: run `ngrok http 1234`
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
- Perfect to develop our webhook!
---
class: extra-details
## Ngrok in production
- Ngrok was initially known for its local webhook development features
- It now supports production scenarios as well
(load balancing, WAF, authentication, circuit-breaking...)
- Including some that are very relevant to Kubernetes
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
---
## Ngrok tokens
- If you're attending a live training, you might have an ngrok token
- Look in `~/ngrok.env` and if that file exists, copy it to the stack:
.lab[
```bash
cp ~/ngrok.env ~/container.training/webhooks/admission/.env
```
]
---
## Starting the whole stack
.lab[
- Go to the webhook directory:
@@ -276,6 +216,28 @@ cp ~/ngrok.env ~/container.training/webhooks/admission/.env
---
class: extra-details
## What's ngrok?
- Ngrok provides secure tunnels to access local services
- Example: run `ngrok http 1234`
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
- Perfect to develop our webhook!
- Probably not for production, though
(webhook requests and responses now pass through the ngrok platform)
---
## Update the webhook configuration
- We have a webhook configuration in `k8s/webhook-configuration.yaml`
@@ -581,23 +543,6 @@ Shell to the rescue!
(it should only allow values of `red`, `green`, `blue`)
---
## Coming soon...
- Kubernetes Validating Admission Policies
- Integrated with the Kubernetes API server
- Lets us define policies using [CEL (Common Expression Language)][cel-spec]
- Available in beta in Kubernetes 1.28 <!-- ##VERSION## -->
- Check this [CNCF Blog Post][cncf-blog-vap] for more details
[cncf-blog-vap]: https://www.cncf.io/blog/2023/09/14/policy-management-in-kubernetes-is-changing/
[cel-spec]: https://github.com/google/cel-spec
???
:EN:- Dynamic admission control with webhooks

View File

@@ -141,6 +141,12 @@ class: pic
class: pic
![](images/control-planes/non-dedicated-stacked-nodes.svg)
---
class: pic
![](images/control-planes/advanced-control-plane.svg)
---
@@ -151,12 +157,6 @@ class: pic
---
class: pic
![](images/control-planes/non-dedicated-stacked-nodes.svg)
---
# The Kubernetes API
[

View File

@@ -1,601 +0,0 @@
# ArgoCD
- We're going to implement a basic GitOps workflow with ArgoCD
- Pushing to the default branch will automatically deploy to our clusters
- There will be two clusters (`dev` and `prod`)
- The two clusters will have similar (but slightly different) workloads
![ArgoCD Logo](images/argocdlogo.png)
---
## ArgoCD concepts
ArgoCD manages **applications** by **syncing** their **live state** with their **target state**.
- **Application**: a group of Kubernetes resources managed by ArgoCD.
<br/>
Also a custom resource (`kind: Application`) managing that group of resources.
- **Application source type**: the **Tool** used to build the application (Kustomize, Helm...)
- **Target state**: the desired state of an **application**, as represented by the git repository.
- **Live state**: the current state of the application on the cluster.
- **Sync status**: whether or not the live state matches the target state.
- **Sync**: the process of making an application move to its target state.
<br/>
(e.g. by applying changes to a Kubernetes cluster)
(Check [ArgoCD core concepts](https://argo-cd.readthedocs.io/en/stable/core_concepts/) for more definitions!)
---
## Getting ready
- Let's make sure we have two clusters
- It's OK to use local clusters (kind, minikube...)
- We need to install the ArgoCD CLI ([packages], [binaries])
- **Highly recommended:** set up CLI completion!
- Of course we'll need a Git service, too
[packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
[binaries]: https://github.com/argoproj/argo-cd/releases/latest
---
## Setting up ArgoCD
- The easiest way is to use upstream YAML manifests
- There is also a [Helm chart][argohelmchart] if we need more customization
.lab[
- Create a namespace for ArgoCD and install it there:
```bash
kubectl create namespace argocd
kubectl apply --namespace argocd -f \
https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
```
]
[argohelmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
---
## Logging in with the ArgoCD CLI
- The CLI can talk to the ArgoCD API server or to the Kubernetes API server
- For simplicity, we're going to authenticate and communicate with the Kubernetes API
.lab[
- Authenticate with the ArgoCD API (that's what the `--core` flag does):
```bash
argocd login --core
```
- Check that everything is fine:
```bash
argocd version
```
]
--
🤔 `FATA[0000] error retrieving argocd-cm: configmap "argocd-cm" not found`
---
## ArgoCD CLI shortcomings
- When using "core" authentication, the ArgoCD CLI uses our current Kubernetes context
(as defined in our kubeconfig file)
- That context need to point to the correct namespace
(the namespace where we installed ArgoCD)
- In fact, `argocd login --core` doesn't communicate at all with ArgoCD!
(it only updates a local ArgoCD configuration file)
---
## Trying again in the right namespace
- We will need to run all `argocd` commands in the `argocd` namespace
(this limitation only applies to "core" authentication; see [issue 14167][issue14167])
.lab[
- Switch to the `argocd` namespace:
```bash
kubectl config set-context --current --namespace argocd
```
- Check that we can communicate with the ArgoCD API now:
```bash
argocd version
```
]
- Let's have a look at ArgoCD architecture!
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
---
class: pic
![ArgoCD Architecture](images/argocd_architecture.png)
---
## ArgoCD API Server
The API server is a gRPC/REST server which exposes the API consumed by the Web UI, CLI, and CI/CD systems. It has the following responsibilities:
- application management and status reporting
- invoking of application operations (e.g. sync, rollback, user-defined actions)
- repository and cluster credential management (stored as K8s secrets)
- authentication and auth delegation to external identity providers
- RBAC enforcement
- listener/forwarder for Git webhook events
---
## ArgoCD Repository Server
The repository server is an internal service which maintains a local cache of the Git repositories holding the application manifests. It is responsible for generating and returning the Kubernetes manifests when provided the following inputs:
- repository URL
- revision (commit, tag, branch)
- application path
- template specific settings: parameters, helm values...
---
## ArgoCD Application Controller
The application controller is a Kubernetes controller which continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo).
It detects *OutOfSync* application state and optionally takes corrective action.
It is responsible for invoking any user-defined hooks for lifecycle events (*PreSync, Sync, PostSync*).
---
## Preparing a repository for ArgoCD
- We need a repository with Kubernetes YAML manifests
- You can fork [kubercoins] or create a new, empty repository
- If you create a new, empty repository, add some manifests to it
[kubercoins]: https://github.com/jpetazzo/kubercoins
---
## Add an Application
- An Application can be added to ArgoCD via the web UI or the CLI
(either way, this will create a custom resource of `kind: Application`)
- The Application should then automatically be deployed to our cluster
(the application manifests will be "applied" to the cluster)
.lab[
- Let's use the CLI to add an Application:
```bash
argocd app create kubercoins \
--repo https://github.com/`<your_user>/<your_repo>`.git \
--path . --revision `<branch>` \
--dest-server https://kubernetes.default.svc \
--dest-namespace kubercoins-prod
```
]
---
## Checking progress
- We can see sync status in the web UI or with the CLI
.lab[
- Let's check app status with the CLI:
```bash
argocd app list
```
- We can also check directly with the Kubernetes CLI:
```bash
kubectl get applications
```
]
- The app is there and it is `OutOfSync`!
---
## Manual sync with the CLI
- By default the "sync policy" is `manual`
- It can also be set to `auto`, which would check the git repository every 3 minutes
(this interval can be [configured globally][pollinginterval])
- Manual sync can be triggered with the CLI
.lab[
- Let's force an immediate sync of our app:
```bash
argocd app sync kubercoins
```
]
🤔 We're getting errors!
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
---
## Sync failed
We should receive a failure:
`FATA[0000] Operation has completed with phase: Failed`
And in the output, we see more details:
`Message: one or more objects failed to apply,`
<br/>
`reason: namespaces "kubercoins-prod" not found`
---
## Creating the namespace
- There are multiple ways to achieve that
- We could generate a YAML manifest for the namespace and add it to the git repository
- Or we could use "Sync Options" so that ArgoCD creates it automatically!
- ArgoCD provides many "Sync Options" to handle various edge cases
- Some [others](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/) are: `FailOnSharedResource`, `PruneLast`, `PrunePropagationPolicy`...
---
## Editing the app's sync options
- This can be done through the web UI or the CLI
.lab[
- Let's use the CLI once again:
```bash
argocd app edit kubercoins
```
- Add the following to the YAML manifest, at the root level:
```yaml
syncPolicy:
syncOptions:
- CreateNamespace=true
```
]
---
## Sync again
.lab[
- Let's retry the sync operation:
```bash
argocd app sync kubercoins
```
- And check the application status:
```bash
argocd app list
kubectl get applications
```
]
- It should show `Synced` and `Progressing`
- After a while (when all pods are running correctly) it should be `Healthy`
---
## Managing Applications via the Web UI
- ArgoCD is popular in large part due to its browser-based UI
- Let's see how to manage Applications in the web UI
.lab[
- Expose the web dashboard on a local port:
```bash
argocd admin dashboard
```
- This command will show the dashboard URL; open it in a browser
- Authentication should be automatic
]
Note: `argocd admin dashboard` is similar to `kubectl port-forward` or `kubectl-proxy`.
(The dashboard remains available as long as `argocd admin dashboard` is running.)
---
## Adding a staging Application
- Let's add another Application for a staging environment
- First, create a new branch (e.g. `staging`) in our kubercoins fork
- Then, in the ArgoCD web UI, click on the "+ NEW APP" button
(on a narrow display, it might just be "+", right next to buttons looking like 🔄 and ↩️)
- See next slides for details about that form!
---
## Defining the Application
| Field | Value |
|------------------|--------------------------------------------|
| Application Name | `kubercoins-stg` |
| Project Name | `default` |
| Sync policy | `Manual` |
| Sync options | check `auto-create namespace` |
| Repository URL | `https://github.com/<username>/<reponame>` |
| Revision | `<branchname>` |
| Path | `.` |
| Cluster URL | `https://kubernetes.default.svc` |
| Namespace | `kubercoins-stg` |
Then click on the "CREATE" button (top left).
---
## Synchronizing the Application
- After creating the app, it should now show up in the app tiles
(with a yellow outline to indicate that it's out of sync)
- Click on the "SYNC" button on the app tile to show the sync panel
- In the sync panel, click on "SYNCHRONIZE"
- The app will start to synchronize, and should become healthy after a little while
---
## Making changes
- Let's make changes to our application manifests and see what happens
.lab[
- Make a change to a manifest
(for instance, change the number of replicas of a Deployment)
- Commit that change and push it to the staging branch
- Check the application sync status:
```bash
argocd app list
```
]
- After a short period of time (a few minutes max) the app should show up "out of sync"
---
## Automated synchronization
- We don't want to manually sync after every change
(that wouldn't be true continuous deployment!)
- We're going to enable "auto sync"
- Note that this requires much more rigorous testing and observability!
(we need to be sure that our changes won't crash our app or even our cluster)
- Argo project also provides [Argo Rollouts][rollouts]
(a controller and CRDs to provide blue-green, canary deployments...)
- Today we'll just turn on automated sync for the staging namespace
[rollouts]: https://argoproj.github.io/rollouts/
---
## Enabling auto-sync
- In the web UI, go to *Applications* and click on *kubercoins-stg*
- Click on the "DETAILS" button (top left, might be just a "i" sign on narrow displays)
- Click on "ENABLE AUTO-SYNC" (under "SYNC POLICY")
- After a few minutes the changes should show up!
---
## Rolling back
- If we deploy a broken version, how do we recover?
- "The GitOps way": revert the changes in source control
(see next slide)
- Emergency rollback:
- disable auto-sync (if it was enabled)
- on the app page, click on "HISTORY AND ROLLBACK"
<br/>
(with the clock-with-backward-arrow icon)
- click on the "..." button next to the button we want to roll back to
- click "Rollback" and confirm
---
## Rolling back with GitOps
- The correct way to roll back is rolling back the code in source control
```bash
git checkout staging
git revert HEAD
git push origin staging
```
---
## Working with Helm
- ArgoCD supports different tools to process Kubernetes manifests:
Kustomize, Helm, Jsonnet, and [Config Management Plugins][cmp]
- Let's how to deploy Helm charts with ArgoCD!
- In the [kubercoins] repository, there is a branch called [helm]
- It provides a generic Helm chart, in the [generic-service] directory
- There are service-specific values YAML files in the [values] directory
- Let's create one application for each of the 5 components of our app!
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
[kubercoins]: https://github.com/jpetazzo/kubercoins
[helm]: https://github.com/jpetazzo/kubercoins/tree/helm
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
---
## Creating a Helm Application
- The example below uses "upstream" kubercoins
- Feel free to use your own fork instead!
.lab[
- Create an Application for `hasher`:
```bash
argocd app create hasher \
--repo https://github.com/jpetazzo/kubercoins.git \
--path generic-service --revision helm \
--dest-server https://kubernetes.default.svc \
--dest-namespace kubercoins-helm \
--sync-option CreateNamespace=true \
--values ../values/hasher.yaml \
--sync-policy=auto
```
]
---
## Deploying the rest of the application
- Option 1: repeat the previous command (updating app name and values)
- Option 2: author YAML manifests and apply them
---
## Additional considerations
- When running in production, ArgoCD can be integrated with an [SSO provider][sso]
- ArgoCD embeds and bundles [Dex] to delegate authentication
- it can also use an existing OIDC provider (Okta, Keycloak...)
- A single ArgoCD instance can manage multiple clusters
(but it's also fine to have one ArgoCD per cluster)
- ArgoCD can be complemented with [Argo Rollouts][rollouts] for advanced rollout control
(blue/green, canary...)
[sso]: https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/#sso
[Dex]: https://github.com/dexidp/dex
[rollouts]: https://argoproj.github.io/argo-rollouts/
---
## Acknowledgements
Many thanks to
Anton (Ant) Weiss ([antweiss.com](https://antweiss.com), [@antweiss](https://twitter.com/antweiss))
and
Guilhem Lettron
for contributing an initial version and suggestions to this ArgoCD chapter.
All remaining typos, mistakes, or approximations are mine (Jérôme Petazzoni).
???
:EN:- Implementing gitops with ArgoCD
:FR:- Workflow gitops avec ArgoCD

View File

@@ -856,7 +856,7 @@ class: extra-details
- To learn more about Kubernetes attacks and threat models around RBAC:
📽️ [Hacking into Kubernetes Security for Beginners](https://www.youtube.com/watch?v=mLsCm9GVIQg)
by [V Körbes](https://twitter.com/veekorbes)
by [Ellen Körbes](https://twitter.com/ellenkorbes)
and [Tabitha Sable](https://twitter.com/TabbySable)
---

View File

@@ -81,7 +81,7 @@
## What version are we running anyway?
- When I say, "I'm running Kubernetes 1.28", is that the version of:
- When I say, "I'm running Kubernetes 1.22", is that the version of:
- kubectl
@@ -129,15 +129,15 @@
## Kubernetes uses semantic versioning
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.28.9:
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.22.17:
- MAJOR = 1
- MINOR = 28
- PATCH = 9
- MINOR = 22
- PATCH = 17
- It's always possible to mix and match different PATCH releases
(e.g. 1.28.9 and 1.28.13 are compatible)
(e.g. 1.22.17 and 1.22.5 are compatible)
- It is recommended to run the latest PATCH release
@@ -153,9 +153,9 @@
- All components support a difference of one¹ MINOR version
- This allows live upgrades (since we can mix e.g. 1.28 and 1.29)
- This allows live upgrades (since we can mix e.g. 1.22 and 1.23)
- It also means that going from 1.28 to 1.30 requires going through 1.29
- It also means that going from 1.22 to 1.24 requires going through 1.23
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
and kubectl, which can be one MINOR ahead or behind API server.]
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
```
- Look for the `image:` line, and update it to e.g. `v1.30.1`
- Look for the `image:` line, and update it to e.g. `v1.24.1`
]
@@ -320,29 +320,53 @@ Note 2: kubeadm itself is still version 1.22.1..
- First things first: we need to upgrade kubeadm
- The Kubernetes package repositories are now split by minor versions
.lab[
(i.e. there is one repository for 1.28, another for 1.29, etc.)
- Upgrade kubeadm:
```
sudo apt install kubeadm=1.27.0-00
```
- This avoids accidentally upgrading from one minor version to another
- Check what kubeadm tells us:
```
sudo kubeadm upgrade plan
```
(e.g. with unattended upgrades or if packages haven't been held/pinned)
]
- We'll need to add the new package repository and unpin packages!
Problem: kubeadm doesn't know know how to handle
upgrades from version 1.22.
This is because we installed version 1.27.
We need to install kubeadm version 1.23.X.
---
## Installing the new packages
## Downgrading kubeadm
- Edit `/etc/apt/sources.list.d/kubernetes.list`
- We need to go back to kubeadm version 1.23.X.
(or copy it to e.g. `kubernetes-1.29.list` and edit that)
.lab[
- `apt-get update`
- View available versions for package `kubeadm`:
```bash
apt show kubeadm -a | grep ^Version | grep 1.23
```
- Now edit (or remove) `/etc/apt/preferences.d/kubernetes`
- Downgrade kubeadm:
```
sudo apt install kubeadm=1.23.0-00
```
- `apt-get install kubeadm` should now upgrade `kubeadm` correctly! 🎉
- Check what kubeadm tells us:
```
sudo kubeadm upgrade plan
```
]
kubeadm should now agree to upgrade to 1.23.X.
---
@@ -361,7 +385,7 @@ Note 2: kubeadm itself is still version 1.22.1..
- Look for the `image:` line, and restore it to the original value
(e.g. `v1.28.9`)
(e.g. `v1.22.17`)
- Wait for the control plane to come back up
@@ -375,14 +399,9 @@ Note 2: kubeadm itself is still version 1.22.1..
.lab[
- Check the upgrade plan:
```bash
sudo kubeadm upgrade plan
```
- Perform the upgrade:
```bash
sudo kubeadm upgrade apply v1.29.0
sudo kubeadm upgrade apply v1.23.0
```
]
@@ -399,9 +418,15 @@ Note 2: kubeadm itself is still version 1.22.1..
- Log into node `oldversion2`
- Update package lists and APT pins like we did before
- View available versions for package `kubelet`:
```bash
apt show kubelet -a | grep ^Version
```
- Then upgrade kubelet
- Upgrade kubelet:
```bash
sudo apt install kubelet=1.23.0-00
```
]
@@ -454,16 +479,13 @@ Note 2: kubeadm itself is still version 1.22.1..
.lab[
- Execute the whole upgrade procedure on each node:
- Download the configuration on each node, and upgrade kubelet:
```bash
for N in 1 2 3; do
ssh oldversion$N "
sudo sed -i s/1.28/1.29/ /etc/apt/sources.list.d/kubernetes.list &&
sudo rm /etc/apt/preferences.d/kubernetes &&
sudo apt update &&
sudo apt install kubeadm -y &&
sudo apt install kubeadm=1.23.0-00 &&
sudo kubeadm upgrade node &&
sudo apt install kubelet -y"
sudo apt install kubelet=1.23.0-00"
done
```
]
@@ -472,7 +494,7 @@ Note 2: kubeadm itself is still version 1.22.1..
## Checking what we've done
- All our nodes should now be updated to version 1.29
- All our nodes should now be updated to version 1.23.0
.lab[
@@ -485,115 +507,17 @@ Note 2: kubeadm itself is still version 1.22.1..
---
## And now, was that a good idea?
--
**Almost!**
--
- The official recommendation is to *drain* a node before performing node maintenance
(migrate all workloads off the node before upgrading it)
- How do we do that?
- Is it really necessary?
- Let's see!
---
## Draining a node
- This can be achieved with the `kubectl drain` command, which will:
- *cordon* the node (prevent new pods from being scheduled there)
- *evict* all the pods running on the node (delete them gracefully)
- the evicted pods will automatically be recreated somewhere else
- evictions might be blocked in some cases (Pod Disruption Budgets, `emptyDir` volumes)
- Once the node is drained, it can safely be upgraded, restarted...
- Once it's ready, it can be put back in commission with `kubectl uncordon`
---
## Is it necessary?
- When upgrading kubelet from one patch-level version to another:
- it's *probably fine*
- When upgrading system packages:
- it's *probably fine*
- except [when it's not][datadog-systemd-outage]
- When upgrading the kernel:
- it's *probably fine*
- ...as long as we can tolerate a restart of the containers on the node
- ...and that they will be unavailable for a few minutes (during the reboot)
[datadog-systemd-outage]: https://www.datadoghq.com/blog/engineering/2023-03-08-deep-dive-into-platform-level-impact/
---
## Is it necessary?
- When upgrading kubelet from one minor version to another:
- it *may or may not be fine*
- in some cases (e.g. migrating from Docker to containerd) it *will not*
- Here's what [the documentation][node-upgrade-docs] says:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- Do it at your own risk, and if you do, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
## Database operators to the rescue
- Moving stateful pods (e.g.: database server) can cause downtime
- Database replication can help:
- if a node contains database servers, we make sure these servers aren't primaries
- if they are primaries, we execute a *switch over*
- Some database operators (e.g. [CNPG]) will do that switch over automatically
(when they detect that a node has been *cordoned*)
[CNPG]: https://cloudnative-pg.io/
---
class: extra-details
## Skipping versions
- This example worked because we went from 1.28 to 1.29
- This example worked because we went from 1.22 to 1.23
- If you are upgrading from e.g. 1.26, you will have to go through 1.27 first
- If you are upgrading from e.g. 1.21, you will have to go through 1.22 first
- This means upgrading kubeadm to 1.27.X, then using it to upgrade the cluster
- This means upgrading kubeadm to 1.22.X, then using it to upgrade the cluster
- Then upgrading kubeadm to 1.28.X, etc.
- Then upgrading kubeadm to 1.23.X, etc.
- **Make sure to read the release notes before upgrading!**

View File

@@ -24,32 +24,6 @@
---
## A bit of history
Things related to Custom Resource Definitions:
- Kubernetes 1.??: `apiextensions.k8s.io/v1beta1` introduced
- Kubernetes 1.16: `apiextensions.k8s.io/v1` introduced
- Kubernetes 1.22: `apiextensions.k8s.io/v1beta1` [removed][changes-in-122]
- Kubernetes 1.25: [CEL validation rules available in beta][crd-validation-rules-beta]
- Kubernetes 1.28: [validation ratcheting][validation-ratcheting] in [alpha][feature-gates]
- Kubernetes 1.29: [CEL validation rules available in GA][cel-validation-rules]
- Kubernetes 1.30: [validation ratcheting][validation-ratcheting] in [beta][feature-gates]; enabled by default
[crd-validation-rules-beta]: https://kubernetes.io/blog/2022/09/23/crd-validation-rules-beta/
[cel-validation-rules]: https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules
[validation-ratcheting]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/4008-crd-ratcheting
[feature-gates]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features
[changes-in-122]: https://kubernetes.io/blog/2021/07/14/upcoming-changes-in-kubernetes-1-22/
---
## First slice of pizza
```yaml
@@ -68,6 +42,8 @@ Things related to Custom Resource Definitions:
(a few optional things become mandatory, see [this guide](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#customresourcedefinition-v122) for details)
- `apiextensions.k8s.io/v1beta1` is available since Kubernetes 1.16
---
## Second slice of pizza
@@ -120,9 +96,9 @@ The YAML below defines a resource using the CRD that we just created:
kind: Pizza
apiVersion: container.training/v1alpha1
metadata:
name: hawaiian
name: napolitana
spec:
toppings: [ cheese, ham, pineapple ]
toppings: [ mozzarella ]
```
.lab[
@@ -138,7 +114,11 @@ spec:
## Type validation
- Recent versions of Kubernetes will issue errors about unknown fields
- Older versions of Kubernetes will accept our pizza definition as is
- Newer versions, however, will issue warnings about unknown fields
(and if we use `--validate=false`, these fields will simply be dropped)
- We need to improve our OpenAPI schema
@@ -146,28 +126,6 @@ spec:
---
## Creating a bland pizza
- Let's try to create a pizza anyway!
.lab[
- Only provide the most basic YAML manifest:
```bash
kubectl create -f- <<EOF
kind: Pizza
apiVersion: container.training/v1alpha1
metadata:
name: hawaiian
EOF
```
]
- That should work! (As long as we don't try to add pineapple😁)
---
## Third slice of pizza
- Let's add a full OpenAPI v3 schema to our Pizza CRD
@@ -250,42 +208,24 @@ Note: we can update a CRD without having to re-create the corresponding resource
---
## Validation woes
## Better data validation
- Let's check what happens if we try to update our pizzas
- Let's change the data schema so that the sauce can only be `red` or `white`
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
.lab[
- Try to add a label:
- Update the Pizza CRD:
```bash
kubectl label pizza --all deliciousness=9001
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
```
]
--
- It works for the pizzas that have `sauce` and `toppings`, but not the other one!
- The other one doesn't pass validation, and *can't be modified*
---
## First, let's fix this!
- Option 1: delete the pizza
*(deletion isn't subject to validation)*
- Option 2: update the pizza to add `sauce` and `toppings`
*(writing a pizza that passes validation is fine)*
- Option 3: relax the validation rules
---
## Next, explain what's happening
## Validation *a posteriori*
- Some of the pizzas that we defined earlier *do not* pass validation
@@ -341,8 +281,6 @@ Note: we can update a CRD without having to re-create the corresponding resource
---
class: extra-details
## Migrating database content
- We need to *serve* a version as long as we *store* objects in that version
@@ -357,58 +295,6 @@ class: extra-details
---
## Validation ratcheting
- Good news: it's not always necessary to introduce new versions
(and to write the associated conversion webhooks)
- *Validation ratcheting allows updates to custom resources that fail validation to succeed if the validation errors were on unchanged keypaths*
- In other words: allow changes that don't introduce further validation errors
- This was introduced in Kubernetes 1.28 (alpha), enabled by default in 1.30 (beta)
- The rules are actually a bit more complex
- Another (maybe more accurate) explanation: allow to tighten or loosen some field definitions
---
## Validation ratcheting example
- Let's change the data schema so that the sauce can only be `red` or `white`
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
.lab[
- Update the Pizza CRD:
```bash
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
```
]
---
## Testing validation ratcheting
- This should work with Kubernetes 1.30 and above
(but give an error for the `brownie` pizza with previous versions of K8S)
.lab[
- Add another label:
```bash
kubectl label pizzas --all food=definitely
```
]
---
## Even better data validation
- If we need more complex data validation, we can use a validating webhook

View File

@@ -1,513 +0,0 @@
# Disruptions
In a perfect world...
- hardware never fails
- software never has bugs
- ...and never needs to be updated
- ...and uses a predictable amount of resources
- ...and these resources are infinite anyways
- network latency and packet loss are zero
- humans never make mistakes
--
😬
---
## Disruptions
In the real world...
- hardware will fail randomly (without advance notice)
- software has bugs
- ...and we constantly add new features
- ...and will sometimes use more resources than expected
- ...and these resources are limited
- network latency and packet loss are NOT zero
- humans make mistake (shutting down the wrong machine, the wrong app...)
---
## Disruptions
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
- There are **voluntary** and **involuntary** disruptions
- voluntary = directly initiated by humans (including by mistake!)
- involuntary = everything else
- In this section, we're going to see what they are and how to prevent them
(or at least, mitigate their effects)
---
## Node outage
- Example: hardware failure (server or network), low-level error
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
- **Involuntary** disruption (even if it results from human error!)
- Consequence: all workloads on that node become unresponsive
- Mitigations:
- scale workloads to at least 2 replicas (or more if quorum is needed)
- add anti-affinity scheduling constraints (to avoid having all pods on the same node)
---
## Node outage play-by-play
- Node goes down (or disconnected from network)
- Its lease (in Namespace `kube-node-lease`) doesn't get renewed
- Controller manager detects that and mark the node as "unreachable"
(this adds both a `NoSchedule` and `NoExecute` taints to the node)
- Eventually, the `NoExecute` taint will evict these pods
- This will trigger creation of replacement pods by owner controllers
(except for pods with a stable network identity, e.g. in a Stateful Set!)
---
## Node outage notes
- By default, pods will tolerate the `unreachable:NoExecute` taint for 5 minutes
(toleration automatically added by Admission controller `DefaultTolerationSeconds`)
- Pods of a Stateful Set don't recover automatically:
- as long as the Pod exists, a replacement Pod can't be created
- the Pod will exist as long as its Node exists
- deleting the Node (manually or automatically) will recover the Pod
---
## Memory/disk pressure
- Example: available memory on a node goes below a specific threshold
(because a pod is using too much memory and no limit was set)
- **Involuntary** disruption
- Consequence: kubelet starts to *evict* some pods
- Mitigations:
- set *resource limits* on containers to prevent them from using too much resources
- set *resource requests* on containers to make sure they don't get evicted
<br/>
(as long as they use less than what they requested)
- make sure that apps don't use more resources than what they've requested
---
## Memory/disk pressure play-by-play
- Memory leak in an application container, slowly causing very high memory usage
- Overall free memory on the node goes below the *soft* or the *hard* threshold
(default hard threshold = 100Mi; default soft threshold = none)
- When reaching the *soft* threshold:
- kubelet waits until the "eviction soft grace period" expires
- then (if resource usage is still above the threshold) it gracefully evicts pods
- When reaching the *hard* threshold:
- kubelet immediately and forcefully evicts pods
---
## Which pods are evicted?
- Kubelet only considers pods that are using *more* than what they requested
(and only for the resource that is under pressure, e.g. RAM or disk usage)
- First, it sorts pods by *priority¹* (as set with the `priorityClassName` in the pod spec)
- Then, by how much their resource usage exceeds their request
(again, for the resource that is under pressure)
- It evicts pods until enough resources have been freed up
---
## Soft (graceful) vs hard (forceful) eviction
- Soft eviction = graceful shutdown of the pod
(honor's the pod `terminationGracePeriodSeconds` timeout)
- Hard eviction = immediate shutdown of the pod
(kills all containers immediately)
---
## Memory/disk pressure notes
- If resource usage increases *very fast*, kubelet might not catch it fast enough
- For memory: this will trigger the kernel out-of-memory killer
- containers killed by OOM are automatically restarted (no eviction)
- eviction might happen at a later point though (if memory usage stays high)
- For disk: there is no "out-of-disk" killer, but writes will fail
- the `write` system call fails with `errno = ENOSPC` / `No space left on device`
- eviction typically happens shortly after (when kubelet catches up)
- When relying on disk/memory bursts a lot, using `priorityClasses` might help
---
## Memory/disk pressure delays
- By default, no soft threshold is defined
- Defining it requires setting both the threshold and the grace period
- Grace periods can be different for the different types of resources
- When a node is under pressure, kubelet places a `NoSchedule` taint
(to avoid adding more pods while the pod is under pressure)
- Once the node is no longer under pressure, kubelet clears the taint
(after waiting an extra timeout, `evictionPressureTransitionPeriod`, 5 min by default)
---
## Accidental deletion
- Example: developer deletes the wrong Deployment, the wrong Namespace...
- **Voluntary** disruption
(from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- only deploy to production systems through e.g. gitops workflows
- enforce peer review of changes
- only give users limited (e.g. read-only) access to production systems
- use canary deployments (might not catch all mistakes though!)
---
## Bad code deployment
- Example: critical bug introduced, application crashes immediately or is non-functional
- **Voluntary** disruption
(again, from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- readiness probes can mitigate immediate crashes
<br/>
(rolling update continues only when enough pods are ready)
- delayed crashes will require a rollback
<br/>
(manual intervention, or automated by a canary system)
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- terminate workloads one at a time, coordinating with users
--
🤔
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- ~~terminate workloads one at a time, coordinating with users~~
- use Pod Disruption Budgets
---
## Pod Disruption Budgets
- A PDB is a kind of *contract* between:
- "admins" = folks maintaining the cluster (e.g. adding/removing/updating nodes)
- "users" = folks deploying apps and workloads on the cluster
- A PDB expresses something like:
*in that particular set of pods, do not "disrupt" more than X at a time*
- Examples:
- in that set of frontend pods, do not disrupt more than 1 at a time
- in that set of worker pods, always have at least 10 ready
<br/>
(do not disrupt them if it would bring down the number of ready pods below 10)
---
## PDB - user side
- Cluster users create a PDB with a manifest like this one:
```yaml
@@INCLUDE[k8s/pod-disruption-budget.yaml]
```
- The PDB must indicate either `minAvailable` or `maxUnavailable`
---
## Rounding logic
- Percentages are rounded **up**
- When specifying `maxUnavailble` as a percentage, this can result in a higher perecentage
(e.g. `maxUnavailable: 50%` with 3 pods can result in 2 pods being unavailable!)
---
## Unmanaged pods
- Specifying `minAvailable: X` works all the time
- Specifying `minAvailable: X%` or `maxUnavaiable` requires *managed pods*
(pods that belong to a controller, e.g. Replica Set, Stateful Set...)
- This is because the PDB controller needs to know the total number of pods
(given by the `replicas` field, not merely by counting pod objects)
- The PDB controller will try to resolve the controller using the pod selector
- If that fails, the PDB controller will emit warning events
(visible with `kubectl describe pdb ...`)
---
## Zero
- `maxUnavailable: 0` means "do not disrupt my pods"
- Same thing if `minAvailable` is greater than or equal to the number of pods
- In that case, cluster admins are supposed to get in touch with cluster users
- This will prevent fully automated operation
(and some cluster admins automated systems might not honor that request)
---
## PDB - admin side
- As a cluster admin, we need to follow certain rules
- Only shut down (or restart) a node when no pods are running on that node
(except system pods belonging to Daemon Sets)
- To remove pods running on a node, we should use the *eviction API*
(which will check PDB constraints and honor them)
- To prevent new pods from being scheduled on a node, we can use a *taint*
- These operations are streamlined by `kubectl drain`, which will:
- *cordon* the node (add a `NoSchedule` taint)
- invoke the *eviction API* to remove pods while respecting their PDBs
---
## Theory vs practice
- `kubectl drain` won't evict pods using `emptyDir` volumes
(unless the `--delete-emptydir-data` flag is passed as well)
- Make sure that `emptyDir` volumes don't hold anything important
(they shouldn't, but... who knows!)
- Kubernetes lacks a standard way for users to express:
*this `emptyDir` volume can/cannot be safely deleted*
- If a PDB forbids an eviction, this requires manual coordination
---
class: extra-details
## Unhealthy pod eviction policy
- By default, unhealthy pods can only be evicted if PDB allows it
(unhealthy = running, but not ready)
- In many cases, unhealthy pods aren't healthy anyway, and can be removed
- This behavior is enabled by setting the appropriate field in the PDB manifest:
```yaml
spec:
unhealthyPodEvictionPolicy: AlwaysAllow
```
---
## Node upgrade
- Example: upgrading kubelet or the Linux kernel on a node
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are temporarily interrupted, and restarted
- this might disrupt these workloads
- Mitigations:
- migrate workloads off the done first (as if we were shutting it down)
---
## Node upgrade notes
- Is it necessary to drain a node before doing an upgrade?
- From [the documentation][node-upgrade-docs]:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- It's *probably* safe to upgrade in-place for:
- kernel upgrades
- kubelet patch-level upgrades (1.X.Y → 1.X.Z)
- It's *probably* better to drain the node for minor revisions kubelet upgrades (1.X → 1.Y)
- In doubt, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
## Manual rescheduling
- Example: moving workloads around to accommodate noisy neighbors or other issues
(e.g. pod X is doing a lot of disk I/O and this is starving other pods)
- **Voluntary** disruption
- Consequence:
- the moved workloads are temporarily interrupted
- Mitigations:
- define an appropriate number of replicas, declare PDBs
- use the [eviction API][eviction-API] to move workloads
[eviction-API]: https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/
???
:EN:- Voluntary and involuntary disruptions
:EN:- Pod Disruption Budgets
:FR:- "Disruptions" volontaires et involontaires
:FR:- Pod Disruption Budgets

View File

@@ -368,30 +368,6 @@ class: extra-details
[ciliumwithoutkubeproxy]: https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#kubeproxy-free
---
class: extra-details
## About the API server certificate...
- In the previous sections, we've skipped API server certificate verification
- To generate a proper certificate, we need to include a `subjectAltName` extension
- And make sure that the CA includes the extension in the certificate
```bash
openssl genrsa -out apiserver.key 4096
openssl req -new -key apiserver.key -subj /CN=kubernetes/ \
-addext "subjectAltName = DNS:kubernetes.default.svc, \
DNS:kubernetes.default, DNS:kubernetes, \
DNS:localhost, DNS:polykube1" -out apiserver.csr
openssl x509 -req -in apiserver.csr -CAkey ca.key -CA ca.cert \
-out apiserver.crt -copy_extensions copy
```
???
:EN:- Connecting nodes and pods

View File

@@ -462,7 +462,7 @@ The "context" section references the "cluster" and "credentials" that we defined
---
## Review the kubeconfig file
## Review the kubeconfig filfe
The kubeconfig file should look like this:

View File

@@ -339,12 +339,34 @@ class: extra-details
---
## Service catalog
- *Service catalog* is another extension mechanism
- It's not extending the Kubernetes API strictly speaking
(but it still provides new features!)
- It doesn't create new types; it uses:
- ClusterServiceBroker
- ClusterServiceClass
- ClusterServicePlan
- ServiceInstance
- ServiceBinding
- It uses the Open service broker API
---
## Documentation
- [Custom Resource Definitions: when to use them](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
- [Custom Resources Definitions: how to use them](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/)
- [Service Catalog](https://kubernetes.io/docs/concepts/extend-kubernetes/service-catalog/)
- [Built-in Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/)
- [Dynamic Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/)

View File

@@ -1,508 +0,0 @@
# FluxCD
- We're going to implement a basic GitOps workflow with Flux
- Pushing to `main` will automatically deploy to the clusters
- There will be two clusters (`dev` and `prod`)
- The two clusters will have similar (but slightly different) workloads
---
## Repository structure
This is (approximately) what we're going to do:
```
@@INCLUDE[slides/k8s/gitopstree.txt]
```
---
## Getting ready
- Let's make sure we have two clusters
- It's OK to use local clusters (kind, minikube...)
- We might run into resource limits, though
(pay attention to `Pending` pods!)
- We need to install the Flux CLI ([packages], [binaries])
- **Highly recommended:** set up CLI completion!
- Of course we'll need a Git service, too
(we're going to use GitHub here)
[packages]: https://fluxcd.io/flux/get-started/
[binaries]: https://github.com/fluxcd/flux2/releases
---
## GitHub setup
- Generate a GitHub token:
https://github.com/settings/tokens/new
- Give it "repo" access
- This token will be used by the `flux bootstrap github` command later
- It will create a repository and configure it (SSH key...)
- The token can be revoked afterwards
---
## Flux bootstrap
.lab[
- Let's set a few variables for convenience, and create our repository:
```bash
export GITHUB_TOKEN=...
export GITHUB_USER=changeme
export GITHUB_REPO=alsochangeme
export FLUX_CLUSTER=dev
flux bootstrap github \
--owner=$GITHUB_USER \
--repository=$GITHUB_REPO \
--branch=main \
--path=./clusters/$FLUX_CLUSTER \
--personal --public
```
]
Problems? check next slide!
---
## What could go wrong?
- `flux bootstrap` will create or update the repository on GitHub
- Then it will install Flux controllers to our cluster
- Then it waits for these controllers to be up and running and ready
- Check pod status in `flux-system`
- If pods are `Pending`, check that you have enough resources on your cluster
- For testing purposes, it should be fine to lower or remove Flux `requests`!
(but don't do that in production!)
- If anything goes wrong, don't worry, we can just re-run the bootstrap
---
class: extra-details
## Idempotence
- It's OK to run that same `flux bootstrap` command multiple times!
- If the repository already exists, it will re-use it
(it won't destroy or empty it)
- If the path `./clusters/$FLUX_CLUSTER` already exists, it will update it
- It's totally fine to re-run `flux bootstrap` if something fails
- It's totally fine to run it multiple times on different clusters
- Or even to run it multiple times for the *same* cluster
(to reinstall Flux on that cluster after a cluster wipe / reinstall)
---
## What do we get?
- Let's look at what `flux bootstrap` installed on the cluster
.lab[
- Look inside the `flux-system` namespace:
```bash
kubectl get all --namespace flux-system
```
- Look at `kustomizations` custom resources:
```bash
kubectl get kustomizations --all-namespaces
```
- See what the `flux` CLI tells us:
```bash
flux get all
```
]
---
## Deploying with GitOps
- We'll need to add/edit files on the repository
- We can do it by using `git clone`, local edits, `git commit`, `git push`
- Or by editing online on the GitHub website
.lab[
- Create a manifest; for instance `clusters/dev/flux-system/blue.yaml`
- Add that manifest to `clusters/dev/kustomization.yaml`
- Commit and push both changes to the repository
]
---
## Waiting for reconciliation
- Compare the git hash that we pushed and the one shown with `kubectl get `
- Option 1: wait for Flux to pick up the changes in the repository
(the default interval for git repositories is 1 minute, so that's fast)
- Option 2: use `flux reconcile source git flux-system`
(this puts an annotation on the appropriate resource, triggering an immediate check)
- Option 3: set up receiver webhooks
(so that git updates trigger immediate reconciliation)
---
## Checking progress
- `flux logs`
- `kubectl get gitrepositories --all-namespaces`
- `kubectl get kustomizations --all-namespaces`
---
## Did it work?
--
- No!
--
- Why?
--
- We need to indicate the namespace where the app should be deployed
- Either in the YAML manifests
- Or in the `kustomization` custom resource
(using field `spec.targetNamespace`)
- Add the namespace to the manifest and try again!
---
## Adding an app in a reusable way
- Let's see a technique to add a whole app
(with multiple resource manifets)
- We want to minimize code repetition
(i.e. easy to add on multiple clusters with minimal changes)
---
## The plan
- Add the app manifests in a directory
(e.g.: `apps/myappname/manifests`)
- Create a kustomization manifest for the app and its namespace
(e.g.: `apps/myappname/flux.yaml`)
- The kustomization manifest will refer to the app manifest
- Add the kustomization manifest to the top-level `flux-system` kustomization
---
## Creating the manifests
- All commands below should be executed at the root of the repository
.lab[
- Put application manifests in their directory:
```bash
mkdir -p apps/dockercoins
cp ~/container.training/k8s/dockercoins.yaml apps/dockercoins/
```
- Create kustomization manifest:
```bash
flux create kustomization dockercoins \
--source=GitRepository/flux-system \
--path=./apps/dockercoins/manifests/ \
--target-namespace=dockercoins \
--prune=true --export > apps/dockercoins/flux.yaml
```
]
---
## Creating the target namespace
- When deploying *helm releases*, it is possible to automatically create the namespace
- When deploying *kustomizations*, we need to create it explicitly
- Let's put the namespace with the kustomization manifest
(so that the whole app can be mediated through a single manifest)
.lab[
- Add the target namespace to the kustomization manifest:
```bash
echo "---
kind: Namespace
apiVersion: v1
metadata:
name: dockercoins" >> apps/dockercoins/flux.yaml
```
]
---
## Linking the kustomization manifest
- Edit `clusters/dev/flux-system/kustomization.yaml`
- Add a line to reference the kustomization manifest that we created:
```yaml
- ../../../apps/dockercoins/flux.yaml
```
- `git add` our manifests, `git commit`, `git push`
(check with `git status` that we haven't forgotten anything!)
- `flux reconcile` or wait for the changes to be picked up
---
## Installing with Helm
- We're going to see two different workflows:
- installing a third-party chart
<br/>
(e.g. something we found on the Artifact Hub)
- installing one of our own charts
<br/>
(e.g. a chart we authored ourselves)
- The procedures are very similar
---
## Installing from a public Helm repository
- Let's install [kube-prometheus-stack][kps]
.lab[
- Create the Flux manifests:
```bash
mkdir -p apps/kube-prometheus-stack
flux create source helm kube-prometheus-stack \
--url=https://prometheus-community.github.io/helm-charts \
--export >> apps/kube-prometheus-stack/flux.yaml
flux create helmrelease kube-prometheus-stack \
--source=HelmRepository/kube-prometheus-stack \
--chart=kube-prometheus-stack --release-name=kube-prometheus-stack \
--target-namespace=kube-prometheus-stack --create-target-namespace \
--export >> apps/kube-prometheus-stack/flux.yaml
```
]
[kps]: https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack
---
## Enable the app
- Just like before, link the manifest from the top-level kustomization
(`flux-system` in namespace `flux-system`)
- `git add` / `git commit` / `git push`
- We should now have a Prometheus+Grafana observability stack!
---
## Installing from a Helm chart in a git repo
- In this example, the chart will be in the same repo
- In the real world, it will typically be in a different repo!
.lab[
- Generate a basic Helm chart:
```bash
mkdir -p charts
helm create charts/myapp
```
]
(This generates a chart which installs NGINX. A lot of things can be customized, though.)
---
## Creating the Flux manifests
- The invocation is very similar to our first example
.lab[
- Generate the Flux manifest for the Helm release:
```bash
mkdir apps/myapp
flux create helmrelease myapp \
--source=GitRepository/flux-system \
--chart=charts/myapp \
--target-namespace=myapp --create-target-namespace \
--export > apps/myapp/flux.yaml
```
- Add a reference to that manifest to the top-level kustomization
- `git add` / `git commit` / `git push` the chart, manifest, and kustomization
]
---
## Passing values
- We can also configure our Helm releases with values
- Using an existing `myvalues.yaml` file:
`flux create helmrelease ... --values=myvalues.yaml`
- Referencing an existing ConfigMap or Secret with a `values.yaml` key:
`flux create helmrelease ... --values-from=ConfigMap/myapp`
---
## Gotchas
- When creating a HelmRelease using a chart stored in a git repository, you must:
- either bump the chart version (in `Chart.yaml`) after each change,
- or set `spec.chart.spec.reconcileStrategy` to `Revision`
- Why?
- Flux installs helm releases using packaged artifacts
- Artifacts are updated only when the Helm chart version changes
- Unless `reconcileStrategy` is set to `Revision` (instead of the default `ChartVersion`)
---
## More gotchas
- There is a bug in Flux that prevents using identical subcharts with aliases
- See [fluxcd/flux2#2505][flux2505] for details
[flux2505]: https://github.com/fluxcd/flux2/discussions/2505
---
## Things that we didn't talk about...
- Bucket sources
- Image automation controller
- Image reflector controller
- And more!
???
:EN:- Implementing gitops with Flux
:FR:- Workflow gitops avec Flux
<!--
helm upgrade --install --repo https://dl.gitea.io/charts --namespace gitea --create-namespace gitea gitea \
--set persistence.enabled=false \
--set redis-cluster.enabled=false \
--set postgresql-ha.enabled=false \
--set postgresql.enabled=true \
--set gitea.config.session.PROVIDER=db \
--set gitea.config.cache.ADAPTER=memory \
#
### Boostrap Flux controllers
```bash
mkdir -p flux/flux-system/gotk-components.yaml
flux install --export > flux/flux-system/gotk-components.yaml
kubectl apply -f flux/flux-system/gotk-components.yaml
```
### Bootstrap GitRepository/Kustomization
```bash
export REPO_URL="<gitlab_url>" DEPLOY_USERNAME="<username>"
read -s DEPLOY_TOKEN
flux create secret git flux-system --url="${REPO_URL}" --username="${DEPLOY_USERNAME}" --password="${DEPLOY_TOKEN}"
flux create source git flux-system --url=$REPO_URL --branch=main --secret-ref flux-system --ignore-paths='/*,!/flux' --export > flux/flux-system/gotk-sync.yaml
flux create kustomization flux-system --source=GitRepository/flux-system --path="./flux" --prune=true --export >> flux/flux-system/gotk-sync.yaml
git add flux/ && git commit -m 'feat: Setup Flux' flux/ && git push
kubectl apply -f flux/flux-system/gotk-sync.yaml
```
-->

View File

@@ -1,13 +0,0 @@
├── charts/ <--- could also be in separate app repos
│ ├── dockercoins/
│ └── color/
├── apps/ <--- YAML manifests for GitOps resources
│ ├── dockercoins/ (might reference the "charts" above,
│ ├── blue/ and/or include environment-specific
│ ├── green/ manifests to create e.g. namespaces,
│ ├── kube-prometheus-stack/ configmaps, secrets...)
│ ├── cert-manager/
│ └── traefik/
└── clusters/ <--- per-cluster; will typically reference
├── prod/ the "apps" above, possibly extending
└── dev/ or adding configuration resources too

View File

@@ -1,4 +1,4 @@
# Git-based workflows (GitOps)
# Git-based workflows
- Deploying with `kubectl` has downsides:
@@ -22,7 +22,7 @@
- These resources have a perfect YAML representation
- All we do is manipulate these YAML representations
- All we do is manipulating these YAML representations
(`kubectl run` generates a YAML file that gets applied)
@@ -34,232 +34,229 @@
- control who can push to which branches
- have formal review processes, pull requests, test gates...
- have formal review processes, pull requests ...
---
## Enabling git-based workflows
- There are a many tools out there to help us do that; with different approaches
- There are a few tools out there to help us do that
- "Git host centric" approach: GitHub Actions, GitLab...
- We'll see demos of two of them: [Flux] and [Gitkube]
*the workflows/action are directly initiated by the git platform*
- There are *many* other tools, some of them with even more features
- "Kubernetes cluster centric" approach: [ArgoCD], [FluxCD]..
- There are also *many* integrations with popular CI/CD systems
*controllers run on our clusters and trigger on repo updates*
(e.g.: GitLab, Jenkins, ...)
- This is not an exhaustive list (see also: Jenkins)
- We're going to talk mostly about "Kubernetes cluster centric" approaches here
[ArgoCD]: https://argoproj.github.io/cd/
[Flux]: https://fluxcd.io/
[Flux]: https://www.weave.works/oss/flux/
[Gitkube]: https://gitkube.sh/
---
## The road to production
## Flux overview
In no specific order, we need to at least:
- We put our Kubernetes resources as YAML files in a git repository
- Choose a tool
- Flux polls that repository regularly (every 5 minutes by default)
- Choose a cluster / app / namespace layout
<br/>
(one cluster per app, different clusters for prod/staging...)
- The resources described by the YAML files are created/updated automatically
- Choose a repository layout
<br/>
(different repositories, directories, branches per app, env, cluster...)
- Choose an installation / bootstrap method
- Choose how new apps / environments / versions will be deployed
- Choose how new images will be built
- Changes are made by updating the code in the repository
---
## Flux vs ArgoCD (1/2)
## Preparing a repository for Flux
- Flux:
- We need a repository with Kubernetes YAML files
- fancy setup with an (optional) dedicated `flux bootstrap` command
<br/>
(with support for specific git providers, repo creation...)
- I have one: https://github.com/jpetazzo/kubercoins
- deploying an app requires multiple CRDs
<br/>
(Kustomization, HelmRelease, GitRepository...)
- Fork it to your GitHub account
- supports Helm charts, Kustomize, raw YAML
- Create a new branch in your fork; e.g. `prod`
- ArgoCD:
(e.g. with "branch" dropdown through the GitHub web UI)
- simple setup (just apply YAMLs / install Helm chart)
- fewer CRDs (basic workflow can be implement with a single "Application" resource)
- supports Helm charts, Jsonnet, Kustomize, raw YAML, and arbitrary plugins
- This is the branch that we are going to use for deployment
---
## Flux vs ArgoCD (2/2)
## Setting up Flux with kustomize
- Flux:
- Clone the Flux repository:
```bash
git clone https://github.com/fluxcd/flux
cd flux
```
- sync interval is configurable per app
- no web UI out of the box
- CLI relies on Kubernetes API access
- CLI can easily generate custom resource manifests (with `--export`)
- self-hosted (flux controllers are managed by flux itself by default)
- one flux instance manages a single cluster
- Edit `deploy/flux-deployment.yaml`
- ArgoCD:
- Change the `--git-url` and `--git-branch` parameters:
```yaml
- --git-url=git@github.com:your-git-username/kubercoins
- --git-branch=prod
```
- sync interval is configured globally
- comes with a web UI
- CLI can use Kubernetes API or separate API and authentication system
- one ArgoCD instance can manage multiple clusters
- Apply all the YAML:
```bash
kubectl apply -k deploy/
```
---
## Cluster, app, namespace layout
## Setting up Flux with Helm
- One cluster per app, different namespaces for environments?
- Add Flux helm repo:
```bash
helm repo add fluxcd https://charts.fluxcd.io
```
- One cluster per environment, different namespaces for apps?
- Everything on a single cluster? One cluster per combination?
- Something in between:
- prod cluster, database cluster, dev/staging/etc cluster
- prod+db cluster per app, shared dev/staging/etc cluster
- And more!
Note: this decision isn't really tied to GitOps!
- Install Flux:
```bash
kubectl create namespace flux
helm upgrade --install flux \
--set git.url=git@github.com:your-git-username/kubercoins \
--set git.branch=prod \
--namespace flux \
fluxcd/flux
```
---
## Repository layout
## Allowing Flux to access the repository
So many different possibilities!
- When it starts, Flux generates an SSH key
- Source repos
- Display that key:
```bash
kubectl -n flux logs deployment/flux | grep identity.pub | cut -d '"' -f2
```
- Cluster/infra repos/branches/directories
- Then add that key to the repository, giving it **write** access
- "Deployment" repos (with manifests, charts)
(some Flux features require write access)
- Different repos/branches/directories for environments
🤔 How to decide?
- After a minute or so, DockerCoins will be deployed to the current namespace
---
## Permissions
## Making changes
- Different teams/companies = different repos
- Make changes (on the `prod` branch), e.g. change `replicas` in `worker`
- separate platform team → separate "infra" vs "apps" repos
- teams working on different apps → different repos per app
- Branches can be "protected" (`production`, `main`...)
(don't need separate repos for separate environments)
- Directories will typically have the same permissions
- Managing directories is easier than branches
- But branches are more "powerful" (cherrypicking, rebasing...)
- After a few minutes, the changes will be picked up by Flux and applied
---
## Resource hierarchy
## Other features
- Git-based deployments are managed by Kubernetes resources
- Flux can keep a list of all the tags of all the images we're running
(e.g. Kustomization, HelmRelease with Flux; Application with ArgoCD)
- The `fluxctl` tool can show us if we're running the latest images
- We will call these resources "GitOps resources"
- We can also "automate" a resource (i.e. automatically deploy new images)
- These resources need to be managed like any other Kubernetes resource
(YAML manifests, Kustomizations, Helm charts)
- They can be managed with Git workflows too!
- And much more!
---
## Cluster / infra management
## Gitkube overview
- How do we provision clusters?
- We put our Kubernetes resources as YAML files in a git repository
- Manual "one-shot" provisioning (CLI, web UI...)
- Gitkube is a git server (or "git remote")
- Automation with Terraform, Ansible...
- After making changes to the repository, we push to Gitkube
- Kubernetes-driven systems (Crossplane, CAPI)
- Infrastructure can also be managed with GitOps
- Gitkube applies the resources to the cluster
---
## Example 1
## Setting up Gitkube
- Managed with YAML/Charts:
- Install the CLI:
```bash
sudo curl -L -o /usr/local/bin/gitkube \
https://github.com/hasura/gitkube/releases/download/v0.2.1/gitkube_linux_amd64
sudo chmod +x /usr/local/bin/gitkube
```
- core components (CNI, CSI, Ingress, logging, monitoring...)
- GitOps controllers
- critical application foundations (database operator, databases)
- GitOps manifests
- Managed with GitOps:
- applications
- staging databases
- Install Gitkube on the cluster:
```bash
gitkube install --expose ClusterIP
```
---
## Example 2
## Creating a Remote
- Managed with YAML/Charts:
- Gitkube provides a new type of API resource: *Remote*
- essential components (CNI, CoreDNS)
(this is using a mechanism called Custom Resource Definitions or CRD)
- initial installation of GitOps controllers
- Create and apply a YAML file containing the following manifest:
```yaml
apiVersion: gitkube.sh/v1alpha1
kind: Remote
metadata:
name: example
spec:
authorizedKeys:
- `ssh-rsa AAA...`
manifests:
path: "."
```
- Managed with GitOps:
- upgrades of GitOps controllers
- core components (CSI, Ingress, logging, monitoring...)
- operators, databases
- more GitOps manifests for applications!
(replace the `ssh-rsa AAA...` section with the content of `~/.ssh/id_rsa.pub`)
---
## Concrete example
## Pushing to our remote
- Source code repository (not shown here)
- Get the `gitkubed` IP address:
```bash
kubectl -n kube-system get svc gitkubed
IP=$(kubectl -n kube-system get svc gitkubed -o json |
jq -r .spec.clusterIP)
```
- Infrastructure repository (shown below), single branch
- Get ourselves a sample repository with resource YAML files:
```bash
git clone git://github.com/jpetazzo/kubercoins
cd kubercoins
```
```
@@INCLUDE[slides/k8s/gitopstree.txt]
```
- Add the remote and push to it:
```bash
git remote add k8s ssh://default-example@$IP/~/git/default-example
git push k8s master
```
---
## Making changes
- Edit a local file
- Commit
- Push!
- Make sure that you push to the `k8s` remote
---
## Other features
- Gitkube can also build container images for us
(see the [documentation](https://github.com/hasura/gitkube/blob/master/docs/remote.md) for more details)
- Gitkube can also deploy Helm charts
(instead of raw YAML files)
???

View File

@@ -51,7 +51,7 @@
- instructions indicating to users "please tweak this and that in the YAML"
- That's where using something like
[CUE](https://github.com/cue-labs/cue-by-example/tree/main/003_kubernetes_tutorial),
[CUE](https://github.com/cuelang/cue/blob/v0.3.2/doc/tutorial/kubernetes/README.md),
[Kustomize](https://kustomize.io/),
or [Helm](https://helm.sh/) can help!
@@ -86,6 +86,8 @@
- On April 30th 2020, Helm was the 10th project to *graduate* within the CNCF
🎉
(alongside Containerd, Prometheus, and Kubernetes itself)
- This is an acknowledgement by the CNCF for projects that
@@ -97,8 +99,6 @@
- See [CNCF announcement](https://www.cncf.io/announcement/2020/04/30/cloud-native-computing-foundation-announces-helm-graduation/)
and [Helm announcement](https://helm.sh/blog/celebrating-helms-cncf-graduation/)
- In other words: Helm is here to stay
---
## Helm concepts
@@ -173,13 +173,11 @@ or `apt` tools).
- Helm 3 doesn't use `tiller` at all, making it simpler (yay!)
- If you see references to `tiller` in a tutorial, documentation... that doc is obsolete!
---
class: extra-details
## What was the problem with `tiller`?
## With or without `tiller`
- With Helm 3:
@@ -195,7 +193,9 @@ class: extra-details
- This indirect model caused significant permissions headaches
- It also made it more complicated to embed Helm in other tools
(`tiller` required very broad permissions to function)
- `tiller` was removed in Helm 3 to simplify the security aspects
---
@@ -222,6 +222,59 @@ class: extra-details
---
class: extra-details
## Only if using Helm 2 ...
- We need to install Tiller and give it some permissions
- Tiller is composed of a *service* and a *deployment* in the `kube-system` namespace
- They can be managed (installed, upgraded...) with the `helm` CLI
.lab[
- Deploy Tiller:
```bash
helm init
```
]
At the end of the install process, you will see:
```
Happy Helming!
```
---
class: extra-details
## Only if using Helm 2 ...
- Tiller needs permissions to create Kubernetes resources
- In a more realistic deployment, you might create per-user or per-team
service accounts, roles, and role bindings
.lab[
- Grant `cluster-admin` role to `kube-system:default` service account:
```bash
kubectl create clusterrolebinding add-on-cluster-admin \
--clusterrole=cluster-admin --serviceaccount=kube-system:default
```
]
(Defining the exact roles and permissions on your cluster requires
a deeper knowledge of Kubernetes' RBAC model. The command above is
fine for personal and development clusters.)
---
## Charts and repositories
- A *repository* (or repo in short) is a collection of charts
@@ -240,7 +293,27 @@ class: extra-details
---
## How to find charts
class: extra-details
## How to find charts, the old way
- Helm 2 came with one pre-configured repo, the "stable" repo
(located at https://charts.helm.sh/stable)
- Helm 3 doesn't have any pre-configured repo
- The "stable" repo mentioned above is now being deprecated
- The new approach is to have fully decentralized repos
- Repos can be indexed in the Artifact Hub
(which supersedes the Helm Hub)
---
## How to find charts, the new way
- Go to the [Artifact Hub](https://artifacthub.io/packages/search?kind=0) (https://artifacthub.io)
@@ -336,6 +409,24 @@ Note: it is also possible to install directly a chart, with `--repo https://...`
---
class: extra-details
## Searching and installing with Helm 2
- Helm 2 doesn't have support for the Helm Hub
- The `helm search` command only takes a search string argument
(e.g. `helm search juice-shop`)
- With Helm 2, the name is optional:
`helm install juice/juice-shop` will automatically generate a name
`helm install --name my-juice-shop juice/juice-shop` will specify a name
---
## Viewing resources of a release
- This specific chart labels all its resources with a `release` label
@@ -451,11 +542,11 @@ All unspecified values will take the default values defined in the chart.
:EN:- Helm concepts
:EN:- Installing software with Helm
:EN:- Finding charts on the Artifact Hub
:EN:- Helm 2, Helm 3, and the Helm Hub
:FR:- Fonctionnement général de Helm
:FR:- Installer des composants via Helm
:FR:- Trouver des *charts* sur *Artifact Hub*
:FR:- Helm 2, Helm 3, et le *Helm Hub*
:T: Getting started with Helm and its concepts

View File

@@ -572,7 +572,7 @@ This is normal: we haven't provided any ingress rule yet.
- Create a prefix match rule for the `blue` service:
```bash
kubectl create ingress bluestar --rule=/blue*=blue:80
kubectl create ingress bluestar --rule=/blue*:blue:80
```
- Check that it works:

View File

@@ -109,7 +109,7 @@ class: extra-details
- Install Go
(on our VMs: `sudo snap install go --classic` or `sudo apk add go`)
(on our VMs: `sudo snap install go --classic`)
- Install kubebuilder
@@ -250,7 +250,7 @@ spec:
## Loading an object
Open `internal/controllers/machine_controller.go`.
Open `controllers/machine_controller.go`.
Add that code in the `Reconcile` method, at the `TODO(user)` location:
@@ -505,7 +505,7 @@ if machine.Spec.SwitchPosition != "down" {
changeAt := machine.Status.SeenAt.Time.Add(5 * time.Second)
if now.Time.After(changeAt) {
machine.Spec.SwitchPosition = "down"
machine.Status.SeenAt = nil
machine.Status.SeenAt = nil
if err := r.Update(ctx, &machine); err != nil {
logger.Info("error updating switch position")
return ctrl.Result{}, client.IgnoreNotFound(err)
@@ -629,17 +629,17 @@ Note: this time, only create a new custom resource; not a new controller.
- We can retrieve associated switches like this:
```go
var switches uselessv1alpha1.SwitchList
var switches uselessv1alpha1.SwitchList
if err := r.List(ctx, &switches,
client.InNamespace(req.Namespace),
client.MatchingLabels{"machine": req.Name},
); err != nil {
logger.Error(err, "unable to list switches of the machine")
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if err := r.List(ctx, &switches,
client.InNamespace(req.Namespace),
client.MatchingLabels{"machine": req.Name},
); err != nil {
logger.Error(err, "unable to list switches of the machine")
return ctrl.Result{}, client.IgnoreNotFound(err)
}
logger.Info("Found switches", "switches", switches)
logger.Info("Found switches", "switches", switches)
```
---
@@ -649,13 +649,13 @@ Note: this time, only create a new custom resource; not a new controller.
- Each time we reconcile a Machine, let's update its status:
```go
status := ""
for _, sw := range switches.Items {
status += string(sw.Spec.Position[0])
}
machine.Status.Positions = status
if err := r.Status().Update(ctx, &machine); err != nil {
...
status := ""
for _, sw := range switches.Items {
status += string(sw.Spec.Position[0])
}
machine.Status.Positions = status
if err := r.Status().Update(ctx, &machine); err != nil {
...
```
- Run the controller and check that POSITIONS gets updated
@@ -721,7 +721,7 @@ if err := r.Create(ctx, &sw); err != nil { ...
Define the following helper function:
```go
func (r *MachineReconciler) machineOfSwitch(ctx context.Context, obj client.Object) []ctrl.Request {
func (r *MachineReconciler) machineOfSwitch(obj client.Object) []ctrl.Request {
return []ctrl.Request{
ctrl.Request{
NamespacedName: types.NamespacedName{
@@ -746,7 +746,7 @@ func (r *MachineReconciler) SetupWithManager(mgr ctrl.Manager) error {
For(&uselessv1alpha1.Machine{}).
Owns(&uselessv1alpha1.Switch{}).
Watches(
&uselessv1alpha1.Switch{},
&source.Kind{Type: &uselessv1alpha1.Switch{}},
handler.EnqueueRequestsFromMapFunc(r.machineOfSwitch),
).
Complete(r)

View File

@@ -128,9 +128,7 @@ configMapGenerator:
- A *variant* is the final outcome of applying bases + overlays
(See the [kustomize glossary][glossary] for more definitions!)
[glossary]: https://kubectl.docs.kubernetes.io/references/kustomize/glossary/
(See the [kustomize glossary](https://github.com/kubernetes-sigs/kustomize/blob/master/docs/glossary.md) for more definitions!)
---
@@ -230,7 +228,7 @@ General workflow:
3. `kustomize edit add patch` to add patches to said resources
4. `kustomize edit add ...` or `kustomize edit set ...` (many options!)
4. `kustomized edit add ...` or `kustomize edit set ...` (many options!)
5. `kustomize build | kubectl apply -f-` or `kubectl apply -k .`
@@ -246,7 +244,7 @@ General workflow:
(just add `--help` after any command to see possible options!)
- Make sure to install the completion and try e.g. `kustomize edit add [TAB][TAB]`
- Make sure to install the completion and try e.g. `kustomize eidt add [TAB][TAB]`
---
@@ -339,7 +337,7 @@ kustomize edit add label app.kubernetes.io/name:dockercoins
- Assuming that `commonLabels` have been set as shown on the previous slide:
```bash
kubectl apply -k . --prune --selector app.kubernetes.io/name=dockercoins
kubectl apply -k . --prune --selector app.kubernetes.io.name=dockercoins
```
- ... This command removes resources that have been removed from the kustomization

View File

@@ -536,12 +536,12 @@ Note: the `apiVersion` field appears to be optional.
- Excerpt:
```yaml
generate:
kind: LimitRange
name: default-limitrange
namespace: "{{request.object.metadata.name}}"
data:
spec:
limits:
kind: LimitRange
name: default-limitrange
namespace: "{{request.object.metadata.name}}"
data:
spec:
limits:
```
- Note that we have to specify the `namespace`

View File

@@ -2,7 +2,7 @@
- "New" policies
(available in alpha since Kubernetes 1.22, and GA since Kubernetes 1.25)
(available in alpha since Kubernetes 1.22)
- Easier to use
@@ -66,6 +66,50 @@ class: extra-details
---
## PSA in practice
- Step 1: enable the PodSecurity admission plugin
- Step 2: label some Namespaces
- Step 3: provide an AdmissionConfiguration (optional)
- Step 4: profit!
---
## Enabling PodSecurity
- This requires Kubernetes 1.22 or later
- This requires the ability to reconfigure the API server
- The following slides assume that we're using `kubeadm`
(and have write access to `/etc/kubernetes/manifests`)
---
## Reconfiguring the API server
- In Kubernetes 1.22, we need to enable the `PodSecurity` feature gate
- In later versions, this might be enabled automatically
.lab[
- Edit `/etc/kubernetes/manifests/kube-apiserver.yaml`
- In the `command` list, add `--feature-gates=PodSecurity=true`
- Save, quit, wait for the API server to be back up again
]
Note: for bonus points, edit the `kubeadm-config` ConfigMap instead!
---
## Namespace labels
- Three optional labels can be added to namespaces:
@@ -233,6 +277,14 @@ Let's use @@LINK[k8s/admission-configuration.yaml]:
- But the Pods don't get created
---
## Clean up
- We probably want to remove the API server flags that we added
(the feature gate and the admission configuration)
???
:EN:- Preventing privilege escalation with Pod Security Admission

View File

@@ -124,7 +124,7 @@
## Admission plugins
- [PodSecurityPolicy](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) (was removed in Kubernetes 1.25)
- [PodSecurityPolicy](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) (will be removed in Kubernetes 1.25)
- create PodSecurityPolicy resources
@@ -132,7 +132,7 @@
- create RoleBinding that grants the Role to a user or ServiceAccount
- [PodSecurityAdmission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) (alpha since Kubernetes 1.22, stable since 1.25)
- [PodSecurityAdmission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) (alpha since Kubernetes 1.22)
- use pre-defined policies (privileged, baseline, restricted)
@@ -162,31 +162,9 @@
---
## Validating Admission Policies
- Alternative to validating admission webhooks
- Evaluated in the API server
(don't require an external server; don't add network latency)
- Written in CEL (Common Expression Language)
- alpha in K8S 1.26; beta in K8S 1.28; GA in K8S 1.30
- Can replace validating webhooks at least in simple cases
- Can extend Pod Security Admission
- Check [the documentation][vapdoc] for examples
[vapdoc]: https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/
---
## Acronym salad
- PSP = Pod Security Policy **(deprecated)**
- PSP = Pod Security Policy
- an admission plugin called PodSecurityPolicy

View File

@@ -2,15 +2,11 @@
- "Legacy" policies
(deprecated since Kubernetes 1.21; removed in 1.25)
(deprecated since Kubernetes 1.21; will be removed in 1.25)
- Superseded by Pod Security Standards + Pod Security Admission
(available in alpha since Kubernetes 1.22; stable since 1.25)
- **Since Kubernetes 1.24 was EOL in July 2023, nobody should use PSPs anymore!**
- This section is here mostly for historical purposes, and can be skipped
(available in alpha since Kubernetes 1.22)
---

View File

@@ -6,53 +6,11 @@
- We can specify *limits* and/or *requests*
- We can specify quantities of CPU and/or memory and/or ephemeral storage
- We can specify quantities of CPU and/or memory
---
## Requests vs limits
- *Requests* are *guaranteed reservations* of resources
- They are used for scheduling purposes
- Kubelet will use cgroups to e.g. guarantee a minimum amount of CPU time
- A container **can** use more than its requested resources
- A container using *less* than what it requested should never be killed or throttled
- A node **cannot** be overcommitted with requests
(the sum of all requests **cannot** be higher than resources available on the node)
- A small amount of resources is set aside for system components
(this explains why there is a difference between "capacity" and "allocatable")
---
## Requests vs limits
- *Limits* are "hard limits" (a container **cannot** exceed its limits)
- They aren't taken into account by the scheduler
- A container exceeding its memory limit is killed instantly
(by the kernel out-of-memory killer)
- A container exceeding its CPU limit is throttled
- A container exceeding its disk limit is killed
(usually with a small delay, since this is checked periodically by kubelet)
- On a given node, the sum of all limits **can** be higher than the node size
---
## Compressible vs incompressible resources
## CPU vs memory
- CPU is a *compressible resource*
@@ -66,29 +24,7 @@
- if we have N GB RAM and need 2N, we might run at... 0.1% speed!
- Disk is also an *incompressible resource*
- when the disk is full, writes will fail
- applications may or may not crash but persistent apps will be in trouble
---
## Running low on CPU
- Two ways for a container to "run low" on CPU:
- it's hitting its CPU limit
- all CPUs on the node are at 100% utilization
- The app in the container will run slower
(compared to running without a limit, or if CPU cycles were available)
- No other consequence
(but this could affect SLA/SLO for latency-sensitive applications!)
- As a result, exceeding limits will have different consequences for CPU and memory
---
@@ -200,7 +136,9 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
## Running low on memory
- When the kernel runs low on memory, it starts to reclaim used memory
- When the system runs low on memory, it starts to reclaim used memory
(we talk about "memory pressure")
- Option 1: free up some buffers and caches
@@ -224,91 +162,71 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
- If a container exceeds its memory *limit*, it gets killed immediately
- If a node memory usage gets too high, it will *evict* some pods
- If a node is overcommitted and under memory pressure, it will terminate some pods
(we say that the node is "under pressure", more on that in a bit!)
(see next slide for some details about what "overcommit" means here!)
[KEP 2400]: https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2400-node-swap/README.md#implementation-history
---
## Running low on disk
## Overcommitting resources
- When the kubelet runs low on disk, it starts to reclaim disk space
- *Limits* are "hard limits" (a container *cannot* exceed its limits)
(similarly to what the kernel does, but in different categories)
- a container exceeding its memory limit is killed
- Option 1: garbage collect dead pods and containers
- a container exceeding its CPU limit is throttled
(no consequence, but their logs will be deleted)
- On a given node, the sum of pod *limits* can be higher than the node size
- Option 2: remove unused images
- *Requests* are used for scheduling purposes
(no consequence, but these images will have to be repulled if we need them later)
- a container can use more than its requested CPU or RAM amounts
- Option 3: evict pods and remove them to reclaim their disk usage
- a container using *less* than what it requested should never be killed or throttled
- Note: this only applies to *ephemeral storage*, not to e.g. Persistent Volumes!
- On a given node, the sum of pod *requests* cannot be higher than the node size
---
## Ephemeral storage?
## Pod quality of service
- This includes:
Each pod is assigned a QoS class (visible in `status.qosClass`).
- the *read-write layer* of the container
<br/>
(any file creation/modification outside of its volumes)
- If limits = requests:
- `emptyDir` volumes mounted in the container
- as long as the container uses less than the limit, it won't be affected
- the container logs stored on the node
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
- This does not include:
- If requests &lt; limits:
- the container image
- as long as the container uses less than the request, it won't be affected
- other types of volumes (e.g. Persistent Volumes, `hostPath`, or `local` volumes)
- otherwise, it might be killed/evicted if the node gets overloaded
- if at least one container has *(requests&lt;limits)*, QoS is considered "Burstable"
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
---
class: extra-details
## Quality of service impact
## Disk limit enforcement
- When a node is overloaded, BestEffort pods are killed first
- Disk usage is periodically measured by kubelet
- Then, Burstable pods that exceed their requests
(with something equivalent to `du`)
- Burstable and Guaranteed pods below their requests are never killed
- There can be a small delay before pod termination when disk limit is exceeded
(except if their node fails)
- It's also possible to enable filesystem *project quotas*
- If we only use Guaranteed pods, no pod should ever be killed
(e.g. with EXT4 or XFS)
(as long as they stay within their limits)
- Remember that container logs are also accounted for!
(container log rotation/retention is managed by kubelet)
---
class: extra-details
## `nodefs` and `imagefs`
- `nodefs` is the main filesystem of the node
(holding, notably, `emptyDir` volumes and container logs)
- Optionally, the container engine can be configured to use an `imagefs`
- `imagefs` will store container images and container writable layers
- When there is a separate `imagefs`, its disk usage is tracked independently
- If `imagefs` usage gets too high, kubelet will remove old images first
(conversely, if `nodefs` usage gets too high, kubelet won't remove old images)
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
---
@@ -386,46 +304,6 @@ class: extra-details
---
## Pod quality of service
Each pod is assigned a QoS class (visible in `status.qosClass`).
- If limits = requests:
- as long as the container uses less than the limit, it won't be affected
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
- If requests &lt; limits:
- as long as the container uses less than the request, it won't be affected
- otherwise, it might be killed/evicted if the node gets overloaded
- if at least one container has *(requests&lt;limits)*, QoS is considered "Burstable"
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
---
## Quality of service impact
- When a node is overloaded, BestEffort pods are killed first
- Then, Burstable pods that exceed their requests
- Burstable and Guaranteed pods below their requests are never killed
(except if their node fails)
- If we only use Guaranteed pods, no pod should ever be killed
(as long as they stay within their limits)
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
---
## Specifying resources
- Resource requests are expressed at the *container* level
@@ -438,9 +316,9 @@ Each pod is assigned a QoS class (visible in `status.qosClass`).
(so 100m = 0.1)
- Memory and ephemeral disk storage are expressed in bytes
- Memory is expressed in bytes
- These can have k, M, G, T, ki, Mi, Gi, Ti suffixes
- Memory can be expressed with k, M, G, T, ki, Mi, Gi, Ti suffixes
(corresponding to 10^3, 10^6, 10^9, 10^12, 2^10, 2^20, 2^30, 2^40)
@@ -456,13 +334,11 @@ containers:
image: jpetazzo/color
resources:
limits:
memory: "100Mi"
cpu: "100m"
ephemeral-storage: 10M
memory: "100Mi"
requests:
cpu: "10m"
ephemeral-storage: 10M
memory: "100Mi"
cpu: "10m"
```
This set of resources makes sure that this service won't be killed (as long as it stays below 100 MB of RAM), but allows its CPU usage to be throttled if necessary.
@@ -489,7 +365,7 @@ This set of resources makes sure that this service won't be killed (as long as i
---
## We need to specify resource values
## We need default resource values
- If we do not set resource values at all:
@@ -503,33 +379,9 @@ This set of resources makes sure that this service won't be killed (as long as i
- if the request is zero, the scheduler can't make a smart placement decision
- This is fine when learning/testing, absolutely not in production!
- To address this, we can set default values for resources
---
## How should we set resources?
- Option 1: manually, for each container
- simple, effective, but tedious
- Option 2: automatically, with the [Vertical Pod Autoscaler (VPA)][vpa]
- relatively simple, very minimal involvement beyond initial setup
- not compatible with HPAv1, can disrupt long-running workloads (see [limitations][vpa-limitations])
- Option 3: semi-automatically, with tools like [Robusta KRR][robusta]
- good compromise between manual work and automation
- Option 4: by creating LimitRanges in our Namespaces
- relatively simple, but "one-size-fits-all" approach might not always work
[robusta]: https://github.com/robusta-dev/krr
[vpa]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
[vpa-limitations]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler#known-limitations
- This is done with a LimitRange object
---
@@ -784,7 +636,7 @@ class: extra-details
- ResourceQuota per namespace
- Let's see one possible strategy to get started with resource limits
- Let's see a simple recommendation to get started with resource limits
---

View File

@@ -352,87 +352,6 @@ class: pic
class: pic
![](images/kubernetes-services/64-ING-nolocal.png)
---
class: extra-details
## Traffic engineering
- By default, connections to a ClusterIP or a NodePort are load balanced
across all the backends of their Service
- This can incur extra network hops (which add latency)
- To remove that extra hop, multiple mechanisms are available:
- `spec.externalTrafficPolicy`
- `spec.internalTrafficPolicy`
- [Topology aware routing](https://kubernetes.io/docs/concepts/services-networking/topology-aware-routing/) annotation (beta)
- `spec.trafficDistribution` (alpha in 1.30, beta in 1.31)
---
## `internal / externalTrafficPolicy`
- Applies respectively to `ClusterIP` and `NodePort` connections
- Can be set to `Cluster` or `Local`
- `Cluster`: load balance connections across all backends (default)
- `Local`: load balance connections to local backends (on the same node)
- With `Local`, if there is no local backend, the connection will fail!
(the parameter expresses a "hard rule", not a preference)
- Example: `externalTrafficPolicy: Local` for Ingress controllers
(as shown on earlier diagrams)
---
class: extra-details
## Topology aware routing
- In beta since Kubernetes 1.23
- Enabled with annotation `service.kubernetes.io/topology-mode=Auto`
- Relies on node annotation `topology.kubernetes.io/zone`
- Kubernetes service proxy will try to keep connections within a zone
(connections made by a pod in zone `a` will be sent to pods in zone `a`)
- ...Except if there are no pods in the zone (then fallback to all zones)
- This can mess up autoscaling!
---
class: extra-details
## `spec.trafficDistribution`
- [KEP4444, Traffic Distribution for Services][kep4444]
- In alpha since Kubernetes 1.30, beta since Kubernetes 1.31
- Should eventually supersede topology aware routing
- Can be set to `PreferClose` (more values might be supported later)
- The meaning of `PreferClose` is implementation dependent
(with kube-proxy, it should work like topology aware routing: stay in a zone)
[kep4444]: https://github.com/kubernetes/enhancements/issues/4444
???
:EN:- Service types: ClusterIP, NodePort, LoadBalancer

View File

@@ -144,30 +144,6 @@
---
## [Orbstack](https://orbstack.dev/)
- Mac only
- Runs Docker containers, Kubernetes, and Linux virtual machines
- Emphasis on speed and energy usage (battery life)
- Great support for `ClusterIP` and `LoadBalancer` services
- Free for personal use; paid product otherwise
---
## [Podman Desktop](https://podman-desktop.io/)
- Available on Linux, Mac, and Windows
- Free and open-source
- Doesn't support Kubernetes directly, but [supports KinD](https://podman-desktop.io/docs/kind)
---
## [Rancher Desktop](https://rancherdesktop.io/)
- Available on Linux, Mac, and Windows
@@ -182,6 +158,8 @@
- Emphasis on ease of use (like Docker Desktop)
- Relatively young product (first release in May 2021)
- Based on k3s and other proven components
---

View File

@@ -166,15 +166,17 @@
- [Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way) by Kelsey Hightower
*step by step guide to install Kubernetes on GCP, with certificates, HA...*
- step by step guide to install Kubernetes on Google Cloud
- covers certificates, high availability ...
- *“Kubernetes The Hard Way is optimized for learning, which means taking the long route to ensure you understand each task required to bootstrap a Kubernetes cluster.”*
- [Deep Dive into Kubernetes Internals for Builders and Operators](https://www.youtube.com/watch?v=3KtEAa7_duA)
*conference talk setting up a simplified Kubernetes cluster - no security or HA*
- conference presentation showing step-by-step control plane setup
- 🇫🇷[Démystifions les composants internes de Kubernetes](https://www.youtube.com/watch?v=OCMNA0dSAzc)
*improved version of the previous one, with certs and recent k8s versions*
- emphasis on simplicity, not on security and availability
---

View File

@@ -67,7 +67,6 @@ content:
- - k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/disruptions.md
- k8s/horizontal-pod-autoscaler.md
- - k8s/prometheus.md
#- k8s/prometheus-stack.md

View File

@@ -124,10 +124,6 @@ content:
- k8s/portworx.md
- k8s/openebs.md
- k8s/stateful-failover.md
-
- k8s/gitworkflows.md
- k8s/flux.md
- k8s/argocd.md
-
- k8s/logs-centralized.md
- k8s/prometheus.md
@@ -135,7 +131,6 @@ content:
- k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/disruptions.md
- k8s/cluster-autoscaler.md
- k8s/horizontal-pod-autoscaler.md
- k8s/hpa-v2.md
@@ -168,6 +163,7 @@ content:
- k8s/cluster-upgrade.md
- k8s/cluster-backup.md
- k8s/cloud-controller-manager.md
- k8s/gitworkflows.md
-
- k8s/lastwords.md
- k8s/links.md

View File

@@ -4,7 +4,7 @@
- Sur scène : Ludovic
- En backstage : Alexandre, Antoine, Aurélien (x2), Benjamin (x2), David, Kostas, Nicolas, Paul, Sébastien, Thibault...
- En backstage : Alexandre, Antoine, Aurélien (x2), Benji, David, Julien, Kostas, Nicolas, Paul, Sébastien, Thibault...
- Horaires : tous les jours de 9h à 13h
@@ -73,4 +73,4 @@
[qdnd]: https://www.youtube.com/channel/UCOAhkxpryr_BKybt9wIw-NQ
[ndeloof]: https://github.com/ndeloof
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerMai2024
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerMai2023

View File

@@ -4,7 +4,7 @@
- Sur scène : Jérôme ([@jpetazzo@hachyderm.io])
- En backstage : Alexandre, Antoine, Aurélien (x2), Benjamin, David, Kostas, Nicolas, Paul, Sébastien, Thibault...
- En backstage : Alexandre, Antoine, Aurélien (x2), Benji, David, Julien, Kostas, Nicolas, Paul, Sébastien, Thibault...
- Horaires : tous les jours de 9h à 13h
@@ -61,11 +61,11 @@
(sauf le dernier jour)
- Une heure de questions/réponses ouvertes !
- Mardi: 16h30-17h30
- Mercredi: 15h00-16h00
- Mercredi: 15h30-16h30
- Jeudi: 16h00-17h00
- Jeudi: 14h30-15h30
- Sur [Jitsi][jitsi] (lien "visioconf" sur le portail de formation)
@@ -73,4 +73,4 @@
[qdnd]: https://www.youtube.com/channel/UCOAhkxpryr_BKybt9wIw-NQ
[ndeloof]: https://github.com/ndeloof
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/HighFiveAutomne2024
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerAutomne2023

View File

@@ -15,7 +15,6 @@ h1, h2, h3, h4, h5, h6 {
font-weight: bold;
font-size: 45px !important;
margin-top: 0.5em;
margin-bottom: 0.75em;
}
code {

View File

@@ -1,11 +1,3 @@
# Note: Ngrok doesn't have an "anonymous" mode anymore.
# This means that it requires an authentication token.
# That said, all you need is a free account; so if you're
# doing the labs on admission webhooks and want to try
# this Compose file, I highly recommend that you create
# an Ngrok account and set the NGROK_AUTHTOKEN environment
# variable to your authentication token.
version: "3"
services:
@@ -13,8 +5,6 @@ services:
ngrok-echo:
image: ngrok/ngrok
command: http --log=stdout localhost:3000
environment:
- NGROK_AUTHTOKEN
ports:
- 3000
@@ -26,8 +16,6 @@ services:
ngrok-flask:
image: ngrok/ngrok
command: http --log=stdout localhost:5000
environment:
- NGROK_AUTHTOKEN
ports:
- 5000