mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-03-02 01:10:20 +00:00
Compare commits
95 Commits
2023-05-en
...
2024-10-en
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f0b5d4ba8 | ||
|
|
8724ab2835 | ||
|
|
a669b15313 | ||
|
|
76067dca97 | ||
|
|
e665dad1b8 | ||
|
|
543204b905 | ||
|
|
c3b81baa06 | ||
|
|
41e5467063 | ||
|
|
96f03066f9 | ||
|
|
a3d543c6fe | ||
|
|
e573d520e9 | ||
|
|
e7b8337dd5 | ||
|
|
8b554c02d3 | ||
|
|
99348d8a2b | ||
|
|
1ea72f2179 | ||
|
|
ff7cbb2e19 | ||
|
|
5d65cf2ef6 | ||
|
|
3fb2c1e9d1 | ||
|
|
59a569e9e7 | ||
|
|
0b95eac799 | ||
|
|
ce13afa0d4 | ||
|
|
e97c93e451 | ||
|
|
3eb0378d13 | ||
|
|
f98192ac76 | ||
|
|
3488f5ad7b | ||
|
|
51f9b2db3b | ||
|
|
787be94cb6 | ||
|
|
86d4dfa775 | ||
|
|
c550ea6553 | ||
|
|
0d761409d7 | ||
|
|
ea16766fd7 | ||
|
|
e5d0e3ef85 | ||
|
|
81026d9d41 | ||
|
|
8788012880 | ||
|
|
ab6ed864e3 | ||
|
|
21f08cf3bd | ||
|
|
00b126ff20 | ||
|
|
d5b462653e | ||
|
|
560be57017 | ||
|
|
303cf459c4 | ||
|
|
2f009de2db | ||
|
|
06ca097b52 | ||
|
|
b4383156a5 | ||
|
|
624ec14763 | ||
|
|
a5e270b756 | ||
|
|
41330f8302 | ||
|
|
4fcd490b30 | ||
|
|
633c29b62c | ||
|
|
0802701f11 | ||
|
|
c407e178d5 | ||
|
|
cb574d7cdd | ||
|
|
84988644df | ||
|
|
3ab64d79e4 | ||
|
|
6391b4d896 | ||
|
|
57e8c6ee2f | ||
|
|
42443df0dc | ||
|
|
9289d453bc | ||
|
|
3d8059c631 | ||
|
|
7ff17fbabd | ||
|
|
dbfda8b458 | ||
|
|
c8fc67c995 | ||
|
|
28222db2e4 | ||
|
|
a38f930858 | ||
|
|
2cef200726 | ||
|
|
1f77a52137 | ||
|
|
b188e0f8a9 | ||
|
|
ac203a128d | ||
|
|
a9920e5cf0 | ||
|
|
d1047f950d | ||
|
|
e380509ffe | ||
|
|
b5c754211e | ||
|
|
cc57d983b2 | ||
|
|
fd86e6079d | ||
|
|
08f2e76082 | ||
|
|
db848767c1 | ||
|
|
c07f52c493 | ||
|
|
016c8fc863 | ||
|
|
b9bbccb346 | ||
|
|
311a2aaf32 | ||
|
|
a19585a587 | ||
|
|
354bd9542e | ||
|
|
0c73e91e6f | ||
|
|
23064b5d26 | ||
|
|
971314a84f | ||
|
|
c0689cc5df | ||
|
|
033873064a | ||
|
|
1ed3af6eff | ||
|
|
33ddfce3fa | ||
|
|
943783c8fb | ||
|
|
46b3aa23bf | ||
|
|
4498dc41a4 | ||
|
|
58de0d31f8 | ||
|
|
d32d986a9e | ||
|
|
fcb922628c | ||
|
|
77ceba7f5b |
@@ -1,6 +1,6 @@
|
||||
FROM ruby:alpine
|
||||
RUN apk add --update build-base curl
|
||||
RUN gem install sinatra
|
||||
RUN gem install sinatra --version '~> 3'
|
||||
RUN gem install thin
|
||||
ADD hasher.rb /
|
||||
CMD ["ruby", "hasher.rb"]
|
||||
|
||||
@@ -16,8 +16,7 @@ spec:
|
||||
hostPath:
|
||||
path: /root
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
- operator: Exists
|
||||
initContainers:
|
||||
- name: hacktheplanet
|
||||
image: alpine
|
||||
@@ -27,7 +26,7 @@ spec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys > /root/.ssh/authorized_keys"
|
||||
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys >> /root/.ssh/authorized_keys"
|
||||
containers:
|
||||
- name: web
|
||||
image: nginx
|
||||
|
||||
13
k8s/pod-disruption-budget.yaml
Normal file
13
k8s/pod-disruption-budget.yaml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: my-pdb
|
||||
spec:
|
||||
#minAvailable: 2
|
||||
#minAvailable: 90%
|
||||
maxUnavailable: 1
|
||||
#maxUnavailable: 10%
|
||||
selector:
|
||||
matchLabels:
|
||||
app: my-app
|
||||
|
||||
27
k8s/sysctl.yaml
Normal file
27
k8s/sysctl.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: sysctl
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: sysctl
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: sysctl
|
||||
spec:
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
initContainers:
|
||||
- name: sysctl
|
||||
image: alpine
|
||||
securityContext:
|
||||
privileged: true
|
||||
command:
|
||||
- sysctl
|
||||
- fs.inotify.max_user_instances=99999
|
||||
containers:
|
||||
- name: pause
|
||||
image: registry.k8s.io/pause:3.8
|
||||
|
||||
@@ -59,6 +59,27 @@ You don't **have to** install the CLI tools of the cloud provider(s) that you wa
|
||||
|
||||
If you want to provide your cloud credentials through other means, you will have to adjust the Terraform configuration files in `terraform/provider-config` accordingly.
|
||||
|
||||
Here is where we look for credentials for each provider:
|
||||
|
||||
- AWS: Terraform defaults; see [AWS provider documentation][creds-aws] (for instance, you can use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables, or AWS config and profile files)
|
||||
- Azure: Terraform defaults; see [AzureRM provider documentation][creds-azure] (typically, you can authenticate with the `az` CLI and Terraform will pick it up automatically)
|
||||
- Civo: CLI configuration file (`~/.civo.json`)
|
||||
- Digital Ocean: CLI configuration file (`~/.config/doctl/config.yaml`)
|
||||
- Exoscale: CLI configuration file (`~/.config/exoscale/exoscale.toml`)
|
||||
- Google Cloud: FIXME, note that the project name is currently hard-coded to `prepare-tf`
|
||||
- Hetzner: CLI configuration file (`~/.config/hcloud/cli.toml`)
|
||||
- Linode: CLI configuration file (`~/.config/linode-cli`)
|
||||
- OpenStack: you will need to write a tfvars file (check [that exemple](terraform/virtual-machines/openstack/tfvars.example))
|
||||
- Oracle: Terraform defaults; see [OCI provider documentation][creds-oci] (for instance, you can set up API keys; or you can use a short-lived token generated by the OCI CLI with `oci session authenticate`)
|
||||
- OVH: Terraform defaults; see [OVH provider documentation][creds-ovh] (this typically involves setting up 5 `OVH_...` environment variables)
|
||||
- Scaleway: Terraform defaults; see [Scaleway provider documentation][creds-scw] (for instance, you can set environment variables, but it will also automatically pick up CLI authentication from `~/.config/scw/config.yaml`)
|
||||
|
||||
[creds-aws]: https://registry.terraform.io/providers/hashicorp/aws/latest/docs#authentication-and-configuration
|
||||
[creds-azure]: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs#authenticating-to-azure
|
||||
[creds-oci]: https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/terraformproviderconfiguration.htm#authentication
|
||||
[creds-ovh]: https://registry.terraform.io/providers/ovh/ovh/latest/docs#provider-configuration
|
||||
[creds-scw]: https://registry.terraform.io/providers/scaleway/scaleway/latest/docs#authentication
|
||||
|
||||
## General Workflow
|
||||
|
||||
- fork/clone repo
|
||||
|
||||
@@ -21,6 +21,11 @@ digitalocean-pvc)
|
||||
jq '.[] | select(.name | startswith("pvc-")) | .id' |
|
||||
xargs -n1 -P10 doctl compute volume delete --force
|
||||
;;
|
||||
scaleway-pvc)
|
||||
scw instance volume list --output json |
|
||||
jq '.[] | select(.name | contains("_pvc-")) | .id' |
|
||||
xargs -n1 -P10 scw instance volume delete
|
||||
;;
|
||||
*)
|
||||
echo "Unknown combination of provider ('$1') and resource ('$2')."
|
||||
;;
|
||||
|
||||
@@ -10,13 +10,22 @@ fi
|
||||
. ~/creds/creds.cloudflare.dns
|
||||
|
||||
cloudflare() {
|
||||
case "$1" in
|
||||
GET|POST|DELETE)
|
||||
METHOD="$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
METHOD=""
|
||||
;;
|
||||
esac
|
||||
URI=$1
|
||||
shift
|
||||
http https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
|
||||
http --ignore-stdin $METHOD https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
|
||||
}
|
||||
|
||||
_list_zones() {
|
||||
cloudflare zones | jq -r .result[].name
|
||||
cloudflare zones?per_page=100 | jq -r .result[].name
|
||||
}
|
||||
|
||||
_get_zone_id() {
|
||||
@@ -32,6 +41,15 @@ _populate_zone() {
|
||||
done
|
||||
}
|
||||
|
||||
_clear_zone() {
|
||||
ZONE_ID=$(_get_zone_id $1)
|
||||
for RECORD_ID in $(
|
||||
cloudflare zones/$ZONE_ID/dns_records | jq -r .result[].id
|
||||
); do
|
||||
cloudflare DELETE zones/$ZONE_ID/dns_records/$RECORD_ID
|
||||
done
|
||||
}
|
||||
|
||||
_add_zone() {
|
||||
cloudflare zones "name=$1"
|
||||
}
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
# https://open-api.netlify.com/#tag/dnsZone
|
||||
[ "$1" ] || {
|
||||
[ "${1-}" ] || {
|
||||
echo ""
|
||||
echo "Add a record in Netlify DNS."
|
||||
echo "This script is hardcoded to add a record to container.training".
|
||||
@@ -12,13 +14,13 @@
|
||||
echo "$0 del <recordid>"
|
||||
echo ""
|
||||
echo "Example to create a A record for eu.container.training:"
|
||||
echo "$0 add eu 185.145.250.0"
|
||||
echo "$0 add eu A 185.145.250.0"
|
||||
echo ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
NETLIFY_CONFIG_FILE=~/.config/netlify/config.json
|
||||
if ! [ "$DOMAIN" ]; then
|
||||
if ! [ "${DOMAIN-}" ]; then
|
||||
DOMAIN=container.training
|
||||
fi
|
||||
|
||||
@@ -49,27 +51,29 @@ ZONE_ID=$(netlify dns_zones |
|
||||
|
||||
_list() {
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
|
||||
jq -r '.[] | select(.type=="A" or .type=="AAAA") | [.hostname, .type, .value, .id] | @tsv' |
|
||||
sort |
|
||||
column --table
|
||||
}
|
||||
|
||||
_add() {
|
||||
NAME=$1.$DOMAIN
|
||||
ADDR=$2
|
||||
|
||||
TYPE=$2
|
||||
VALUE=$3
|
||||
|
||||
# It looks like if we create two identical records, then delete one of them,
|
||||
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
|
||||
# though it's still visible through the API and the website?)
|
||||
|
||||
if netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="'$TYPE'" and .value=="'$VALUE'")' |
|
||||
grep .
|
||||
then
|
||||
echo "It looks like that record already exists. Refusing to create it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=$TYPE hostname=$NAME value=$VALUE ttl=300
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'")'
|
||||
@@ -88,7 +92,7 @@ case "$1" in
|
||||
_list
|
||||
;;
|
||||
add)
|
||||
_add $2 $3
|
||||
_add $2 $3 $4
|
||||
;;
|
||||
del)
|
||||
_del $2
|
||||
|
||||
@@ -1,13 +1,29 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Baseline resource usage per vcluster in our usecase:
|
||||
# 500 MB RAM
|
||||
# 10% CPU
|
||||
# (See https://docs.google.com/document/d/1n0lwp6rQKQUIuo_A5LQ1dgCzrmjkDjmDtNj1Jn92UrI)
|
||||
# PRO2-XS = 4 core, 16 gb
|
||||
|
||||
# deploy big cluster
|
||||
TF_VAR_node_size=g6-standard-6 \
|
||||
TF_VAR_nodes_per_cluster=5 \
|
||||
TF_VAR_location=eu-west \
|
||||
./labctl create --mode mk8s --settings settings/mk8s.env --provider linode --tag konk
|
||||
PROVIDER=scaleway
|
||||
|
||||
case "$PROVIDER" in
|
||||
linode)
|
||||
export TF_VAR_node_size=g6-standard-6
|
||||
export TF_VAR_location=eu-west
|
||||
;;
|
||||
scaleway)
|
||||
export TF_VAR_node_size=PRO2-XS
|
||||
export TF_VAR_location=fr-par-2
|
||||
;;
|
||||
esac
|
||||
|
||||
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
|
||||
|
||||
# set kubeconfig file
|
||||
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
|
||||
export KUBECONFIG=~/kubeconfig
|
||||
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
|
||||
|
||||
# set external_ip labels
|
||||
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
|
||||
@@ -16,4 +32,12 @@ while read node address; do
|
||||
done
|
||||
|
||||
# vcluster all the things
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 27
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 50
|
||||
|
||||
# install prometheus stack because that's cool
|
||||
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
|
||||
--namespace prom-system --create-namespace \
|
||||
kube-prometheus-stack kube-prometheus-stack
|
||||
|
||||
# and also fix sysctl
|
||||
kubectl apply -f ../k8s/sysctl.yaml --namespace kube-system
|
||||
|
||||
@@ -126,6 +126,7 @@ set number
|
||||
set shiftwidth=2
|
||||
set softtabstop=2
|
||||
set nowrap
|
||||
set laststatus=2
|
||||
SQRL
|
||||
|
||||
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
|
||||
@@ -320,6 +321,7 @@ _cmd_clusterize() {
|
||||
pssh "
|
||||
set -e
|
||||
grep PSSH_ /etc/ssh/sshd_config || echo 'AcceptEnv PSSH_*' | sudo tee -a /etc/ssh/sshd_config
|
||||
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
|
||||
sudo systemctl restart ssh.service"
|
||||
|
||||
pssh -I < tags/$TAG/clusters.txt "
|
||||
@@ -391,7 +393,7 @@ _cmd_docker() {
|
||||
##VERSION## https://github.com/docker/compose/releases
|
||||
COMPOSE_VERSION=v2.11.1
|
||||
COMPOSE_PLATFORM='linux-$(uname -m)'
|
||||
|
||||
|
||||
# Just in case you need Compose 1.X, you can use the following lines.
|
||||
# (But it will probably only work for x86_64 machines.)
|
||||
#COMPOSE_VERSION=1.29.2
|
||||
@@ -420,10 +422,23 @@ _cmd_kubebins() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
if [ "$KUBEVERSION" = "" ]; then
|
||||
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
||||
fi
|
||||
|
||||
##VERSION##
|
||||
ETCD_VERSION=v3.4.13
|
||||
K8SBIN_VERSION=v1.19.11 # Can't go to 1.20 because it requires a serviceaccount signing key.
|
||||
CNI_VERSION=v0.8.7
|
||||
case "$KUBEVERSION" in
|
||||
1.19.*)
|
||||
ETCD_VERSION=v3.4.13
|
||||
CNI_VERSION=v0.8.7
|
||||
;;
|
||||
*)
|
||||
ETCD_VERSION=v3.5.10
|
||||
CNI_VERSION=v1.3.0
|
||||
;;
|
||||
esac
|
||||
|
||||
K8SBIN_VERSION="v$KUBEVERSION"
|
||||
ARCH=${ARCHITECTURE-amd64}
|
||||
pssh --timeout 300 "
|
||||
set -e
|
||||
@@ -447,30 +462,41 @@ _cmd_kubebins() {
|
||||
"
|
||||
}
|
||||
|
||||
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
|
||||
_cmd_kube() {
|
||||
_cmd kubepkgs "Install Kubernetes packages (kubectl, kubeadm, kubelet)"
|
||||
_cmd_kubepkgs() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
if [ "$KUBEVERSION" ]; then
|
||||
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
||||
pssh "
|
||||
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
||||
# Prior September 2023, there was a single Kubernetes package repo that
|
||||
# contained packages for all versions, so we could just add that repo
|
||||
# and install whatever was the latest version available there.
|
||||
# Things have changed (versions after September 2023, e.g. 1.28.3 are
|
||||
# not in the old repo) and now there is a different repo for each
|
||||
# minor version, so we need to figure out what minor version we are
|
||||
# installing to add the corresponding repo.
|
||||
if [ "$KUBEVERSION" = "" ]; then
|
||||
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
||||
fi
|
||||
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
|
||||
|
||||
# Since the new repo doesn't have older versions, add a safety check here.
|
||||
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
|
||||
if [ "$MINORVERSION" -lt 24 ]; then
|
||||
die "Cannot install kubepkgs for versions before 1.24."
|
||||
fi
|
||||
|
||||
pssh "
|
||||
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
||||
Package: kubectl kubeadm kubelet
|
||||
Pin: version $KUBEVERSION-*
|
||||
Pin-Priority: 1000
|
||||
EOF"
|
||||
fi
|
||||
|
||||
# As of February 27th, 2023, packages.cloud.google.com seems broken
|
||||
# (serves HTTP 500 errors for the GPG key), so let's pre-load that key.
|
||||
pssh -I "sudo apt-key add -" < lib/kubernetes-apt-key.gpg
|
||||
|
||||
# Install packages
|
||||
pssh --timeout 200 "
|
||||
#curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
|
||||
#sudo apt-key add - &&
|
||||
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
|
||||
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
|
||||
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
|
||||
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
|
||||
sudo tee /etc/apt/sources.list.d/kubernetes.list"
|
||||
pssh --timeout 200 "
|
||||
sudo apt-get update -q &&
|
||||
@@ -478,8 +504,21 @@ EOF"
|
||||
sudo apt-mark hold kubelet kubeadm kubectl &&
|
||||
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
|
||||
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
|
||||
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
|
||||
echo 'alias k=kubecolor' | sudo tee /etc/bash_completion.d/k &&
|
||||
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
|
||||
}
|
||||
|
||||
_cmd kubeadm "Setup kubernetes clusters with kubeadm"
|
||||
_cmd_kubeadm() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
if [ "$KUBEVERSION" ]; then
|
||||
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
||||
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
|
||||
IGNORE_SWAP="- Swap"
|
||||
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
|
||||
fi
|
||||
|
||||
# Install a valid configuration for containerd
|
||||
# (first, the CRI interface needs to be re-enabled;
|
||||
@@ -500,6 +539,9 @@ bootstrapTokens:
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: JoinConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
@@ -511,6 +553,9 @@ discovery:
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: KubeletConfiguration
|
||||
apiVersion: kubelet.config.k8s.io/v1beta1
|
||||
@@ -593,6 +638,31 @@ _cmd_kubetools() {
|
||||
;;
|
||||
esac
|
||||
|
||||
# Install ArgoCD CLI
|
||||
##VERSION## https://github.com/argoproj/argo-cd/releases/latest
|
||||
URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-${ARCH}
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/argocd ]; then
|
||||
sudo curl -o /usr/local/bin/argocd -fsSL $URL
|
||||
sudo chmod +x /usr/local/bin/argocd
|
||||
argocd completion bash | sudo tee /etc/bash_completion.d/argocd
|
||||
argocd version --client
|
||||
fi"
|
||||
|
||||
# Install Flux CLI
|
||||
##VERSION## https://github.com/fluxcd/flux2/releases
|
||||
FLUX_VERSION=2.3.0
|
||||
FILENAME=flux_${FLUX_VERSION}_linux_${ARCH}
|
||||
URL=https://github.com/fluxcd/flux2/releases/download/v$FLUX_VERSION/$FILENAME.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/flux ]; then
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx flux
|
||||
sudo chmod +x /usr/local/bin/flux
|
||||
flux completion bash | sudo tee /etc/bash_completion.d/flux
|
||||
flux --version
|
||||
fi"
|
||||
|
||||
# Install kubectx and kubens
|
||||
pssh "
|
||||
set -e
|
||||
@@ -624,7 +694,7 @@ EOF
|
||||
|
||||
# Install stern
|
||||
##VERSION## https://github.com/stern/stern/releases
|
||||
STERN_VERSION=1.22.0
|
||||
STERN_VERSION=1.29.0
|
||||
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
|
||||
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
|
||||
pssh "
|
||||
@@ -646,7 +716,7 @@ EOF
|
||||
|
||||
# Install kustomize
|
||||
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
|
||||
KUSTOMIZE_VERSION=v4.5.7
|
||||
KUSTOMIZE_VERSION=v5.4.1
|
||||
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kustomize ]; then
|
||||
@@ -677,6 +747,16 @@ EOF
|
||||
aws-iam-authenticator version
|
||||
fi"
|
||||
|
||||
# Install jless (jless.io)
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/jless ]; then
|
||||
##VERSION##
|
||||
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
|
||||
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
|
||||
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
|
||||
sudo mv jless /usr/local/bin
|
||||
fi"
|
||||
|
||||
# Install the krew package manager
|
||||
pssh "
|
||||
if [ ! -d /home/$USER_LOGIN/.krew ]; then
|
||||
@@ -688,21 +768,31 @@ EOF
|
||||
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
|
||||
fi"
|
||||
|
||||
# Install kubecolor
|
||||
KUBECOLOR_VERSION=0.4.0
|
||||
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubecolor ]; then
|
||||
##VERSION##
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx kubecolor
|
||||
fi"
|
||||
|
||||
# Install k9s
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/k9s ]; then
|
||||
FILENAME=k9s_Linux_$ARCH.tar.gz &&
|
||||
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
|
||||
sudo tar -zxvf- -C /usr/local/bin k9s
|
||||
sudo tar -C /usr/local/bin -zx k9s
|
||||
k9s version
|
||||
fi"
|
||||
|
||||
# Install popeye
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/popeye ]; then
|
||||
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
|
||||
FILENAME=popeye_Linux_$ARCH.tar.gz &&
|
||||
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
|
||||
sudo tar -zxvf- -C /usr/local/bin popeye
|
||||
sudo tar -C /usr/local/bin -zx popeye
|
||||
popeye version
|
||||
fi"
|
||||
|
||||
@@ -712,10 +802,10 @@ EOF
|
||||
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/tilt ]; then
|
||||
TILT_VERSION=0.22.15
|
||||
TILT_VERSION=0.33.13
|
||||
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
|
||||
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
|
||||
sudo tar -zxvf- -C /usr/local/bin tilt
|
||||
sudo tar -C /usr/local/bin -zx tilt
|
||||
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
|
||||
tilt version
|
||||
fi"
|
||||
@@ -757,7 +847,8 @@ EOF
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
|
||||
KUBESEAL_VERSION=0.17.4
|
||||
KUBESEAL_VERSION=0.26.2
|
||||
URL=https://github.com/bitnami-labs/sealed-secrets/releases/download/v${KUBESEAL_VERSION}/kubeseal-${KUBESEAL_VERSION}-linux-${ARCH}.tar.gz
|
||||
#case $ARCH in
|
||||
#amd64) FILENAME=kubeseal-linux-amd64;;
|
||||
#arm64) FILENAME=kubeseal-arm64;;
|
||||
@@ -765,13 +856,13 @@ EOF
|
||||
#esac
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubeseal ]; then
|
||||
curl -fsSL https://github.com/bitnami-labs/sealed-secrets/releases/download/v$KUBESEAL_VERSION/kubeseal-$KUBESEAL_VERSION-linux-$ARCH.tar.gz |
|
||||
sudo tar -zxvf- -C /usr/local/bin kubeseal
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx kubeseal
|
||||
kubeseal --version
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/vmware-tanzu/velero/releases
|
||||
VELERO_VERSION=1.11.0
|
||||
VELERO_VERSION=1.13.2
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/velero ]; then
|
||||
curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v$VELERO_VERSION/velero-v$VELERO_VERSION-linux-$ARCH.tar.gz |
|
||||
@@ -781,13 +872,21 @@ EOF
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/doitintl/kube-no-trouble/releases
|
||||
KUBENT_VERSION=0.7.0
|
||||
KUBENT_VERSION=0.7.2
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubent ]; then
|
||||
curl -fsSL https://github.com/doitintl/kube-no-trouble/releases/download/${KUBENT_VERSION}/kubent-${KUBENT_VERSION}-linux-$ARCH.tar.gz |
|
||||
sudo tar -zxvf- -C /usr/local/bin kubent
|
||||
kubent --version
|
||||
fi"
|
||||
|
||||
# Ngrok. Note that unfortunately, this is the x86_64 binary.
|
||||
# We might have to rethink how to handle this for multi-arch environments.
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/ngrok ]; then
|
||||
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
|
||||
sudo tar -zxvf- -C /usr/local/bin ngrok
|
||||
fi"
|
||||
}
|
||||
|
||||
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
|
||||
@@ -921,12 +1020,19 @@ _cmd_standardize() {
|
||||
# Disable unattended upgrades so that they don't mess up with the subsequent steps
|
||||
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
|
||||
# Digital Ocean's cloud init disables password authentication; re-enable it.
|
||||
# Some cloud providers think that it's smart to disable password authentication.
|
||||
# We need to re-neable it, though.
|
||||
# Digital Ocecan
|
||||
pssh "
|
||||
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
# AWS
|
||||
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
|
||||
# Special case for oracle since their iptables blocks everything but SSH
|
||||
pssh "
|
||||
@@ -962,11 +1068,12 @@ _cmd_tailhist () {
|
||||
# halfway through and we're actually trying to download it again.
|
||||
pssh "
|
||||
set -e
|
||||
sudo apt-get install unzip -y
|
||||
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
|
||||
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
|
||||
sudo mv websocketd /usr/local/bin/websocketd
|
||||
sudo mkdir -p /tmp/tailhist
|
||||
sudo tee /root/tailhist.service <<EOF
|
||||
sudo mkdir -p /opt/tailhist
|
||||
sudo tee /opt/tailhist.service <<EOF
|
||||
[Unit]
|
||||
Description=tailhist
|
||||
|
||||
@@ -974,16 +1081,16 @@ Description=tailhist
|
||||
WantedBy=multi-user.target
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/tmp/tailhist
|
||||
WorkingDirectory=/opt/tailhist
|
||||
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
|
||||
User=nobody
|
||||
Group=nogroup
|
||||
Restart=always
|
||||
EOF
|
||||
sudo systemctl enable /root/tailhist.service --now
|
||||
sudo systemctl enable /opt/tailhist.service --now
|
||||
"
|
||||
|
||||
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
|
||||
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
|
||||
}
|
||||
|
||||
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
|
||||
|
||||
Binary file not shown.
@@ -17,6 +17,12 @@ pssh() {
|
||||
|
||||
echo "[parallel-ssh] $@"
|
||||
|
||||
# There are some routers that really struggle with the number of TCP
|
||||
# connections that we open when deploying large fleets of clusters.
|
||||
# We're adding a 1 second delay here, but this can be cranked up if
|
||||
# necessary - or down to zero, too.
|
||||
sleep ${PSSH_DELAY_PRE-1}
|
||||
|
||||
$(which pssh || which parallel-ssh) -h $HOSTFILE -l ubuntu \
|
||||
--par ${PSSH_PARALLEL_CONNECTIONS-100} \
|
||||
--timeout 300 \
|
||||
|
||||
16
prepare-labs/map-dns.sh
Executable file
16
prepare-labs/map-dns.sh
Executable file
@@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
DOMAINS=domains.txt
|
||||
IPS=ips.txt
|
||||
|
||||
. ./dns-cloudflare.sh
|
||||
|
||||
paste "$DOMAINS" "$IPS" | while read domain ips; do
|
||||
if ! [ "$domain" ]; then
|
||||
echo "⚠️ No more domains!"
|
||||
exit 1
|
||||
fi
|
||||
_clear_zone "$domain"
|
||||
_populate_zone "$domain" $ips
|
||||
done
|
||||
echo "✅ All done."
|
||||
26
prepare-labs/settings/admin-monokube.env
Normal file
26
prepare-labs/settings/admin-monokube.env
Normal file
@@ -0,0 +1,26 @@
|
||||
CLUSTERSIZE=1
|
||||
|
||||
CLUSTERPREFIX=monokube
|
||||
|
||||
# We're sticking to this in the first DMUC lab,
|
||||
# because it still works with Docker, and doesn't
|
||||
# require a ServiceAccount signing key.
|
||||
KUBEVERSION=1.19.11
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
tools
|
||||
docker
|
||||
disabledocker
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubebins
|
||||
kubetools
|
||||
ips
|
||||
"
|
||||
@@ -7,7 +7,7 @@ USER_PASSWORD=training
|
||||
|
||||
# For a list of old versions, check:
|
||||
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
|
||||
KUBEVERSION=1.22.5
|
||||
KUBEVERSION=1.28.9
|
||||
|
||||
STEPS="
|
||||
wait
|
||||
@@ -18,7 +18,8 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kube
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
CLUSTERSIZE=1
|
||||
CLUSTERSIZE=3
|
||||
|
||||
CLUSTERPREFIX=dmuc
|
||||
CLUSTERPREFIX=polykube
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
@@ -10,12 +10,11 @@ STEPS="
|
||||
standardize
|
||||
clusterize
|
||||
tools
|
||||
docker
|
||||
disabledocker
|
||||
kubepkgs
|
||||
kubebins
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubebins
|
||||
kubetools
|
||||
ips
|
||||
"
|
||||
@@ -14,7 +14,8 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kube
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
"
|
||||
|
||||
6
prepare-labs/settings/konk.env
Normal file
6
prepare-labs/settings/konk.env
Normal file
@@ -0,0 +1,6 @@
|
||||
CLUSTERSIZE=5
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=
|
||||
|
||||
STEPS="stage2"
|
||||
@@ -14,7 +14,8 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kube
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
"
|
||||
|
||||
@@ -15,7 +15,8 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kube
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
|
||||
@@ -1,3 +1,8 @@
|
||||
#export TF_VAR_node_size=GP2.4
|
||||
#export TF_VAR_node_size=g6-standard-6
|
||||
#export TF_VAR_node_size=m7i.xlarge
|
||||
|
||||
|
||||
CLUSTERSIZE=1
|
||||
|
||||
CLUSTERPREFIX=CHANGEME
|
||||
|
||||
@@ -7,7 +7,7 @@ STUDENTS=2
|
||||
#export TF_VAR_location=eu-north-1
|
||||
export TF_VAR_node_size=S
|
||||
|
||||
SETTINGS=admin-dmuc
|
||||
SETTINGS=admin-monokube
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
@@ -15,15 +15,7 @@ TAG=$PREFIX-$SETTINGS
|
||||
--settings settings/$SETTINGS.env \
|
||||
--students $STUDENTS
|
||||
|
||||
SETTINGS=admin-kubenet
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
--provider $PROVIDER \
|
||||
--settings settings/$SETTINGS.env \
|
||||
--students $STUDENTS
|
||||
|
||||
SETTINGS=admin-kuberouter
|
||||
SETTINGS=admin-polykube
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
|
||||
2
prepare-labs/terraform/list-locations/exoscale
Executable file
2
prepare-labs/terraform/list-locations/exoscale
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
exo zone
|
||||
@@ -1,7 +1,8 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.47.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
1
prepare-labs/terraform/one-kubernetes/azure/common.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/azure/common.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../common.tf
|
||||
1
prepare-labs/terraform/one-kubernetes/azure/config.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/azure/config.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../../providers/azure/config.tf
|
||||
22
prepare-labs/terraform/one-kubernetes/azure/main.tf
Normal file
22
prepare-labs/terraform/one-kubernetes/azure/main.tf
Normal file
@@ -0,0 +1,22 @@
|
||||
resource "azurerm_resource_group" "_" {
|
||||
name = var.cluster_name
|
||||
location = var.location
|
||||
}
|
||||
|
||||
resource "azurerm_kubernetes_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
location = var.location
|
||||
dns_prefix = var.cluster_name
|
||||
identity {
|
||||
type = "SystemAssigned"
|
||||
}
|
||||
resource_group_name = azurerm_resource_group._.name
|
||||
default_node_pool {
|
||||
name = "x86"
|
||||
node_count = var.min_nodes_per_pool
|
||||
min_count = var.min_nodes_per_pool
|
||||
max_count = var.max_nodes_per_pool
|
||||
vm_size = local.node_size
|
||||
enable_auto_scaling = true
|
||||
}
|
||||
}
|
||||
12
prepare-labs/terraform/one-kubernetes/azure/outputs.tf
Normal file
12
prepare-labs/terraform/one-kubernetes/azure/outputs.tf
Normal file
@@ -0,0 +1,12 @@
|
||||
output "cluster_id" {
|
||||
value = azurerm_kubernetes_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = true
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = azurerm_kubernetes_cluster._.kube_config_raw
|
||||
sensitive = true
|
||||
}
|
||||
7
prepare-labs/terraform/one-kubernetes/azure/provider.tf
Normal file
7
prepare-labs/terraform/one-kubernetes/azure/provider.tf
Normal file
@@ -0,0 +1,7 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
azurerm = {
|
||||
source = "hashicorp/azurerm"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
prepare-labs/terraform/one-kubernetes/azure/variables.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/azure/variables.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../../providers/azure/variables.tf
|
||||
@@ -11,17 +11,23 @@ data "oci_containerengine_cluster_option" "_" {
|
||||
locals {
|
||||
compartment_id = oci_identity_compartment._.id
|
||||
kubernetes_version = data.oci_containerengine_cluster_option._.kubernetes_versions[0]
|
||||
images = [
|
||||
for image in data.oci_containerengine_node_pool_option._.sources : image
|
||||
if can(regex("OKE", image.source_name))
|
||||
&& can(regex(substr(local.kubernetes_version, 1, -1), image.source_name))
|
||||
&& !can(regex("GPU", image.source_name))
|
||||
&& !can(regex("aarch64", image.source_name))
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
data "oci_identity_availability_domains" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
}
|
||||
|
||||
data "oci_core_images" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
operating_system = "Oracle Linux"
|
||||
operating_system_version = "8"
|
||||
shape = local.shape
|
||||
data "oci_containerengine_node_pool_option" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
node_pool_option_id = oci_containerengine_cluster._.id
|
||||
}
|
||||
|
||||
resource "oci_containerengine_cluster" "_" {
|
||||
@@ -56,7 +62,7 @@ resource "oci_containerengine_node_pool" "_" {
|
||||
}
|
||||
}
|
||||
node_source_details {
|
||||
image_id = data.oci_core_images._.images[0].id
|
||||
image_id = local.images[0].image_id
|
||||
source_type = "image"
|
||||
}
|
||||
}
|
||||
|
||||
1
prepare-labs/terraform/one-kubernetes/ovh/common.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/ovh/common.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../common.tf
|
||||
1
prepare-labs/terraform/one-kubernetes/ovh/config.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/ovh/config.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../../providers/ovh/config.tf
|
||||
18
prepare-labs/terraform/one-kubernetes/ovh/main.tf
Normal file
18
prepare-labs/terraform/one-kubernetes/ovh/main.tf
Normal file
@@ -0,0 +1,18 @@
|
||||
resource "ovh_cloud_project_kube" "_" {
|
||||
name = var.cluster_name
|
||||
region = var.location
|
||||
version = local.k8s_version
|
||||
}
|
||||
|
||||
resource "ovh_cloud_project_kube_nodepool" "_" {
|
||||
kube_id = ovh_cloud_project_kube._.id
|
||||
name = "x86"
|
||||
flavor_name = local.node_size
|
||||
desired_nodes = var.min_nodes_per_pool
|
||||
min_nodes = var.min_nodes_per_pool
|
||||
max_nodes = var.max_nodes_per_pool
|
||||
}
|
||||
|
||||
locals {
|
||||
k8s_version = "1.26"
|
||||
}
|
||||
12
prepare-labs/terraform/one-kubernetes/ovh/outputs.tf
Normal file
12
prepare-labs/terraform/one-kubernetes/ovh/outputs.tf
Normal file
@@ -0,0 +1,12 @@
|
||||
output "cluster_id" {
|
||||
value = ovh_cloud_project_kube._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = false
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
sensitive = true
|
||||
value = ovh_cloud_project_kube._.kubeconfig
|
||||
}
|
||||
7
prepare-labs/terraform/one-kubernetes/ovh/provider.tf
Normal file
7
prepare-labs/terraform/one-kubernetes/ovh/provider.tf
Normal file
@@ -0,0 +1,7 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
ovh = {
|
||||
source = "ovh/ovh"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
prepare-labs/terraform/one-kubernetes/ovh/variables.tf
Symbolic link
1
prepare-labs/terraform/one-kubernetes/ovh/variables.tf
Symbolic link
@@ -0,0 +1 @@
|
||||
../../providers/ovh/variables.tf
|
||||
@@ -1,10 +1,23 @@
|
||||
resource "scaleway_vpc_private_network" "_" {
|
||||
}
|
||||
|
||||
# This is a kind of hack to use a custom security group with Kapsulse.
|
||||
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
|
||||
|
||||
resource "scaleway_instance_security_group" "_" {
|
||||
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
|
||||
inbound_default_policy = "accept"
|
||||
outbound_default_policy = "accept"
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
#region = var.location
|
||||
name = var.cluster_name
|
||||
tags = var.common_tags
|
||||
version = local.k8s_version
|
||||
type = "kapsule"
|
||||
cni = "cilium"
|
||||
delete_additional_resources = true
|
||||
private_network_id = scaleway_vpc_private_network._.id
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_pool" "_" {
|
||||
@@ -17,6 +30,7 @@ resource "scaleway_k8s_pool" "_" {
|
||||
max_size = var.max_nodes_per_pool
|
||||
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
|
||||
autohealing = true
|
||||
depends_on = [ scaleway_instance_security_group._ ]
|
||||
}
|
||||
|
||||
data "scaleway_k8s_version" "_" {
|
||||
|
||||
@@ -4,6 +4,7 @@ resource "helm_release" "_" {
|
||||
create_namespace = true
|
||||
repository = "https://charts.loft.sh"
|
||||
chart = "vcluster"
|
||||
version = "0.19.7"
|
||||
set {
|
||||
name = "service.type"
|
||||
value = "NodePort"
|
||||
|
||||
@@ -44,5 +44,5 @@ locals {
|
||||
guest_api_server_port = local.node_port
|
||||
guest_api_server_url_new = "https://${local.guest_api_server_host}:${local.guest_api_server_port}"
|
||||
guest_api_server_url_old = yamldecode(local.kubeconfig_raw).clusters[0].cluster.server
|
||||
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
|
||||
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
|
||||
}
|
||||
|
||||
@@ -14,9 +14,9 @@ $ hcloud server-type list | grep shared
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "cx11"
|
||||
M = "cx21"
|
||||
L = "cx31"
|
||||
S = "cpx11"
|
||||
M = "cpx21"
|
||||
L = "cpx31"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
0
prepare-labs/terraform/providers/ovh/config.tf
Normal file
0
prepare-labs/terraform/providers/ovh/config.tf
Normal file
13
prepare-labs/terraform/providers/ovh/variables.tf
Normal file
13
prepare-labs/terraform/providers/ovh/variables.tf
Normal file
@@ -0,0 +1,13 @@
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "d2-4"
|
||||
M = "d2-4"
|
||||
L = "d2-8"
|
||||
}
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
default = "BHS5"
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
type = map(any)
|
||||
default = {}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,14 +1,22 @@
|
||||
resource "openstack_compute_instance_v2" "_" {
|
||||
for_each = local.nodes
|
||||
name = each.value.node_name
|
||||
image_name = var.image
|
||||
image_name = data.openstack_images_image_v2._.name
|
||||
flavor_name = each.value.node_size
|
||||
key_pair = openstack_compute_keypair_v2._.name
|
||||
key_pair = openstack_compute_keypair_v2._.name
|
||||
network {
|
||||
port = openstack_networking_port_v2._[each.key].id
|
||||
}
|
||||
}
|
||||
|
||||
data "openstack_images_image_v2" "_" {
|
||||
most_recent = true
|
||||
properties = {
|
||||
os = "ubuntu"
|
||||
version = "24.04"
|
||||
}
|
||||
}
|
||||
|
||||
resource "openstack_networking_port_v2" "_" {
|
||||
for_each = local.nodes
|
||||
network_id = openstack_networking_network_v2._.id
|
||||
|
||||
@@ -31,10 +31,6 @@ variable "external_network_id" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "image" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {}
|
||||
|
||||
@@ -4,6 +4,11 @@
|
||||
# another set of clusters while a first one is still running)
|
||||
# you should set the TF_VAR_cluster_name environment variable.
|
||||
|
||||
if ! [ "$TF_VAR_cluster_name" ]; then
|
||||
echo "Please set TF_VAR_cluster_name. Thanks."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd terraform/one-kubernetes
|
||||
|
||||
case "$1" in
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
title: |
|
||||
Docker Intensif
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
slides: https://2024-10-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
title: |
|
||||
Fondamentaux Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
slides: https://2024-10-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
|
||||
@@ -2,11 +2,11 @@ title: |
|
||||
Packaging d'applications
|
||||
pour Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
slides: https://2024-10-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -15,7 +15,7 @@ exclude:
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics-julien.md
|
||||
- logistics.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
- k8s/prereqs-advanced.md
|
||||
@@ -40,4 +40,7 @@ content:
|
||||
- exercises/helm-umbrella-chart-details.md
|
||||
-
|
||||
- k8s/ytt.md
|
||||
- k8s/gitworkflows.md
|
||||
- k8s/flux.md
|
||||
- k8s/argocd.md
|
||||
- shared/thankyou.md
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
title: |
|
||||
Kubernetes Avancé
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
slides: https://2024-10-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -27,6 +27,7 @@ content:
|
||||
- exercises/netpol-brief.md
|
||||
- exercises/sealed-secrets-brief.md
|
||||
- exercises/kyverno-ingress-domain-name-brief.md
|
||||
- exercises/reqlim-brief.md
|
||||
- #1
|
||||
- k8s/demo-apps.md
|
||||
- k8s/netpol.md
|
||||
@@ -53,6 +54,7 @@ content:
|
||||
- k8s/apiserver-deepdive.md
|
||||
- k8s/aggregation-layer.md
|
||||
- k8s/hpa-v2.md
|
||||
- exercises/reqlim-details.md
|
||||
- #4
|
||||
- k8s/statefulsets.md
|
||||
- k8s/consul.md
|
||||
|
||||
14
slides/5.yml
14
slides/5.yml
@@ -1,11 +1,11 @@
|
||||
title: |
|
||||
Opérer Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
slides: https://2024-10-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -27,14 +27,14 @@ content:
|
||||
- shared/handson.md
|
||||
- k8s/architecture.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc.md
|
||||
-
|
||||
- k8s/multinode.md
|
||||
- k8s/cni.md
|
||||
- k8s/interco.md
|
||||
- k8s/dmuc-easy.md
|
||||
-
|
||||
- k8s/dmuc-medium.md
|
||||
- k8s/dmuc-hard.md
|
||||
- k8s/cni-internals.md
|
||||
#- k8s/interco.md
|
||||
- k8s/apilb.md
|
||||
-
|
||||
- k8s/internal-apis.md
|
||||
- k8s/staticpods.md
|
||||
- k8s/cluster-upgrade.md
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
# Shortlinks for next training in English and French
|
||||
#/next https://www.eventbrite.com/e/livestream-intensive-kubernetes-bootcamp-tickets-103262336428
|
||||
/next https://skillsmatter.com/courses/700-advanced-kubernetes-concepts-workshop-jerome-petazzoni
|
||||
/next https://qconsf.com/training/nov2024/asynchronous-architecture-patterns-scale-ml-and-other-high-latency-workloads
|
||||
/hi5 https://enix.io/fr/services/formation/online/
|
||||
/us https://www.ardanlabs.com/live-training-events/deploying-microservices-and-traditional-applications-with-kubernetes-march-28-2022.html
|
||||
/uk https://skillsmatter.com/workshops/827-deploying-microservices-and-traditional-applications-with-kubernetes-with-jerome-petazzoni
|
||||
|
||||
814
slides/autopilot/package-lock.json
generated
814
slides/autopilot/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -2,8 +2,8 @@
|
||||
"name": "container-training-pub-sub-server",
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"express": "^4.16.2",
|
||||
"socket.io": "^4.6.1",
|
||||
"socket.io-client": "^4.5.1"
|
||||
"express": "^4.21.1",
|
||||
"socket.io": "^4.8.0",
|
||||
"socket.io-client": "^4.7.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,22 +113,16 @@ class: pic
|
||||
## Results
|
||||
|
||||
* [Dev-to-prod reduced from 9 months to 15 minutes (ING)](
|
||||
https://www.docker.com/sites/default/files/CS_ING_01.25.2015_1.pdf)
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_ING_01.25.2015_1.pdf)
|
||||
|
||||
* [Continuous integration job time reduced by more than 60% (BBC)](
|
||||
https://www.docker.com/sites/default/files/CS_BBCNews_01.25.2015_1.pdf)
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_BBCNews_01.25.2015_1.pdf)
|
||||
|
||||
* [Deploy 100 times a day instead of once a week (GILT)](
|
||||
https://www.docker.com/sites/default/files/CS_Gilt%20Groupe_03.18.2015_0.pdf)
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_Gilt_Groupe_03.18.2015_0.pdf)
|
||||
|
||||
* [70% infrastructure consolidation (MetLife)](
|
||||
https://www.docker.com/customers/metlife-transforms-customer-experience-legacy-and-microservices-mashup)
|
||||
|
||||
* [60% infrastructure consolidation (Intesa Sanpaolo)](
|
||||
https://blog.docker.com/2017/11/intesa-sanpaolo-builds-resilient-foundation-banking-docker-enterprise-edition/)
|
||||
|
||||
* [14x application density; 60% of legacy datacenter migrated in 4 months (GE Appliances)](
|
||||
https://www.docker.com/customers/ge-uses-docker-enable-self-service-their-developers)
|
||||
https://www.youtube.com/watch?v=Bwt3xigvlj0)
|
||||
|
||||
* etc.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## Exercise — Ingress
|
||||
## Exercise — Ingress Controller
|
||||
|
||||
- Add an ingress controller to a Kubernetes cluster
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Exercise — Ingress
|
||||
# Exercise — Ingress Controller
|
||||
|
||||
- We want to expose a couple of web apps through an ingress controller
|
||||
|
||||
@@ -128,4 +128,4 @@ This is similar to the previous scenario, but with two significant changes:
|
||||
|
||||
1. We only want to run the ingress controller on nodes that have the role `ingress`.
|
||||
|
||||
2. We don't want to use `hostNetwork`, but a list of `externalIPs` instead.
|
||||
2. We want to either use `hostPort`, or a list of `externalIPs` (not `hostNetwork`).
|
||||
@@ -1,6 +1,6 @@
|
||||
# Exercise — Network Policies
|
||||
|
||||
We want to to implement a generic network security mechanism.
|
||||
We want to implement a generic network security mechanism.
|
||||
|
||||
Instead of creating one policy per service, we want to
|
||||
create a fixed number of policies, and use a single label
|
||||
|
||||
11
slides/exercises/polykuberbac-brief.md
Normal file
11
slides/exercises/polykuberbac-brief.md
Normal file
@@ -0,0 +1,11 @@
|
||||
## Exercise — Enable RBAC
|
||||
|
||||
- Enable RBAC on a manually-deployed control plane
|
||||
|
||||
- This involves:
|
||||
|
||||
- generating different certificates
|
||||
|
||||
- distributing the certificates to the controllers
|
||||
|
||||
- enabling the proper authorizers in API server
|
||||
117
slides/exercises/polykuberbac-details.md
Normal file
117
slides/exercises/polykuberbac-details.md
Normal file
@@ -0,0 +1,117 @@
|
||||
# Exercise — Enable RBAC
|
||||
|
||||
- We want to enable RBAC on the "polykube" cluster
|
||||
|
||||
(it doesn't matter whether we have 1 or multiple nodes)
|
||||
|
||||
- Ideally, we want to have, for instance:
|
||||
|
||||
- one key, certificate, and kubeconfig for a cluster admin
|
||||
|
||||
- one key, certificate, and kubeconfig for a user
|
||||
<br/>
|
||||
(with permissions in a single namespace)
|
||||
|
||||
- Bonus points: enable the NodeAuthorizer too!
|
||||
|
||||
- Check the following slides for hints
|
||||
|
||||
---
|
||||
|
||||
## Step 1
|
||||
|
||||
- Enable RBAC itself!
|
||||
|
||||
--
|
||||
|
||||
- This is done with an API server command-line flag
|
||||
|
||||
--
|
||||
|
||||
- Check [the documentation][kube-apiserver-doc] to see the flag
|
||||
|
||||
--
|
||||
|
||||
- For now, only enable `--authorization-mode=RBAC`
|
||||
|
||||
[kube-apiserver-doc]: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
|
||||
|
||||
---
|
||||
|
||||
## Step 2
|
||||
|
||||
- Our certificate doesn't work anymore, we need to generate a new one
|
||||
|
||||
--
|
||||
|
||||
- We need a certificate that will have *some* (ideally *all*) permissions
|
||||
|
||||
--
|
||||
|
||||
- Two options:
|
||||
|
||||
- use the equivalent of "root" (identity that completely skips permission checks)
|
||||
|
||||
- a "non-root" identity but which is granted permissions with RBAC
|
||||
|
||||
--
|
||||
|
||||
- The "non-root" option looks nice, but to grant permissions, we need permissions
|
||||
|
||||
- So let's start with the equivalent of "root"!
|
||||
|
||||
--
|
||||
|
||||
- The Kubernetes equivalent of `root` is the group `system:masters`
|
||||
|
||||
---
|
||||
|
||||
## Step 2, continued
|
||||
|
||||
- We need to generate a certificate for a user belonging to group `system:masters`
|
||||
|
||||
--
|
||||
|
||||
- In Kubernetes certificates, groups are encoded with the "organization" field
|
||||
|
||||
--
|
||||
|
||||
- That corresponds to `O=system:masters`
|
||||
|
||||
--
|
||||
|
||||
- In other words we need to generate a new certificate, but with a subject of:
|
||||
|
||||
`/CN=admin/O=system:masters/` (the `CN` doesn't matter)
|
||||
|
||||
- That certificate should be able to interact with the API server, like before
|
||||
|
||||
---
|
||||
|
||||
## Step 3
|
||||
|
||||
- Now, all our controllers have permissions issues
|
||||
|
||||
- We need to either:
|
||||
|
||||
- use that `system:masters` cert everywhere
|
||||
|
||||
- generate different certs for every controller, with the proper identities
|
||||
|
||||
- Suggestion: use `system-masters` everywhere to begin with
|
||||
|
||||
(and make sure the cluster is back on its feet)
|
||||
|
||||
---
|
||||
|
||||
## Step 4
|
||||
|
||||
At this point, there are two possible forks in the road:
|
||||
|
||||
1. Generate certs for the control plane controllers
|
||||
|
||||
(`kube-controller-manager`, `kube-scheduler`)
|
||||
|
||||
2. Generate cert(s) for the node(s) and enable `NodeAuthorizer`
|
||||
|
||||
Good luck!
|
||||
7
slides/exercises/reqlim-brief.md
Normal file
7
slides/exercises/reqlim-brief.md
Normal file
@@ -0,0 +1,7 @@
|
||||
## Exercise — Requests and Limits
|
||||
|
||||
- Check current resource allocation and utilization
|
||||
|
||||
- Make sure that all workloads have requests (and perhaps limits)
|
||||
|
||||
- Make sure that all *future* workloads too!
|
||||
55
slides/exercises/reqlim-details.md
Normal file
55
slides/exercises/reqlim-details.md
Normal file
@@ -0,0 +1,55 @@
|
||||
# Exercise — Requests and Limits
|
||||
|
||||
By default, if we don't specify *resource requests*,
|
||||
our workloads will run in `BestEffort` quality of service.
|
||||
|
||||
`BestEffort` is very bad for production workloads,
|
||||
because the scheduler has no idea of the actual resource
|
||||
requirements of our apps, and won't be able to make
|
||||
smart decisions about workload placement.
|
||||
|
||||
As a result, when the cluster gets overloaded,
|
||||
containers will be killed, pods will be evicted,
|
||||
and service disruptions will happen.
|
||||
|
||||
Let's solve this!
|
||||
|
||||
---
|
||||
|
||||
## Check current state
|
||||
|
||||
- Check *allocations*
|
||||
|
||||
(i.e. which pods have requests and limits for CPU and memory)
|
||||
|
||||
- Then check *utilization*
|
||||
|
||||
(i.e. actual resource usage)
|
||||
|
||||
- Possible tools: `kubectl`, plugins like `view-allocations`, Prometheus...
|
||||
|
||||
---
|
||||
|
||||
## Follow best practices
|
||||
|
||||
- We want to make sure that *all* workloads have requests
|
||||
|
||||
(and perhaps limits, too!)
|
||||
|
||||
- Depending on the workload:
|
||||
|
||||
- edit its YAML manifest
|
||||
|
||||
- adjust its Helm values
|
||||
|
||||
- add LimitRange in its Namespace
|
||||
|
||||
- Then check again to confirm that the job has been done properly!
|
||||
|
||||
---
|
||||
|
||||
## Be future-proof!
|
||||
|
||||
- We want to make sure that *future* workloads will have requests, too
|
||||
|
||||
- How can that be implemented?
|
||||
@@ -10,108 +10,120 @@
|
||||
</head>
|
||||
<body>
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 9 mai 2023</td>
|
||||
<td>Mardi 24 septembre 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 10 mai 2023</td>
|
||||
<td>Mercredi 25 septembre 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 11 mai 2023</td>
|
||||
<td>Jeudi 26 septembre 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 12 mai 2023</td>
|
||||
<td>Vendredi 27 septembre 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Lundi 15 mai 2023</td>
|
||||
<td>Mardi 1er octobre 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 16 mai 2023</td>
|
||||
<td>Mercredi 2 octobre 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 17 mai 2023</td>
|
||||
<td>Jeudi 3 octobre 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lundi 22 mai 2023</td>
|
||||
<td>Vendredi 4 octobre 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 23 mai 2023</td>
|
||||
<td>Lundi 7 octobre 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 24 mai 2023</td>
|
||||
<td>Mardi 8 octobre 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 25 mai 2023</td>
|
||||
<td>Mercredi 9 octobre 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 26 mai 2023</td>
|
||||
<td>Jeudi 10 octobre 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 30 mai 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 31 mai 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 1er juin 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 6 juin 2023</td>
|
||||
<td>Vendredi 11 octobre 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 7 juin 2023</td>
|
||||
<td>Lundi 14 octobre 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 15 octobre 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mercredi 16 octobre 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 17 octobre 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 18 octobre 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
BIN
slides/images/argocd_architecture.png
Normal file
BIN
slides/images/argocd_architecture.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 103 KiB |
BIN
slides/images/argocdlogo.png
Normal file
BIN
slides/images/argocdlogo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 22 KiB |
@@ -981,10 +981,6 @@
|
||||
# event: LISA
|
||||
# title: Deploying and Scaling Applications with Docker Swarm
|
||||
|
||||
#2015-09-24-strangeloop
|
||||
|
||||
|
||||
|
||||
- title: Introduction to Docker and Containers
|
||||
slides: intro-selfpaced.yml.html
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
https://gallant-turing-d0d520.netlify.com/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/ShippingContainerSFBay.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/aerial-view-of-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/blue-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/chinook-helicopter-container.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/container-cranes.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/container-housing.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/containers-by-the-water.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/distillery-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/lots-of-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/plastic-containers.JPG
|
||||
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-1.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-2.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/two-containers-on-a-truck.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/wall-of-containers.jpeg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/catene-de-conteneurs.jpg
|
||||
https://prettypictures.container.training/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
|
||||
https://prettypictures.container.training/containers/ShippingContainerSFBay.jpg
|
||||
https://prettypictures.container.training/containers/aerial-view-of-containers.jpg
|
||||
https://prettypictures.container.training/containers/blue-containers.jpg
|
||||
https://prettypictures.container.training/containers/chinook-helicopter-container.jpg
|
||||
https://prettypictures.container.training/containers/container-cranes.jpg
|
||||
https://prettypictures.container.training/containers/container-housing.jpg
|
||||
https://prettypictures.container.training/containers/containers-by-the-water.jpg
|
||||
https://prettypictures.container.training/containers/distillery-containers.jpg
|
||||
https://prettypictures.container.training/containers/lots-of-containers.jpg
|
||||
https://prettypictures.container.training/containers/plastic-containers.JPG
|
||||
https://prettypictures.container.training/containers/train-of-containers-1.jpg
|
||||
https://prettypictures.container.training/containers/train-of-containers-2.jpg
|
||||
https://prettypictures.container.training/containers/two-containers-on-a-truck.jpg
|
||||
https://prettypictures.container.training/containers/wall-of-containers.jpeg
|
||||
https://prettypictures.container.training/containers/catene-de-conteneurs.jpg
|
||||
|
||||
@@ -20,19 +20,21 @@
|
||||
|
||||
## Use cases
|
||||
|
||||
Some examples ...
|
||||
- Defaulting
|
||||
|
||||
- Stand-alone admission controllers
|
||||
*injecting image pull secrets, sidecars, environment variables...*
|
||||
|
||||
*validating:* policy enforcement (e.g. quotas, naming conventions ...)
|
||||
- Policy enforcement and best practices
|
||||
|
||||
*mutating:* inject or provide default values (e.g. pod presets)
|
||||
*prevent: `latest` images, deprecated APIs...*
|
||||
|
||||
- Admission controllers part of a greater system
|
||||
*require: PDBs, resource requests/limits, labels/annotations, local registry...*
|
||||
|
||||
*validating:* advanced typing for operators
|
||||
- Problem mitigation
|
||||
|
||||
*mutating:* inject sidecars for service meshes
|
||||
*block nodes with vulnerable kernels, inject log4j mitigations...*
|
||||
|
||||
- Extended validation for operators
|
||||
|
||||
---
|
||||
|
||||
@@ -198,6 +200,64 @@ Some examples ...
|
||||
|
||||
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
|
||||
|
||||
- We will need an ngrok account for the tunnels
|
||||
|
||||
(a free account is fine)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Ngrok in production
|
||||
|
||||
- Ngrok was initially known for its local webhook development features
|
||||
|
||||
- It now supports production scenarios as well
|
||||
|
||||
(load balancing, WAF, authentication, circuit-breaking...)
|
||||
|
||||
- Including some that are very relevant to Kubernetes
|
||||
|
||||
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
|
||||
|
||||
---
|
||||
|
||||
## Ngrok tokens
|
||||
|
||||
- If you're attending a live training, you might have an ngrok token
|
||||
|
||||
- Look in `~/ngrok.env` and if that file exists, copy it to the stack:
|
||||
|
||||
.lab[
|
||||
|
||||
```bash
|
||||
cp ~/ngrok.env ~/container.training/webhooks/admission/.env
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Starting the whole stack
|
||||
|
||||
.lab[
|
||||
|
||||
- Go to the webhook directory:
|
||||
@@ -216,28 +276,6 @@ Some examples ...
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
- Probably not for production, though
|
||||
|
||||
(webhook requests and responses now pass through the ngrok platform)
|
||||
|
||||
---
|
||||
|
||||
## Update the webhook configuration
|
||||
|
||||
- We have a webhook configuration in `k8s/webhook-configuration.yaml`
|
||||
@@ -543,6 +581,23 @@ Shell to the rescue!
|
||||
|
||||
(it should only allow values of `red`, `green`, `blue`)
|
||||
|
||||
---
|
||||
|
||||
## Coming soon...
|
||||
|
||||
- Kubernetes Validating Admission Policies
|
||||
|
||||
- Integrated with the Kubernetes API server
|
||||
|
||||
- Lets us define policies using [CEL (Common Expression Language)][cel-spec]
|
||||
|
||||
- Available in beta in Kubernetes 1.28 <!-- ##VERSION## -->
|
||||
|
||||
- Check this [CNCF Blog Post][cncf-blog-vap] for more details
|
||||
|
||||
[cncf-blog-vap]: https://www.cncf.io/blog/2023/09/14/policy-management-in-kubernetes-is-changing/
|
||||
[cel-spec]: https://github.com/google/cel-spec
|
||||
|
||||
???
|
||||
|
||||
:EN:- Dynamic admission control with webhooks
|
||||
|
||||
@@ -141,12 +141,6 @@ class: pic
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
@@ -157,6 +151,12 @@ class: pic
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
# The Kubernetes API
|
||||
|
||||
[
|
||||
|
||||
601
slides/k8s/argocd.md
Normal file
601
slides/k8s/argocd.md
Normal file
@@ -0,0 +1,601 @@
|
||||
# ArgoCD
|
||||
|
||||
- We're going to implement a basic GitOps workflow with ArgoCD
|
||||
|
||||
- Pushing to the default branch will automatically deploy to our clusters
|
||||
|
||||
- There will be two clusters (`dev` and `prod`)
|
||||
|
||||
- The two clusters will have similar (but slightly different) workloads
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## ArgoCD concepts
|
||||
|
||||
ArgoCD manages **applications** by **syncing** their **live state** with their **target state**.
|
||||
|
||||
- **Application**: a group of Kubernetes resources managed by ArgoCD.
|
||||
<br/>
|
||||
Also a custom resource (`kind: Application`) managing that group of resources.
|
||||
|
||||
- **Application source type**: the **Tool** used to build the application (Kustomize, Helm...)
|
||||
|
||||
- **Target state**: the desired state of an **application**, as represented by the git repository.
|
||||
|
||||
- **Live state**: the current state of the application on the cluster.
|
||||
|
||||
- **Sync status**: whether or not the live state matches the target state.
|
||||
|
||||
- **Sync**: the process of making an application move to its target state.
|
||||
<br/>
|
||||
(e.g. by applying changes to a Kubernetes cluster)
|
||||
|
||||
(Check [ArgoCD core concepts](https://argo-cd.readthedocs.io/en/stable/core_concepts/) for more definitions!)
|
||||
|
||||
---
|
||||
|
||||
## Getting ready
|
||||
|
||||
- Let's make sure we have two clusters
|
||||
|
||||
- It's OK to use local clusters (kind, minikube...)
|
||||
|
||||
- We need to install the ArgoCD CLI ([packages], [binaries])
|
||||
|
||||
- **Highly recommended:** set up CLI completion!
|
||||
|
||||
- Of course we'll need a Git service, too
|
||||
|
||||
[packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
|
||||
[binaries]: https://github.com/argoproj/argo-cd/releases/latest
|
||||
|
||||
---
|
||||
|
||||
## Setting up ArgoCD
|
||||
|
||||
- The easiest way is to use upstream YAML manifests
|
||||
|
||||
- There is also a [Helm chart][argohelmchart] if we need more customization
|
||||
|
||||
.lab[
|
||||
|
||||
- Create a namespace for ArgoCD and install it there:
|
||||
```bash
|
||||
kubectl create namespace argocd
|
||||
kubectl apply --namespace argocd -f \
|
||||
https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
[argohelmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
|
||||
|
||||
---
|
||||
|
||||
## Logging in with the ArgoCD CLI
|
||||
|
||||
- The CLI can talk to the ArgoCD API server or to the Kubernetes API server
|
||||
|
||||
- For simplicity, we're going to authenticate and communicate with the Kubernetes API
|
||||
|
||||
.lab[
|
||||
|
||||
- Authenticate with the ArgoCD API (that's what the `--core` flag does):
|
||||
```bash
|
||||
argocd login --core
|
||||
```
|
||||
|
||||
- Check that everything is fine:
|
||||
```bash
|
||||
argocd version
|
||||
```
|
||||
]
|
||||
|
||||
--
|
||||
|
||||
🤔 `FATA[0000] error retrieving argocd-cm: configmap "argocd-cm" not found`
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD CLI shortcomings
|
||||
|
||||
- When using "core" authentication, the ArgoCD CLI uses our current Kubernetes context
|
||||
|
||||
(as defined in our kubeconfig file)
|
||||
|
||||
- That context need to point to the correct namespace
|
||||
|
||||
(the namespace where we installed ArgoCD)
|
||||
|
||||
- In fact, `argocd login --core` doesn't communicate at all with ArgoCD!
|
||||
|
||||
(it only updates a local ArgoCD configuration file)
|
||||
|
||||
---
|
||||
|
||||
## Trying again in the right namespace
|
||||
|
||||
- We will need to run all `argocd` commands in the `argocd` namespace
|
||||
|
||||
(this limitation only applies to "core" authentication; see [issue 14167][issue14167])
|
||||
|
||||
.lab[
|
||||
|
||||
- Switch to the `argocd` namespace:
|
||||
```bash
|
||||
kubectl config set-context --current --namespace argocd
|
||||
```
|
||||
|
||||
- Check that we can communicate with the ArgoCD API now:
|
||||
```bash
|
||||
argocd version
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- Let's have a look at ArgoCD architecture!
|
||||
|
||||
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## ArgoCD API Server
|
||||
|
||||
The API server is a gRPC/REST server which exposes the API consumed by the Web UI, CLI, and CI/CD systems. It has the following responsibilities:
|
||||
|
||||
- application management and status reporting
|
||||
|
||||
- invoking of application operations (e.g. sync, rollback, user-defined actions)
|
||||
|
||||
- repository and cluster credential management (stored as K8s secrets)
|
||||
|
||||
- authentication and auth delegation to external identity providers
|
||||
|
||||
- RBAC enforcement
|
||||
|
||||
- listener/forwarder for Git webhook events
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD Repository Server
|
||||
|
||||
The repository server is an internal service which maintains a local cache of the Git repositories holding the application manifests. It is responsible for generating and returning the Kubernetes manifests when provided the following inputs:
|
||||
|
||||
- repository URL
|
||||
|
||||
- revision (commit, tag, branch)
|
||||
|
||||
- application path
|
||||
|
||||
- template specific settings: parameters, helm values...
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD Application Controller
|
||||
|
||||
The application controller is a Kubernetes controller which continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo).
|
||||
|
||||
It detects *OutOfSync* application state and optionally takes corrective action.
|
||||
|
||||
It is responsible for invoking any user-defined hooks for lifecycle events (*PreSync, Sync, PostSync*).
|
||||
|
||||
---
|
||||
|
||||
## Preparing a repository for ArgoCD
|
||||
|
||||
- We need a repository with Kubernetes YAML manifests
|
||||
|
||||
- You can fork [kubercoins] or create a new, empty repository
|
||||
|
||||
- If you create a new, empty repository, add some manifests to it
|
||||
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
|
||||
---
|
||||
|
||||
## Add an Application
|
||||
|
||||
- An Application can be added to ArgoCD via the web UI or the CLI
|
||||
|
||||
(either way, this will create a custom resource of `kind: Application`)
|
||||
|
||||
- The Application should then automatically be deployed to our cluster
|
||||
|
||||
(the application manifests will be "applied" to the cluster)
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's use the CLI to add an Application:
|
||||
```bash
|
||||
argocd app create kubercoins \
|
||||
--repo https://github.com/`<your_user>/<your_repo>`.git \
|
||||
--path . --revision `<branch>` \
|
||||
--dest-server https://kubernetes.default.svc \
|
||||
--dest-namespace kubercoins-prod
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Checking progress
|
||||
|
||||
- We can see sync status in the web UI or with the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's check app status with the CLI:
|
||||
```bash
|
||||
argocd app list
|
||||
```
|
||||
|
||||
- We can also check directly with the Kubernetes CLI:
|
||||
```bash
|
||||
kubectl get applications
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- The app is there and it is `OutOfSync`!
|
||||
|
||||
---
|
||||
|
||||
## Manual sync with the CLI
|
||||
|
||||
- By default the "sync policy" is `manual`
|
||||
|
||||
- It can also be set to `auto`, which would check the git repository every 3 minutes
|
||||
|
||||
(this interval can be [configured globally][pollinginterval])
|
||||
|
||||
- Manual sync can be triggered with the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's force an immediate sync of our app:
|
||||
```bash
|
||||
argocd app sync kubercoins
|
||||
```
|
||||
]
|
||||
|
||||
🤔 We're getting errors!
|
||||
|
||||
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
|
||||
|
||||
---
|
||||
|
||||
## Sync failed
|
||||
|
||||
We should receive a failure:
|
||||
|
||||
`FATA[0000] Operation has completed with phase: Failed`
|
||||
|
||||
And in the output, we see more details:
|
||||
|
||||
`Message: one or more objects failed to apply,`
|
||||
<br/>
|
||||
`reason: namespaces "kubercoins-prod" not found`
|
||||
|
||||
---
|
||||
|
||||
## Creating the namespace
|
||||
|
||||
- There are multiple ways to achieve that
|
||||
|
||||
- We could generate a YAML manifest for the namespace and add it to the git repository
|
||||
|
||||
- Or we could use "Sync Options" so that ArgoCD creates it automatically!
|
||||
|
||||
- ArgoCD provides many "Sync Options" to handle various edge cases
|
||||
|
||||
- Some [others](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/) are: `FailOnSharedResource`, `PruneLast`, `PrunePropagationPolicy`...
|
||||
|
||||
---
|
||||
|
||||
## Editing the app's sync options
|
||||
|
||||
- This can be done through the web UI or the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's use the CLI once again:
|
||||
```bash
|
||||
argocd app edit kubercoins
|
||||
```
|
||||
|
||||
- Add the following to the YAML manifest, at the root level:
|
||||
```yaml
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Sync again
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's retry the sync operation:
|
||||
```bash
|
||||
argocd app sync kubercoins
|
||||
```
|
||||
|
||||
- And check the application status:
|
||||
```bash
|
||||
argocd app list
|
||||
kubectl get applications
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- It should show `Synced` and `Progressing`
|
||||
|
||||
- After a while (when all pods are running correctly) it should be `Healthy`
|
||||
|
||||
---
|
||||
|
||||
## Managing Applications via the Web UI
|
||||
|
||||
- ArgoCD is popular in large part due to its browser-based UI
|
||||
|
||||
- Let's see how to manage Applications in the web UI
|
||||
|
||||
.lab[
|
||||
|
||||
- Expose the web dashboard on a local port:
|
||||
```bash
|
||||
argocd admin dashboard
|
||||
```
|
||||
|
||||
- This command will show the dashboard URL; open it in a browser
|
||||
|
||||
- Authentication should be automatic
|
||||
|
||||
]
|
||||
|
||||
Note: `argocd admin dashboard` is similar to `kubectl port-forward` or `kubectl-proxy`.
|
||||
|
||||
(The dashboard remains available as long as `argocd admin dashboard` is running.)
|
||||
|
||||
---
|
||||
|
||||
## Adding a staging Application
|
||||
|
||||
- Let's add another Application for a staging environment
|
||||
|
||||
- First, create a new branch (e.g. `staging`) in our kubercoins fork
|
||||
|
||||
- Then, in the ArgoCD web UI, click on the "+ NEW APP" button
|
||||
|
||||
(on a narrow display, it might just be "+", right next to buttons looking like 🔄 and ↩️)
|
||||
|
||||
- See next slides for details about that form!
|
||||
|
||||
---
|
||||
|
||||
## Defining the Application
|
||||
|
||||
| Field | Value |
|
||||
|------------------|--------------------------------------------|
|
||||
| Application Name | `kubercoins-stg` |
|
||||
| Project Name | `default` |
|
||||
| Sync policy | `Manual` |
|
||||
| Sync options | check `auto-create namespace` |
|
||||
| Repository URL | `https://github.com/<username>/<reponame>` |
|
||||
| Revision | `<branchname>` |
|
||||
| Path | `.` |
|
||||
| Cluster URL | `https://kubernetes.default.svc` |
|
||||
| Namespace | `kubercoins-stg` |
|
||||
|
||||
Then click on the "CREATE" button (top left).
|
||||
|
||||
---
|
||||
|
||||
## Synchronizing the Application
|
||||
|
||||
- After creating the app, it should now show up in the app tiles
|
||||
|
||||
(with a yellow outline to indicate that it's out of sync)
|
||||
|
||||
- Click on the "SYNC" button on the app tile to show the sync panel
|
||||
|
||||
- In the sync panel, click on "SYNCHRONIZE"
|
||||
|
||||
- The app will start to synchronize, and should become healthy after a little while
|
||||
|
||||
---
|
||||
|
||||
## Making changes
|
||||
|
||||
- Let's make changes to our application manifests and see what happens
|
||||
|
||||
.lab[
|
||||
|
||||
- Make a change to a manifest
|
||||
|
||||
(for instance, change the number of replicas of a Deployment)
|
||||
|
||||
- Commit that change and push it to the staging branch
|
||||
|
||||
- Check the application sync status:
|
||||
```bash
|
||||
argocd app list
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- After a short period of time (a few minutes max) the app should show up "out of sync"
|
||||
|
||||
---
|
||||
|
||||
## Automated synchronization
|
||||
|
||||
- We don't want to manually sync after every change
|
||||
|
||||
(that wouldn't be true continuous deployment!)
|
||||
|
||||
- We're going to enable "auto sync"
|
||||
|
||||
- Note that this requires much more rigorous testing and observability!
|
||||
|
||||
(we need to be sure that our changes won't crash our app or even our cluster)
|
||||
|
||||
- Argo project also provides [Argo Rollouts][rollouts]
|
||||
|
||||
(a controller and CRDs to provide blue-green, canary deployments...)
|
||||
|
||||
- Today we'll just turn on automated sync for the staging namespace
|
||||
|
||||
[rollouts]: https://argoproj.github.io/rollouts/
|
||||
|
||||
---
|
||||
|
||||
## Enabling auto-sync
|
||||
|
||||
- In the web UI, go to *Applications* and click on *kubercoins-stg*
|
||||
|
||||
- Click on the "DETAILS" button (top left, might be just a "i" sign on narrow displays)
|
||||
|
||||
- Click on "ENABLE AUTO-SYNC" (under "SYNC POLICY")
|
||||
|
||||
- After a few minutes the changes should show up!
|
||||
|
||||
---
|
||||
|
||||
## Rolling back
|
||||
|
||||
- If we deploy a broken version, how do we recover?
|
||||
|
||||
- "The GitOps way": revert the changes in source control
|
||||
|
||||
(see next slide)
|
||||
|
||||
- Emergency rollback:
|
||||
|
||||
- disable auto-sync (if it was enabled)
|
||||
|
||||
- on the app page, click on "HISTORY AND ROLLBACK"
|
||||
<br/>
|
||||
(with the clock-with-backward-arrow icon)
|
||||
|
||||
- click on the "..." button next to the button we want to roll back to
|
||||
|
||||
- click "Rollback" and confirm
|
||||
|
||||
---
|
||||
|
||||
## Rolling back with GitOps
|
||||
|
||||
- The correct way to roll back is rolling back the code in source control
|
||||
|
||||
```bash
|
||||
git checkout staging
|
||||
git revert HEAD
|
||||
git push origin staging
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Working with Helm
|
||||
|
||||
- ArgoCD supports different tools to process Kubernetes manifests:
|
||||
|
||||
Kustomize, Helm, Jsonnet, and [Config Management Plugins][cmp]
|
||||
|
||||
- Let's how to deploy Helm charts with ArgoCD!
|
||||
|
||||
- In the [kubercoins] repository, there is a branch called [helm]
|
||||
|
||||
- It provides a generic Helm chart, in the [generic-service] directory
|
||||
|
||||
- There are service-specific values YAML files in the [values] directory
|
||||
|
||||
- Let's create one application for each of the 5 components of our app!
|
||||
|
||||
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
[helm]: https://github.com/jpetazzo/kubercoins/tree/helm
|
||||
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
|
||||
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
|
||||
|
||||
---
|
||||
|
||||
## Creating a Helm Application
|
||||
|
||||
- The example below uses "upstream" kubercoins
|
||||
|
||||
- Feel free to use your own fork instead!
|
||||
|
||||
.lab[
|
||||
|
||||
- Create an Application for `hasher`:
|
||||
```bash
|
||||
argocd app create hasher \
|
||||
--repo https://github.com/jpetazzo/kubercoins.git \
|
||||
--path generic-service --revision helm \
|
||||
--dest-server https://kubernetes.default.svc \
|
||||
--dest-namespace kubercoins-helm \
|
||||
--sync-option CreateNamespace=true \
|
||||
--values ../values/hasher.yaml \
|
||||
--sync-policy=auto
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Deploying the rest of the application
|
||||
|
||||
- Option 1: repeat the previous command (updating app name and values)
|
||||
|
||||
- Option 2: author YAML manifests and apply them
|
||||
|
||||
---
|
||||
|
||||
## Additional considerations
|
||||
|
||||
- When running in production, ArgoCD can be integrated with an [SSO provider][sso]
|
||||
|
||||
- ArgoCD embeds and bundles [Dex] to delegate authentication
|
||||
|
||||
- it can also use an existing OIDC provider (Okta, Keycloak...)
|
||||
|
||||
- A single ArgoCD instance can manage multiple clusters
|
||||
|
||||
(but it's also fine to have one ArgoCD per cluster)
|
||||
|
||||
- ArgoCD can be complemented with [Argo Rollouts][rollouts] for advanced rollout control
|
||||
|
||||
(blue/green, canary...)
|
||||
|
||||
[sso]: https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/#sso
|
||||
[Dex]: https://github.com/dexidp/dex
|
||||
[rollouts]: https://argoproj.github.io/argo-rollouts/
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
Many thanks to
|
||||
Anton (Ant) Weiss ([antweiss.com](https://antweiss.com), [@antweiss](https://twitter.com/antweiss))
|
||||
and
|
||||
Guilhem Lettron
|
||||
for contributing an initial version and suggestions to this ArgoCD chapter.
|
||||
|
||||
All remaining typos, mistakes, or approximations are mine (Jérôme Petazzoni).
|
||||
|
||||
???
|
||||
|
||||
:EN:- Implementing gitops with ArgoCD
|
||||
:FR:- Workflow gitops avec ArgoCD
|
||||
@@ -856,7 +856,7 @@ class: extra-details
|
||||
- To learn more about Kubernetes attacks and threat models around RBAC:
|
||||
|
||||
📽️ [Hacking into Kubernetes Security for Beginners](https://www.youtube.com/watch?v=mLsCm9GVIQg)
|
||||
by [Ellen Körbes](https://twitter.com/ellenkorbes)
|
||||
by [V Körbes](https://twitter.com/veekorbes)
|
||||
and [Tabitha Sable](https://twitter.com/TabbySable)
|
||||
|
||||
---
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
|
||||
## What version are we running anyway?
|
||||
|
||||
- When I say, "I'm running Kubernetes 1.22", is that the version of:
|
||||
- When I say, "I'm running Kubernetes 1.28", is that the version of:
|
||||
|
||||
- kubectl
|
||||
|
||||
@@ -111,6 +111,73 @@
|
||||
|
||||
---
|
||||
|
||||
## Important questions
|
||||
|
||||
- Should we upgrade the control plane before or after the kubelets?
|
||||
|
||||
- Within the control plane, should we upgrade the API server first or last?
|
||||
|
||||
- How often should we upgrade?
|
||||
|
||||
- How long are versions maintained?
|
||||
|
||||
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
|
||||
|
||||
- Let's review the key elements together ...
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.28.9:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 28
|
||||
- PATCH = 9
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.28.9 and 1.28.13 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
(but it's mandatory only when there is a security advisory)
|
||||
|
||||
---
|
||||
|
||||
## Version skew
|
||||
|
||||
- API server must be more recent than its clients (kubelet and control plane)
|
||||
|
||||
- ... Which means it must always be upgraded first
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.28 and 1.29)
|
||||
|
||||
- It also means that going from 1.28 to 1.30 requires going through 1.29
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
---
|
||||
|
||||
## Release cycle
|
||||
|
||||
- There is a new PATCH relese whenever necessary
|
||||
|
||||
(every few weeks, or "ASAP" when there is a security vulnerability)
|
||||
|
||||
- There is a new MINOR release every 3 months (approximately)
|
||||
|
||||
- At any given time, three MINOR releases are maintained
|
||||
|
||||
- ... Which means that MINOR releases are maintained approximately 9 months
|
||||
|
||||
- We should expect to upgrade at least every 3 months (on average)
|
||||
|
||||
---
|
||||
|
||||
## General guidelines
|
||||
|
||||
- To update a component, use whatever was used to install it
|
||||
@@ -139,73 +206,6 @@
|
||||
|
||||
---
|
||||
|
||||
## Important questions
|
||||
|
||||
- Should we upgrade the control plane before or after the kubelets?
|
||||
|
||||
- Within the control plane, should we upgrade the API server first or last?
|
||||
|
||||
- How often should we upgrade?
|
||||
|
||||
- How long are versions maintained?
|
||||
|
||||
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
|
||||
|
||||
- Let's review the key elements together ...
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.22.17:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 22
|
||||
- PATCH = 17
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.22.17 and 1.22.5 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
(but it's mandatory only when there is a security advisory)
|
||||
|
||||
---
|
||||
|
||||
## Version skew
|
||||
|
||||
- API server must be more recent than its clients (kubelet and control plane)
|
||||
|
||||
- ... Which means it must always be upgraded first
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.22 and 1.23)
|
||||
|
||||
- It also means that going from 1.22 to 1.24 requires going through 1.23
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
---
|
||||
|
||||
## Release cycle
|
||||
|
||||
- There is a new PATCH relese whenever necessary
|
||||
|
||||
(every few weeks, or "ASAP" when there is a security vulnerability)
|
||||
|
||||
- There is a new MINOR release every 3 months (approximately)
|
||||
|
||||
- At any given time, three MINOR releases are maintained
|
||||
|
||||
- ... Which means that MINOR releases are maintained approximately 9 months
|
||||
|
||||
- We should expect to upgrade at least every 3 months (on average)
|
||||
|
||||
---
|
||||
|
||||
## In practice
|
||||
|
||||
- We are going to update a few cluster components
|
||||
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
|
||||
```
|
||||
|
||||
- Look for the `image:` line, and update it to e.g. `v1.24.1`
|
||||
- Look for the `image:` line, and update it to e.g. `v1.30.1`
|
||||
|
||||
]
|
||||
|
||||
@@ -320,53 +320,29 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- First things first: we need to upgrade kubeadm
|
||||
|
||||
.lab[
|
||||
- The Kubernetes package repositories are now split by minor versions
|
||||
|
||||
- Upgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.27.0-00
|
||||
```
|
||||
(i.e. there is one repository for 1.28, another for 1.29, etc.)
|
||||
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
- This avoids accidentally upgrading from one minor version to another
|
||||
|
||||
]
|
||||
(e.g. with unattended upgrades or if packages haven't been held/pinned)
|
||||
|
||||
Problem: kubeadm doesn't know know how to handle
|
||||
upgrades from version 1.22.
|
||||
|
||||
This is because we installed version 1.27.
|
||||
|
||||
We need to install kubeadm version 1.23.X.
|
||||
- We'll need to add the new package repository and unpin packages!
|
||||
|
||||
---
|
||||
|
||||
## Downgrading kubeadm
|
||||
## Installing the new packages
|
||||
|
||||
- We need to go back to kubeadm version 1.23.X.
|
||||
- Edit `/etc/apt/sources.list.d/kubernetes.list`
|
||||
|
||||
.lab[
|
||||
(or copy it to e.g. `kubernetes-1.29.list` and edit that)
|
||||
|
||||
- View available versions for package `kubeadm`:
|
||||
```bash
|
||||
apt show kubeadm -a | grep ^Version | grep 1.23
|
||||
```
|
||||
- `apt-get update`
|
||||
|
||||
- Downgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.23.0-00
|
||||
```
|
||||
- Now edit (or remove) `/etc/apt/preferences.d/kubernetes`
|
||||
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
kubeadm should now agree to upgrade to 1.23.X.
|
||||
- `apt-get install kubeadm` should now upgrade `kubeadm` correctly! 🎉
|
||||
|
||||
---
|
||||
|
||||
@@ -385,7 +361,7 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
- Look for the `image:` line, and restore it to the original value
|
||||
|
||||
(e.g. `v1.22.17`)
|
||||
(e.g. `v1.28.9`)
|
||||
|
||||
- Wait for the control plane to come back up
|
||||
|
||||
@@ -399,9 +375,14 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the upgrade plan:
|
||||
```bash
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
- Perform the upgrade:
|
||||
```bash
|
||||
sudo kubeadm upgrade apply v1.23.0
|
||||
sudo kubeadm upgrade apply v1.29.0
|
||||
```
|
||||
|
||||
]
|
||||
@@ -418,15 +399,9 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
- Log into node `oldversion2`
|
||||
|
||||
- View available versions for package `kubelet`:
|
||||
```bash
|
||||
apt show kubelet -a | grep ^Version
|
||||
```
|
||||
- Update package lists and APT pins like we did before
|
||||
|
||||
- Upgrade kubelet:
|
||||
```bash
|
||||
sudo apt install kubelet=1.23.0-00
|
||||
```
|
||||
- Then upgrade kubelet
|
||||
|
||||
]
|
||||
|
||||
@@ -479,13 +454,16 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
.lab[
|
||||
|
||||
- Download the configuration on each node, and upgrade kubelet:
|
||||
- Execute the whole upgrade procedure on each node:
|
||||
```bash
|
||||
for N in 1 2 3; do
|
||||
ssh oldversion$N "
|
||||
sudo apt install kubeadm=1.23.0-00 &&
|
||||
sudo sed -i s/1.28/1.29/ /etc/apt/sources.list.d/kubernetes.list &&
|
||||
sudo rm /etc/apt/preferences.d/kubernetes &&
|
||||
sudo apt update &&
|
||||
sudo apt install kubeadm -y &&
|
||||
sudo kubeadm upgrade node &&
|
||||
sudo apt install kubelet=1.23.0-00"
|
||||
sudo apt install kubelet -y"
|
||||
done
|
||||
```
|
||||
]
|
||||
@@ -494,7 +472,7 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
## Checking what we've done
|
||||
|
||||
- All our nodes should now be updated to version 1.23.0
|
||||
- All our nodes should now be updated to version 1.29
|
||||
|
||||
.lab[
|
||||
|
||||
@@ -507,17 +485,115 @@ kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
---
|
||||
|
||||
## And now, was that a good idea?
|
||||
|
||||
--
|
||||
|
||||
**Almost!**
|
||||
|
||||
--
|
||||
|
||||
- The official recommendation is to *drain* a node before performing node maintenance
|
||||
|
||||
(migrate all workloads off the node before upgrading it)
|
||||
|
||||
- How do we do that?
|
||||
|
||||
- Is it really necessary?
|
||||
|
||||
- Let's see!
|
||||
|
||||
---
|
||||
|
||||
## Draining a node
|
||||
|
||||
- This can be achieved with the `kubectl drain` command, which will:
|
||||
|
||||
- *cordon* the node (prevent new pods from being scheduled there)
|
||||
|
||||
- *evict* all the pods running on the node (delete them gracefully)
|
||||
|
||||
- the evicted pods will automatically be recreated somewhere else
|
||||
|
||||
- evictions might be blocked in some cases (Pod Disruption Budgets, `emptyDir` volumes)
|
||||
|
||||
- Once the node is drained, it can safely be upgraded, restarted...
|
||||
|
||||
- Once it's ready, it can be put back in commission with `kubectl uncordon`
|
||||
|
||||
---
|
||||
|
||||
## Is it necessary?
|
||||
|
||||
- When upgrading kubelet from one patch-level version to another:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- When upgrading system packages:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- except [when it's not][datadog-systemd-outage]
|
||||
|
||||
- When upgrading the kernel:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- ...as long as we can tolerate a restart of the containers on the node
|
||||
|
||||
- ...and that they will be unavailable for a few minutes (during the reboot)
|
||||
|
||||
[datadog-systemd-outage]: https://www.datadoghq.com/blog/engineering/2023-03-08-deep-dive-into-platform-level-impact/
|
||||
|
||||
---
|
||||
|
||||
## Is it necessary?
|
||||
|
||||
- When upgrading kubelet from one minor version to another:
|
||||
|
||||
- it *may or may not be fine*
|
||||
|
||||
- in some cases (e.g. migrating from Docker to containerd) it *will not*
|
||||
|
||||
- Here's what [the documentation][node-upgrade-docs] says:
|
||||
|
||||
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
|
||||
|
||||
- Do it at your own risk, and if you do, test extensively in staging environments!
|
||||
|
||||
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
|
||||
|
||||
---
|
||||
|
||||
## Database operators to the rescue
|
||||
|
||||
- Moving stateful pods (e.g.: database server) can cause downtime
|
||||
|
||||
- Database replication can help:
|
||||
|
||||
- if a node contains database servers, we make sure these servers aren't primaries
|
||||
|
||||
- if they are primaries, we execute a *switch over*
|
||||
|
||||
- Some database operators (e.g. [CNPG]) will do that switch over automatically
|
||||
|
||||
(when they detect that a node has been *cordoned*)
|
||||
|
||||
[CNPG]: https://cloudnative-pg.io/
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Skipping versions
|
||||
|
||||
- This example worked because we went from 1.22 to 1.23
|
||||
- This example worked because we went from 1.28 to 1.29
|
||||
|
||||
- If you are upgrading from e.g. 1.21, you will have to go through 1.22 first
|
||||
- If you are upgrading from e.g. 1.26, you will have to go through 1.27 first
|
||||
|
||||
- This means upgrading kubeadm to 1.22.X, then using it to upgrade the cluster
|
||||
- This means upgrading kubeadm to 1.27.X, then using it to upgrade the cluster
|
||||
|
||||
- Then upgrading kubeadm to 1.23.X, etc.
|
||||
- Then upgrading kubeadm to 1.28.X, etc.
|
||||
|
||||
- **Make sure to read the release notes before upgrading!**
|
||||
|
||||
|
||||
@@ -24,6 +24,32 @@
|
||||
|
||||
---
|
||||
|
||||
## A bit of history
|
||||
|
||||
Things related to Custom Resource Definitions:
|
||||
|
||||
- Kubernetes 1.??: `apiextensions.k8s.io/v1beta1` introduced
|
||||
|
||||
- Kubernetes 1.16: `apiextensions.k8s.io/v1` introduced
|
||||
|
||||
- Kubernetes 1.22: `apiextensions.k8s.io/v1beta1` [removed][changes-in-122]
|
||||
|
||||
- Kubernetes 1.25: [CEL validation rules available in beta][crd-validation-rules-beta]
|
||||
|
||||
- Kubernetes 1.28: [validation ratcheting][validation-ratcheting] in [alpha][feature-gates]
|
||||
|
||||
- Kubernetes 1.29: [CEL validation rules available in GA][cel-validation-rules]
|
||||
|
||||
- Kubernetes 1.30: [validation ratcheting][validation-ratcheting] in [beta][feature-gates]; enabled by default
|
||||
|
||||
[crd-validation-rules-beta]: https://kubernetes.io/blog/2022/09/23/crd-validation-rules-beta/
|
||||
[cel-validation-rules]: https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules
|
||||
[validation-ratcheting]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/4008-crd-ratcheting
|
||||
[feature-gates]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features
|
||||
[changes-in-122]: https://kubernetes.io/blog/2021/07/14/upcoming-changes-in-kubernetes-1-22/
|
||||
|
||||
---
|
||||
|
||||
## First slice of pizza
|
||||
|
||||
```yaml
|
||||
@@ -42,8 +68,6 @@
|
||||
|
||||
(a few optional things become mandatory, see [this guide](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#customresourcedefinition-v122) for details)
|
||||
|
||||
- `apiextensions.k8s.io/v1beta1` is available since Kubernetes 1.16
|
||||
|
||||
---
|
||||
|
||||
## Second slice of pizza
|
||||
@@ -96,9 +120,9 @@ The YAML below defines a resource using the CRD that we just created:
|
||||
kind: Pizza
|
||||
apiVersion: container.training/v1alpha1
|
||||
metadata:
|
||||
name: napolitana
|
||||
name: hawaiian
|
||||
spec:
|
||||
toppings: [ mozzarella ]
|
||||
toppings: [ cheese, ham, pineapple ]
|
||||
```
|
||||
|
||||
.lab[
|
||||
@@ -114,11 +138,7 @@ spec:
|
||||
|
||||
## Type validation
|
||||
|
||||
- Older versions of Kubernetes will accept our pizza definition as is
|
||||
|
||||
- Newer versions, however, will issue warnings about unknown fields
|
||||
|
||||
(and if we use `--validate=false`, these fields will simply be dropped)
|
||||
- Recent versions of Kubernetes will issue errors about unknown fields
|
||||
|
||||
- We need to improve our OpenAPI schema
|
||||
|
||||
@@ -126,6 +146,28 @@ spec:
|
||||
|
||||
---
|
||||
|
||||
## Creating a bland pizza
|
||||
|
||||
- Let's try to create a pizza anyway!
|
||||
|
||||
.lab[
|
||||
|
||||
- Only provide the most basic YAML manifest:
|
||||
```bash
|
||||
kubectl create -f- <<EOF
|
||||
kind: Pizza
|
||||
apiVersion: container.training/v1alpha1
|
||||
metadata:
|
||||
name: hawaiian
|
||||
EOF
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- That should work! (As long as we don't try to add pineapple😁)
|
||||
|
||||
---
|
||||
|
||||
## Third slice of pizza
|
||||
|
||||
- Let's add a full OpenAPI v3 schema to our Pizza CRD
|
||||
@@ -208,24 +250,42 @@ Note: we can update a CRD without having to re-create the corresponding resource
|
||||
|
||||
---
|
||||
|
||||
## Better data validation
|
||||
## Validation woes
|
||||
|
||||
- Let's change the data schema so that the sauce can only be `red` or `white`
|
||||
|
||||
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
|
||||
- Let's check what happens if we try to update our pizzas
|
||||
|
||||
.lab[
|
||||
|
||||
- Update the Pizza CRD:
|
||||
- Try to add a label:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
|
||||
kubectl label pizza --all deliciousness=9001
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
--
|
||||
|
||||
- It works for the pizzas that have `sauce` and `toppings`, but not the other one!
|
||||
|
||||
- The other one doesn't pass validation, and *can't be modified*
|
||||
|
||||
---
|
||||
|
||||
## Validation *a posteriori*
|
||||
## First, let's fix this!
|
||||
|
||||
- Option 1: delete the pizza
|
||||
|
||||
*(deletion isn't subject to validation)*
|
||||
|
||||
- Option 2: update the pizza to add `sauce` and `toppings`
|
||||
|
||||
*(writing a pizza that passes validation is fine)*
|
||||
|
||||
- Option 3: relax the validation rules
|
||||
|
||||
---
|
||||
|
||||
## Next, explain what's happening
|
||||
|
||||
- Some of the pizzas that we defined earlier *do not* pass validation
|
||||
|
||||
@@ -281,6 +341,8 @@ Note: we can update a CRD without having to re-create the corresponding resource
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Migrating database content
|
||||
|
||||
- We need to *serve* a version as long as we *store* objects in that version
|
||||
@@ -295,6 +357,58 @@ Note: we can update a CRD without having to re-create the corresponding resource
|
||||
|
||||
---
|
||||
|
||||
## Validation ratcheting
|
||||
|
||||
- Good news: it's not always necessary to introduce new versions
|
||||
|
||||
(and to write the associated conversion webhooks)
|
||||
|
||||
- *Validation ratcheting allows updates to custom resources that fail validation to succeed if the validation errors were on unchanged keypaths*
|
||||
|
||||
- In other words: allow changes that don't introduce further validation errors
|
||||
|
||||
- This was introduced in Kubernetes 1.28 (alpha), enabled by default in 1.30 (beta)
|
||||
|
||||
- The rules are actually a bit more complex
|
||||
|
||||
- Another (maybe more accurate) explanation: allow to tighten or loosen some field definitions
|
||||
|
||||
---
|
||||
|
||||
## Validation ratcheting example
|
||||
|
||||
- Let's change the data schema so that the sauce can only be `red` or `white`
|
||||
|
||||
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
|
||||
|
||||
.lab[
|
||||
|
||||
- Update the Pizza CRD:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Testing validation ratcheting
|
||||
|
||||
- This should work with Kubernetes 1.30 and above
|
||||
|
||||
(but give an error for the `brownie` pizza with previous versions of K8S)
|
||||
|
||||
.lab[
|
||||
|
||||
- Add another label:
|
||||
```bash
|
||||
kubectl label pizzas --all food=definitely
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Even better data validation
|
||||
|
||||
- If we need more complex data validation, we can use a validating webhook
|
||||
|
||||
513
slides/k8s/disruptions.md
Normal file
513
slides/k8s/disruptions.md
Normal file
@@ -0,0 +1,513 @@
|
||||
# Disruptions
|
||||
|
||||
In a perfect world...
|
||||
|
||||
- hardware never fails
|
||||
|
||||
- software never has bugs
|
||||
|
||||
- ...and never needs to be updated
|
||||
|
||||
- ...and uses a predictable amount of resources
|
||||
|
||||
- ...and these resources are infinite anyways
|
||||
|
||||
- network latency and packet loss are zero
|
||||
|
||||
- humans never make mistakes
|
||||
|
||||
--
|
||||
|
||||
😬
|
||||
|
||||
---
|
||||
|
||||
## Disruptions
|
||||
|
||||
In the real world...
|
||||
|
||||
- hardware will fail randomly (without advance notice)
|
||||
|
||||
- software has bugs
|
||||
|
||||
- ...and we constantly add new features
|
||||
|
||||
- ...and will sometimes use more resources than expected
|
||||
|
||||
- ...and these resources are limited
|
||||
|
||||
- network latency and packet loss are NOT zero
|
||||
|
||||
- humans make mistake (shutting down the wrong machine, the wrong app...)
|
||||
|
||||
---
|
||||
|
||||
## Disruptions
|
||||
|
||||
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
|
||||
|
||||
- There are **voluntary** and **involuntary** disruptions
|
||||
|
||||
- voluntary = directly initiated by humans (including by mistake!)
|
||||
|
||||
- involuntary = everything else
|
||||
|
||||
- In this section, we're going to see what they are and how to prevent them
|
||||
|
||||
(or at least, mitigate their effects)
|
||||
|
||||
---
|
||||
|
||||
## Node outage
|
||||
|
||||
- Example: hardware failure (server or network), low-level error
|
||||
|
||||
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
|
||||
|
||||
- **Involuntary** disruption (even if it results from human error!)
|
||||
|
||||
- Consequence: all workloads on that node become unresponsive
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- scale workloads to at least 2 replicas (or more if quorum is needed)
|
||||
|
||||
- add anti-affinity scheduling constraints (to avoid having all pods on the same node)
|
||||
|
||||
---
|
||||
|
||||
## Node outage play-by-play
|
||||
|
||||
- Node goes down (or disconnected from network)
|
||||
|
||||
- Its lease (in Namespace `kube-node-lease`) doesn't get renewed
|
||||
|
||||
- Controller manager detects that and mark the node as "unreachable"
|
||||
|
||||
(this adds both a `NoSchedule` and `NoExecute` taints to the node)
|
||||
|
||||
- Eventually, the `NoExecute` taint will evict these pods
|
||||
|
||||
- This will trigger creation of replacement pods by owner controllers
|
||||
|
||||
(except for pods with a stable network identity, e.g. in a Stateful Set!)
|
||||
|
||||
---
|
||||
|
||||
## Node outage notes
|
||||
|
||||
- By default, pods will tolerate the `unreachable:NoExecute` taint for 5 minutes
|
||||
|
||||
(toleration automatically added by Admission controller `DefaultTolerationSeconds`)
|
||||
|
||||
- Pods of a Stateful Set don't recover automatically:
|
||||
|
||||
- as long as the Pod exists, a replacement Pod can't be created
|
||||
|
||||
- the Pod will exist as long as its Node exists
|
||||
|
||||
- deleting the Node (manually or automatically) will recover the Pod
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure
|
||||
|
||||
- Example: available memory on a node goes below a specific threshold
|
||||
|
||||
(because a pod is using too much memory and no limit was set)
|
||||
|
||||
- **Involuntary** disruption
|
||||
|
||||
- Consequence: kubelet starts to *evict* some pods
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- set *resource limits* on containers to prevent them from using too much resources
|
||||
|
||||
- set *resource requests* on containers to make sure they don't get evicted
|
||||
<br/>
|
||||
(as long as they use less than what they requested)
|
||||
|
||||
- make sure that apps don't use more resources than what they've requested
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure play-by-play
|
||||
|
||||
- Memory leak in an application container, slowly causing very high memory usage
|
||||
|
||||
- Overall free memory on the node goes below the *soft* or the *hard* threshold
|
||||
|
||||
(default hard threshold = 100Mi; default soft threshold = none)
|
||||
|
||||
- When reaching the *soft* threshold:
|
||||
|
||||
- kubelet waits until the "eviction soft grace period" expires
|
||||
|
||||
- then (if resource usage is still above the threshold) it gracefully evicts pods
|
||||
|
||||
- When reaching the *hard* threshold:
|
||||
|
||||
- kubelet immediately and forcefully evicts pods
|
||||
|
||||
---
|
||||
|
||||
## Which pods are evicted?
|
||||
|
||||
- Kubelet only considers pods that are using *more* than what they requested
|
||||
|
||||
(and only for the resource that is under pressure, e.g. RAM or disk usage)
|
||||
|
||||
- First, it sorts pods by *priority¹* (as set with the `priorityClassName` in the pod spec)
|
||||
|
||||
- Then, by how much their resource usage exceeds their request
|
||||
|
||||
(again, for the resource that is under pressure)
|
||||
|
||||
- It evicts pods until enough resources have been freed up
|
||||
|
||||
---
|
||||
|
||||
## Soft (graceful) vs hard (forceful) eviction
|
||||
|
||||
- Soft eviction = graceful shutdown of the pod
|
||||
|
||||
(honor's the pod `terminationGracePeriodSeconds` timeout)
|
||||
|
||||
- Hard eviction = immediate shutdown of the pod
|
||||
|
||||
(kills all containers immediately)
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure notes
|
||||
|
||||
- If resource usage increases *very fast*, kubelet might not catch it fast enough
|
||||
|
||||
- For memory: this will trigger the kernel out-of-memory killer
|
||||
|
||||
- containers killed by OOM are automatically restarted (no eviction)
|
||||
|
||||
- eviction might happen at a later point though (if memory usage stays high)
|
||||
|
||||
- For disk: there is no "out-of-disk" killer, but writes will fail
|
||||
|
||||
- the `write` system call fails with `errno = ENOSPC` / `No space left on device`
|
||||
|
||||
- eviction typically happens shortly after (when kubelet catches up)
|
||||
|
||||
- When relying on disk/memory bursts a lot, using `priorityClasses` might help
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure delays
|
||||
|
||||
- By default, no soft threshold is defined
|
||||
|
||||
- Defining it requires setting both the threshold and the grace period
|
||||
|
||||
- Grace periods can be different for the different types of resources
|
||||
|
||||
- When a node is under pressure, kubelet places a `NoSchedule` taint
|
||||
|
||||
(to avoid adding more pods while the pod is under pressure)
|
||||
|
||||
- Once the node is no longer under pressure, kubelet clears the taint
|
||||
|
||||
(after waiting an extra timeout, `evictionPressureTransitionPeriod`, 5 min by default)
|
||||
|
||||
---
|
||||
|
||||
## Accidental deletion
|
||||
|
||||
- Example: developer deletes the wrong Deployment, the wrong Namespace...
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
(from Kubernetes' perspective!)
|
||||
|
||||
- Consequence: application is down
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- only deploy to production systems through e.g. gitops workflows
|
||||
|
||||
- enforce peer review of changes
|
||||
|
||||
- only give users limited (e.g. read-only) access to production systems
|
||||
|
||||
- use canary deployments (might not catch all mistakes though!)
|
||||
|
||||
---
|
||||
|
||||
## Bad code deployment
|
||||
|
||||
- Example: critical bug introduced, application crashes immediately or is non-functional
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
(again, from Kubernetes' perspective!)
|
||||
|
||||
- Consequence: application is down
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- readiness probes can mitigate immediate crashes
|
||||
<br/>
|
||||
(rolling update continues only when enough pods are ready)
|
||||
|
||||
- delayed crashes will require a rollback
|
||||
<br/>
|
||||
(manual intervention, or automated by a canary system)
|
||||
|
||||
---
|
||||
|
||||
## Node shutdown
|
||||
|
||||
- Example: scaling down a cluster to save money
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are terminated
|
||||
|
||||
- this might disrupt workloads that have too many replicas on that node
|
||||
|
||||
- or workloads that should not be interrupted at all
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- terminate workloads one at a time, coordinating with users
|
||||
|
||||
--
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## Node shutdown
|
||||
|
||||
- Example: scaling down a cluster to save money
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are terminated
|
||||
|
||||
- this might disrupt workloads that have too many replicas on that node
|
||||
|
||||
- or workloads that should not be interrupted at all
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- ~~terminate workloads one at a time, coordinating with users~~
|
||||
|
||||
- use Pod Disruption Budgets
|
||||
|
||||
---
|
||||
|
||||
## Pod Disruption Budgets
|
||||
|
||||
- A PDB is a kind of *contract* between:
|
||||
|
||||
- "admins" = folks maintaining the cluster (e.g. adding/removing/updating nodes)
|
||||
|
||||
- "users" = folks deploying apps and workloads on the cluster
|
||||
|
||||
- A PDB expresses something like:
|
||||
|
||||
*in that particular set of pods, do not "disrupt" more than X at a time*
|
||||
|
||||
- Examples:
|
||||
|
||||
- in that set of frontend pods, do not disrupt more than 1 at a time
|
||||
|
||||
- in that set of worker pods, always have at least 10 ready
|
||||
<br/>
|
||||
(do not disrupt them if it would bring down the number of ready pods below 10)
|
||||
|
||||
---
|
||||
|
||||
## PDB - user side
|
||||
|
||||
- Cluster users create a PDB with a manifest like this one:
|
||||
|
||||
```yaml
|
||||
@@INCLUDE[k8s/pod-disruption-budget.yaml]
|
||||
```
|
||||
|
||||
- The PDB must indicate either `minAvailable` or `maxUnavailable`
|
||||
|
||||
---
|
||||
|
||||
## Rounding logic
|
||||
|
||||
- Percentages are rounded **up**
|
||||
|
||||
- When specifying `maxUnavailble` as a percentage, this can result in a higher perecentage
|
||||
|
||||
(e.g. `maxUnavailable: 50%` with 3 pods can result in 2 pods being unavailable!)
|
||||
|
||||
---
|
||||
|
||||
## Unmanaged pods
|
||||
|
||||
- Specifying `minAvailable: X` works all the time
|
||||
|
||||
- Specifying `minAvailable: X%` or `maxUnavaiable` requires *managed pods*
|
||||
|
||||
(pods that belong to a controller, e.g. Replica Set, Stateful Set...)
|
||||
|
||||
- This is because the PDB controller needs to know the total number of pods
|
||||
|
||||
(given by the `replicas` field, not merely by counting pod objects)
|
||||
|
||||
- The PDB controller will try to resolve the controller using the pod selector
|
||||
|
||||
- If that fails, the PDB controller will emit warning events
|
||||
|
||||
(visible with `kubectl describe pdb ...`)
|
||||
|
||||
---
|
||||
|
||||
## Zero
|
||||
|
||||
- `maxUnavailable: 0` means "do not disrupt my pods"
|
||||
|
||||
- Same thing if `minAvailable` is greater than or equal to the number of pods
|
||||
|
||||
- In that case, cluster admins are supposed to get in touch with cluster users
|
||||
|
||||
- This will prevent fully automated operation
|
||||
|
||||
(and some cluster admins automated systems might not honor that request)
|
||||
|
||||
---
|
||||
|
||||
## PDB - admin side
|
||||
|
||||
- As a cluster admin, we need to follow certain rules
|
||||
|
||||
- Only shut down (or restart) a node when no pods are running on that node
|
||||
|
||||
(except system pods belonging to Daemon Sets)
|
||||
|
||||
- To remove pods running on a node, we should use the *eviction API*
|
||||
|
||||
(which will check PDB constraints and honor them)
|
||||
|
||||
- To prevent new pods from being scheduled on a node, we can use a *taint*
|
||||
|
||||
- These operations are streamlined by `kubectl drain`, which will:
|
||||
|
||||
- *cordon* the node (add a `NoSchedule` taint)
|
||||
|
||||
- invoke the *eviction API* to remove pods while respecting their PDBs
|
||||
|
||||
---
|
||||
|
||||
## Theory vs practice
|
||||
|
||||
- `kubectl drain` won't evict pods using `emptyDir` volumes
|
||||
|
||||
(unless the `--delete-emptydir-data` flag is passed as well)
|
||||
|
||||
- Make sure that `emptyDir` volumes don't hold anything important
|
||||
|
||||
(they shouldn't, but... who knows!)
|
||||
|
||||
- Kubernetes lacks a standard way for users to express:
|
||||
|
||||
*this `emptyDir` volume can/cannot be safely deleted*
|
||||
|
||||
- If a PDB forbids an eviction, this requires manual coordination
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Unhealthy pod eviction policy
|
||||
|
||||
- By default, unhealthy pods can only be evicted if PDB allows it
|
||||
|
||||
(unhealthy = running, but not ready)
|
||||
|
||||
- In many cases, unhealthy pods aren't healthy anyway, and can be removed
|
||||
|
||||
- This behavior is enabled by setting the appropriate field in the PDB manifest:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
unhealthyPodEvictionPolicy: AlwaysAllow
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Node upgrade
|
||||
|
||||
- Example: upgrading kubelet or the Linux kernel on a node
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are temporarily interrupted, and restarted
|
||||
|
||||
- this might disrupt these workloads
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- migrate workloads off the done first (as if we were shutting it down)
|
||||
|
||||
---
|
||||
|
||||
## Node upgrade notes
|
||||
|
||||
- Is it necessary to drain a node before doing an upgrade?
|
||||
|
||||
- From [the documentation][node-upgrade-docs]:
|
||||
|
||||
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
|
||||
|
||||
- It's *probably* safe to upgrade in-place for:
|
||||
|
||||
- kernel upgrades
|
||||
|
||||
- kubelet patch-level upgrades (1.X.Y → 1.X.Z)
|
||||
|
||||
- It's *probably* better to drain the node for minor revisions kubelet upgrades (1.X → 1.Y)
|
||||
|
||||
- In doubt, test extensively in staging environments!
|
||||
|
||||
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
|
||||
|
||||
---
|
||||
|
||||
## Manual rescheduling
|
||||
|
||||
- Example: moving workloads around to accommodate noisy neighbors or other issues
|
||||
|
||||
(e.g. pod X is doing a lot of disk I/O and this is starving other pods)
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- the moved workloads are temporarily interrupted
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- define an appropriate number of replicas, declare PDBs
|
||||
|
||||
- use the [eviction API][eviction-API] to move workloads
|
||||
|
||||
[eviction-API]: https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/
|
||||
|
||||
???
|
||||
|
||||
:EN:- Voluntary and involuntary disruptions
|
||||
:EN:- Pod Disruption Budgets
|
||||
:FR:- "Disruptions" volontaires et involontaires
|
||||
:FR:- Pod Disruption Budgets
|
||||
@@ -1,4 +1,4 @@
|
||||
# Building our own cluster
|
||||
# Building our own cluster (easy)
|
||||
|
||||
- Let's build our own cluster!
|
||||
|
||||
@@ -33,10 +33,7 @@
|
||||
|
||||
## Our environment
|
||||
|
||||
- We will use the machine indicated as `dmuc1`
|
||||
|
||||
(this stands for "Dessine Moi Un Cluster" or "Draw Me A Sheep",
|
||||
<br/>in homage to Saint-Exupery's "The Little Prince")
|
||||
- We will use the machine indicated as `monokube1`
|
||||
|
||||
- This machine:
|
||||
|
||||
@@ -48,13 +45,33 @@
|
||||
|
||||
---
|
||||
|
||||
## The fine print
|
||||
|
||||
- We're going to use a *very old* version of Kubernetes
|
||||
|
||||
(specifically, 1.19)
|
||||
|
||||
- Why?
|
||||
|
||||
- It's much easier to set up than recent versions
|
||||
|
||||
- it's compatible with Docker (no need to set up CNI)
|
||||
|
||||
- it doesn't require a ServiceAccount keypair
|
||||
|
||||
- it can be exposed over plain HTTP (insecure but easier)
|
||||
|
||||
- We'll do that, and later, move to recent versions of Kubernetes!
|
||||
|
||||
---
|
||||
|
||||
## Checking our environment
|
||||
|
||||
- Let's make sure we have everything we need first
|
||||
|
||||
.lab[
|
||||
|
||||
- Log into the `dmuc1` machine
|
||||
- Log into the `monokube1` machine
|
||||
|
||||
- Get root:
|
||||
```bash
|
||||
@@ -528,7 +545,38 @@ clusters:
|
||||
|
||||
]
|
||||
|
||||
Success!
|
||||
If it works: great!
|
||||
|
||||
If it complains about a "cgroup driver", check the next slide.
|
||||
|
||||
---
|
||||
|
||||
## Cgroup drivers
|
||||
|
||||
- Cgroups ("control groups") are a Linux kernel feature
|
||||
|
||||
- They're used to account and limit resources
|
||||
|
||||
(e.g.: memory, CPU, block I/O...)
|
||||
|
||||
- There are multiple ways to manipulate cgroups, including:
|
||||
|
||||
- through a pseudo-filesystem (typically mounted in /sys/fs/cgroup)
|
||||
|
||||
- through systemd
|
||||
|
||||
- Kubelet and the container engine need to agree on which method to use
|
||||
|
||||
---
|
||||
|
||||
## Setting the cgroup driver
|
||||
|
||||
- If kubelet refused to start, mentioning a cgroup driver issue, try:
|
||||
```bash
|
||||
kubelet --kubeconfig ~/.kube/config --cgroup-driver=systemd
|
||||
```
|
||||
|
||||
- That *should* do the trick!
|
||||
|
||||
---
|
||||
|
||||
@@ -547,7 +595,7 @@ Success!
|
||||
|
||||
Our node should show up.
|
||||
|
||||
Its name will be its hostname (it should be `dmuc1`).
|
||||
Its name will be its hostname (it should be `monokube1`).
|
||||
|
||||
---
|
||||
|
||||
398
slides/k8s/dmuc-hard.md
Normal file
398
slides/k8s/dmuc-hard.md
Normal file
@@ -0,0 +1,398 @@
|
||||
# Building our own cluster (hard)
|
||||
|
||||
- This section assumes that you already went through
|
||||
|
||||
*“Building our own cluster (medium)”*
|
||||
|
||||
- In that previous section, we built a cluster with a single node
|
||||
|
||||
- In this new section, we're going to add more nodes to the cluster
|
||||
|
||||
- Note: we will need the lab environment of that previous section
|
||||
|
||||
- If you haven't done it yet, you should go through that section first
|
||||
|
||||
---
|
||||
|
||||
## Our environment
|
||||
|
||||
- On `polykube1`, we should have our Kubernetes control plane
|
||||
|
||||
- We're also assuming that we have the kubeconfig file created earlier
|
||||
|
||||
(in `~/.kube/config`)
|
||||
|
||||
- We're going to work on `polykube2` and add it to the cluster
|
||||
|
||||
- This machine has exactly the same setup as `polykube1`
|
||||
|
||||
(Ubuntu LTS with CNI, etcd, and Kubernetes binaries installed)
|
||||
|
||||
- Note that we won't need the etcd binaries here
|
||||
|
||||
(the control plane will run solely on `polykube1`)
|
||||
|
||||
---
|
||||
|
||||
## Checklist
|
||||
|
||||
We need to:
|
||||
|
||||
- generate the kubeconfig file for `polykube2`
|
||||
|
||||
- install a container engine
|
||||
|
||||
- generate a CNI configuration file
|
||||
|
||||
- start kubelet
|
||||
|
||||
---
|
||||
|
||||
## Generating the kubeconfig file
|
||||
|
||||
- Ideally, we should generate a key pair and certificate for `polykube2`...
|
||||
|
||||
- ...and generate a kubeconfig file using these
|
||||
|
||||
- At the moment, for simplicity, we'll use the same key pair and certificate as earlier
|
||||
|
||||
- We have a couple of options:
|
||||
|
||||
- copy the required files (kubeconfig, key pair, certificate)
|
||||
|
||||
- "flatten" the kubeconfig file (embed the key and certificate within)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## To flatten or not to flatten?
|
||||
|
||||
- "Flattening" the kubeconfig file can seem easier
|
||||
|
||||
(because it means we'll only have one file to move around)
|
||||
|
||||
- But it's easier to rotate the key or renew the certificate when they're in separate files
|
||||
|
||||
---
|
||||
|
||||
## Flatten and copy the kubeconfig file
|
||||
|
||||
- We'll flatten the file and copy it over
|
||||
|
||||
.lab[
|
||||
|
||||
- On `polykube1`, flatten the kubeconfig file:
|
||||
```bash
|
||||
kubectl config view --flatten > kubeconfig
|
||||
```
|
||||
|
||||
- Then copy it to `polykube2`:
|
||||
```bash
|
||||
scp kubeconfig polykube2:
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Generate CNI configuration
|
||||
|
||||
Back on `polykube2`, put the following in `/etc/cni/net.d/kube.conf`:
|
||||
|
||||
```json
|
||||
{
|
||||
"cniVersion": "0.3.1",
|
||||
"name": "kube",
|
||||
"type": "bridge",
|
||||
"bridge": "cni0",
|
||||
"isDefaultGateway": true,
|
||||
"ipMasq": true,
|
||||
"hairpinMode": true,
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": `"10.1.2.0/24"`
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Note how we changed the subnet!
|
||||
|
||||
---
|
||||
|
||||
## Install container engine and start `kubelet`
|
||||
|
||||
.lab[
|
||||
|
||||
- Install `containerd`:
|
||||
```bash
|
||||
sudo apt-get install containerd -y
|
||||
```
|
||||
|
||||
- Start `containerd`:
|
||||
```bash
|
||||
sudo systemctl start containerd
|
||||
```
|
||||
|
||||
- Start `kubelet`:
|
||||
```bash
|
||||
sudo kubelet --kubeconfig kubeconfig
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We're getting errors looking like:
|
||||
```
|
||||
"Post \"https://localhost:6443/api/v1/nodes\": ... connect: connection refused"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Updating the kubeconfig file
|
||||
|
||||
- Our kubeconfig file still references `localhost:6443`
|
||||
|
||||
- This was fine on `polykube1`
|
||||
|
||||
(where `kubelet` was connecting to the control plane running locally)
|
||||
|
||||
- On `polykube2`, we need to change that and put the address of the API server
|
||||
|
||||
(i.e. the address of `polykube1`)
|
||||
|
||||
.lab[
|
||||
|
||||
- Update the `kubeconfig` file:
|
||||
```bash
|
||||
sed -i s/localhost:6443/polykube1:6443/ kubeconfig
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Starting `kubelet`
|
||||
|
||||
- `kubelet` should now start correctly (hopefully!)
|
||||
|
||||
.lab[
|
||||
|
||||
- On `polykube2`, start `kubelet`:
|
||||
```bash
|
||||
sudo kubelet --kubeconfig kubeconfig
|
||||
```
|
||||
|
||||
- On `polykube1`, check that `polykube2` shows up and is `Ready`:
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Testing connectivity
|
||||
|
||||
- From `polykube1`, can we connect to Pods running on `polykube2`? 🤔
|
||||
|
||||
.lab[
|
||||
|
||||
- Scale the test Deployment:
|
||||
```bash
|
||||
kubectl scale deployment blue --replicas=5
|
||||
```
|
||||
|
||||
- Get the IP addresses of the Pods:
|
||||
```bash
|
||||
kubectl get pods -o wide
|
||||
```
|
||||
|
||||
- Pick a Pod on `polykube2` and try to connect to it:
|
||||
```bash
|
||||
curl `10.1.2.2`
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
--
|
||||
|
||||
At that point, it doesn't work.
|
||||
|
||||
---
|
||||
|
||||
## Refresher on the *pod network*
|
||||
|
||||
- The *pod network* (or *pod-to-pod network*) has a few responsibilities:
|
||||
|
||||
- allocating and managing Pod IP addresses
|
||||
|
||||
- connecting Pods and Nodes
|
||||
|
||||
- connecting Pods together on a given node
|
||||
|
||||
- *connecting Pods together across nodes*
|
||||
|
||||
- That last part is the one that's not functioning in our cluster
|
||||
|
||||
- It typically requires some combination of routing, tunneling, bridging...
|
||||
|
||||
---
|
||||
|
||||
## Connecting networks together
|
||||
|
||||
- We can add manual routes between our nodes
|
||||
|
||||
- This requires adding `N x (N-1)` routes
|
||||
|
||||
(on each node, add a route to every other node)
|
||||
|
||||
- This will work on home labs where nodes are directly connected
|
||||
|
||||
(e.g. on an Ethernet switch, or same WiFi network, or a bridge between local VMs)
|
||||
|
||||
- ...Or on clouds where IP address filtering has been disabled
|
||||
|
||||
(by default, most cloud providers will discard packets going to unknown IP addresses)
|
||||
|
||||
- If IP address filtering is enabled, you'll have to use e.g. tunneling or overlay networks
|
||||
|
||||
---
|
||||
|
||||
## Important warning
|
||||
|
||||
- The technique that we are about to use doesn't work everywhere
|
||||
|
||||
- It only works if:
|
||||
|
||||
- all the nodes are directly connected to each other (at layer 2)
|
||||
|
||||
- the underlying network allows the IP addresses of our pods
|
||||
|
||||
- If we are on physical machines connected by a switch: OK
|
||||
|
||||
- If we are on virtual machines in a public cloud: NOT OK
|
||||
|
||||
- on AWS, we need to disable "source and destination checks" on our instances
|
||||
|
||||
- on OpenStack, we need to disable "port security" on our network ports
|
||||
|
||||
---
|
||||
|
||||
## Routing basics
|
||||
|
||||
- We need to tell *each* node:
|
||||
|
||||
"The subnet 10.1.N.0/24 is located on node N" (for all values of N)
|
||||
|
||||
- This is how we add a route on Linux:
|
||||
```bash
|
||||
ip route add 10.1.N.0/24 via W.X.Y.Z
|
||||
```
|
||||
|
||||
(where `W.X.Y.Z` is the internal IP address of node N)
|
||||
|
||||
- We can see the internal IP addresses of our nodes with:
|
||||
```bash
|
||||
kubectl get nodes -o wide
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Adding our route
|
||||
|
||||
- Let's add a route from `polykube1` to `polykube2`
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the internal address of `polykube2`:
|
||||
```bash
|
||||
kubectl get node polykube2 -o wide
|
||||
```
|
||||
|
||||
- Now, on `polykube1`, add the route to the Pods running on `polykube2`:
|
||||
```bash
|
||||
sudo ip route add 10.1.2.0/24 via `A.B.C.D`
|
||||
```
|
||||
|
||||
- Finally, check that we can now connect to a Pod running on `polykube2`:
|
||||
```bash
|
||||
curl 10.1.2.2
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## What's next?
|
||||
|
||||
- The network configuration feels very manual:
|
||||
|
||||
- we had to generate the CNI configuration file (in `/etc/cni/net.d`)
|
||||
|
||||
- we had to manually update the nodes' routing tables
|
||||
|
||||
- Can we automate that?
|
||||
|
||||
**YES!**
|
||||
|
||||
- We could install something like [kube-router](https://www.kube-router.io/)
|
||||
|
||||
(which specifically takes care of the CNI configuration file and populates routing tables)
|
||||
|
||||
- Or we could also go with e.g. [Cilium](https://cilium.io/)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## If you want to try Cilium...
|
||||
|
||||
- Add the `--root-ca-file` flag to the controller manager:
|
||||
|
||||
- use the certificate automatically generated by the API server
|
||||
<br/>
|
||||
(it should be in `/var/run/kubernetes/apiserver.crt`)
|
||||
|
||||
- or generate a key pair and certificate for the API server and point to
|
||||
that certificate
|
||||
|
||||
- without that, you'll get certificate validation errors
|
||||
<br/>
|
||||
(because in our Pods, the `ca.crt` file used to validate the API server will be empty)
|
||||
|
||||
- Check the Cilium [without kube-proxy][ciliumwithoutkubeproxy] instructions
|
||||
|
||||
(make sure to pass the API server IP address and port!)
|
||||
|
||||
- Other pod-to-pod network implementations might also require additional steps
|
||||
|
||||
[ciliumwithoutkubeproxy]: https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#kubeproxy-free
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## About the API server certificate...
|
||||
|
||||
- In the previous sections, we've skipped API server certificate verification
|
||||
|
||||
- To generate a proper certificate, we need to include a `subjectAltName` extension
|
||||
|
||||
- And make sure that the CA includes the extension in the certificate
|
||||
|
||||
```bash
|
||||
openssl genrsa -out apiserver.key 4096
|
||||
|
||||
openssl req -new -key apiserver.key -subj /CN=kubernetes/ \
|
||||
-addext "subjectAltName = DNS:kubernetes.default.svc, \
|
||||
DNS:kubernetes.default, DNS:kubernetes, \
|
||||
DNS:localhost, DNS:polykube1" -out apiserver.csr
|
||||
|
||||
openssl x509 -req -in apiserver.csr -CAkey ca.key -CA ca.cert \
|
||||
-out apiserver.crt -copy_extensions copy
|
||||
```
|
||||
|
||||
???
|
||||
|
||||
:EN:- Connecting nodes and pods
|
||||
:FR:- Interconnecter les nœuds et les pods
|
||||
891
slides/k8s/dmuc-medium.md
Normal file
891
slides/k8s/dmuc-medium.md
Normal file
@@ -0,0 +1,891 @@
|
||||
# Building our own cluster (medium)
|
||||
|
||||
- This section assumes that you already went through
|
||||
|
||||
*“Building our own cluster (easy)”*
|
||||
|
||||
- In that section, we saw how to run each control plane component manually...
|
||||
|
||||
...but with an older version of Kubernetes (1.19)
|
||||
|
||||
- In this section, we're going to do something similar...
|
||||
|
||||
...but with recent versions of Kubernetes!
|
||||
|
||||
- Note: we won't need the lab environment of that previous section
|
||||
|
||||
(we're going to build a new cluster from scratch)
|
||||
|
||||
---
|
||||
|
||||
## What remains the same
|
||||
|
||||
- We'll use machines with Kubernetes binaries pre-downloaded
|
||||
|
||||
- We'll run individual components by hand
|
||||
|
||||
(etcd, API server, controller manager, scheduler, kubelet)
|
||||
|
||||
- We'll run on a single node
|
||||
|
||||
(but we'll be laying the groundwork to add more nodes)
|
||||
|
||||
- We'll get the cluster to the point where we can run and expose pods
|
||||
|
||||
---
|
||||
|
||||
## What's different
|
||||
|
||||
- We'll need to generate TLS keys and certificates
|
||||
|
||||
(because it's mandatory with recent versions of Kubernetes)
|
||||
|
||||
- Things will be *a little bit more* secure
|
||||
|
||||
(but still not 100% secure, far from it!)
|
||||
|
||||
- We'll use containerd instead of Docker
|
||||
|
||||
(you could probably try with CRI-O or another CRI engine, too)
|
||||
|
||||
- We'll need to set up CNI for networking
|
||||
|
||||
- *And we won't do everything as root this time (but we might use `sudo` a lot)*
|
||||
|
||||
---
|
||||
|
||||
## Our environment
|
||||
|
||||
- We will use the machine indicated as `polykube1`
|
||||
|
||||
- This machine:
|
||||
|
||||
- runs Ubuntu LTS
|
||||
|
||||
- has Kubernetes, etcd, and CNI binaries installed
|
||||
|
||||
- but nothing is running
|
||||
|
||||
---
|
||||
|
||||
## Checking our environment
|
||||
|
||||
- Let's make sure we have everything we need first
|
||||
|
||||
.lab[
|
||||
|
||||
- Log into the `polykube1` machine
|
||||
|
||||
- Check available versions:
|
||||
```bash
|
||||
etcd -version
|
||||
kube-apiserver --version
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## The plan
|
||||
|
||||
We'll follow the same methodology as for the "easy" section
|
||||
|
||||
1. Start API server
|
||||
|
||||
2. Interact with it (create Deployment and Service)
|
||||
|
||||
3. See what's broken
|
||||
|
||||
4. Fix it and go back to step 2 until it works!
|
||||
|
||||
---
|
||||
|
||||
## Dealing with multiple processes
|
||||
|
||||
- Again, we are going to start many processes
|
||||
|
||||
- Depending on what you're comfortable with, you can:
|
||||
|
||||
- open multiple windows and multiple SSH connections
|
||||
|
||||
- use a terminal multiplexer like screen or tmux
|
||||
|
||||
- put processes in the background with `&`
|
||||
<br/>(warning: log output might get confusing to read!)
|
||||
|
||||
---
|
||||
|
||||
## Starting API server
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to start the API server:
|
||||
```bash
|
||||
kube-apiserver
|
||||
# It will complain about permission to /var/run/kubernetes
|
||||
|
||||
sudo kube-apiserver
|
||||
# Now it will complain about a bunch of missing flags, including:
|
||||
# --etcd-servers
|
||||
# --service-account-issuer
|
||||
# --service-account-signing-key-file
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
Just like before, we'll need to start etcd.
|
||||
|
||||
But we'll also need some TLS keys!
|
||||
|
||||
---
|
||||
|
||||
## Generating TLS keys
|
||||
|
||||
- There are many ways to generate TLS keys (and certificates)
|
||||
|
||||
- A very popular and modern tool to do that is [cfssl]
|
||||
|
||||
- We're going to use the old-fashioned [openssl] CLI
|
||||
|
||||
- Feel free to use cfssl or any other tool if you prefer!
|
||||
|
||||
[cfssl]: https://github.com/cloudflare/cfssl#using-the-command-line-tool
|
||||
[openssl]: https://www.openssl.org/docs/man3.0/man1/
|
||||
|
||||
---
|
||||
|
||||
## How many keys do we need?
|
||||
|
||||
At the very least, we need the following two keys:
|
||||
|
||||
- ServiceAccount key pair
|
||||
|
||||
- API client key pair, aka "CA key"
|
||||
|
||||
(technically, we will need a *certificate* for that key pair)
|
||||
|
||||
But if we wanted to tighten the cluster security, we'd need many more...
|
||||
|
||||
---
|
||||
|
||||
## The other keys
|
||||
|
||||
These keys are not strictly necessary at this point:
|
||||
|
||||
- etcd key pair
|
||||
|
||||
*without that key, communication with etcd will be insecure*
|
||||
|
||||
- API server endpoint key pair
|
||||
|
||||
*the API server will generate this one automatically if we don't*
|
||||
|
||||
- kubelet key pair (used by API server to connect to kubelets)
|
||||
|
||||
*without that key, commands like kubectl logs/exec will be insecure*
|
||||
|
||||
---
|
||||
|
||||
## Would you like some auth with that?
|
||||
|
||||
If we want to enable authentication and authorization, we also need various API client key pairs signed by the "CA key" mentioned earlier. That would include (non-exhaustive list):
|
||||
|
||||
- controller manager key pair
|
||||
|
||||
- scheduler key pair
|
||||
|
||||
- in most cases: kube-proxy (or equivalent) key pair
|
||||
|
||||
- in most cases: key pairs for the nodes joining the cluster
|
||||
|
||||
(these might be generated through TLS bootstrap tokens)
|
||||
|
||||
- key pairs for users that will interact with the clusters
|
||||
|
||||
(unless another authentication mechanism like OIDC is used)
|
||||
|
||||
---
|
||||
|
||||
## Generating our keys and certificates
|
||||
|
||||
.lab[
|
||||
|
||||
- Generate the ServiceAccount key pair:
|
||||
```bash
|
||||
openssl genrsa -out sa.key 2048
|
||||
```
|
||||
|
||||
- Generate the CA key pair:
|
||||
```bash
|
||||
openssl genrsa -out ca.key 2048
|
||||
```
|
||||
|
||||
- Generate a self-signed certificate for the CA key:
|
||||
```bash
|
||||
openssl x509 -new -key ca.key -out ca.cert -subj /CN=kubernetes/
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Starting etcd
|
||||
|
||||
- This one is easy!
|
||||
|
||||
.lab[
|
||||
|
||||
- Start etcd:
|
||||
```bash
|
||||
etcd
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
Note: if you want a bit of extra challenge, you can try
|
||||
to generate the etcd key pair and use it.
|
||||
|
||||
(You will need to pass it to etcd and to the API server.)
|
||||
|
||||
---
|
||||
|
||||
## Starting API server
|
||||
|
||||
- We need to use the keys and certificate that we just generated
|
||||
|
||||
.lab[
|
||||
|
||||
- Start the API server:
|
||||
```bash
|
||||
sudo kube-apiserver \
|
||||
--etcd-servers=http://localhost:2379 \
|
||||
--service-account-signing-key-file=sa.key \
|
||||
--service-account-issuer=https://kubernetes \
|
||||
--service-account-key-file=sa.key \
|
||||
--client-ca-file=ca.cert
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
The API server should now start.
|
||||
|
||||
But can we really use it? 🤔
|
||||
|
||||
---
|
||||
|
||||
## Trying `kubectl`
|
||||
|
||||
- Let's try some simple `kubectl` command
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to list Namespaces:
|
||||
```bash
|
||||
kubectl get namespaces
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We're getting an error message like this one:
|
||||
|
||||
```
|
||||
The connection to the server localhost:8080 was refused -
|
||||
did you specify the right host or port?
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## What's going on?
|
||||
|
||||
- Recent versions of Kubernetes don't support unauthenticated API access
|
||||
|
||||
- The API server doesn't support listening on plain HTTP anymore
|
||||
|
||||
- `kubectl` still tries to connect to `localhost:8080` by default
|
||||
|
||||
- But there is nothing listening there
|
||||
|
||||
- Our API server listens on port 6443, using TLS
|
||||
|
||||
---
|
||||
|
||||
## Trying to access the API server
|
||||
|
||||
- Let's use `curl` first to confirm that everything works correctly
|
||||
|
||||
(and then we will move to `kubectl`)
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to connect with `curl`:
|
||||
```bash
|
||||
curl https://localhost:6443
|
||||
# This will fail because the API server certificate is unknown.
|
||||
```
|
||||
|
||||
- Try again, skipping certificate verification:
|
||||
```bash
|
||||
curl --insecure https://localhost:6443
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should now see an `Unauthorized` Kubernetes API error message.
|
||||
</br>
|
||||
We need to authenticate with our key and certificate.
|
||||
|
||||
---
|
||||
|
||||
## Authenticating with the API server
|
||||
|
||||
- For the time being, we can use the CA key and cert directly
|
||||
|
||||
- In a real world scenario, we would *never* do that!
|
||||
|
||||
(because we don't want the CA key to be out there in the wild)
|
||||
|
||||
.lab[
|
||||
|
||||
- Try again, skipping cert verification, and using the CA key and cert:
|
||||
```bash
|
||||
curl --insecure --key ca.key --cert ca.cert https://localhost:6443
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should see a list of API routes.
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Doing it right
|
||||
|
||||
In the future, instead of using the CA key and certificate,
|
||||
we should generate a new key, and a certificate for that key,
|
||||
signed by the CA key.
|
||||
|
||||
Then we can use that new key and certificate to authenticate.
|
||||
|
||||
Example:
|
||||
|
||||
```
|
||||
### Generate a key pair
|
||||
openssl genrsa -out user.key
|
||||
|
||||
### Extract the public key
|
||||
openssl pkey -in user.key -out user.pub -pubout
|
||||
|
||||
### Generate a certificate signed by the CA key
|
||||
openssl x509 -new -key ca.key -force_pubkey user.pub -out user.cert \
|
||||
-subj /CN=kubernetes-user/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Writing a kubeconfig file
|
||||
|
||||
- We now want to use `kubectl` instead of `curl`
|
||||
|
||||
- We'll need to write a kubeconfig file for `kubectl`
|
||||
|
||||
- There are many way to do that; here, we're going to use `kubectl config`
|
||||
|
||||
- We'll need to:
|
||||
|
||||
- set the "cluster" (API server endpoint)
|
||||
|
||||
- set the "credentials" (the key and certficate)
|
||||
|
||||
- set the "context" (referencing the cluster and credentials)
|
||||
|
||||
- use that context (make it the default that `kubectl` will use)
|
||||
|
||||
---
|
||||
|
||||
## Set the cluster
|
||||
|
||||
The "cluster" section holds the API server endpoint.
|
||||
|
||||
.lab[
|
||||
|
||||
- Set the API server endpoint:
|
||||
```bash
|
||||
kubectl config set-cluster polykube --server=https://localhost:6443
|
||||
```
|
||||
|
||||
- Don't verify the API server certificate:
|
||||
```bash
|
||||
kubectl config set-cluster polykube --insecure-skip-tls-verify
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Set the credentials
|
||||
|
||||
The "credentials" section can hold a TLS key and certificate, or a token, or configuration information for a plugin (for instance, when using AWS EKS or GCP GKE, they use a plugin).
|
||||
|
||||
.lab[
|
||||
|
||||
- Set the client key and certificate:
|
||||
```bash
|
||||
kubectl config set-credentials polykube \
|
||||
--client-key ca.key \
|
||||
--client-certificate ca.cert
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Set and use the context
|
||||
|
||||
The "context" section references the "cluster" and "credentials" that we defined earlier.
|
||||
|
||||
(It can also optionally reference a Namespace.)
|
||||
|
||||
.lab[
|
||||
|
||||
- Set the "context":
|
||||
```bash
|
||||
kubectl config set-context polykube --cluster polykube --user polykube
|
||||
```
|
||||
|
||||
- Set that context to be the default context:
|
||||
```bash
|
||||
kubectl config use-context polykube
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Review the kubeconfig file
|
||||
|
||||
The kubeconfig file should look like this:
|
||||
|
||||
.small[
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
clusters:
|
||||
- cluster:
|
||||
insecure-skip-tls-verify: true
|
||||
server: https://localhost:6443
|
||||
name: polykube
|
||||
contexts:
|
||||
- context:
|
||||
cluster: polykube
|
||||
user: polykube
|
||||
name: polykube
|
||||
current-context: polykube
|
||||
kind: Config
|
||||
preferences: {}
|
||||
users:
|
||||
- name: polykube
|
||||
user:
|
||||
client-certificate: /root/ca.cert
|
||||
client-key: /root/ca.key
|
||||
```
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Trying the kubeconfig file
|
||||
|
||||
- We should now be able to access our cluster's API!
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to list Namespaces:
|
||||
```bash
|
||||
kubectl get namespaces
|
||||
```
|
||||
]
|
||||
|
||||
This should show the classic `default`, `kube-system`, etc.
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Do we need `--client-ca-file` ?
|
||||
|
||||
Technically, we didn't need to specify the `--client-ca-file` flag!
|
||||
|
||||
But without that flag, no client can be authenticated.
|
||||
|
||||
Which means that we wouldn't be able to issue any API request!
|
||||
|
||||
---
|
||||
|
||||
## Running pods
|
||||
|
||||
- We can now try to create a Deployment
|
||||
|
||||
.lab[
|
||||
|
||||
- Create a Deployment:
|
||||
```bash
|
||||
kubectl create deployment blue --image=jpetazzo/color
|
||||
```
|
||||
|
||||
- Check the results:
|
||||
```bash
|
||||
kubectl get deployments,replicasets,pods
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
Our Deployment exists, but not the Replica Set or Pod.
|
||||
|
||||
We need to run the controller manager.
|
||||
|
||||
---
|
||||
|
||||
## Running the controller manager
|
||||
|
||||
- Previously, we used the `--master` flag to pass the API server address
|
||||
|
||||
- Now, we need to authenticate properly
|
||||
|
||||
- The simplest way at this point is probably to use the same kubeconfig file!
|
||||
|
||||
.lab[
|
||||
|
||||
- Start the controller manager:
|
||||
```bash
|
||||
kube-controller-manager --kubeconfig .kube/config
|
||||
```
|
||||
|
||||
- Check the results:
|
||||
```bash
|
||||
kubectl get deployments,replicasets,pods
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## What's next?
|
||||
|
||||
- Normally, the last commands showed us a Pod in `Pending` state
|
||||
|
||||
- We need two things to continue:
|
||||
|
||||
- the scheduler (to assign the Pod to a Node)
|
||||
|
||||
- a Node!
|
||||
|
||||
- We're going to run `kubelet` to register the Node with the cluster
|
||||
|
||||
---
|
||||
|
||||
## Running `kubelet`
|
||||
|
||||
- Let's try to run `kubelet` and see what happens!
|
||||
|
||||
.lab[
|
||||
|
||||
- Start `kubelet`:
|
||||
```bash
|
||||
sudo kubelet
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should see an error about connecting to `containerd.sock`.
|
||||
|
||||
We need to run a container engine!
|
||||
|
||||
(For instance, `containerd`.)
|
||||
|
||||
---
|
||||
|
||||
## Running `containerd`
|
||||
|
||||
- We need to install and start `containerd`
|
||||
|
||||
- You could try another engine if you wanted
|
||||
|
||||
(but there might be complications!)
|
||||
|
||||
.lab[
|
||||
|
||||
- Install `containerd`:
|
||||
```bash
|
||||
sudo apt-get install containerd
|
||||
```
|
||||
|
||||
- Start `containerd`:
|
||||
```bash
|
||||
sudo containerd
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Configuring `containerd`
|
||||
|
||||
Depending on how we install `containerd`, it might need a bit of extra configuration.
|
||||
|
||||
Watch for the following symptoms:
|
||||
|
||||
- `containerd` refuses to start
|
||||
|
||||
(rare, unless there is an *invalid* configuration)
|
||||
|
||||
- `containerd` starts but `kubelet` can't connect
|
||||
|
||||
(could be the case if the configuration disables the CRI socket)
|
||||
|
||||
- `containerd` starts and things work but Pods keep being killed
|
||||
|
||||
(may happen if there is a mismatch in the cgroups driver)
|
||||
|
||||
---
|
||||
|
||||
## Starting `kubelet` for good
|
||||
|
||||
- Now that `containerd` is running, `kubelet` should start!
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to start `kubelet`:
|
||||
```bash
|
||||
sudo kubelet
|
||||
```
|
||||
|
||||
- In another terminal, check if our Node is now visible:
|
||||
```bash
|
||||
sudo kubectl get nodes
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
`kubelet` should now start, but our Node doesn't show up in `kubectl get nodes`!
|
||||
|
||||
This is because without a kubeconfig file, `kubelet` runs in standalone mode:
|
||||
<br/>
|
||||
it will not connect to a Kubernetes API server, and will only start *static pods*.
|
||||
|
||||
---
|
||||
|
||||
## Passing the kubeconfig file
|
||||
|
||||
- Let's start `kubelet` again, with our kubeconfig file
|
||||
|
||||
.lab[
|
||||
|
||||
- Stop `kubelet` (e.g. with `Ctrl-C`)
|
||||
|
||||
- Restart it with the kubeconfig file:
|
||||
```bash
|
||||
sudo kubelet --kubeconfig .kube/config
|
||||
```
|
||||
|
||||
- Check our list of Nodes:
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
This time, our Node should show up!
|
||||
|
||||
---
|
||||
|
||||
## Node readiness
|
||||
|
||||
- However, our Node shows up as `NotReady`
|
||||
|
||||
- If we wait a few minutes, the `kubelet` logs will tell us why:
|
||||
|
||||
*we're missing a CNI configuration!*
|
||||
|
||||
- As a result, the containers can't be connected to the network
|
||||
|
||||
- `kubelet` detects that and doesn't become `Ready` until this is fixed
|
||||
|
||||
---
|
||||
|
||||
## CNI configuration
|
||||
|
||||
- We need to provide a CNI configuration
|
||||
|
||||
- This is a file in `/etc/cni/net.d`
|
||||
|
||||
(the name of the file doesn't matter; the first file in lexicographic order will be used)
|
||||
|
||||
- Usually, when installing a "CNI plugin¹", this file gets installed automatically
|
||||
|
||||
- Here, we are going to write that file manually
|
||||
|
||||
.footnote[¹Technically, a *pod network*; typically running as a DaemonSet, which will install the file with a `hostPath` volume.]
|
||||
|
||||
---
|
||||
|
||||
## Our CNI configuration
|
||||
|
||||
Create the following file in e.g. `/etc/cni/net.d/kube.conf`:
|
||||
|
||||
```json
|
||||
{
|
||||
"cniVersion": "0.3.1",
|
||||
"name": "kube",
|
||||
"type": "bridge",
|
||||
"bridge": "cni0",
|
||||
"isDefaultGateway": true,
|
||||
"ipMasq": true,
|
||||
"hairpinMode": true,
|
||||
"ipam": {
|
||||
"type": "host-local",
|
||||
"subnet": "10.1.1.0/24"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
That's all we need - `kubelet` will detect and validate the file automatically!
|
||||
|
||||
---
|
||||
|
||||
## Checking our Node again
|
||||
|
||||
- After a short time (typically about 10 seconds) the Node should be `Ready`
|
||||
|
||||
.lab[
|
||||
|
||||
- Wait until the Node is `Ready`:
|
||||
```bash
|
||||
kubectl get nodes
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
If the Node doesn't show up as `Ready`, check the `kubelet` logs.
|
||||
|
||||
---
|
||||
|
||||
## What's next?
|
||||
|
||||
- At this point, we have a `Pending` Pod and a `Ready` Node
|
||||
|
||||
- All we need is the scheduler to bind the former to the latter
|
||||
|
||||
.lab[
|
||||
|
||||
- Run the scheduler:
|
||||
```bash
|
||||
kube-scheduler --kubeconfig .kube/config
|
||||
```
|
||||
|
||||
- Check that the Pod gets assigned to the Node and becomes `Running`:
|
||||
```bash
|
||||
kubectl get pods
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Check network access
|
||||
|
||||
- Let's check that we can connect to our Pod, and that the Pod can connect outside
|
||||
|
||||
.lab[
|
||||
|
||||
- Get the Pod's IP address:
|
||||
```bash
|
||||
kubectl get pods -o wide
|
||||
```
|
||||
|
||||
- Connect to the Pod (make sure to update the IP address):
|
||||
```bash
|
||||
curl `10.1.1.2`
|
||||
```
|
||||
|
||||
- Check that the Pod has external connectivity too:
|
||||
```bash
|
||||
kubectl exec `blue-xxxxxxxxxx-yyyyy` -- ping -c3 1.1
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Expose our Deployment
|
||||
|
||||
- We can now try to expose the Deployment and connect to the ClusterIP
|
||||
|
||||
.lab[
|
||||
|
||||
- Expose the Deployment:
|
||||
```bash
|
||||
kubectl expose deployment blue --port=80
|
||||
```
|
||||
|
||||
- Retrieve the ClusterIP:
|
||||
```bash
|
||||
kubectl get services
|
||||
```
|
||||
|
||||
- Try to connect to the ClusterIP:
|
||||
```bash
|
||||
curl `10.0.0.42`
|
||||
```
|
||||
]
|
||||
|
||||
At this point, it won't work - we need to run `kube-proxy`!
|
||||
|
||||
---
|
||||
|
||||
## Running `kube-proxy`
|
||||
|
||||
- We need to run `kube-proxy`
|
||||
|
||||
(also passing it our kubeconfig file)
|
||||
|
||||
.lab[
|
||||
|
||||
- Run `kube-proxy`:
|
||||
```bash
|
||||
sudo kube-proxy --kubeconfig .kube/config
|
||||
```
|
||||
|
||||
- Try again to connect to the ClusterIP:
|
||||
```bash
|
||||
curl `10.0.0.42`
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
This time, it should work.
|
||||
|
||||
---
|
||||
|
||||
## What's next?
|
||||
|
||||
- Scale up the Deployment, and check that load balancing works properly
|
||||
|
||||
- Enable RBAC, and generate individual certificates for each controller
|
||||
|
||||
(check the [certificate paths][certpath] section in the Kubernetes documentation
|
||||
for a detailed list of all the certificates and keys that are used by the
|
||||
control plane, and which flags are used by which components to configure them!)
|
||||
|
||||
- Add more nodes to the cluster
|
||||
|
||||
*Feel free to try these if you want to get additional hands-on experience!*
|
||||
|
||||
[certpath]: https://kubernetes.io/docs/setup/best-practices/certificates/#certificate-paths
|
||||
|
||||
???
|
||||
|
||||
:EN:- Setting up control plane certificates
|
||||
:EN:- Implementing a basic CNI configuration
|
||||
:FR:- Mettre en place les certificats du plan de contrôle
|
||||
:FR:- Réaliser un configuration CNI basique
|
||||
@@ -339,34 +339,12 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Service catalog
|
||||
|
||||
- *Service catalog* is another extension mechanism
|
||||
|
||||
- It's not extending the Kubernetes API strictly speaking
|
||||
|
||||
(but it still provides new features!)
|
||||
|
||||
- It doesn't create new types; it uses:
|
||||
|
||||
- ClusterServiceBroker
|
||||
- ClusterServiceClass
|
||||
- ClusterServicePlan
|
||||
- ServiceInstance
|
||||
- ServiceBinding
|
||||
|
||||
- It uses the Open service broker API
|
||||
|
||||
---
|
||||
|
||||
## Documentation
|
||||
|
||||
- [Custom Resource Definitions: when to use them](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
|
||||
|
||||
- [Custom Resources Definitions: how to use them](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/)
|
||||
|
||||
- [Service Catalog](https://kubernetes.io/docs/concepts/extend-kubernetes/service-catalog/)
|
||||
|
||||
- [Built-in Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/)
|
||||
|
||||
- [Dynamic Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/)
|
||||
|
||||
508
slides/k8s/flux.md
Normal file
508
slides/k8s/flux.md
Normal file
@@ -0,0 +1,508 @@
|
||||
# FluxCD
|
||||
|
||||
- We're going to implement a basic GitOps workflow with Flux
|
||||
|
||||
- Pushing to `main` will automatically deploy to the clusters
|
||||
|
||||
- There will be two clusters (`dev` and `prod`)
|
||||
|
||||
- The two clusters will have similar (but slightly different) workloads
|
||||
|
||||
---
|
||||
|
||||
## Repository structure
|
||||
|
||||
This is (approximately) what we're going to do:
|
||||
|
||||
```
|
||||
@@INCLUDE[slides/k8s/gitopstree.txt]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Getting ready
|
||||
|
||||
- Let's make sure we have two clusters
|
||||
|
||||
- It's OK to use local clusters (kind, minikube...)
|
||||
|
||||
- We might run into resource limits, though
|
||||
|
||||
(pay attention to `Pending` pods!)
|
||||
|
||||
- We need to install the Flux CLI ([packages], [binaries])
|
||||
|
||||
- **Highly recommended:** set up CLI completion!
|
||||
|
||||
- Of course we'll need a Git service, too
|
||||
|
||||
(we're going to use GitHub here)
|
||||
|
||||
[packages]: https://fluxcd.io/flux/get-started/
|
||||
[binaries]: https://github.com/fluxcd/flux2/releases
|
||||
|
||||
---
|
||||
|
||||
## GitHub setup
|
||||
|
||||
- Generate a GitHub token:
|
||||
|
||||
https://github.com/settings/tokens/new
|
||||
|
||||
- Give it "repo" access
|
||||
|
||||
- This token will be used by the `flux bootstrap github` command later
|
||||
|
||||
- It will create a repository and configure it (SSH key...)
|
||||
|
||||
- The token can be revoked afterwards
|
||||
|
||||
---
|
||||
|
||||
## Flux bootstrap
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's set a few variables for convenience, and create our repository:
|
||||
```bash
|
||||
export GITHUB_TOKEN=...
|
||||
export GITHUB_USER=changeme
|
||||
export GITHUB_REPO=alsochangeme
|
||||
export FLUX_CLUSTER=dev
|
||||
|
||||
flux bootstrap github \
|
||||
--owner=$GITHUB_USER \
|
||||
--repository=$GITHUB_REPO \
|
||||
--branch=main \
|
||||
--path=./clusters/$FLUX_CLUSTER \
|
||||
--personal --public
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
Problems? check next slide!
|
||||
|
||||
---
|
||||
|
||||
## What could go wrong?
|
||||
|
||||
- `flux bootstrap` will create or update the repository on GitHub
|
||||
|
||||
- Then it will install Flux controllers to our cluster
|
||||
|
||||
- Then it waits for these controllers to be up and running and ready
|
||||
|
||||
- Check pod status in `flux-system`
|
||||
|
||||
- If pods are `Pending`, check that you have enough resources on your cluster
|
||||
|
||||
- For testing purposes, it should be fine to lower or remove Flux `requests`!
|
||||
|
||||
(but don't do that in production!)
|
||||
|
||||
- If anything goes wrong, don't worry, we can just re-run the bootstrap
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Idempotence
|
||||
|
||||
- It's OK to run that same `flux bootstrap` command multiple times!
|
||||
|
||||
- If the repository already exists, it will re-use it
|
||||
|
||||
(it won't destroy or empty it)
|
||||
|
||||
- If the path `./clusters/$FLUX_CLUSTER` already exists, it will update it
|
||||
|
||||
- It's totally fine to re-run `flux bootstrap` if something fails
|
||||
|
||||
- It's totally fine to run it multiple times on different clusters
|
||||
|
||||
- Or even to run it multiple times for the *same* cluster
|
||||
|
||||
(to reinstall Flux on that cluster after a cluster wipe / reinstall)
|
||||
|
||||
---
|
||||
|
||||
## What do we get?
|
||||
|
||||
- Let's look at what `flux bootstrap` installed on the cluster
|
||||
|
||||
.lab[
|
||||
|
||||
- Look inside the `flux-system` namespace:
|
||||
```bash
|
||||
kubectl get all --namespace flux-system
|
||||
```
|
||||
|
||||
- Look at `kustomizations` custom resources:
|
||||
```bash
|
||||
kubectl get kustomizations --all-namespaces
|
||||
```
|
||||
|
||||
- See what the `flux` CLI tells us:
|
||||
```bash
|
||||
flux get all
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Deploying with GitOps
|
||||
|
||||
- We'll need to add/edit files on the repository
|
||||
|
||||
- We can do it by using `git clone`, local edits, `git commit`, `git push`
|
||||
|
||||
- Or by editing online on the GitHub website
|
||||
|
||||
.lab[
|
||||
|
||||
- Create a manifest; for instance `clusters/dev/flux-system/blue.yaml`
|
||||
|
||||
- Add that manifest to `clusters/dev/kustomization.yaml`
|
||||
|
||||
- Commit and push both changes to the repository
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Waiting for reconciliation
|
||||
|
||||
- Compare the git hash that we pushed and the one shown with `kubectl get `
|
||||
|
||||
- Option 1: wait for Flux to pick up the changes in the repository
|
||||
|
||||
(the default interval for git repositories is 1 minute, so that's fast)
|
||||
|
||||
- Option 2: use `flux reconcile source git flux-system`
|
||||
|
||||
(this puts an annotation on the appropriate resource, triggering an immediate check)
|
||||
|
||||
- Option 3: set up receiver webhooks
|
||||
|
||||
(so that git updates trigger immediate reconciliation)
|
||||
|
||||
---
|
||||
|
||||
## Checking progress
|
||||
|
||||
- `flux logs`
|
||||
|
||||
- `kubectl get gitrepositories --all-namespaces`
|
||||
|
||||
- `kubectl get kustomizations --all-namespaces`
|
||||
|
||||
---
|
||||
|
||||
## Did it work?
|
||||
|
||||
--
|
||||
|
||||
- No!
|
||||
|
||||
--
|
||||
|
||||
- Why?
|
||||
|
||||
--
|
||||
|
||||
- We need to indicate the namespace where the app should be deployed
|
||||
|
||||
- Either in the YAML manifests
|
||||
|
||||
- Or in the `kustomization` custom resource
|
||||
|
||||
(using field `spec.targetNamespace`)
|
||||
|
||||
- Add the namespace to the manifest and try again!
|
||||
|
||||
---
|
||||
|
||||
## Adding an app in a reusable way
|
||||
|
||||
- Let's see a technique to add a whole app
|
||||
|
||||
(with multiple resource manifets)
|
||||
|
||||
- We want to minimize code repetition
|
||||
|
||||
(i.e. easy to add on multiple clusters with minimal changes)
|
||||
|
||||
---
|
||||
|
||||
## The plan
|
||||
|
||||
- Add the app manifests in a directory
|
||||
|
||||
(e.g.: `apps/myappname/manifests`)
|
||||
|
||||
- Create a kustomization manifest for the app and its namespace
|
||||
|
||||
(e.g.: `apps/myappname/flux.yaml`)
|
||||
|
||||
- The kustomization manifest will refer to the app manifest
|
||||
|
||||
- Add the kustomization manifest to the top-level `flux-system` kustomization
|
||||
|
||||
---
|
||||
|
||||
## Creating the manifests
|
||||
|
||||
- All commands below should be executed at the root of the repository
|
||||
|
||||
.lab[
|
||||
|
||||
- Put application manifests in their directory:
|
||||
```bash
|
||||
mkdir -p apps/dockercoins
|
||||
cp ~/container.training/k8s/dockercoins.yaml apps/dockercoins/
|
||||
```
|
||||
|
||||
- Create kustomization manifest:
|
||||
```bash
|
||||
flux create kustomization dockercoins \
|
||||
--source=GitRepository/flux-system \
|
||||
--path=./apps/dockercoins/manifests/ \
|
||||
--target-namespace=dockercoins \
|
||||
--prune=true --export > apps/dockercoins/flux.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Creating the target namespace
|
||||
|
||||
- When deploying *helm releases*, it is possible to automatically create the namespace
|
||||
|
||||
- When deploying *kustomizations*, we need to create it explicitly
|
||||
|
||||
- Let's put the namespace with the kustomization manifest
|
||||
|
||||
(so that the whole app can be mediated through a single manifest)
|
||||
|
||||
.lab[
|
||||
|
||||
- Add the target namespace to the kustomization manifest:
|
||||
```bash
|
||||
echo "---
|
||||
kind: Namespace
|
||||
apiVersion: v1
|
||||
metadata:
|
||||
name: dockercoins" >> apps/dockercoins/flux.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Linking the kustomization manifest
|
||||
|
||||
- Edit `clusters/dev/flux-system/kustomization.yaml`
|
||||
|
||||
- Add a line to reference the kustomization manifest that we created:
|
||||
```yaml
|
||||
- ../../../apps/dockercoins/flux.yaml
|
||||
```
|
||||
|
||||
- `git add` our manifests, `git commit`, `git push`
|
||||
|
||||
(check with `git status` that we haven't forgotten anything!)
|
||||
|
||||
- `flux reconcile` or wait for the changes to be picked up
|
||||
|
||||
---
|
||||
|
||||
## Installing with Helm
|
||||
|
||||
- We're going to see two different workflows:
|
||||
|
||||
- installing a third-party chart
|
||||
<br/>
|
||||
(e.g. something we found on the Artifact Hub)
|
||||
|
||||
- installing one of our own charts
|
||||
<br/>
|
||||
(e.g. a chart we authored ourselves)
|
||||
|
||||
- The procedures are very similar
|
||||
|
||||
---
|
||||
|
||||
## Installing from a public Helm repository
|
||||
|
||||
- Let's install [kube-prometheus-stack][kps]
|
||||
|
||||
.lab[
|
||||
|
||||
- Create the Flux manifests:
|
||||
```bash
|
||||
mkdir -p apps/kube-prometheus-stack
|
||||
flux create source helm kube-prometheus-stack \
|
||||
--url=https://prometheus-community.github.io/helm-charts \
|
||||
--export >> apps/kube-prometheus-stack/flux.yaml
|
||||
flux create helmrelease kube-prometheus-stack \
|
||||
--source=HelmRepository/kube-prometheus-stack \
|
||||
--chart=kube-prometheus-stack --release-name=kube-prometheus-stack \
|
||||
--target-namespace=kube-prometheus-stack --create-target-namespace \
|
||||
--export >> apps/kube-prometheus-stack/flux.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
[kps]: https://artifacthub.io/packages/helm/prometheus-community/kube-prometheus-stack
|
||||
|
||||
---
|
||||
|
||||
## Enable the app
|
||||
|
||||
- Just like before, link the manifest from the top-level kustomization
|
||||
|
||||
(`flux-system` in namespace `flux-system`)
|
||||
|
||||
- `git add` / `git commit` / `git push`
|
||||
|
||||
- We should now have a Prometheus+Grafana observability stack!
|
||||
|
||||
---
|
||||
|
||||
## Installing from a Helm chart in a git repo
|
||||
|
||||
- In this example, the chart will be in the same repo
|
||||
|
||||
- In the real world, it will typically be in a different repo!
|
||||
|
||||
.lab[
|
||||
|
||||
- Generate a basic Helm chart:
|
||||
```bash
|
||||
mkdir -p charts
|
||||
helm create charts/myapp
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
(This generates a chart which installs NGINX. A lot of things can be customized, though.)
|
||||
|
||||
---
|
||||
|
||||
## Creating the Flux manifests
|
||||
|
||||
- The invocation is very similar to our first example
|
||||
|
||||
.lab[
|
||||
|
||||
- Generate the Flux manifest for the Helm release:
|
||||
```bash
|
||||
mkdir apps/myapp
|
||||
flux create helmrelease myapp \
|
||||
--source=GitRepository/flux-system \
|
||||
--chart=charts/myapp \
|
||||
--target-namespace=myapp --create-target-namespace \
|
||||
--export > apps/myapp/flux.yaml
|
||||
```
|
||||
|
||||
- Add a reference to that manifest to the top-level kustomization
|
||||
|
||||
- `git add` / `git commit` / `git push` the chart, manifest, and kustomization
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Passing values
|
||||
|
||||
- We can also configure our Helm releases with values
|
||||
|
||||
- Using an existing `myvalues.yaml` file:
|
||||
|
||||
`flux create helmrelease ... --values=myvalues.yaml`
|
||||
|
||||
- Referencing an existing ConfigMap or Secret with a `values.yaml` key:
|
||||
|
||||
`flux create helmrelease ... --values-from=ConfigMap/myapp`
|
||||
|
||||
---
|
||||
|
||||
## Gotchas
|
||||
|
||||
- When creating a HelmRelease using a chart stored in a git repository, you must:
|
||||
|
||||
- either bump the chart version (in `Chart.yaml`) after each change,
|
||||
|
||||
- or set `spec.chart.spec.reconcileStrategy` to `Revision`
|
||||
|
||||
- Why?
|
||||
|
||||
- Flux installs helm releases using packaged artifacts
|
||||
|
||||
- Artifacts are updated only when the Helm chart version changes
|
||||
|
||||
- Unless `reconcileStrategy` is set to `Revision` (instead of the default `ChartVersion`)
|
||||
|
||||
---
|
||||
|
||||
## More gotchas
|
||||
|
||||
- There is a bug in Flux that prevents using identical subcharts with aliases
|
||||
|
||||
- See [fluxcd/flux2#2505][flux2505] for details
|
||||
|
||||
[flux2505]: https://github.com/fluxcd/flux2/discussions/2505
|
||||
|
||||
---
|
||||
|
||||
## Things that we didn't talk about...
|
||||
|
||||
- Bucket sources
|
||||
|
||||
- Image automation controller
|
||||
|
||||
- Image reflector controller
|
||||
|
||||
- And more!
|
||||
|
||||
???
|
||||
|
||||
:EN:- Implementing gitops with Flux
|
||||
:FR:- Workflow gitops avec Flux
|
||||
|
||||
<!--
|
||||
|
||||
helm upgrade --install --repo https://dl.gitea.io/charts --namespace gitea --create-namespace gitea gitea \
|
||||
--set persistence.enabled=false \
|
||||
--set redis-cluster.enabled=false \
|
||||
--set postgresql-ha.enabled=false \
|
||||
--set postgresql.enabled=true \
|
||||
--set gitea.config.session.PROVIDER=db \
|
||||
--set gitea.config.cache.ADAPTER=memory \
|
||||
#
|
||||
|
||||
### Boostrap Flux controllers
|
||||
|
||||
```bash
|
||||
mkdir -p flux/flux-system/gotk-components.yaml
|
||||
flux install --export > flux/flux-system/gotk-components.yaml
|
||||
kubectl apply -f flux/flux-system/gotk-components.yaml
|
||||
```
|
||||
|
||||
### Bootstrap GitRepository/Kustomization
|
||||
|
||||
```bash
|
||||
export REPO_URL="<gitlab_url>" DEPLOY_USERNAME="<username>"
|
||||
read -s DEPLOY_TOKEN
|
||||
flux create secret git flux-system --url="${REPO_URL}" --username="${DEPLOY_USERNAME}" --password="${DEPLOY_TOKEN}"
|
||||
flux create source git flux-system --url=$REPO_URL --branch=main --secret-ref flux-system --ignore-paths='/*,!/flux' --export > flux/flux-system/gotk-sync.yaml
|
||||
flux create kustomization flux-system --source=GitRepository/flux-system --path="./flux" --prune=true --export >> flux/flux-system/gotk-sync.yaml
|
||||
|
||||
git add flux/ && git commit -m 'feat: Setup Flux' flux/ && git push
|
||||
kubectl apply -f flux/flux-system/gotk-sync.yaml
|
||||
```
|
||||
|
||||
-->
|
||||
|
||||
13
slides/k8s/gitopstree.txt
Normal file
13
slides/k8s/gitopstree.txt
Normal file
@@ -0,0 +1,13 @@
|
||||
├── charts/ <--- could also be in separate app repos
|
||||
│ ├── dockercoins/
|
||||
│ └── color/
|
||||
├── apps/ <--- YAML manifests for GitOps resources
|
||||
│ ├── dockercoins/ (might reference the "charts" above,
|
||||
│ ├── blue/ and/or include environment-specific
|
||||
│ ├── green/ manifests to create e.g. namespaces,
|
||||
│ ├── kube-prometheus-stack/ configmaps, secrets...)
|
||||
│ ├── cert-manager/
|
||||
│ └── traefik/
|
||||
└── clusters/ <--- per-cluster; will typically reference
|
||||
├── prod/ the "apps" above, possibly extending
|
||||
└── dev/ or adding configuration resources too
|
||||
@@ -1,4 +1,4 @@
|
||||
# Git-based workflows
|
||||
# Git-based workflows (GitOps)
|
||||
|
||||
- Deploying with `kubectl` has downsides:
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
|
||||
- These resources have a perfect YAML representation
|
||||
|
||||
- All we do is manipulating these YAML representations
|
||||
- All we do is manipulate these YAML representations
|
||||
|
||||
(`kubectl run` generates a YAML file that gets applied)
|
||||
|
||||
@@ -34,229 +34,232 @@
|
||||
|
||||
- control who can push to which branches
|
||||
|
||||
- have formal review processes, pull requests ...
|
||||
- have formal review processes, pull requests, test gates...
|
||||
|
||||
---
|
||||
|
||||
## Enabling git-based workflows
|
||||
|
||||
- There are a few tools out there to help us do that
|
||||
- There are a many tools out there to help us do that; with different approaches
|
||||
|
||||
- We'll see demos of two of them: [Flux] and [Gitkube]
|
||||
- "Git host centric" approach: GitHub Actions, GitLab...
|
||||
|
||||
- There are *many* other tools, some of them with even more features
|
||||
*the workflows/action are directly initiated by the git platform*
|
||||
|
||||
- There are also *many* integrations with popular CI/CD systems
|
||||
- "Kubernetes cluster centric" approach: [ArgoCD], [FluxCD]..
|
||||
|
||||
(e.g.: GitLab, Jenkins, ...)
|
||||
*controllers run on our clusters and trigger on repo updates*
|
||||
|
||||
[Flux]: https://www.weave.works/oss/flux/
|
||||
[Gitkube]: https://gitkube.sh/
|
||||
- This is not an exhaustive list (see also: Jenkins)
|
||||
|
||||
- We're going to talk mostly about "Kubernetes cluster centric" approaches here
|
||||
|
||||
[ArgoCD]: https://argoproj.github.io/cd/
|
||||
[Flux]: https://fluxcd.io/
|
||||
|
||||
---
|
||||
|
||||
## Flux overview
|
||||
## The road to production
|
||||
|
||||
- We put our Kubernetes resources as YAML files in a git repository
|
||||
In no specific order, we need to at least:
|
||||
|
||||
- Flux polls that repository regularly (every 5 minutes by default)
|
||||
- Choose a tool
|
||||
|
||||
- The resources described by the YAML files are created/updated automatically
|
||||
- Choose a cluster / app / namespace layout
|
||||
<br/>
|
||||
(one cluster per app, different clusters for prod/staging...)
|
||||
|
||||
- Changes are made by updating the code in the repository
|
||||
- Choose a repository layout
|
||||
<br/>
|
||||
(different repositories, directories, branches per app, env, cluster...)
|
||||
|
||||
- Choose an installation / bootstrap method
|
||||
|
||||
- Choose how new apps / environments / versions will be deployed
|
||||
|
||||
- Choose how new images will be built
|
||||
|
||||
---
|
||||
|
||||
## Preparing a repository for Flux
|
||||
## Flux vs ArgoCD (1/2)
|
||||
|
||||
- We need a repository with Kubernetes YAML files
|
||||
- Flux:
|
||||
|
||||
- I have one: https://github.com/jpetazzo/kubercoins
|
||||
- fancy setup with an (optional) dedicated `flux bootstrap` command
|
||||
<br/>
|
||||
(with support for specific git providers, repo creation...)
|
||||
|
||||
- Fork it to your GitHub account
|
||||
- deploying an app requires multiple CRDs
|
||||
<br/>
|
||||
(Kustomization, HelmRelease, GitRepository...)
|
||||
|
||||
- Create a new branch in your fork; e.g. `prod`
|
||||
- supports Helm charts, Kustomize, raw YAML
|
||||
|
||||
(e.g. with "branch" dropdown through the GitHub web UI)
|
||||
- ArgoCD:
|
||||
|
||||
- This is the branch that we are going to use for deployment
|
||||
- simple setup (just apply YAMLs / install Helm chart)
|
||||
|
||||
- fewer CRDs (basic workflow can be implement with a single "Application" resource)
|
||||
|
||||
- supports Helm charts, Jsonnet, Kustomize, raw YAML, and arbitrary plugins
|
||||
|
||||
---
|
||||
|
||||
## Setting up Flux with kustomize
|
||||
## Flux vs ArgoCD (2/2)
|
||||
|
||||
- Clone the Flux repository:
|
||||
```bash
|
||||
git clone https://github.com/fluxcd/flux
|
||||
cd flux
|
||||
```
|
||||
- Flux:
|
||||
|
||||
- Edit `deploy/flux-deployment.yaml`
|
||||
- sync interval is configurable per app
|
||||
- no web UI out of the box
|
||||
- CLI relies on Kubernetes API access
|
||||
- CLI can easily generate custom resource manifests (with `--export`)
|
||||
- self-hosted (flux controllers are managed by flux itself by default)
|
||||
- one flux instance manages a single cluster
|
||||
|
||||
- Change the `--git-url` and `--git-branch` parameters:
|
||||
```yaml
|
||||
- --git-url=git@github.com:your-git-username/kubercoins
|
||||
- --git-branch=prod
|
||||
```
|
||||
- ArgoCD:
|
||||
|
||||
- Apply all the YAML:
|
||||
```bash
|
||||
kubectl apply -k deploy/
|
||||
```
|
||||
- sync interval is configured globally
|
||||
- comes with a web UI
|
||||
- CLI can use Kubernetes API or separate API and authentication system
|
||||
- one ArgoCD instance can manage multiple clusters
|
||||
|
||||
---
|
||||
|
||||
## Setting up Flux with Helm
|
||||
## Cluster, app, namespace layout
|
||||
|
||||
- Add Flux helm repo:
|
||||
```bash
|
||||
helm repo add fluxcd https://charts.fluxcd.io
|
||||
```
|
||||
- One cluster per app, different namespaces for environments?
|
||||
|
||||
- Install Flux:
|
||||
```bash
|
||||
kubectl create namespace flux
|
||||
helm upgrade --install flux \
|
||||
--set git.url=git@github.com:your-git-username/kubercoins \
|
||||
--set git.branch=prod \
|
||||
--namespace flux \
|
||||
fluxcd/flux
|
||||
```
|
||||
- One cluster per environment, different namespaces for apps?
|
||||
|
||||
- Everything on a single cluster? One cluster per combination?
|
||||
|
||||
- Something in between:
|
||||
|
||||
- prod cluster, database cluster, dev/staging/etc cluster
|
||||
|
||||
- prod+db cluster per app, shared dev/staging/etc cluster
|
||||
|
||||
- And more!
|
||||
|
||||
Note: this decision isn't really tied to GitOps!
|
||||
|
||||
---
|
||||
|
||||
## Allowing Flux to access the repository
|
||||
## Repository layout
|
||||
|
||||
- When it starts, Flux generates an SSH key
|
||||
So many different possibilities!
|
||||
|
||||
- Display that key:
|
||||
```bash
|
||||
kubectl -n flux logs deployment/flux | grep identity.pub | cut -d '"' -f2
|
||||
```
|
||||
- Source repos
|
||||
|
||||
- Then add that key to the repository, giving it **write** access
|
||||
- Cluster/infra repos/branches/directories
|
||||
|
||||
(some Flux features require write access)
|
||||
- "Deployment" repos (with manifests, charts)
|
||||
|
||||
- After a minute or so, DockerCoins will be deployed to the current namespace
|
||||
- Different repos/branches/directories for environments
|
||||
|
||||
🤔 How to decide?
|
||||
|
||||
---
|
||||
|
||||
## Making changes
|
||||
## Permissions
|
||||
|
||||
- Make changes (on the `prod` branch), e.g. change `replicas` in `worker`
|
||||
- Different teams/companies = different repos
|
||||
|
||||
- After a few minutes, the changes will be picked up by Flux and applied
|
||||
- separate platform team → separate "infra" vs "apps" repos
|
||||
|
||||
- teams working on different apps → different repos per app
|
||||
|
||||
- Branches can be "protected" (`production`, `main`...)
|
||||
|
||||
(don't need separate repos for separate environments)
|
||||
|
||||
- Directories will typically have the same permissions
|
||||
|
||||
- Managing directories is easier than branches
|
||||
|
||||
- But branches are more "powerful" (cherrypicking, rebasing...)
|
||||
|
||||
---
|
||||
|
||||
## Other features
|
||||
## Resource hierarchy
|
||||
|
||||
- Flux can keep a list of all the tags of all the images we're running
|
||||
- Git-based deployments are managed by Kubernetes resources
|
||||
|
||||
- The `fluxctl` tool can show us if we're running the latest images
|
||||
(e.g. Kustomization, HelmRelease with Flux; Application with ArgoCD)
|
||||
|
||||
- We can also "automate" a resource (i.e. automatically deploy new images)
|
||||
- We will call these resources "GitOps resources"
|
||||
|
||||
- And much more!
|
||||
- These resources need to be managed like any other Kubernetes resource
|
||||
|
||||
(YAML manifests, Kustomizations, Helm charts)
|
||||
|
||||
- They can be managed with Git workflows too!
|
||||
|
||||
---
|
||||
|
||||
## Gitkube overview
|
||||
## Cluster / infra management
|
||||
|
||||
- We put our Kubernetes resources as YAML files in a git repository
|
||||
- How do we provision clusters?
|
||||
|
||||
- Gitkube is a git server (or "git remote")
|
||||
- Manual "one-shot" provisioning (CLI, web UI...)
|
||||
|
||||
- After making changes to the repository, we push to Gitkube
|
||||
- Automation with Terraform, Ansible...
|
||||
|
||||
- Gitkube applies the resources to the cluster
|
||||
- Kubernetes-driven systems (Crossplane, CAPI)
|
||||
|
||||
- Infrastructure can also be managed with GitOps
|
||||
|
||||
---
|
||||
|
||||
## Setting up Gitkube
|
||||
## Example 1
|
||||
|
||||
- Install the CLI:
|
||||
```bash
|
||||
sudo curl -L -o /usr/local/bin/gitkube \
|
||||
https://github.com/hasura/gitkube/releases/download/v0.2.1/gitkube_linux_amd64
|
||||
sudo chmod +x /usr/local/bin/gitkube
|
||||
```
|
||||
- Managed with YAML/Charts:
|
||||
|
||||
- Install Gitkube on the cluster:
|
||||
```bash
|
||||
gitkube install --expose ClusterIP
|
||||
```
|
||||
- core components (CNI, CSI, Ingress, logging, monitoring...)
|
||||
|
||||
- GitOps controllers
|
||||
|
||||
- critical application foundations (database operator, databases)
|
||||
|
||||
- GitOps manifests
|
||||
|
||||
- Managed with GitOps:
|
||||
|
||||
- applications
|
||||
|
||||
- staging databases
|
||||
|
||||
---
|
||||
|
||||
## Creating a Remote
|
||||
## Example 2
|
||||
|
||||
- Gitkube provides a new type of API resource: *Remote*
|
||||
- Managed with YAML/Charts:
|
||||
|
||||
(this is using a mechanism called Custom Resource Definitions or CRD)
|
||||
- essential components (CNI, CoreDNS)
|
||||
|
||||
- Create and apply a YAML file containing the following manifest:
|
||||
```yaml
|
||||
apiVersion: gitkube.sh/v1alpha1
|
||||
kind: Remote
|
||||
metadata:
|
||||
name: example
|
||||
spec:
|
||||
authorizedKeys:
|
||||
- `ssh-rsa AAA...`
|
||||
manifests:
|
||||
path: "."
|
||||
```
|
||||
- initial installation of GitOps controllers
|
||||
|
||||
(replace the `ssh-rsa AAA...` section with the content of `~/.ssh/id_rsa.pub`)
|
||||
- Managed with GitOps:
|
||||
|
||||
- upgrades of GitOps controllers
|
||||
|
||||
- core components (CSI, Ingress, logging, monitoring...)
|
||||
|
||||
- operators, databases
|
||||
|
||||
- more GitOps manifests for applications!
|
||||
|
||||
---
|
||||
|
||||
## Pushing to our remote
|
||||
## Concrete example
|
||||
|
||||
- Get the `gitkubed` IP address:
|
||||
```bash
|
||||
kubectl -n kube-system get svc gitkubed
|
||||
IP=$(kubectl -n kube-system get svc gitkubed -o json |
|
||||
jq -r .spec.clusterIP)
|
||||
```
|
||||
- Source code repository (not shown here)
|
||||
|
||||
- Get ourselves a sample repository with resource YAML files:
|
||||
```bash
|
||||
git clone git://github.com/jpetazzo/kubercoins
|
||||
cd kubercoins
|
||||
```
|
||||
- Infrastructure repository (shown below), single branch
|
||||
|
||||
- Add the remote and push to it:
|
||||
```bash
|
||||
git remote add k8s ssh://default-example@$IP/~/git/default-example
|
||||
git push k8s master
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Making changes
|
||||
|
||||
- Edit a local file
|
||||
|
||||
- Commit
|
||||
|
||||
- Push!
|
||||
|
||||
- Make sure that you push to the `k8s` remote
|
||||
|
||||
---
|
||||
|
||||
## Other features
|
||||
|
||||
- Gitkube can also build container images for us
|
||||
|
||||
(see the [documentation](https://github.com/hasura/gitkube/blob/master/docs/remote.md) for more details)
|
||||
|
||||
- Gitkube can also deploy Helm charts
|
||||
|
||||
(instead of raw YAML files)
|
||||
```
|
||||
@@INCLUDE[slides/k8s/gitopstree.txt]
|
||||
```
|
||||
|
||||
???
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
- instructions indicating to users "please tweak this and that in the YAML"
|
||||
|
||||
- That's where using something like
|
||||
[CUE](https://github.com/cuelang/cue/blob/v0.3.2/doc/tutorial/kubernetes/README.md),
|
||||
[CUE](https://github.com/cue-labs/cue-by-example/tree/main/003_kubernetes_tutorial),
|
||||
[Kustomize](https://kustomize.io/),
|
||||
or [Helm](https://helm.sh/) can help!
|
||||
|
||||
@@ -86,8 +86,6 @@
|
||||
|
||||
- On April 30th 2020, Helm was the 10th project to *graduate* within the CNCF
|
||||
|
||||
🎉
|
||||
|
||||
(alongside Containerd, Prometheus, and Kubernetes itself)
|
||||
|
||||
- This is an acknowledgement by the CNCF for projects that
|
||||
@@ -99,6 +97,8 @@
|
||||
- See [CNCF announcement](https://www.cncf.io/announcement/2020/04/30/cloud-native-computing-foundation-announces-helm-graduation/)
|
||||
and [Helm announcement](https://helm.sh/blog/celebrating-helms-cncf-graduation/)
|
||||
|
||||
- In other words: Helm is here to stay
|
||||
|
||||
---
|
||||
|
||||
## Helm concepts
|
||||
@@ -173,11 +173,13 @@ or `apt` tools).
|
||||
|
||||
- Helm 3 doesn't use `tiller` at all, making it simpler (yay!)
|
||||
|
||||
- If you see references to `tiller` in a tutorial, documentation... that doc is obsolete!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## With or without `tiller`
|
||||
## What was the problem with `tiller`?
|
||||
|
||||
- With Helm 3:
|
||||
|
||||
@@ -193,9 +195,7 @@ class: extra-details
|
||||
|
||||
- This indirect model caused significant permissions headaches
|
||||
|
||||
(`tiller` required very broad permissions to function)
|
||||
|
||||
- `tiller` was removed in Helm 3 to simplify the security aspects
|
||||
- It also made it more complicated to embed Helm in other tools
|
||||
|
||||
---
|
||||
|
||||
@@ -222,59 +222,6 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Only if using Helm 2 ...
|
||||
|
||||
- We need to install Tiller and give it some permissions
|
||||
|
||||
- Tiller is composed of a *service* and a *deployment* in the `kube-system` namespace
|
||||
|
||||
- They can be managed (installed, upgraded...) with the `helm` CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Deploy Tiller:
|
||||
```bash
|
||||
helm init
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
At the end of the install process, you will see:
|
||||
|
||||
```
|
||||
Happy Helming!
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Only if using Helm 2 ...
|
||||
|
||||
- Tiller needs permissions to create Kubernetes resources
|
||||
|
||||
- In a more realistic deployment, you might create per-user or per-team
|
||||
service accounts, roles, and role bindings
|
||||
|
||||
.lab[
|
||||
|
||||
- Grant `cluster-admin` role to `kube-system:default` service account:
|
||||
```bash
|
||||
kubectl create clusterrolebinding add-on-cluster-admin \
|
||||
--clusterrole=cluster-admin --serviceaccount=kube-system:default
|
||||
```
|
||||
|
||||
|
||||
]
|
||||
|
||||
(Defining the exact roles and permissions on your cluster requires
|
||||
a deeper knowledge of Kubernetes' RBAC model. The command above is
|
||||
fine for personal and development clusters.)
|
||||
|
||||
---
|
||||
|
||||
## Charts and repositories
|
||||
|
||||
- A *repository* (or repo in short) is a collection of charts
|
||||
@@ -293,27 +240,7 @@ fine for personal and development clusters.)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## How to find charts, the old way
|
||||
|
||||
- Helm 2 came with one pre-configured repo, the "stable" repo
|
||||
|
||||
(located at https://charts.helm.sh/stable)
|
||||
|
||||
- Helm 3 doesn't have any pre-configured repo
|
||||
|
||||
- The "stable" repo mentioned above is now being deprecated
|
||||
|
||||
- The new approach is to have fully decentralized repos
|
||||
|
||||
- Repos can be indexed in the Artifact Hub
|
||||
|
||||
(which supersedes the Helm Hub)
|
||||
|
||||
---
|
||||
|
||||
## How to find charts, the new way
|
||||
## How to find charts
|
||||
|
||||
- Go to the [Artifact Hub](https://artifacthub.io/packages/search?kind=0) (https://artifacthub.io)
|
||||
|
||||
@@ -409,24 +336,6 @@ Note: it is also possible to install directly a chart, with `--repo https://...`
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Searching and installing with Helm 2
|
||||
|
||||
- Helm 2 doesn't have support for the Helm Hub
|
||||
|
||||
- The `helm search` command only takes a search string argument
|
||||
|
||||
(e.g. `helm search juice-shop`)
|
||||
|
||||
- With Helm 2, the name is optional:
|
||||
|
||||
`helm install juice/juice-shop` will automatically generate a name
|
||||
|
||||
`helm install --name my-juice-shop juice/juice-shop` will specify a name
|
||||
|
||||
---
|
||||
|
||||
## Viewing resources of a release
|
||||
|
||||
- This specific chart labels all its resources with a `release` label
|
||||
@@ -542,11 +451,11 @@ All unspecified values will take the default values defined in the chart.
|
||||
|
||||
:EN:- Helm concepts
|
||||
:EN:- Installing software with Helm
|
||||
:EN:- Helm 2, Helm 3, and the Helm Hub
|
||||
:EN:- Finding charts on the Artifact Hub
|
||||
|
||||
:FR:- Fonctionnement général de Helm
|
||||
:FR:- Installer des composants via Helm
|
||||
:FR:- Helm 2, Helm 3, et le *Helm Hub*
|
||||
:FR:- Trouver des *charts* sur *Artifact Hub*
|
||||
|
||||
:T: Getting started with Helm and its concepts
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ class: extra-details
|
||||
|
||||
- We present 3 methods to obtain a certificate
|
||||
|
||||
- We suggest that we use method 1 (self-signed certificate)
|
||||
- We suggest that you use method 1 (self-signed certificate)
|
||||
|
||||
- it's the simplest and fastest method
|
||||
|
||||
|
||||
@@ -572,7 +572,7 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
|
||||
- Create a prefix match rule for the `blue` service:
|
||||
```bash
|
||||
kubectl create ingress bluestar --rule=/blue*:blue:80
|
||||
kubectl create ingress bluestar --rule=/blue*=blue:80
|
||||
```
|
||||
|
||||
- Check that it works:
|
||||
|
||||
@@ -109,7 +109,7 @@ class: extra-details
|
||||
|
||||
- Install Go
|
||||
|
||||
(on our VMs: `sudo snap install go --classic`)
|
||||
(on our VMs: `sudo snap install go --classic` or `sudo apk add go`)
|
||||
|
||||
- Install kubebuilder
|
||||
|
||||
@@ -250,7 +250,7 @@ spec:
|
||||
|
||||
## Loading an object
|
||||
|
||||
Open `controllers/machine_controller.go`.
|
||||
Open `internal/controllers/machine_controller.go`.
|
||||
|
||||
Add that code in the `Reconcile` method, at the `TODO(user)` location:
|
||||
|
||||
@@ -505,7 +505,7 @@ if machine.Spec.SwitchPosition != "down" {
|
||||
changeAt := machine.Status.SeenAt.Time.Add(5 * time.Second)
|
||||
if now.Time.After(changeAt) {
|
||||
machine.Spec.SwitchPosition = "down"
|
||||
machine.Status.SeenAt = nil
|
||||
machine.Status.SeenAt = nil
|
||||
if err := r.Update(ctx, &machine); err != nil {
|
||||
logger.Info("error updating switch position")
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
@@ -629,17 +629,17 @@ Note: this time, only create a new custom resource; not a new controller.
|
||||
- We can retrieve associated switches like this:
|
||||
|
||||
```go
|
||||
var switches uselessv1alpha1.SwitchList
|
||||
var switches uselessv1alpha1.SwitchList
|
||||
|
||||
if err := r.List(ctx, &switches,
|
||||
client.InNamespace(req.Namespace),
|
||||
client.MatchingLabels{"machine": req.Name},
|
||||
); err != nil {
|
||||
logger.Error(err, "unable to list switches of the machine")
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
if err := r.List(ctx, &switches,
|
||||
client.InNamespace(req.Namespace),
|
||||
client.MatchingLabels{"machine": req.Name},
|
||||
); err != nil {
|
||||
logger.Error(err, "unable to list switches of the machine")
|
||||
return ctrl.Result{}, client.IgnoreNotFound(err)
|
||||
}
|
||||
|
||||
logger.Info("Found switches", "switches", switches)
|
||||
logger.Info("Found switches", "switches", switches)
|
||||
```
|
||||
|
||||
---
|
||||
@@ -649,13 +649,13 @@ Note: this time, only create a new custom resource; not a new controller.
|
||||
- Each time we reconcile a Machine, let's update its status:
|
||||
|
||||
```go
|
||||
status := ""
|
||||
for _, sw := range switches.Items {
|
||||
status += string(sw.Spec.Position[0])
|
||||
}
|
||||
machine.Status.Positions = status
|
||||
if err := r.Status().Update(ctx, &machine); err != nil {
|
||||
...
|
||||
status := ""
|
||||
for _, sw := range switches.Items {
|
||||
status += string(sw.Spec.Position[0])
|
||||
}
|
||||
machine.Status.Positions = status
|
||||
if err := r.Status().Update(ctx, &machine); err != nil {
|
||||
...
|
||||
```
|
||||
|
||||
- Run the controller and check that POSITIONS gets updated
|
||||
@@ -721,7 +721,7 @@ if err := r.Create(ctx, &sw); err != nil { ...
|
||||
Define the following helper function:
|
||||
|
||||
```go
|
||||
func (r *MachineReconciler) machineOfSwitch(obj client.Object) []ctrl.Request {
|
||||
func (r *MachineReconciler) machineOfSwitch(ctx context.Context, obj client.Object) []ctrl.Request {
|
||||
return []ctrl.Request{
|
||||
ctrl.Request{
|
||||
NamespacedName: types.NamespacedName{
|
||||
@@ -746,7 +746,7 @@ func (r *MachineReconciler) SetupWithManager(mgr ctrl.Manager) error {
|
||||
For(&uselessv1alpha1.Machine{}).
|
||||
Owns(&uselessv1alpha1.Switch{}).
|
||||
Watches(
|
||||
&source.Kind{Type: &uselessv1alpha1.Switch{}},
|
||||
&uselessv1alpha1.Switch{},
|
||||
handler.EnqueueRequestsFromMapFunc(r.machineOfSwitch),
|
||||
).
|
||||
Complete(r)
|
||||
|
||||
@@ -128,7 +128,9 @@ configMapGenerator:
|
||||
|
||||
- A *variant* is the final outcome of applying bases + overlays
|
||||
|
||||
(See the [kustomize glossary](https://github.com/kubernetes-sigs/kustomize/blob/master/docs/glossary.md) for more definitions!)
|
||||
(See the [kustomize glossary][glossary] for more definitions!)
|
||||
|
||||
[glossary]: https://kubectl.docs.kubernetes.io/references/kustomize/glossary/
|
||||
|
||||
---
|
||||
|
||||
@@ -228,7 +230,7 @@ General workflow:
|
||||
|
||||
3. `kustomize edit add patch` to add patches to said resources
|
||||
|
||||
4. `kustomized edit add ...` or `kustomize edit set ...` (many options!)
|
||||
4. `kustomize edit add ...` or `kustomize edit set ...` (many options!)
|
||||
|
||||
5. `kustomize build | kubectl apply -f-` or `kubectl apply -k .`
|
||||
|
||||
@@ -244,7 +246,7 @@ General workflow:
|
||||
|
||||
(just add `--help` after any command to see possible options!)
|
||||
|
||||
- Make sure to install the completion and try e.g. `kustomize eidt add [TAB][TAB]`
|
||||
- Make sure to install the completion and try e.g. `kustomize edit add [TAB][TAB]`
|
||||
|
||||
---
|
||||
|
||||
@@ -337,7 +339,7 @@ kustomize edit add label app.kubernetes.io/name:dockercoins
|
||||
|
||||
- Assuming that `commonLabels` have been set as shown on the previous slide:
|
||||
```bash
|
||||
kubectl apply -k . --prune --selector app.kubernetes.io.name=dockercoins
|
||||
kubectl apply -k . --prune --selector app.kubernetes.io/name=dockercoins
|
||||
```
|
||||
|
||||
- ... This command removes resources that have been removed from the kustomization
|
||||
|
||||
@@ -536,12 +536,12 @@ Note: the `apiVersion` field appears to be optional.
|
||||
- Excerpt:
|
||||
```yaml
|
||||
generate:
|
||||
kind: LimitRange
|
||||
name: default-limitrange
|
||||
namespace: "{{request.object.metadata.name}}"
|
||||
data:
|
||||
spec:
|
||||
limits:
|
||||
kind: LimitRange
|
||||
name: default-limitrange
|
||||
namespace: "{{request.object.metadata.name}}"
|
||||
data:
|
||||
spec:
|
||||
limits:
|
||||
```
|
||||
|
||||
- Note that we have to specify the `namespace`
|
||||
|
||||
@@ -429,11 +429,11 @@ troubleshoot easily, without having to poke holes in our firewall.
|
||||
|
||||
- The API documentation has a lot of detail about the format of various objects: <!-- ##VERSION## -->
|
||||
|
||||
- [NetworkPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicy-v1-networking-k8s-io)
|
||||
- [NetworkPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicy-v1-networking-k8s-io)
|
||||
|
||||
- [NetworkPolicySpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicyspec-v1-networking-k8s-io)
|
||||
- [NetworkPolicySpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicyspec-v1-networking-k8s-io)
|
||||
|
||||
- [NetworkPolicyIngressRule](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicyingressrule-v1-networking-k8s-io)
|
||||
- [NetworkPolicyIngressRule](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicyingressrule-v1-networking-k8s-io)
|
||||
|
||||
- etc.
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
- "New" policies
|
||||
|
||||
(available in alpha since Kubernetes 1.22)
|
||||
(available in alpha since Kubernetes 1.22, and GA since Kubernetes 1.25)
|
||||
|
||||
- Easier to use
|
||||
|
||||
@@ -66,50 +66,6 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## PSA in practice
|
||||
|
||||
- Step 1: enable the PodSecurity admission plugin
|
||||
|
||||
- Step 2: label some Namespaces
|
||||
|
||||
- Step 3: provide an AdmissionConfiguration (optional)
|
||||
|
||||
- Step 4: profit!
|
||||
|
||||
---
|
||||
|
||||
## Enabling PodSecurity
|
||||
|
||||
- This requires Kubernetes 1.22 or later
|
||||
|
||||
- This requires the ability to reconfigure the API server
|
||||
|
||||
- The following slides assume that we're using `kubeadm`
|
||||
|
||||
(and have write access to `/etc/kubernetes/manifests`)
|
||||
|
||||
---
|
||||
|
||||
## Reconfiguring the API server
|
||||
|
||||
- In Kubernetes 1.22, we need to enable the `PodSecurity` feature gate
|
||||
|
||||
- In later versions, this might be enabled automatically
|
||||
|
||||
.lab[
|
||||
|
||||
- Edit `/etc/kubernetes/manifests/kube-apiserver.yaml`
|
||||
|
||||
- In the `command` list, add `--feature-gates=PodSecurity=true`
|
||||
|
||||
- Save, quit, wait for the API server to be back up again
|
||||
|
||||
]
|
||||
|
||||
Note: for bonus points, edit the `kubeadm-config` ConfigMap instead!
|
||||
|
||||
---
|
||||
|
||||
## Namespace labels
|
||||
|
||||
- Three optional labels can be added to namespaces:
|
||||
@@ -277,14 +233,6 @@ Let's use @@LINK[k8s/admission-configuration.yaml]:
|
||||
|
||||
- But the Pods don't get created
|
||||
|
||||
---
|
||||
|
||||
## Clean up
|
||||
|
||||
- We probably want to remove the API server flags that we added
|
||||
|
||||
(the feature gate and the admission configuration)
|
||||
|
||||
???
|
||||
|
||||
:EN:- Preventing privilege escalation with Pod Security Admission
|
||||
|
||||
@@ -114,7 +114,7 @@
|
||||
|
||||
- plugins (compiled in API server; enabled/disabled by reconfiguration)
|
||||
|
||||
- webhooks (registesred dynamically)
|
||||
- webhooks (registered dynamically)
|
||||
|
||||
- Admission control has many other uses
|
||||
|
||||
@@ -124,7 +124,7 @@
|
||||
|
||||
## Admission plugins
|
||||
|
||||
- [PodSecurityPolicy](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) (will be removed in Kubernetes 1.25)
|
||||
- [PodSecurityPolicy](https://kubernetes.io/docs/concepts/policy/pod-security-policy/) (was removed in Kubernetes 1.25)
|
||||
|
||||
- create PodSecurityPolicy resources
|
||||
|
||||
@@ -132,7 +132,7 @@
|
||||
|
||||
- create RoleBinding that grants the Role to a user or ServiceAccount
|
||||
|
||||
- [PodSecurityAdmission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) (alpha since Kubernetes 1.22)
|
||||
- [PodSecurityAdmission](https://kubernetes.io/docs/concepts/security/pod-security-admission/) (alpha since Kubernetes 1.22, stable since 1.25)
|
||||
|
||||
- use pre-defined policies (privileged, baseline, restricted)
|
||||
|
||||
@@ -162,9 +162,31 @@
|
||||
|
||||
---
|
||||
|
||||
## Validating Admission Policies
|
||||
|
||||
- Alternative to validating admission webhooks
|
||||
|
||||
- Evaluated in the API server
|
||||
|
||||
(don't require an external server; don't add network latency)
|
||||
|
||||
- Written in CEL (Common Expression Language)
|
||||
|
||||
- alpha in K8S 1.26; beta in K8S 1.28; GA in K8S 1.30
|
||||
|
||||
- Can replace validating webhooks at least in simple cases
|
||||
|
||||
- Can extend Pod Security Admission
|
||||
|
||||
- Check [the documentation][vapdoc] for examples
|
||||
|
||||
[vapdoc]: https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/
|
||||
|
||||
---
|
||||
|
||||
## Acronym salad
|
||||
|
||||
- PSP = Pod Security Policy
|
||||
- PSP = Pod Security Policy **(deprecated)**
|
||||
|
||||
- an admission plugin called PodSecurityPolicy
|
||||
|
||||
|
||||
@@ -2,11 +2,15 @@
|
||||
|
||||
- "Legacy" policies
|
||||
|
||||
(deprecated since Kubernetes 1.21; will be removed in 1.25)
|
||||
(deprecated since Kubernetes 1.21; removed in 1.25)
|
||||
|
||||
- Superseded by Pod Security Standards + Pod Security Admission
|
||||
|
||||
(available in alpha since Kubernetes 1.22)
|
||||
(available in alpha since Kubernetes 1.22; stable since 1.25)
|
||||
|
||||
- **Since Kubernetes 1.24 was EOL in July 2023, nobody should use PSPs anymore!**
|
||||
|
||||
- This section is here mostly for historical purposes, and can be skipped
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Pre-requirements
|
||||
## Pre-requirements
|
||||
|
||||
- Kubernetes concepts
|
||||
|
||||
|
||||
@@ -6,11 +6,53 @@
|
||||
|
||||
- We can specify *limits* and/or *requests*
|
||||
|
||||
- We can specify quantities of CPU and/or memory
|
||||
- We can specify quantities of CPU and/or memory and/or ephemeral storage
|
||||
|
||||
---
|
||||
|
||||
## CPU vs memory
|
||||
## Requests vs limits
|
||||
|
||||
- *Requests* are *guaranteed reservations* of resources
|
||||
|
||||
- They are used for scheduling purposes
|
||||
|
||||
- Kubelet will use cgroups to e.g. guarantee a minimum amount of CPU time
|
||||
|
||||
- A container **can** use more than its requested resources
|
||||
|
||||
- A container using *less* than what it requested should never be killed or throttled
|
||||
|
||||
- A node **cannot** be overcommitted with requests
|
||||
|
||||
(the sum of all requests **cannot** be higher than resources available on the node)
|
||||
|
||||
- A small amount of resources is set aside for system components
|
||||
|
||||
(this explains why there is a difference between "capacity" and "allocatable")
|
||||
|
||||
---
|
||||
|
||||
## Requests vs limits
|
||||
|
||||
- *Limits* are "hard limits" (a container **cannot** exceed its limits)
|
||||
|
||||
- They aren't taken into account by the scheduler
|
||||
|
||||
- A container exceeding its memory limit is killed instantly
|
||||
|
||||
(by the kernel out-of-memory killer)
|
||||
|
||||
- A container exceeding its CPU limit is throttled
|
||||
|
||||
- A container exceeding its disk limit is killed
|
||||
|
||||
(usually with a small delay, since this is checked periodically by kubelet)
|
||||
|
||||
- On a given node, the sum of all limits **can** be higher than the node size
|
||||
|
||||
---
|
||||
|
||||
## Compressible vs incompressible resources
|
||||
|
||||
- CPU is a *compressible resource*
|
||||
|
||||
@@ -24,7 +66,29 @@
|
||||
|
||||
- if we have N GB RAM and need 2N, we might run at... 0.1% speed!
|
||||
|
||||
- As a result, exceeding limits will have different consequences for CPU and memory
|
||||
- Disk is also an *incompressible resource*
|
||||
|
||||
- when the disk is full, writes will fail
|
||||
|
||||
- applications may or may not crash but persistent apps will be in trouble
|
||||
|
||||
---
|
||||
|
||||
## Running low on CPU
|
||||
|
||||
- Two ways for a container to "run low" on CPU:
|
||||
|
||||
- it's hitting its CPU limit
|
||||
|
||||
- all CPUs on the node are at 100% utilization
|
||||
|
||||
- The app in the container will run slower
|
||||
|
||||
(compared to running without a limit, or if CPU cycles were available)
|
||||
|
||||
- No other consequence
|
||||
|
||||
(but this could affect SLA/SLO for latency-sensitive applications!)
|
||||
|
||||
---
|
||||
|
||||
@@ -136,9 +200,7 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
|
||||
|
||||
## Running low on memory
|
||||
|
||||
- When the system runs low on memory, it starts to reclaim used memory
|
||||
|
||||
(we talk about "memory pressure")
|
||||
- When the kernel runs low on memory, it starts to reclaim used memory
|
||||
|
||||
- Option 1: free up some buffers and caches
|
||||
|
||||
@@ -162,71 +224,91 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
|
||||
|
||||
- If a container exceeds its memory *limit*, it gets killed immediately
|
||||
|
||||
- If a node is overcommitted and under memory pressure, it will terminate some pods
|
||||
- If a node memory usage gets too high, it will *evict* some pods
|
||||
|
||||
(see next slide for some details about what "overcommit" means here!)
|
||||
(we say that the node is "under pressure", more on that in a bit!)
|
||||
|
||||
[KEP 2400]: https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2400-node-swap/README.md#implementation-history
|
||||
|
||||
---
|
||||
|
||||
## Overcommitting resources
|
||||
## Running low on disk
|
||||
|
||||
- *Limits* are "hard limits" (a container *cannot* exceed its limits)
|
||||
- When the kubelet runs low on disk, it starts to reclaim disk space
|
||||
|
||||
- a container exceeding its memory limit is killed
|
||||
(similarly to what the kernel does, but in different categories)
|
||||
|
||||
- a container exceeding its CPU limit is throttled
|
||||
- Option 1: garbage collect dead pods and containers
|
||||
|
||||
- On a given node, the sum of pod *limits* can be higher than the node size
|
||||
(no consequence, but their logs will be deleted)
|
||||
|
||||
- *Requests* are used for scheduling purposes
|
||||
- Option 2: remove unused images
|
||||
|
||||
- a container can use more than its requested CPU or RAM amounts
|
||||
(no consequence, but these images will have to be repulled if we need them later)
|
||||
|
||||
- a container using *less* than what it requested should never be killed or throttled
|
||||
- Option 3: evict pods and remove them to reclaim their disk usage
|
||||
|
||||
- On a given node, the sum of pod *requests* cannot be higher than the node size
|
||||
- Note: this only applies to *ephemeral storage*, not to e.g. Persistent Volumes!
|
||||
|
||||
---
|
||||
|
||||
## Pod quality of service
|
||||
## Ephemeral storage?
|
||||
|
||||
Each pod is assigned a QoS class (visible in `status.qosClass`).
|
||||
- This includes:
|
||||
|
||||
- If limits = requests:
|
||||
- the *read-write layer* of the container
|
||||
<br/>
|
||||
(any file creation/modification outside of its volumes)
|
||||
|
||||
- as long as the container uses less than the limit, it won't be affected
|
||||
- `emptyDir` volumes mounted in the container
|
||||
|
||||
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
|
||||
- the container logs stored on the node
|
||||
|
||||
- If requests < limits:
|
||||
- This does not include:
|
||||
|
||||
- as long as the container uses less than the request, it won't be affected
|
||||
- the container image
|
||||
|
||||
- otherwise, it might be killed/evicted if the node gets overloaded
|
||||
|
||||
- if at least one container has *(requests<limits)*, QoS is considered "Burstable"
|
||||
|
||||
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
|
||||
- other types of volumes (e.g. Persistent Volumes, `hostPath`, or `local` volumes)
|
||||
|
||||
---
|
||||
|
||||
## Quality of service impact
|
||||
class: extra-details
|
||||
|
||||
- When a node is overloaded, BestEffort pods are killed first
|
||||
## Disk limit enforcement
|
||||
|
||||
- Then, Burstable pods that exceed their requests
|
||||
- Disk usage is periodically measured by kubelet
|
||||
|
||||
- Burstable and Guaranteed pods below their requests are never killed
|
||||
(with something equivalent to `du`)
|
||||
|
||||
(except if their node fails)
|
||||
- There can be a small delay before pod termination when disk limit is exceeded
|
||||
|
||||
- If we only use Guaranteed pods, no pod should ever be killed
|
||||
- It's also possible to enable filesystem *project quotas*
|
||||
|
||||
(as long as they stay within their limits)
|
||||
(e.g. with EXT4 or XFS)
|
||||
|
||||
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
|
||||
- Remember that container logs are also accounted for!
|
||||
|
||||
(container log rotation/retention is managed by kubelet)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## `nodefs` and `imagefs`
|
||||
|
||||
- `nodefs` is the main filesystem of the node
|
||||
|
||||
(holding, notably, `emptyDir` volumes and container logs)
|
||||
|
||||
- Optionally, the container engine can be configured to use an `imagefs`
|
||||
|
||||
- `imagefs` will store container images and container writable layers
|
||||
|
||||
- When there is a separate `imagefs`, its disk usage is tracked independently
|
||||
|
||||
- If `imagefs` usage gets too high, kubelet will remove old images first
|
||||
|
||||
(conversely, if `nodefs` usage gets too high, kubelet won't remove old images)
|
||||
|
||||
---
|
||||
|
||||
@@ -304,6 +386,46 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Pod quality of service
|
||||
|
||||
Each pod is assigned a QoS class (visible in `status.qosClass`).
|
||||
|
||||
- If limits = requests:
|
||||
|
||||
- as long as the container uses less than the limit, it won't be affected
|
||||
|
||||
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
|
||||
|
||||
- If requests < limits:
|
||||
|
||||
- as long as the container uses less than the request, it won't be affected
|
||||
|
||||
- otherwise, it might be killed/evicted if the node gets overloaded
|
||||
|
||||
- if at least one container has *(requests<limits)*, QoS is considered "Burstable"
|
||||
|
||||
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
|
||||
|
||||
---
|
||||
|
||||
## Quality of service impact
|
||||
|
||||
- When a node is overloaded, BestEffort pods are killed first
|
||||
|
||||
- Then, Burstable pods that exceed their requests
|
||||
|
||||
- Burstable and Guaranteed pods below their requests are never killed
|
||||
|
||||
(except if their node fails)
|
||||
|
||||
- If we only use Guaranteed pods, no pod should ever be killed
|
||||
|
||||
(as long as they stay within their limits)
|
||||
|
||||
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
|
||||
|
||||
---
|
||||
|
||||
## Specifying resources
|
||||
|
||||
- Resource requests are expressed at the *container* level
|
||||
@@ -316,9 +438,9 @@ class: extra-details
|
||||
|
||||
(so 100m = 0.1)
|
||||
|
||||
- Memory is expressed in bytes
|
||||
- Memory and ephemeral disk storage are expressed in bytes
|
||||
|
||||
- Memory can be expressed with k, M, G, T, ki, Mi, Gi, Ti suffixes
|
||||
- These can have k, M, G, T, ki, Mi, Gi, Ti suffixes
|
||||
|
||||
(corresponding to 10^3, 10^6, 10^9, 10^12, 2^10, 2^20, 2^30, 2^40)
|
||||
|
||||
@@ -334,11 +456,13 @@ containers:
|
||||
image: jpetazzo/color
|
||||
resources:
|
||||
limits:
|
||||
memory: "100Mi"
|
||||
cpu: "100m"
|
||||
requests:
|
||||
ephemeral-storage: 10M
|
||||
memory: "100Mi"
|
||||
requests:
|
||||
cpu: "10m"
|
||||
ephemeral-storage: 10M
|
||||
memory: "100Mi"
|
||||
```
|
||||
|
||||
This set of resources makes sure that this service won't be killed (as long as it stays below 100 MB of RAM), but allows its CPU usage to be throttled if necessary.
|
||||
@@ -365,7 +489,7 @@ This set of resources makes sure that this service won't be killed (as long as i
|
||||
|
||||
---
|
||||
|
||||
## We need default resource values
|
||||
## We need to specify resource values
|
||||
|
||||
- If we do not set resource values at all:
|
||||
|
||||
@@ -379,9 +503,33 @@ This set of resources makes sure that this service won't be killed (as long as i
|
||||
|
||||
- if the request is zero, the scheduler can't make a smart placement decision
|
||||
|
||||
- To address this, we can set default values for resources
|
||||
- This is fine when learning/testing, absolutely not in production!
|
||||
|
||||
- This is done with a LimitRange object
|
||||
---
|
||||
|
||||
## How should we set resources?
|
||||
|
||||
- Option 1: manually, for each container
|
||||
|
||||
- simple, effective, but tedious
|
||||
|
||||
- Option 2: automatically, with the [Vertical Pod Autoscaler (VPA)][vpa]
|
||||
|
||||
- relatively simple, very minimal involvement beyond initial setup
|
||||
|
||||
- not compatible with HPAv1, can disrupt long-running workloads (see [limitations][vpa-limitations])
|
||||
|
||||
- Option 3: semi-automatically, with tools like [Robusta KRR][robusta]
|
||||
|
||||
- good compromise between manual work and automation
|
||||
|
||||
- Option 4: by creating LimitRanges in our Namespaces
|
||||
|
||||
- relatively simple, but "one-size-fits-all" approach might not always work
|
||||
|
||||
[robusta]: https://github.com/robusta-dev/krr
|
||||
[vpa]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
|
||||
[vpa-limitations]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler#known-limitations
|
||||
|
||||
---
|
||||
|
||||
@@ -636,7 +784,7 @@ class: extra-details
|
||||
|
||||
- ResourceQuota per namespace
|
||||
|
||||
- Let's see a simple recommendation to get started with resource limits
|
||||
- Let's see one possible strategy to get started with resource limits
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -352,6 +352,87 @@ class: pic
|
||||
class: pic
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Traffic engineering
|
||||
|
||||
- By default, connections to a ClusterIP or a NodePort are load balanced
|
||||
across all the backends of their Service
|
||||
|
||||
- This can incur extra network hops (which add latency)
|
||||
|
||||
- To remove that extra hop, multiple mechanisms are available:
|
||||
|
||||
- `spec.externalTrafficPolicy`
|
||||
|
||||
- `spec.internalTrafficPolicy`
|
||||
|
||||
- [Topology aware routing](https://kubernetes.io/docs/concepts/services-networking/topology-aware-routing/) annotation (beta)
|
||||
|
||||
- `spec.trafficDistribution` (alpha in 1.30, beta in 1.31)
|
||||
|
||||
---
|
||||
|
||||
## `internal / externalTrafficPolicy`
|
||||
|
||||
- Applies respectively to `ClusterIP` and `NodePort` connections
|
||||
|
||||
- Can be set to `Cluster` or `Local`
|
||||
|
||||
- `Cluster`: load balance connections across all backends (default)
|
||||
|
||||
- `Local`: load balance connections to local backends (on the same node)
|
||||
|
||||
- With `Local`, if there is no local backend, the connection will fail!
|
||||
|
||||
(the parameter expresses a "hard rule", not a preference)
|
||||
|
||||
- Example: `externalTrafficPolicy: Local` for Ingress controllers
|
||||
|
||||
(as shown on earlier diagrams)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Topology aware routing
|
||||
|
||||
- In beta since Kubernetes 1.23
|
||||
|
||||
- Enabled with annotation `service.kubernetes.io/topology-mode=Auto`
|
||||
|
||||
- Relies on node annotation `topology.kubernetes.io/zone`
|
||||
|
||||
- Kubernetes service proxy will try to keep connections within a zone
|
||||
|
||||
(connections made by a pod in zone `a` will be sent to pods in zone `a`)
|
||||
|
||||
- ...Except if there are no pods in the zone (then fallback to all zones)
|
||||
|
||||
- This can mess up autoscaling!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## `spec.trafficDistribution`
|
||||
|
||||
- [KEP4444, Traffic Distribution for Services][kep4444]
|
||||
|
||||
- In alpha since Kubernetes 1.30, beta since Kubernetes 1.31
|
||||
|
||||
- Should eventually supersede topology aware routing
|
||||
|
||||
- Can be set to `PreferClose` (more values might be supported later)
|
||||
|
||||
- The meaning of `PreferClose` is implementation dependent
|
||||
|
||||
(with kube-proxy, it should work like topology aware routing: stay in a zone)
|
||||
|
||||
[kep4444]: https://github.com/kubernetes/enhancements/issues/4444
|
||||
|
||||
???
|
||||
|
||||
:EN:- Service types: ClusterIP, NodePort, LoadBalancer
|
||||
|
||||
@@ -144,6 +144,30 @@
|
||||
|
||||
---
|
||||
|
||||
## [Orbstack](https://orbstack.dev/)
|
||||
|
||||
- Mac only
|
||||
|
||||
- Runs Docker containers, Kubernetes, and Linux virtual machines
|
||||
|
||||
- Emphasis on speed and energy usage (battery life)
|
||||
|
||||
- Great support for `ClusterIP` and `LoadBalancer` services
|
||||
|
||||
- Free for personal use; paid product otherwise
|
||||
|
||||
---
|
||||
|
||||
## [Podman Desktop](https://podman-desktop.io/)
|
||||
|
||||
- Available on Linux, Mac, and Windows
|
||||
|
||||
- Free and open-source
|
||||
|
||||
- Doesn't support Kubernetes directly, but [supports KinD](https://podman-desktop.io/docs/kind)
|
||||
|
||||
---
|
||||
|
||||
## [Rancher Desktop](https://rancherdesktop.io/)
|
||||
|
||||
- Available on Linux, Mac, and Windows
|
||||
@@ -158,8 +182,6 @@
|
||||
|
||||
- Emphasis on ease of use (like Docker Desktop)
|
||||
|
||||
- Relatively young product (first release in May 2021)
|
||||
|
||||
- Based on k3s and other proven components
|
||||
|
||||
---
|
||||
|
||||
@@ -166,17 +166,15 @@
|
||||
|
||||
- [Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way) by Kelsey Hightower
|
||||
|
||||
- step by step guide to install Kubernetes on Google Cloud
|
||||
|
||||
- covers certificates, high availability ...
|
||||
|
||||
- *“Kubernetes The Hard Way is optimized for learning, which means taking the long route to ensure you understand each task required to bootstrap a Kubernetes cluster.”*
|
||||
*step by step guide to install Kubernetes on GCP, with certificates, HA...*
|
||||
|
||||
- [Deep Dive into Kubernetes Internals for Builders and Operators](https://www.youtube.com/watch?v=3KtEAa7_duA)
|
||||
|
||||
- conference presentation showing step-by-step control plane setup
|
||||
*conference talk setting up a simplified Kubernetes cluster - no security or HA*
|
||||
|
||||
- emphasis on simplicity, not on security and availability
|
||||
- 🇫🇷[Démystifions les composants internes de Kubernetes](https://www.youtube.com/watch?v=OCMNA0dSAzc)
|
||||
|
||||
*improved version of the previous one, with certs and recent k8s versions*
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -32,12 +32,14 @@ content:
|
||||
- k8s/architecture.md
|
||||
#- k8s/internal-apis.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc.md
|
||||
- k8s/dmuc-easy.md
|
||||
-
|
||||
- k8s/multinode.md
|
||||
- k8s/cni.md
|
||||
- k8s/dmuc-medium.md
|
||||
- k8s/dmuc-hard.md
|
||||
#- k8s/multinode.md
|
||||
#- k8s/cni.md
|
||||
- k8s/cni-internals.md
|
||||
- k8s/interco.md
|
||||
#- k8s/interco.md
|
||||
-
|
||||
- k8s/apilb.md
|
||||
#- k8s/setup-overview.md
|
||||
|
||||
@@ -32,11 +32,13 @@ content:
|
||||
- k8s/architecture.md
|
||||
- k8s/internal-apis.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc.md
|
||||
- - k8s/multinode.md
|
||||
- k8s/cni.md
|
||||
- k8s/dmuc-easy.md
|
||||
- - k8s/dmuc-medium.md
|
||||
- k8s/dmuc-hard.md
|
||||
#- k8s/multinode.md
|
||||
#- k8s/cni.md
|
||||
- k8s/cni-internals.md
|
||||
- k8s/interco.md
|
||||
#- k8s/interco.md
|
||||
- - k8s/apilb.md
|
||||
- k8s/setup-overview.md
|
||||
#- k8s/setup-devel.md
|
||||
@@ -65,6 +67,7 @@ content:
|
||||
- - k8s/resource-limits.md
|
||||
- k8s/metrics-server.md
|
||||
- k8s/cluster-sizing.md
|
||||
- k8s/disruptions.md
|
||||
- k8s/horizontal-pod-autoscaler.md
|
||||
- - k8s/prometheus.md
|
||||
#- k8s/prometheus-stack.md
|
||||
|
||||
@@ -30,13 +30,15 @@ content:
|
||||
- k8s/architecture.md
|
||||
- k8s/internal-apis.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc.md
|
||||
- k8s/dmuc-easy.md
|
||||
- #2
|
||||
- k8s/multinode.md
|
||||
- k8s/cni.md
|
||||
- k8s/interco.md
|
||||
- #3
|
||||
- k8s/dmuc-medium.md
|
||||
- k8s/dmuc-hard.md
|
||||
#- k8s/multinode.md
|
||||
#- k8s/cni.md
|
||||
#- k8s/interco.md
|
||||
- k8s/cni-internals.md
|
||||
- #3
|
||||
- k8s/apilb.md
|
||||
- k8s/control-plane-auth.md
|
||||
- |
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user