Compare commits

..

2 Commits

Author SHA1 Message Date
Julien Girardin
6a8e00fc7d Change last day schedule of Allo Docker for Julien 2023-05-30 15:44:33 +02:00
Jérôme Petazzoni
e8c2b29c8f ⚛️ HighFive 2023Q2 content update 2023-05-29 14:54:07 +02:00
156 changed files with 3240 additions and 9013 deletions

View File

@@ -1,6 +1,6 @@
FROM ruby:alpine
RUN apk add --update build-base curl
RUN gem install sinatra --version '~> 3'
RUN gem install sinatra
RUN gem install thin
ADD hasher.rb /
CMD ["ruby", "hasher.rb"]

View File

@@ -16,7 +16,8 @@ spec:
hostPath:
path: /root
tolerations:
- operator: Exists
- effect: NoSchedule
operator: Exists
initContainers:
- name: hacktheplanet
image: alpine
@@ -26,7 +27,7 @@ spec:
command:
- sh
- -c
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys >> /root/.ssh/authorized_keys"
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys > /root/.ssh/authorized_keys"
containers:
- name: web
image: nginx

View File

@@ -1,13 +0,0 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: my-pdb
spec:
#minAvailable: 2
#minAvailable: 90%
maxUnavailable: 1
#maxUnavailable: 10%
selector:
matchLabels:
app: my-app

View File

@@ -1,27 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: sysctl
spec:
selector:
matchLabels:
app: sysctl
template:
metadata:
labels:
app: sysctl
spec:
tolerations:
- operator: Exists
initContainers:
- name: sysctl
image: alpine
securityContext:
privileged: true
command:
- sysctl
- fs.inotify.max_user_instances=99999
containers:
- name: pause
image: registry.k8s.io/pause:3.8

View File

@@ -59,27 +59,6 @@ You don't **have to** install the CLI tools of the cloud provider(s) that you wa
If you want to provide your cloud credentials through other means, you will have to adjust the Terraform configuration files in `terraform/provider-config` accordingly.
Here is where we look for credentials for each provider:
- AWS: Terraform defaults; see [AWS provider documentation][creds-aws] (for instance, you can use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables, or AWS config and profile files)
- Azure: Terraform defaults; see [AzureRM provider documentation][creds-azure] (typically, you can authenticate with the `az` CLI and Terraform will pick it up automatically)
- Civo: CLI configuration file (`~/.civo.json`)
- Digital Ocean: CLI configuration file (`~/.config/doctl/config.yaml`)
- Exoscale: CLI configuration file (`~/.config/exoscale/exoscale.toml`)
- Google Cloud: FIXME, note that the project name is currently hard-coded to `prepare-tf`
- Hetzner: CLI configuration file (`~/.config/hcloud/cli.toml`)
- Linode: CLI configuration file (`~/.config/linode-cli`)
- OpenStack: you will need to write a tfvars file (check [that exemple](terraform/virtual-machines/openstack/tfvars.example))
- Oracle: Terraform defaults; see [OCI provider documentation][creds-oci] (for instance, you can set up API keys; or you can use a short-lived token generated by the OCI CLI with `oci session authenticate`)
- OVH: Terraform defaults; see [OVH provider documentation][creds-ovh] (this typically involves setting up 5 `OVH_...` environment variables)
- Scaleway: Terraform defaults; see [Scaleway provider documentation][creds-scw] (for instance, you can set environment variables, but it will also automatically pick up CLI authentication from `~/.config/scw/config.yaml`)
[creds-aws]: https://registry.terraform.io/providers/hashicorp/aws/latest/docs#authentication-and-configuration
[creds-azure]: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs#authenticating-to-azure
[creds-oci]: https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/terraformproviderconfiguration.htm#authentication
[creds-ovh]: https://registry.terraform.io/providers/ovh/ovh/latest/docs#provider-configuration
[creds-scw]: https://registry.terraform.io/providers/scaleway/scaleway/latest/docs#authentication
## General Workflow
- fork/clone repo

View File

@@ -21,11 +21,6 @@ digitalocean-pvc)
jq '.[] | select(.name | startswith("pvc-")) | .id' |
xargs -n1 -P10 doctl compute volume delete --force
;;
scaleway-pvc)
scw instance volume list --output json |
jq '.[] | select(.name | contains("_pvc-")) | .id' |
xargs -n1 -P10 scw instance volume delete
;;
*)
echo "Unknown combination of provider ('$1') and resource ('$2')."
;;

View File

@@ -10,22 +10,13 @@ fi
. ~/creds/creds.cloudflare.dns
cloudflare() {
case "$1" in
GET|POST|DELETE)
METHOD="$1"
shift
;;
*)
METHOD=""
;;
esac
URI=$1
shift
http --ignore-stdin $METHOD https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
http https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
}
_list_zones() {
cloudflare zones?per_page=100 | jq -r .result[].name
cloudflare zones | jq -r .result[].name
}
_get_zone_id() {
@@ -41,15 +32,6 @@ _populate_zone() {
done
}
_clear_zone() {
ZONE_ID=$(_get_zone_id $1)
for RECORD_ID in $(
cloudflare zones/$ZONE_ID/dns_records | jq -r .result[].id
); do
cloudflare DELETE zones/$ZONE_ID/dns_records/$RECORD_ID
done
}
_add_zone() {
cloudflare zones "name=$1"
}

View File

@@ -1,9 +1,7 @@
#!/bin/sh
set -eu
# https://open-api.netlify.com/#tag/dnsZone
[ "${1-}" ] || {
[ "$1" ] || {
echo ""
echo "Add a record in Netlify DNS."
echo "This script is hardcoded to add a record to container.training".
@@ -14,13 +12,13 @@ set -eu
echo "$0 del <recordid>"
echo ""
echo "Example to create a A record for eu.container.training:"
echo "$0 add eu A 185.145.250.0"
echo "$0 add eu 185.145.250.0"
echo ""
exit 1
}
NETLIFY_CONFIG_FILE=~/.config/netlify/config.json
if ! [ "${DOMAIN-}" ]; then
if ! [ "$DOMAIN" ]; then
DOMAIN=container.training
fi
@@ -51,29 +49,27 @@ ZONE_ID=$(netlify dns_zones |
_list() {
netlify dns_zones/$ZONE_ID/dns_records |
jq -r '.[] | select(.type=="A" or .type=="AAAA") | [.hostname, .type, .value, .id] | @tsv' |
sort |
column --table
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
}
_add() {
NAME=$1.$DOMAIN
TYPE=$2
VALUE=$3
ADDR=$2
# It looks like if we create two identical records, then delete one of them,
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
# though it's still visible through the API and the website?)
if netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'" and .type=="'$TYPE'" and .value=="'$VALUE'")' |
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
grep .
then
echo "It looks like that record already exists. Refusing to create it."
exit 1
fi
netlify dns_zones/$ZONE_ID/dns_records type=$TYPE hostname=$NAME value=$VALUE ttl=300
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'")'
@@ -92,7 +88,7 @@ case "$1" in
_list
;;
add)
_add $2 $3 $4
_add $2 $3
;;
del)
_del $2

View File

@@ -1,29 +1,13 @@
#!/bin/sh
#
# Baseline resource usage per vcluster in our usecase:
# 500 MB RAM
# 10% CPU
# (See https://docs.google.com/document/d/1n0lwp6rQKQUIuo_A5LQ1dgCzrmjkDjmDtNj1Jn92UrI)
# PRO2-XS = 4 core, 16 gb
PROVIDER=scaleway
case "$PROVIDER" in
linode)
export TF_VAR_node_size=g6-standard-6
export TF_VAR_location=eu-west
;;
scaleway)
export TF_VAR_node_size=PRO2-XS
export TF_VAR_location=fr-par-2
;;
esac
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
# deploy big cluster
TF_VAR_node_size=g6-standard-6 \
TF_VAR_nodes_per_cluster=5 \
TF_VAR_location=eu-west \
./labctl create --mode mk8s --settings settings/mk8s.env --provider linode --tag konk
# set kubeconfig file
export KUBECONFIG=~/kubeconfig
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
# set external_ip labels
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
@@ -32,12 +16,4 @@ while read node address; do
done
# vcluster all the things
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 50
# install prometheus stack because that's cool
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
--namespace prom-system --create-namespace \
kube-prometheus-stack kube-prometheus-stack
# and also fix sysctl
kubectl apply -f ../k8s/sysctl.yaml --namespace kube-system
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 27

View File

@@ -57,7 +57,7 @@ need_tag() {
if [ ! -d "tags/$TAG" ]; then
die "Tag $TAG not found (directory tags/$TAG does not exist)."
fi
for FILE in mode provider settings.env status; do
for FILE in settings.env ips.txt; do
if [ ! -f "tags/$TAG/$FILE" ]; then
warning "File tags/$TAG/$FILE not found."
fi

View File

@@ -19,22 +19,20 @@ _cmd_cards() {
TAG=$1
need_tag
OPTIONS_FILE=$2
[ -f "$OPTIONS_FILE" ] || die "Please specify a YAML options file as 2nd argument."
OPTIONS_FILE_PATH="$(readlink -f "$OPTIONS_FILE")"
die FIXME
# This will process logins.jsonl to generate two files: cards.pdf and cards.html
# This will process ips.txt to generate two files: ips.pdf and ips.html
(
cd tags/$TAG
../../../lib/make-login-cards.py "$OPTIONS_FILE_PATH"
../../../lib/ips-txt-to-html.py settings.yaml
)
ln -sf ../tags/$TAG/cards.html www/$TAG.html
ln -sf ../tags/$TAG/cards.pdf www/$TAG.pdf
ln -sf ../tags/$TAG/ips.html www/$TAG.html
ln -sf ../tags/$TAG/ips.pdf www/$TAG.pdf
info "Cards created. You can view them with:"
info "xdg-open tags/$TAG/cards.html tags/$TAG/cards.pdf (on Linux)"
info "open tags/$TAG/cards.html (on macOS)"
info "xdg-open tags/$TAG/ips.html tags/$TAG/ips.pdf (on Linux)"
info "open tags/$TAG/ips.html (on macOS)"
info "Or you can start a web server with:"
info "$0 www"
}
@@ -128,7 +126,6 @@ set number
set shiftwidth=2
set softtabstop=2
set nowrap
set laststatus=2
SQRL
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
@@ -259,9 +256,7 @@ _cmd_create() {
terraform init
echo tag = \"$TAG\" >> terraform.tfvars
echo how_many_clusters = $STUDENTS >> terraform.tfvars
if [ "$CLUSTERSIZE" ]; then
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
fi
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
for RETRY in 1 2 3; do
if terraform apply -auto-approve; then
touch terraform.ok
@@ -325,11 +320,10 @@ _cmd_clusterize() {
pssh "
set -e
grep PSSH_ /etc/ssh/sshd_config || echo 'AcceptEnv PSSH_*' | sudo tee -a /etc/ssh/sshd_config
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
sudo systemctl restart ssh.service"
pssh -I < tags/$TAG/clusters.tsv "
grep -w \$PSSH_HOST | tr '\t' '\n' > /tmp/cluster"
pssh -I < tags/$TAG/clusters.txt "
grep -w \$PSSH_HOST | tr ' ' '\n' > /tmp/cluster"
pssh "
echo \$PSSH_HOST > /tmp/ipv4
head -n 1 /tmp/cluster | sudo tee /etc/ipv4_of_first_node
@@ -350,10 +344,6 @@ _cmd_clusterize() {
done < /tmp/cluster
"
while read line; do
printf '{"login": "%s", "password": "%s", "ipaddrs": "%s"}\n' "$USER_LOGIN" "$USER_PASSWORD" "$line"
done < tags/$TAG/clusters.tsv > tags/$TAG/logins.jsonl
echo cluster_ok > tags/$TAG/status
}
@@ -401,7 +391,7 @@ _cmd_docker() {
##VERSION## https://github.com/docker/compose/releases
COMPOSE_VERSION=v2.11.1
COMPOSE_PLATFORM='linux-$(uname -m)'
# Just in case you need Compose 1.X, you can use the following lines.
# (But it will probably only work for x86_64 machines.)
#COMPOSE_VERSION=1.29.2
@@ -430,23 +420,10 @@ _cmd_kubebins() {
TAG=$1
need_tag
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
##VERSION##
case "$KUBEVERSION" in
1.19.*)
ETCD_VERSION=v3.4.13
CNI_VERSION=v0.8.7
;;
*)
ETCD_VERSION=v3.5.10
CNI_VERSION=v1.3.0
;;
esac
K8SBIN_VERSION="v$KUBEVERSION"
ETCD_VERSION=v3.4.13
K8SBIN_VERSION=v1.19.11 # Can't go to 1.20 because it requires a serviceaccount signing key.
CNI_VERSION=v0.8.7
ARCH=${ARCHITECTURE-amd64}
pssh --timeout 300 "
set -e
@@ -470,41 +447,30 @@ _cmd_kubebins() {
"
}
_cmd kubepkgs "Install Kubernetes packages (kubectl, kubeadm, kubelet)"
_cmd_kubepkgs() {
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
_cmd_kube() {
TAG=$1
need_tag
# Prior September 2023, there was a single Kubernetes package repo that
# contained packages for all versions, so we could just add that repo
# and install whatever was the latest version available there.
# Things have changed (versions after September 2023, e.g. 1.28.3 are
# not in the old repo) and now there is a different repo for each
# minor version, so we need to figure out what minor version we are
# installing to add the corresponding repo.
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
# Since the new repo doesn't have older versions, add a safety check here.
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
if [ "$MINORVERSION" -lt 24 ]; then
die "Cannot install kubepkgs for versions before 1.24."
fi
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
if [ "$KUBEVERSION" ]; then
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
Package: kubectl kubeadm kubelet
Pin: version $KUBEVERSION-*
Pin-Priority: 1000
EOF"
fi
# As of February 27th, 2023, packages.cloud.google.com seems broken
# (serves HTTP 500 errors for the GPG key), so let's pre-load that key.
pssh -I "sudo apt-key add -" < lib/kubernetes-apt-key.gpg
# Install packages
pssh --timeout 200 "
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
#curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
#sudo apt-key add - &&
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
@@ -512,21 +478,8 @@ EOF"
sudo apt-mark hold kubelet kubeadm kubectl &&
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubecolor' | sudo tee /etc/bash_completion.d/k &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
}
_cmd kubeadm "Setup kubernetes clusters with kubeadm"
_cmd_kubeadm() {
TAG=$1
need_tag
if [ "$KUBEVERSION" ]; then
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
IGNORE_SWAP="- Swap"
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
fi
# Install a valid configuration for containerd
# (first, the CRI interface needs to be re-enabled;
@@ -547,9 +500,6 @@ bootstrapTokens:
nodeRegistration:
ignorePreflightErrors:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
$IGNORE_IPTABLES
---
kind: JoinConfiguration
apiVersion: kubeadm.k8s.io/v1beta3
@@ -561,9 +511,6 @@ discovery:
nodeRegistration:
ignorePreflightErrors:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
$IGNORE_IPTABLES
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1
@@ -646,31 +593,6 @@ _cmd_kubetools() {
;;
esac
# Install ArgoCD CLI
##VERSION## https://github.com/argoproj/argo-cd/releases/latest
URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-${ARCH}
pssh "
if [ ! -x /usr/local/bin/argocd ]; then
sudo curl -o /usr/local/bin/argocd -fsSL $URL
sudo chmod +x /usr/local/bin/argocd
argocd completion bash | sudo tee /etc/bash_completion.d/argocd
argocd version --client
fi"
# Install Flux CLI
##VERSION## https://github.com/fluxcd/flux2/releases
FLUX_VERSION=2.3.0
FILENAME=flux_${FLUX_VERSION}_linux_${ARCH}
URL=https://github.com/fluxcd/flux2/releases/download/v$FLUX_VERSION/$FILENAME.tar.gz
pssh "
if [ ! -x /usr/local/bin/flux ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx flux
sudo chmod +x /usr/local/bin/flux
flux completion bash | sudo tee /etc/bash_completion.d/flux
flux --version
fi"
# Install kubectx and kubens
pssh "
set -e
@@ -702,7 +624,7 @@ EOF
# Install stern
##VERSION## https://github.com/stern/stern/releases
STERN_VERSION=1.29.0
STERN_VERSION=1.22.0
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
pssh "
@@ -724,7 +646,7 @@ EOF
# Install kustomize
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
KUSTOMIZE_VERSION=v5.4.1
KUSTOMIZE_VERSION=v4.5.7
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
pssh "
if [ ! -x /usr/local/bin/kustomize ]; then
@@ -755,16 +677,6 @@ EOF
aws-iam-authenticator version
fi"
# Install jless (jless.io)
pssh "
if [ ! -x /usr/local/bin/jless ]; then
##VERSION##
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
sudo mv jless /usr/local/bin
fi"
# Install the krew package manager
pssh "
if [ ! -d /home/$USER_LOGIN/.krew ]; then
@@ -776,31 +688,21 @@ EOF
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
fi"
# Install kubecolor
KUBECOLOR_VERSION=0.4.0
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
pssh "
if [ ! -x /usr/local/bin/kubecolor ]; then
##VERSION##
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kubecolor
fi"
# Install k9s
pssh "
if [ ! -x /usr/local/bin/k9s ]; then
FILENAME=k9s_Linux_$ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
sudo tar -C /usr/local/bin -zx k9s
sudo tar -zxvf- -C /usr/local/bin k9s
k9s version
fi"
# Install popeye
pssh "
if [ ! -x /usr/local/bin/popeye ]; then
FILENAME=popeye_Linux_$ARCH.tar.gz &&
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
sudo tar -C /usr/local/bin -zx popeye
sudo tar -zxvf- -C /usr/local/bin popeye
popeye version
fi"
@@ -810,10 +712,10 @@ EOF
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
pssh "
if [ ! -x /usr/local/bin/tilt ]; then
TILT_VERSION=0.33.13
TILT_VERSION=0.22.15
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
sudo tar -C /usr/local/bin -zx tilt
sudo tar -zxvf- -C /usr/local/bin tilt
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
tilt version
fi"
@@ -855,8 +757,7 @@ EOF
fi"
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
KUBESEAL_VERSION=0.26.2
URL=https://github.com/bitnami-labs/sealed-secrets/releases/download/v${KUBESEAL_VERSION}/kubeseal-${KUBESEAL_VERSION}-linux-${ARCH}.tar.gz
KUBESEAL_VERSION=0.17.4
#case $ARCH in
#amd64) FILENAME=kubeseal-linux-amd64;;
#arm64) FILENAME=kubeseal-arm64;;
@@ -864,13 +765,13 @@ EOF
#esac
pssh "
if [ ! -x /usr/local/bin/kubeseal ]; then
curl -fsSL $URL |
sudo tar -C /usr/local/bin -zx kubeseal
curl -fsSL https://github.com/bitnami-labs/sealed-secrets/releases/download/v$KUBESEAL_VERSION/kubeseal-$KUBESEAL_VERSION-linux-$ARCH.tar.gz |
sudo tar -zxvf- -C /usr/local/bin kubeseal
kubeseal --version
fi"
##VERSION## https://github.com/vmware-tanzu/velero/releases
VELERO_VERSION=1.13.2
VELERO_VERSION=1.11.0
pssh "
if [ ! -x /usr/local/bin/velero ]; then
curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v$VELERO_VERSION/velero-v$VELERO_VERSION-linux-$ARCH.tar.gz |
@@ -880,21 +781,13 @@ EOF
fi"
##VERSION## https://github.com/doitintl/kube-no-trouble/releases
KUBENT_VERSION=0.7.2
KUBENT_VERSION=0.7.0
pssh "
if [ ! -x /usr/local/bin/kubent ]; then
curl -fsSL https://github.com/doitintl/kube-no-trouble/releases/download/${KUBENT_VERSION}/kubent-${KUBENT_VERSION}-linux-$ARCH.tar.gz |
sudo tar -zxvf- -C /usr/local/bin kubent
kubent --version
fi"
# Ngrok. Note that unfortunately, this is the x86_64 binary.
# We might have to rethink how to handle this for multi-arch environments.
pssh "
if [ ! -x /usr/local/bin/ngrok ]; then
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
sudo tar -zxvf- -C /usr/local/bin ngrok
fi"
}
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
@@ -942,15 +835,6 @@ _cmd_inventory() {
FIXME
}
_cmd logins "Show login information for a group of instances"
_cmd_logins() {
TAG=$1
need_tag $TAG
cat tags/$TAG/logins.jsonl \
| jq -r '"\(.password)\tssh -l \(.login)\(if .port then " -p \(.port)" else "" end)\t\(.ipaddrs)"'
}
_cmd maketag "Generate a quasi-unique tag for a group of instances"
_cmd_maketag() {
if [ -z $USER ]; then
@@ -1001,9 +885,6 @@ _cmd_stage2() {
cd tags/$TAG/stage2
terraform init -upgrade
terraform apply -auto-approve
terraform output -raw logins_jsonl > ../logins.jsonl
terraform output -raw ips_txt > ../ips.txt
echo "stage2_ok" > status
}
_cmd standardize "Deal with non-standard Ubuntu cloud images"
@@ -1040,19 +921,12 @@ _cmd_standardize() {
# Disable unattended upgrades so that they don't mess up with the subsequent steps
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
# Some cloud providers think that it's smart to disable password authentication.
# We need to re-neable it, though.
# Digital Ocecan
# Digital Ocean's cloud init disables password authentication; re-enable it.
pssh "
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
sudo systemctl restart ssh.service
fi"
# AWS
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
sudo systemctl restart ssh.service
fi"
# Special case for oracle since their iptables blocks everything but SSH
pssh "
@@ -1088,12 +962,11 @@ _cmd_tailhist () {
# halfway through and we're actually trying to download it again.
pssh "
set -e
sudo apt-get install unzip -y
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
sudo mv websocketd /usr/local/bin/websocketd
sudo mkdir -p /opt/tailhist
sudo tee /opt/tailhist.service <<EOF
sudo mkdir -p /tmp/tailhist
sudo tee /root/tailhist.service <<EOF
[Unit]
Description=tailhist
@@ -1101,16 +974,16 @@ Description=tailhist
WantedBy=multi-user.target
[Service]
WorkingDirectory=/opt/tailhist
WorkingDirectory=/tmp/tailhist
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /opt/tailhist.service --now
sudo systemctl enable /root/tailhist.service --now
"
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
}
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
@@ -1183,8 +1056,8 @@ _cmd_tags() {
cd tags
echo "[#] [Status] [Tag] [Mode] [Provider]"
for tag in *; do
if [ -f $tag/logins.jsonl ]; then
count="$(wc -l < $tag/logins.jsonl)"
if [ -f $tag/ips.txt ]; then
count="$(wc -l < $tag/ips.txt)"
else
count="?"
fi
@@ -1347,7 +1220,7 @@ EOF"
_cmd www "Run a web server to access card HTML and PDF"
_cmd_www() {
cd www
IPADDR=$(curl -fsSL canihazip.com/s || echo localhost)
IPADDR=$(curl -sL canihazip.com/s)
info "The following files are available:"
for F in *; do
echo "http://$IPADDR:8000/$F"

View File

@@ -1,22 +1,32 @@
#!/usr/bin/env python3
import json
import os
import sys
import yaml
import jinja2
# Read settings from user-provided settings file
context = yaml.safe_load(open(sys.argv[1]))
context["logins"] = []
for line in open("logins.jsonl"):
if line.strip():
context["logins"].append(json.loads(line))
ips = list(open("ips.txt"))
clustersize = context["clustersize"]
print("---------------------------------------------")
print(" Number of cards: {}".format(len(context["logins"])))
print(" Number of IPs: {}".format(len(ips)))
print(" VMs per cluster: {}".format(clustersize))
print("---------------------------------------------")
assert len(ips)%clustersize == 0
clusters = []
while ips:
cluster = ips[:clustersize]
ips = ips[clustersize:]
clusters.append(cluster)
context["clusters"] = clusters
template_file_name = context["cards_template"]
template_file_path = os.path.join(
os.path.dirname(__file__),
@@ -25,23 +35,23 @@ template_file_path = os.path.join(
template_file_name
)
template = jinja2.Template(open(template_file_path).read())
with open("cards.html", "w") as f:
f.write(template.render(**context))
print("Generated cards.html")
with open("ips.html", "w") as f:
f.write(template.render(**context))
print("Generated ips.html")
try:
import pdfkit
paper_size = context["paper_size"]
margin = {"A4": "0.5cm", "Letter": "0.2in"}[paper_size]
with open("cards.html") as f:
pdfkit.from_file(f, "cards.pdf", options={
with open("ips.html") as f:
pdfkit.from_file(f, "ips.pdf", options={
"page-size": paper_size,
"margin-top": margin,
"margin-bottom": margin,
"margin-left": margin,
"margin-right": margin,
})
print("Generated cards.pdf")
print("Generated ips.pdf")
except ImportError:
print("WARNING: could not import pdfkit; did not generate cards.pdf")
print("WARNING: could not import pdfkit; did not generate ips.pdf")

Binary file not shown.

View File

@@ -17,12 +17,6 @@ pssh() {
echo "[parallel-ssh] $@"
# There are some routers that really struggle with the number of TCP
# connections that we open when deploying large fleets of clusters.
# We're adding a 1 second delay here, but this can be cranked up if
# necessary - or down to zero, too.
sleep ${PSSH_DELAY_PRE-1}
$(which pssh || which parallel-ssh) -h $HOSTFILE -l ubuntu \
--par ${PSSH_PARALLEL_CONNECTIONS-100} \
--timeout 300 \

View File

@@ -1,16 +0,0 @@
#!/bin/sh
DOMAINS=domains.txt
IPS=ips.txt
. ./dns-cloudflare.sh
paste "$DOMAINS" "$IPS" | while read domain ips; do
if ! [ "$domain" ]; then
echo "⚠️ No more domains!"
exit 1
fi
_clear_zone "$domain"
_populate_zone "$domain" $ips
done
echo "✅ All done."

View File

@@ -1,6 +1,6 @@
CLUSTERSIZE=3
CLUSTERSIZE=1
CLUSTERPREFIX=polykube
CLUSTERPREFIX=dmuc
USER_LOGIN=k8s
USER_PASSWORD=training
@@ -10,11 +10,12 @@ STEPS="
standardize
clusterize
tools
kubepkgs
kubebins
docker
disabledocker
createuser
webssh
tailhist
kubebins
kubetools
ips
"

View File

@@ -1,26 +0,0 @@
CLUSTERSIZE=1
CLUSTERPREFIX=monokube
# We're sticking to this in the first DMUC lab,
# because it still works with Docker, and doesn't
# require a ServiceAccount signing key.
KUBEVERSION=1.19.11
USER_LOGIN=k8s
USER_PASSWORD=training
STEPS="
wait
standardize
clusterize
tools
docker
disabledocker
createuser
webssh
tailhist
kubebins
kubetools
ips
"

View File

@@ -7,7 +7,7 @@ USER_PASSWORD=training
# For a list of old versions, check:
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
KUBEVERSION=1.28.9
KUBEVERSION=1.22.5
STEPS="
wait
@@ -18,8 +18,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"

View File

@@ -14,8 +14,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"
"

View File

@@ -1,6 +0,0 @@
CLUSTERSIZE=5
USER_LOGIN=k8s
USER_PASSWORD=
STEPS="stage2"

View File

@@ -14,8 +14,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"
"

View File

@@ -15,8 +15,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"

View File

@@ -1,3 +1,5 @@
CLUSTERSIZE=2
USER_LOGIN=k8s
USER_PASSWORD=

View File

@@ -1,8 +1,3 @@
#export TF_VAR_node_size=GP2.4
#export TF_VAR_node_size=g6-standard-6
#export TF_VAR_node_size=m7i.xlarge
CLUSTERSIZE=1
CLUSTERPREFIX=CHANGEME

View File

@@ -7,7 +7,7 @@ STUDENTS=2
#export TF_VAR_location=eu-north-1
export TF_VAR_node_size=S
SETTINGS=admin-monokube
SETTINGS=admin-dmuc
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \
@@ -15,7 +15,15 @@ TAG=$PREFIX-$SETTINGS
--settings settings/$SETTINGS.env \
--students $STUDENTS
SETTINGS=admin-polykube
SETTINGS=admin-kubenet
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \
--provider $PROVIDER \
--settings settings/$SETTINGS.env \
--students $STUDENTS
SETTINGS=admin-kuberouter
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \

View File

@@ -7,7 +7,7 @@
{%- set url = url
| default("http://FIXME.container.training/") -%}
{%- set pagesize = pagesize
| default(10) -%}
| default(9) -%}
{%- set lang = lang
| default("en") -%}
{%- set event = event
@@ -15,36 +15,79 @@
{%- set backside = backside
| default(False) -%}
{%- set image = image
| default(False) -%}
| default("kube") -%}
{%- set clusternumber = clusternumber
| default(None) -%}
{%- set thing = thing
| default("lab environment") -%}
{%- if lang == "en" -%}
{%- set intro -%}
Here is the connection information to your very own
{{ thing }} for this {{ event }}.
You can connect to it with any SSH client.
{%- endset -%}
{%- if qrcode == True -%}
{%- set qrcode = "https://container.training/q" -%}
{%- elif qrcode -%}
{%- set qrcode = qrcode -%}
{%- endif -%}
{%- if lang == "fr" -%}
{%- set intro -%}
Voici les informations permettant de se connecter à votre
{{ thing }} pour cette formation.
Vous pouvez vous y connecter
avec n'importe quel client SSH.
{%- endset -%}
{# You can also set img_bottom_src instead. #}
{%- set img_logo_src = {
"docker": "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png",
"swarm": "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png",
"kube": "https://avatars1.githubusercontent.com/u/13629408",
"enix": "https://enix.io/static/img/logos/logo-domain-cropped.png",
}[image] -%}
{%- if lang == "en" and clustersize == 1 -%}
{%- set intro -%}
Here is the connection information to your very own
machine for this {{ event }}.
You can connect to this VM with any SSH client.
{%- endset -%}
{%- set listhead -%}
Your machine is:
{%- endset -%}
{%- endif -%}
{%- if lang == "en" and clustersize != 1 -%}
{%- set intro -%}
Here is the connection information to your very own
cluster for this {{ event }}.
You can connect to each VM with any SSH client.
{%- endset -%}
{%- set listhead -%}
Your machines are:
{%- endset -%}
{%- endif -%}
{%- if lang == "fr" and clustersize == 1 -%}
{%- set intro -%}
Voici les informations permettant de se connecter à votre
machine pour cette formation.
Vous pouvez vous connecter à cette machine virtuelle
avec n'importe quel client SSH.
{%- endset -%}
{%- set listhead -%}
Adresse IP:
{%- endset -%}
{%- endif -%}
{%- if lang == "en" and clusterprefix != "node" -%}
{%- set intro -%}
Here is the connection information for the
<strong>{{ clusterprefix }}</strong> environment.
{%- endset -%}
{%- endif -%}
{%- if lang == "fr" and clustersize != 1 -%}
{%- set intro -%}
Voici les informations permettant de se connecter à votre
cluster pour cette formation.
Vous pouvez vous connecter à chaque machine virtuelle
avec n'importe quel client SSH.
{%- endset -%}
{%- set listhead -%}
Adresses IP:
{%- endset -%}
{%- endif -%}
{%- if lang == "en" -%}
{%- set slides_are_at -%}
You can find the slides at:
{%- endset -%}
{%- set slides_are_at -%}
You can find the slides at:
{%- endset -%}
{%- endif -%}
{%- if lang == "fr" -%}
{%- set slides_are_at -%}
Le support de formation est à l'adresse suivante :
{%- endset -%}
{%- set slides_are_at -%}
Le support de formation est à l'adresse suivante :
{%- endset -%}
{%- endif -%}
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
@@ -59,21 +102,25 @@
}
body {
/* this is A4 minus 0.5cm margins */
width: 20cm;
height: 28.7cm;
width: 20cm;
height: 28.7cm;
}
{% elif paper_size == "Letter" %}
@page {
size: Letter; /* 8.5in x 11in */
size: Letter;
margin: 0.2in;
}
body {
width: 6.75in; /* two cards wide */
margin-left: 0.875in; /* (8.5in - 6.75in)/2 */
margin-top: 0.1875in; /* (11in - 5 cards)/2 */
/* this is Letter minus 0.2in margins */
width: 8.6in;
heigth: 10.6in;
}
{% endif %}
body, table {
margin: 0;
padding: 0;
line-height: 1em;
font-size: 15px;
font-family: 'Slabo 27px';
@@ -87,45 +134,47 @@ table {
padding-left: 0.4em;
}
td:first-child {
width: 10.5em;
}
div.card {
div {
float: left;
border: 0.01in dotted black;
border: 1px dotted black;
{% if backside %}
height: 33%;
{% endif %}
/* columns * (width+left+right) < 100% */
/*
columns * (width+left+right) < 100%
height: 33%;
width: 24.8%;
width: 33%;
width: 24.8%;
*/
width: 3.355in; /* 3.375in minus two 0.01in borders */
height: 2.105in; /* 2.125in minus two 0.01in borders */
/**/
width: 33%;
/**/
}
p {
margin: 0.8em;
}
div.front {
{% if image %}
background-image: url("{{ image }}");
background-repeat: no-repeat;
background-size: 1in;
background-position-x: 2.8in;
background-position-y: center;
{% endif %}
div.back {
border: 1px dotted grey;
}
span.scale {
white-space: nowrap;
white-space: nowrap;
}
img.logo {
height: 4.5em;
float: right;
}
img.bottom {
height: 2.5em;
display: block;
margin: 0.5em auto;
}
.qrcode img {
height: 5.8em;
padding: 1em 1em 0.5em 1em;
float: left;
width: 40%;
margin: 1em;
}
.logpass {
@@ -140,97 +189,101 @@ span.scale {
height: 0;
}
</style>
<script type="text/javascript" src="qrcode.min.js"></script>
<script type="text/javascript" src="https://cdn.rawgit.com/davidshimjs/qrcodejs/gh-pages/qrcode.min.js"></script>
<script type="text/javascript">
function qrcodes() {
[].forEach.call(
document.getElementsByClassName("qrcode"),
(e, index) => {
new QRCode(e, {
text: "{{ qrcode }}",
correctLevel: QRCode.CorrectLevel.L
});
}
);
[].forEach.call(
document.getElementsByClassName("qrcode"),
(e, index) => {
new QRCode(e, {
text: "{{ qrcode }}",
correctLevel: QRCode.CorrectLevel.L
});
}
);
}
function scale() {
[].forEach.call(
document.getElementsByClassName("scale"),
(e, index) => {
var text_width = e.getBoundingClientRect().width;
var box_width = e.parentElement.getBoundingClientRect().width;
var percent = 100 * box_width / text_width + "%";
e.style.fontSize = percent;
}
);
[].forEach.call(
document.getElementsByClassName("scale"),
(e, index) => {
var text_width = e.getBoundingClientRect().width;
var box_width = e.parentElement.getBoundingClientRect().width;
var percent = 100 * box_width / text_width + "%";
e.style.fontSize = percent;
}
);
}
</script>
</head>
<body onload="qrcodes(); scale();">
{% for login in logins %}
<div class="card front">
{% for cluster in clusters %}
<div>
<p>{{ intro }}</p>
<p>
{% if img_logo_src %}
<img class="logo" src="{{ img_logo_src }}" />
{% endif %}
<table>
<tr>
<td>login:</td>
<td>password:</td>
</tr>
<tr>
<td class="logpass">{{ login.login }}</td>
<td class="logpass">{{ login.password }}</td>
</tr>
<tr>
<td>IP address:</td>
{% if login.port %}
<td>port:</td>
{% endif %}
</tr>
<tr>
<td class="logpass">{{ login.ipaddrs.split("\t")[0] }}</td>
{% if login.port %}
<td class="logpass">{{ login.port }}</td>
{% endif %}
</tr>
{% if clusternumber != None %}
<tr><td>cluster:</td></tr>
<tr><td class="logpass">{{ clusternumber + loop.index }}</td></tr>
{% endif %}
<tr><td>login:</td></tr>
<tr><td class="logpass">{{ user_login }}</td></tr>
<tr><td>password:</td></tr>
<tr><td class="logpass">{{ user_password }}</td></tr>
</table>
</p>
<p>
{{ listhead }}
<table>
{% for node in cluster %}
<tr>
<td>{{ clusterprefix }}{{ loop.index }}:</td>
<td>{{ node }}</td>
</tr>
{% endfor %}
</table>
</p>
<p>
{% if url %}
{{ slides_are_at }}
{{ slides_are_at }}
<p>
<span class="scale">{{ url }}</span>
</p>
{% endif %}
{% if img_bottom_src %}
<img class="bottom" src="{{ img_bottom_src }}" />
{% endif %}
</p>
</div>
{% if loop.index%pagesize==0 or loop.last %}
<span class="pagebreak"></span>
{% if backside %}
{% for x in range(pagesize) %}
<div class="card back">
{{ backside }}
{#
<p>Thanks for attending
"Getting Started With Kubernetes and Container Orchestration"
during CONFERENCE in Month YYYY!</p>
<p>If you liked that workshop,
I can train your team, in person or
online, with custom courses of
any length and any level.
</p>
{% if qrcode %}
<p>If you're interested, please scan that QR code to contact me:</p>
<span class="qrcode"></span>
{% for x in range(pagesize) %}
<div class="back">
<p>Thanks for attending
"Getting Started With Kubernetes and Container Orchestration"
during CONFERENCE in Month YYYY!</p>
<p>If you liked that workshop,
I can train your team, in person or
online, with custom courses of
any length and any level.
</p>
{% if qrcode %}
<p>If you're interested, please scan that QR code to contact me:</p>
<span class="qrcode"></span>
{% else %}
<p>If you're interested, you can contact me at:</p>
{% endif %}
<p>jerome.petazzoni@gmail.com</p>
#}
</div>
{% endfor %}
<span class="pagebreak"></span>
{% endif %}
<p>If you're interested, you can contact me at:</p>
{% endif %}
<p>jerome.petazzoni@gmail.com</p>
</div>
{% endfor %}
<span class="pagebreak"></span>
{% endif %}
{% endif %}
{% endfor %}
</body>

View File

@@ -1,19 +0,0 @@
cards_template: cards.html
paper_size: Letter
url: https://2024-11-qconsf.container.training
event: workshop
backside: |
<div class="qrcode"></div>
<p>
Thanks for attending the Asynchronous Architecture Patterns workshop at QCON!
</p>
<p>
<b>This QR code will give you my contact info</b> as well as a link to a feedback form.
</p>
<p>
If you liked this workshop, I can train your team, in person or online, with custom
courses of any length and any level, on Docker, Kubernetes, and MLops.
</p>
qrcode: https://2024-11-qconsf.container.training/#contact
thing: Kubernetes cluster
image: logo-kubernetes.png

View File

@@ -1,2 +0,0 @@
#!/bin/sh
exo zone

View File

@@ -8,8 +8,8 @@ resource "random_string" "_" {
resource "time_static" "_" {}
locals {
min_nodes_per_pool = var.min_nodes_per_cluster
max_nodes_per_pool = var.max_nodes_per_cluster
min_nodes_per_pool = var.nodes_per_cluster
max_nodes_per_pool = var.nodes_per_cluster * 2
timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
tag = random_string._.result
# Common tags to be assigned to all resources

View File

@@ -217,27 +217,16 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
%{ endfor ~}
output "ips_txt" {
output "ip_addresses_of_nodes" {
value = join("\n", [
%{ for index, cluster in clusters ~}
join("\n", concat(
join("\t", concat(
[
random_string.shpod_${index}.result,
"ssh -l k8s -p $${kubernetes_service.shpod_${index}.spec[0].port[0].node_port}"
],
split(" ", file("./externalips.${index}"))
)),
%{ endfor ~}
""
])
}
output "logins_jsonl" {
value = join("\n", [
%{ for index, cluster in clusters ~}
jsonencode({
login = "k8s",
password = random_string.shpod_${index}.result,
port = kubernetes_service.shpod_${index}.spec[0].port[0].node_port,
ipaddrs = replace(file("./externalips.${index}"), " ", "\t"),
}),
%{ endfor ~}
""
])
}

View File

@@ -7,16 +7,11 @@ variable "how_many_clusters" {
default = 2
}
variable "min_nodes_per_cluster" {
variable "nodes_per_cluster" {
type = number
default = 2
}
variable "max_nodes_per_cluster" {
type = number
default = 4
}
variable "node_size" {
type = string
default = "M"

View File

@@ -1,8 +1,7 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.47.0"
source = "hashicorp/aws"
}
}
}

View File

@@ -1 +0,0 @@
../common.tf

View File

@@ -1 +0,0 @@
../../providers/azure/config.tf

View File

@@ -1,22 +0,0 @@
resource "azurerm_resource_group" "_" {
name = var.cluster_name
location = var.location
}
resource "azurerm_kubernetes_cluster" "_" {
name = var.cluster_name
location = var.location
dns_prefix = var.cluster_name
identity {
type = "SystemAssigned"
}
resource_group_name = azurerm_resource_group._.name
default_node_pool {
name = "x86"
node_count = var.min_nodes_per_pool
min_count = var.min_nodes_per_pool
max_count = var.max_nodes_per_pool
vm_size = local.node_size
enable_auto_scaling = true
}
}

View File

@@ -1,12 +0,0 @@
output "cluster_id" {
value = azurerm_kubernetes_cluster._.id
}
output "has_metrics_server" {
value = true
}
output "kubeconfig" {
value = azurerm_kubernetes_cluster._.kube_config_raw
sensitive = true
}

View File

@@ -1,7 +0,0 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
}
}
}

View File

@@ -1 +0,0 @@
../../providers/azure/variables.tf

View File

@@ -11,23 +11,17 @@ data "oci_containerengine_cluster_option" "_" {
locals {
compartment_id = oci_identity_compartment._.id
kubernetes_version = data.oci_containerengine_cluster_option._.kubernetes_versions[0]
images = [
for image in data.oci_containerengine_node_pool_option._.sources : image
if can(regex("OKE", image.source_name))
&& can(regex(substr(local.kubernetes_version, 1, -1), image.source_name))
&& !can(regex("GPU", image.source_name))
&& !can(regex("aarch64", image.source_name))
]
}
data "oci_identity_availability_domains" "_" {
compartment_id = local.compartment_id
}
data "oci_containerengine_node_pool_option" "_" {
compartment_id = local.compartment_id
node_pool_option_id = oci_containerengine_cluster._.id
data "oci_core_images" "_" {
compartment_id = local.compartment_id
operating_system = "Oracle Linux"
operating_system_version = "8"
shape = local.shape
}
resource "oci_containerengine_cluster" "_" {
@@ -62,7 +56,7 @@ resource "oci_containerengine_node_pool" "_" {
}
}
node_source_details {
image_id = local.images[0].image_id
image_id = data.oci_core_images._.images[0].id
source_type = "image"
}
}

View File

@@ -1 +0,0 @@
../common.tf

View File

@@ -1 +0,0 @@
../../providers/ovh/config.tf

View File

@@ -1,18 +0,0 @@
resource "ovh_cloud_project_kube" "_" {
name = var.cluster_name
region = var.location
version = local.k8s_version
}
resource "ovh_cloud_project_kube_nodepool" "_" {
kube_id = ovh_cloud_project_kube._.id
name = "x86"
flavor_name = local.node_size
desired_nodes = var.min_nodes_per_pool
min_nodes = var.min_nodes_per_pool
max_nodes = var.max_nodes_per_pool
}
locals {
k8s_version = "1.26"
}

View File

@@ -1,12 +0,0 @@
output "cluster_id" {
value = ovh_cloud_project_kube._.id
}
output "has_metrics_server" {
value = false
}
output "kubeconfig" {
sensitive = true
value = ovh_cloud_project_kube._.kubeconfig
}

View File

@@ -1,7 +0,0 @@
terraform {
required_providers {
ovh = {
source = "ovh/ovh"
}
}
}

View File

@@ -1 +0,0 @@
../../providers/ovh/variables.tf

View File

@@ -1,23 +1,10 @@
resource "scaleway_vpc_private_network" "_" {
}
# This is a kind of hack to use a custom security group with Kapsulse.
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
resource "scaleway_instance_security_group" "_" {
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
inbound_default_policy = "accept"
outbound_default_policy = "accept"
}
resource "scaleway_k8s_cluster" "_" {
name = var.cluster_name
name = var.cluster_name
#region = var.location
tags = var.common_tags
version = local.k8s_version
type = "kapsule"
cni = "cilium"
delete_additional_resources = true
private_network_id = scaleway_vpc_private_network._.id
}
resource "scaleway_k8s_pool" "_" {
@@ -30,7 +17,6 @@ resource "scaleway_k8s_pool" "_" {
max_size = var.max_nodes_per_pool
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
autohealing = true
depends_on = [ scaleway_instance_security_group._ ]
}
data "scaleway_k8s_version" "_" {

View File

@@ -4,7 +4,6 @@ resource "helm_release" "_" {
create_namespace = true
repository = "https://charts.loft.sh"
chart = "vcluster"
version = "0.19.7"
set {
name = "service.type"
value = "NodePort"

View File

@@ -44,5 +44,5 @@ locals {
guest_api_server_port = local.node_port
guest_api_server_url_new = "https://${local.guest_api_server_host}:${local.guest_api_server_port}"
guest_api_server_url_old = yamldecode(local.kubeconfig_raw).clusters[0].cluster.server
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
}

View File

@@ -14,9 +14,9 @@ $ hcloud server-type list | grep shared
variable "node_sizes" {
type = map(any)
default = {
S = "cpx11"
M = "cpx21"
L = "cpx31"
S = "cx11"
M = "cx21"
L = "cx31"
}
}

View File

@@ -1,13 +0,0 @@
variable "node_sizes" {
type = map(any)
default = {
S = "d2-4"
M = "d2-4"
L = "d2-8"
}
}
variable "location" {
type = string
default = "BHS5"
}

View File

@@ -1,5 +1,5 @@
variable "node_sizes" {
type = map(any)
type = map(any)
default = {}
}

View File

@@ -71,10 +71,10 @@ resource "local_file" "ip_addresses" {
resource "local_file" "clusters" {
content = join("", formatlist("%s\n", [
for cid in range(1, 1 + var.how_many_clusters) :
join("\t",
join(" ",
[for nid in range(1, 1 + var.nodes_per_cluster) :
local.ip_addresses[format("c%03dn%03d", cid, nid)]
])]))
filename = "clusters.tsv"
filename = "clusters.txt"
file_permission = "0600"
}

View File

@@ -1,22 +1,14 @@
resource "openstack_compute_instance_v2" "_" {
for_each = local.nodes
name = each.value.node_name
image_name = data.openstack_images_image_v2._.name
image_name = var.image
flavor_name = each.value.node_size
key_pair = openstack_compute_keypair_v2._.name
key_pair = openstack_compute_keypair_v2._.name
network {
port = openstack_networking_port_v2._[each.key].id
}
}
data "openstack_images_image_v2" "_" {
most_recent = true
properties = {
os = "ubuntu"
version = "24.04"
}
}
resource "openstack_networking_port_v2" "_" {
for_each = local.nodes
network_id = openstack_networking_network_v2._.id

View File

@@ -31,6 +31,10 @@ variable "external_network_id" {
type = string
}
variable "image" {
type = string
}
variable "node_sizes" {
type = map(any)
default = {}

View File

@@ -4,11 +4,6 @@
# another set of clusters while a first one is still running)
# you should set the TF_VAR_cluster_name environment variable.
if ! [ "$TF_VAR_cluster_name" ]; then
echo "Please set TF_VAR_cluster_name. Thanks."
exit 1
fi
cd terraform/one-kubernetes
case "$1" in

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

File diff suppressed because one or more lines are too long

68
slides/1.yml Normal file
View File

@@ -0,0 +1,68 @@
title: |
Docker Intensif
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics.md
- containers/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
- # DAY 1
#- containers/Docker_Overview.md
#- containers/Docker_History.md
- containers/Training_Environment.md
#- containers/Installing_Docker.md
- containers/First_Containers.md
- containers/Background_Containers.md
- containers/Initial_Images.md
- containers/Building_Images_Interactively.md
- containers/Building_Images_With_Dockerfiles.md
- containers/Cmd_And_Entrypoint.md
- containers/Copying_Files_During_Build.md
- containers/Exercise_Dockerfile_Basic.md
- # DAY 2
- containers/Container_Networking_Basics.md
- containers/Local_Development_Workflow.md
- containers/Container_Network_Model.md
- containers/Compose_For_Dev_Stacks.md
- containers/Exercise_Composefile.md
- # DAY 3
- containers/Start_And_Attach.md
- containers/Naming_And_Inspecting.md
- containers/Labels.md
- containers/Getting_Inside.md
- containers/Dockerfile_Tips.md
- containers/Advanced_Dockerfiles.md
- containers/Multi_Stage_Builds.md
- containers/Publishing_To_Docker_Hub.md
- containers/Exercise_Dockerfile_Advanced.md
- # DAY 4
- containers/Buildkit.md
- containers/Network_Drivers.md
- containers/Namespaces_Cgroups.md
#- containers/Copy_On_Write.md
- containers/Orchestration_Overview.md
#- containers/Docker_Machine.md
#- containers/Init_Systems.md
#- containers/Application_Configuration.md
#- containers/Logging.md
#- containers/Containers_From_Scratch.md
#- containers/Container_Engines.md
#- containers/Pods_Anatomy.md
#- containers/Ecosystem.md
- shared/thankyou.md
#- containers/links.md

View File

@@ -1,11 +1,11 @@
title: |
Kubernetes
Fondamentaux Kubernetes
chat: "[Mattermost](https://training.enix.io/mattermost)"
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-10-boursorama.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -24,75 +24,69 @@ content:
- shared/handson.md
#- shared/webssh.md
- shared/connecting.md
- exercises/k8sfundamentals-brief.md
- exercises/yaml-brief.md
- exercises/localcluster-brief.md
- exercises/healthchecks-brief.md
- shared/toc.md
- # 1 Monday morning
- # 1
#- k8s/versions-k8s.md
- shared/sampleapp.md
#- shared/composescale.md
#- shared/hastyconclusions.md
- shared/composedown.md
- k8s/concepts-k8s.md
- k8s/kubectlget.md
- k8s/kubectl-run.md
- k8s/kubectlexpose.md
#- k8s/shippingimages.md
- k8s/service-types.md
- k8s/kubenet.md
- k8s/shippingimages.md
#- k8s/buildshiprun-selfhosted.md
- k8s/buildshiprun-dockerhub.md
- exercises/k8sfundamentals-details.md
- k8s/ourapponkube.md
- # 2 Monday afternoon
- k8s/service-types.md
- k8s/kubenet.md
#- k8s/exercise-wordsmith.md
- # 2
- shared/yaml.md
- k8s/labels-annotations.md
- k8s/kubectl-logs.md
- k8s/logs-cli.md
- # 3 Tuesday morning
- shared/yaml.md
- k8s/yamldeploy.md
- k8s/namespaces.md
- shared/declarative.md
- k8s/declarative.md
- k8s/deploymentslideshow.md
- k8s/ingress.md
- k8s/cert-manager.md
- k8s/setup-overview.md
- k8s/setup-devel.md
#- k8s/setup-managed.md
#- k8s/setup-selfhosted.md
- k8s/localkubeconfig.md
- k8s/accessinternal.md
- k8s/kubectlproxy.md
- exercises/yaml-details.md
- exercises/ingress-details.md
- # 4 Tuesday afternoon
- k8s/volumes.md
#- k8s/exercise-configmap.md
- k8s/configuration.md
- k8s/secrets.md
- # 5 Wednesday morning
- exercises/localcluster-details.md
- # 3
#- k8s/kubectlscale.md
- k8s/scalingdockercoins.md
- shared/hastyconclusions.md
- k8s/daemonset.md
- k8s/rollout.md
- k8s/healthchecks.md
#- k8s/healthchecks-more.md
- k8s/dashboard.md
- k8s/k9s.md
- k8s/tilt.md
- exercises/healthchecks-details.md
- # 6 Wednesday afternoon
- k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
#- k8s/horizontal-pod-autoscaler.md
- exercises/reqlim-details.md
- # 7 Thursday morning
- k8s/authn-authz.md
- k8s/admission.md
- k8s/cainjector.md
- k8s/kyverno.md
- exercises/rbac-details.md
- exercises/kyverno-ingress-domain-name-details.md
- # 8 Thursday afternoon
- k8s/statefulsets.md
- k8s/consul.md
- k8s/pv-pvc-sc.md
- k8s/volume-claim-templates.md
- k8s/stateful-failover.md
- # 9 Friday morning
- k8s/helm-intro.md
- k8s/helm-chart-format.md
- k8s/helm-create-basic-chart.md
- exercises/helm-generic-chart-details.md
- # 10 Friday afternoon
- k8s/helm-create-better-chart.md
- k8s/helm-dependencies.md
- k8s/helm-values-schema-validation.md
- k8s/helm-secrets.md
- exercises/helm-umbrella-chart-details.md
- # 4
- k8s/ingress.md
#- k8s/ingress-tls.md
#- k8s/ingress-advanced.md
- k8s/volumes.md
#- k8s/exercise-configmap.md
#- k8s/build-with-docker.md
#- k8s/build-with-kaniko.md
- k8s/configuration.md
- k8s/secrets.md
- k8s/batch-jobs.md
- shared/thankyou.md

43
slides/3.yml Normal file
View File

@@ -0,0 +1,43 @@
title: |
Packaging d'applications
pour Kubernetes
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics-julien.md
- k8s/intro.md
- shared/about-slides.md
- k8s/prereqs-advanced.md
- shared/handson.md
- shared/webssh.md
- shared/connecting.md
#- shared/chat-room-im.md
#- shared/chat-room-zoom.md
- shared/toc.md
-
- k8s/demo-apps.md
- k8s/kustomize.md
- k8s/helm-intro.md
- k8s/helm-chart-format.md
- k8s/helm-create-basic-chart.md
- exercises/helm-generic-chart-details.md
-
- k8s/helm-create-better-chart.md
- k8s/helm-dependencies.md
- k8s/helm-values-schema-validation.md
- k8s/helm-secrets.md
- exercises/helm-umbrella-chart-details.md
-
- k8s/ytt.md
- shared/thankyou.md

70
slides/4.yml Normal file
View File

@@ -0,0 +1,70 @@
title: |
Kubernetes Avancé
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics.md
- k8s/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-zoom.md
- k8s/prereqs-advanced.md
- shared/handson.md
- shared/webssh.md
- shared/connecting.md
- shared/toc.md
- exercises/netpol-brief.md
- exercises/sealed-secrets-brief.md
- exercises/kyverno-ingress-domain-name-brief.md
- #1
- k8s/demo-apps.md
- k8s/netpol.md
- k8s/authn-authz.md
- k8s/sealed-secrets.md
- k8s/cert-manager.md
- k8s/cainjector.md
- k8s/ingress-tls.md
- exercises/netpol-details.md
- exercises/sealed-secrets-details.md
- #2
- k8s/extending-api.md
- k8s/crd.md
- k8s/operators.md
- k8s/admission.md
- k8s/cainjector.md
- k8s/kyverno.md
- exercises/kyverno-ingress-domain-name-details.md
- #3
- k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/horizontal-pod-autoscaler.md
- k8s/apiserver-deepdive.md
- k8s/aggregation-layer.md
- k8s/hpa-v2.md
- #4
- k8s/statefulsets.md
- k8s/consul.md
- k8s/pv-pvc-sc.md
- k8s/volume-claim-templates.md
#- k8s/eck.md
#- k8s/portworx.md
- k8s/openebs.md
- k8s/stateful-failover.md
- k8s/operators-design.md
- k8s/operators-example.md
- k8s/owners-and-dependents.md
- k8s/events.md
- k8s/finalizers.md
- shared/thankyou.md

59
slides/5.yml Normal file
View File

@@ -0,0 +1,59 @@
title: |
Opérer Kubernetes
chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics-ludovic.md
- k8s/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
# DAY 1
-
- k8s/prereqs-advanced.md
- shared/handson.md
- k8s/architecture.md
- k8s/deploymentslideshow.md
- k8s/dmuc.md
-
- k8s/multinode.md
- k8s/cni.md
- k8s/interco.md
-
- k8s/cni-internals.md
- k8s/apilb.md
- k8s/internal-apis.md
- k8s/staticpods.md
- k8s/cluster-upgrade.md
- k8s/cluster-backup.md
#- k8s/cloud-controller-manager.md
-
- k8s/control-plane-auth.md
- k8s/user-cert.md
- k8s/csr-api.md
- k8s/openid-connect.md
- k8s/pod-security-intro.md
- k8s/pod-security-policies.md
- k8s/pod-security-admission.md
- shared/thankyou.md
#-
# |
# # (Extra content)
# - k8s/apiserver-deepdive.md
# - k8s/setup-overview.md
# - k8s/setup-devel.md
# - k8s/setup-managed.md
# - k8s/setup-selfhosted.md

View File

@@ -2,7 +2,6 @@
#/ /kube-halfday.yml.html 200!
#/ /kube-fullday.yml.html 200!
#/ /kube-twodays.yml.html 200!
/ /kube.yml.html 200!
# And this allows to do "git clone https://container.training".
/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
@@ -17,10 +16,12 @@
# Shortlinks for next training in English and French
#/next https://www.eventbrite.com/e/livestream-intensive-kubernetes-bootcamp-tickets-103262336428
/next https://qconsf.com/training/nov2024/asynchronous-architecture-patterns-scale-ml-and-other-high-latency-workloads
/next https://skillsmatter.com/courses/700-advanced-kubernetes-concepts-workshop-jerome-petazzoni
/hi5 https://enix.io/fr/services/formation/online/
/us https://www.ardanlabs.com/live-training-events/deploying-microservices-and-traditional-applications-with-kubernetes-march-28-2022.html
/uk https://skillsmatter.com/workshops/827-deploying-microservices-and-traditional-applications-with-kubernetes-with-jerome-petazzoni
# Survey form
/please https://docs.google.com/forms/d/e/1FAIpQLSfIYSgrV7tpfBNm1hOaprjnBHgWKn5n-k5vtNXYJkOX1sRxng/viewform
/ /highfive.html 200!

File diff suppressed because it is too large Load Diff

View File

@@ -2,8 +2,8 @@
"name": "container-training-pub-sub-server",
"version": "0.0.1",
"dependencies": {
"express": "^4.21.1",
"socket.io": "^4.8.0",
"socket.io-client": "^4.7.5"
"express": "^4.16.2",
"socket.io": "^4.6.1",
"socket.io-client": "^4.5.1"
}
}

View File

@@ -40,7 +40,7 @@
- In multi-stage builds, all stages can be built in parallel
(example: https://github.com/jpetazzo/shpod; [before][shpod-before-parallel] and [after][shpod-after-parallel])
(example: https://github.com/jpetazzo/shpod; [before] and [after])
- Stages are built only when they are necessary
@@ -50,8 +50,8 @@
- Files are cached in the builder
[shpod-before-parallel]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
[shpod-after-parallel]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
[before]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
[after]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
---
@@ -121,10 +121,10 @@ docker buildx build … \
- Must not use binary downloads with hard-coded architectures!
(streamlining a Dockerfile for multi-arch: [before][shpod-before-multiarch], [after][shpod-after-multiarch])
(streamlining a Dockerfile for multi-arch: [before], [after])
[shpod-before-multiarch]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
[shpod-after-multiarch]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
[before]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
[after]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
---

View File

@@ -120,11 +120,11 @@ class: extra-details
(and won't end up in the resulting image)
- See the [documentation][dockerignore] for the little details
- See the [documentation] for the little details
(exceptions can be made with `!`, multiple directory levels with `**`...)
[dockerignore]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
[documentation]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
???

View File

@@ -113,16 +113,22 @@ class: pic
## Results
* [Dev-to-prod reduced from 9 months to 15 minutes (ING)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_ING_01.25.2015_1.pdf)
https://www.docker.com/sites/default/files/CS_ING_01.25.2015_1.pdf)
* [Continuous integration job time reduced by more than 60% (BBC)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_BBCNews_01.25.2015_1.pdf)
https://www.docker.com/sites/default/files/CS_BBCNews_01.25.2015_1.pdf)
* [Deploy 100 times a day instead of once a week (GILT)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_Gilt_Groupe_03.18.2015_0.pdf)
https://www.docker.com/sites/default/files/CS_Gilt%20Groupe_03.18.2015_0.pdf)
* [70% infrastructure consolidation (MetLife)](
https://www.youtube.com/watch?v=Bwt3xigvlj0)
https://www.docker.com/customers/metlife-transforms-customer-experience-legacy-and-microservices-mashup)
* [60% infrastructure consolidation (Intesa Sanpaolo)](
https://blog.docker.com/2017/11/intesa-sanpaolo-builds-resilient-foundation-banking-docker-enterprise-edition/)
* [14x application density; 60% of legacy datacenter migrated in 4 months (GE Appliances)](
https://www.docker.com/customers/ge-uses-docker-enable-self-service-their-developers)
* etc.

View File

@@ -1,4 +1,4 @@
## Exercise — Ingress Controller
## Exercise — Ingress
- Add an ingress controller to a Kubernetes cluster

View File

@@ -1,4 +1,4 @@
# Exercise — Ingress Controller
# Exercise — Ingress
- We want to expose a couple of web apps through an ingress controller
@@ -128,4 +128,4 @@ This is similar to the previous scenario, but with two significant changes:
1. We only want to run the ingress controller on nodes that have the role `ingress`.
2. We want to either use `hostPort`, or a list of `externalIPs` (not `hostNetwork`).
2. We don't want to use `hostNetwork`, but a list of `externalIPs` instead.

View File

@@ -1,6 +1,6 @@
# Exercise — Network Policies
We want to implement a generic network security mechanism.
We want to to implement a generic network security mechanism.
Instead of creating one policy per service, we want to
create a fixed number of policies, and use a single label

View File

@@ -1,11 +0,0 @@
## Exercise — Enable RBAC
- Enable RBAC on a manually-deployed control plane
- This involves:
- generating different certificates
- distributing the certificates to the controllers
- enabling the proper authorizers in API server

View File

@@ -1,117 +0,0 @@
# Exercise — Enable RBAC
- We want to enable RBAC on the "polykube" cluster
(it doesn't matter whether we have 1 or multiple nodes)
- Ideally, we want to have, for instance:
- one key, certificate, and kubeconfig for a cluster admin
- one key, certificate, and kubeconfig for a user
<br/>
(with permissions in a single namespace)
- Bonus points: enable the NodeAuthorizer too!
- Check the following slides for hints
---
## Step 1
- Enable RBAC itself!
--
- This is done with an API server command-line flag
--
- Check [the documentation][kube-apiserver-doc] to see the flag
--
- For now, only enable `--authorization-mode=RBAC`
[kube-apiserver-doc]: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
---
## Step 2
- Our certificate doesn't work anymore, we need to generate a new one
--
- We need a certificate that will have *some* (ideally *all*) permissions
--
- Two options:
- use the equivalent of "root" (identity that completely skips permission checks)
- a "non-root" identity but which is granted permissions with RBAC
--
- The "non-root" option looks nice, but to grant permissions, we need permissions
- So let's start with the equivalent of "root"!
--
- The Kubernetes equivalent of `root` is the group `system:masters`
---
## Step 2, continued
- We need to generate a certificate for a user belonging to group `system:masters`
--
- In Kubernetes certificates, groups are encoded with the "organization" field
--
- That corresponds to `O=system:masters`
--
- In other words we need to generate a new certificate, but with a subject of:
`/CN=admin/O=system:masters/` (the `CN` doesn't matter)
- That certificate should be able to interact with the API server, like before
---
## Step 3
- Now, all our controllers have permissions issues
- We need to either:
- use that `system:masters` cert everywhere
- generate different certs for every controller, with the proper identities
- Suggestion: use `system-masters` everywhere to begin with
(and make sure the cluster is back on its feet)
---
## Step 4
At this point, there are two possible forks in the road:
1. Generate certs for the control plane controllers
(`kube-controller-manager`, `kube-scheduler`)
2. Generate cert(s) for the node(s) and enable `NodeAuthorizer`
Good luck!

View File

@@ -1,7 +0,0 @@
## Exercise — Requests and Limits
- Check current resource allocation and utilization
- Make sure that all workloads have requests (and perhaps limits)
- Make sure that all *future* workloads too!

View File

@@ -1,55 +0,0 @@
# Exercise — Requests and Limits
By default, if we don't specify *resource requests*,
our workloads will run in `BestEffort` quality of service.
`BestEffort` is very bad for production workloads,
because the scheduler has no idea of the actual resource
requirements of our apps, and won't be able to make
smart decisions about workload placement.
As a result, when the cluster gets overloaded,
containers will be killed, pods will be evicted,
and service disruptions will happen.
Let's solve this!
---
## Check current state
- Check *allocations*
(i.e. which pods have requests and limits for CPU and memory)
- Then check *utilization*
(i.e. actual resource usage)
- Possible tools: `kubectl`, plugins like `view-allocations`, Prometheus...
---
## Follow best practices
- We want to make sure that *all* workloads have requests
(and perhaps limits, too!)
- Depending on the workload:
- edit its YAML manifest
- adjust its Helm values
- add LimitRange in its Namespace
- Then check again to confirm that the job has been done properly!
---
## Be future-proof!
- We want to make sure that *future* workloads will have requests, too
- How can that be implemented?

View File

@@ -1,5 +0,0 @@
#!/bin/sh
for LINK in $(cat */*.md | sed -n 's/^\[\(.*\)\]:.*/\1/p' | sort | uniq -d); do
grep '^\['"$LINK"'\]:' */*.md
done

117
slides/highfive.html Normal file
View File

@@ -0,0 +1,117 @@
<?xml version="1.0"?>
<html>
<head>
<style>
td {
background: #ccc;
padding: 1em;
}
</style>
</head>
<body>
<table>
<tr>
<td>Mardi 9 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Mercredi 10 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Jeudi 11 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Vendredi 12 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Lundi 15 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 16 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 17 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Lundi 22 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 23 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mercredi 24 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Jeudi 25 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Vendredi 26 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mardi 30 mai 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 31 mai 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 1er juin 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 6 juin 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 7 juin 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
</table>
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 103 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

View File

@@ -981,6 +981,10 @@
# event: LISA
# title: Deploying and Scaling Applications with Docker Swarm
#2015-09-24-strangeloop
- title: Introduction to Docker and Containers
slides: intro-selfpaced.yml.html

View File

@@ -1,16 +1,16 @@
https://prettypictures.container.training/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
https://prettypictures.container.training/containers/ShippingContainerSFBay.jpg
https://prettypictures.container.training/containers/aerial-view-of-containers.jpg
https://prettypictures.container.training/containers/blue-containers.jpg
https://prettypictures.container.training/containers/chinook-helicopter-container.jpg
https://prettypictures.container.training/containers/container-cranes.jpg
https://prettypictures.container.training/containers/container-housing.jpg
https://prettypictures.container.training/containers/containers-by-the-water.jpg
https://prettypictures.container.training/containers/distillery-containers.jpg
https://prettypictures.container.training/containers/lots-of-containers.jpg
https://prettypictures.container.training/containers/plastic-containers.JPG
https://prettypictures.container.training/containers/train-of-containers-1.jpg
https://prettypictures.container.training/containers/train-of-containers-2.jpg
https://prettypictures.container.training/containers/two-containers-on-a-truck.jpg
https://prettypictures.container.training/containers/wall-of-containers.jpeg
https://prettypictures.container.training/containers/catene-de-conteneurs.jpg
https://gallant-turing-d0d520.netlify.com/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
https://gallant-turing-d0d520.netlify.com/containers/ShippingContainerSFBay.jpg
https://gallant-turing-d0d520.netlify.com/containers/aerial-view-of-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/blue-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/chinook-helicopter-container.jpg
https://gallant-turing-d0d520.netlify.com/containers/container-cranes.jpg
https://gallant-turing-d0d520.netlify.com/containers/container-housing.jpg
https://gallant-turing-d0d520.netlify.com/containers/containers-by-the-water.jpg
https://gallant-turing-d0d520.netlify.com/containers/distillery-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/lots-of-containers.jpg
https://gallant-turing-d0d520.netlify.com/containers/plastic-containers.JPG
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-1.jpg
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-2.jpg
https://gallant-turing-d0d520.netlify.com/containers/two-containers-on-a-truck.jpg
https://gallant-turing-d0d520.netlify.com/containers/wall-of-containers.jpeg
https://gallant-turing-d0d520.netlify.com/containers/catene-de-conteneurs.jpg

72
slides/intro-fullday.yml Normal file
View File

@@ -0,0 +1,72 @@
title: |
Introduction
to Containers
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
gitrepo: github.com/jpetazzo/container.training
slides: https://container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics.md
- containers/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-slack.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
-
#- containers/Docker_Overview.md
#- containers/Docker_History.md
- containers/Training_Environment.md
#- containers/Installing_Docker.md
- containers/First_Containers.md
- containers/Background_Containers.md
#- containers/Start_And_Attach.md
- containers/Naming_And_Inspecting.md
#- containers/Labels.md
- containers/Getting_Inside.md
- containers/Initial_Images.md
-
- containers/Building_Images_Interactively.md
- containers/Building_Images_With_Dockerfiles.md
- containers/Cmd_And_Entrypoint.md
- containers/Copying_Files_During_Build.md
- containers/Exercise_Dockerfile_Basic.md
-
- containers/Container_Networking_Basics.md
#- containers/Network_Drivers.md
- containers/Local_Development_Workflow.md
- containers/Container_Network_Model.md
- shared/yaml.md
- containers/Compose_For_Dev_Stacks.md
- containers/Exercise_Composefile.md
-
- containers/Multi_Stage_Builds.md
#- containers/Publishing_To_Docker_Hub.md
- containers/Dockerfile_Tips.md
- containers/Exercise_Dockerfile_Advanced.md
#- containers/Docker_Machine.md
#- containers/Advanced_Dockerfiles.md
#- containers/Buildkit.md
#- containers/Init_Systems.md
#- containers/Application_Configuration.md
#- containers/Logging.md
#- containers/Namespaces_Cgroups.md
#- containers/Copy_On_Write.md
#- containers/Containers_From_Scratch.md
#- containers/Container_Engines.md
#- containers/Pods_Anatomy.md
#- containers/Ecosystem.md
#- containers/Orchestration_Overview.md
- shared/thankyou.md
- containers/links.md

View File

@@ -0,0 +1,73 @@
title: |
Introduction
to Containers
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
gitrepo: github.com/jpetazzo/container.training
slides: https://container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- in-person
content:
- shared/title.md
# - shared/logistics.md
- containers/intro.md
- shared/about-slides.md
#- shared/chat-room-im.md
#- shared/chat-room-slack.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
- - containers/Docker_Overview.md
- containers/Docker_History.md
- containers/Training_Environment.md
- containers/Installing_Docker.md
- containers/First_Containers.md
- containers/Background_Containers.md
- containers/Start_And_Attach.md
- - containers/Initial_Images.md
- containers/Building_Images_Interactively.md
- containers/Building_Images_With_Dockerfiles.md
- containers/Cmd_And_Entrypoint.md
- containers/Copying_Files_During_Build.md
- containers/Exercise_Dockerfile_Basic.md
- - containers/Multi_Stage_Builds.md
- containers/Publishing_To_Docker_Hub.md
- containers/Dockerfile_Tips.md
- containers/Exercise_Dockerfile_Advanced.md
- - containers/Naming_And_Inspecting.md
- containers/Labels.md
- containers/Getting_Inside.md
- - containers/Container_Networking_Basics.md
- containers/Network_Drivers.md
- containers/Container_Network_Model.md
#- containers/Connecting_Containers_With_Links.md
- containers/Ambassadors.md
- - containers/Local_Development_Workflow.md
- containers/Windows_Containers.md
- containers/Working_With_Volumes.md
- shared/yaml.md
- containers/Compose_For_Dev_Stacks.md
- containers/Exercise_Composefile.md
- containers/Docker_Machine.md
- - containers/Advanced_Dockerfiles.md
- containers/Buildkit.md
- containers/Init_Systems.md
- containers/Application_Configuration.md
- containers/Logging.md
- containers/Resource_Limits.md
- - containers/Namespaces_Cgroups.md
- containers/Copy_On_Write.md
#- containers/Containers_From_Scratch.md
- - containers/Container_Engines.md
- containers/Pods_Anatomy.md
- containers/Ecosystem.md
- containers/Orchestration_Overview.md
- shared/thankyou.md
- containers/links.md

81
slides/intro-twodays.yml Normal file
View File

@@ -0,0 +1,81 @@
title: |
Introduction
to Containers
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
gitrepo: github.com/jpetazzo/container.training
slides: https://container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics.md
- containers/intro.md
- shared/about-slides.md
- shared/chat-room-im.md
#- shared/chat-room-slack.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/toc.md
- # DAY 1
- containers/Docker_Overview.md
#- containers/Docker_History.md
- containers/Training_Environment.md
- containers/First_Containers.md
- containers/Background_Containers.md
- containers/Initial_Images.md
-
- containers/Building_Images_Interactively.md
- containers/Building_Images_With_Dockerfiles.md
- containers/Cmd_And_Entrypoint.md
- containers/Copying_Files_During_Build.md
- containers/Exercise_Dockerfile_Basic.md
-
- containers/Dockerfile_Tips.md
- containers/Multi_Stage_Builds.md
- containers/Publishing_To_Docker_Hub.md
- containers/Exercise_Dockerfile_Advanced.md
-
- containers/Naming_And_Inspecting.md
- containers/Labels.md
- containers/Start_And_Attach.md
- containers/Getting_Inside.md
- containers/Resource_Limits.md
- # DAY 2
- containers/Container_Networking_Basics.md
- containers/Network_Drivers.md
- containers/Container_Network_Model.md
-
- containers/Local_Development_Workflow.md
- containers/Working_With_Volumes.md
- shared/yaml.md
- containers/Compose_For_Dev_Stacks.md
- containers/Exercise_Composefile.md
-
- containers/Installing_Docker.md
- containers/Container_Engines.md
- containers/Init_Systems.md
- containers/Advanced_Dockerfiles.md
- containers/Buildkit.md
-
- containers/Application_Configuration.md
- containers/Logging.md
- containers/Orchestration_Overview.md
-
- shared/thankyou.md
- containers/links.md
#-
#- containers/Docker_Machine.md
#- containers/Ambassadors.md
#- containers/Namespaces_Cgroups.md
#- containers/Copy_On_Write.md
#- containers/Containers_From_Scratch.md
#- containers/Pods_Anatomy.md
#- containers/Ecosystem.md

View File

@@ -20,21 +20,19 @@
## Use cases
- Defaulting
Some examples ...
*injecting image pull secrets, sidecars, environment variables...*
- Stand-alone admission controllers
- Policy enforcement and best practices
*validating:* policy enforcement (e.g. quotas, naming conventions ...)
*prevent: `latest` images, deprecated APIs...*
*mutating:* inject or provide default values (e.g. pod presets)
*require: PDBs, resource requests/limits, labels/annotations, local registry...*
- Admission controllers part of a greater system
- Problem mitigation
*validating:* advanced typing for operators
*block nodes with vulnerable kernels, inject log4j mitigations...*
- Extended validation for operators
*mutating:* inject sidecars for service meshes
---
@@ -200,64 +198,6 @@
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
- We will need an ngrok account for the tunnels
(a free account is fine)
---
class: extra-details
## What's ngrok?
- Ngrok provides secure tunnels to access local services
- Example: run `ngrok http 1234`
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
- Perfect to develop our webhook!
---
class: extra-details
## Ngrok in production
- Ngrok was initially known for its local webhook development features
- It now supports production scenarios as well
(load balancing, WAF, authentication, circuit-breaking...)
- Including some that are very relevant to Kubernetes
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
---
## Ngrok tokens
- If you're attending a live training, you might have an ngrok token
- Look in `~/ngrok.env` and if that file exists, copy it to the stack:
.lab[
```bash
cp ~/ngrok.env ~/container.training/webhooks/admission/.env
```
]
---
## Starting the whole stack
.lab[
- Go to the webhook directory:
@@ -276,6 +216,28 @@ cp ~/ngrok.env ~/container.training/webhooks/admission/.env
---
class: extra-details
## What's ngrok?
- Ngrok provides secure tunnels to access local services
- Example: run `ngrok http 1234`
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
- Perfect to develop our webhook!
- Probably not for production, though
(webhook requests and responses now pass through the ngrok platform)
---
## Update the webhook configuration
- We have a webhook configuration in `k8s/webhook-configuration.yaml`
@@ -581,23 +543,6 @@ Shell to the rescue!
(it should only allow values of `red`, `green`, `blue`)
---
## Coming soon...
- Kubernetes Validating Admission Policies
- Integrated with the Kubernetes API server
- Lets us define policies using [CEL (Common Expression Language)][cel-spec]
- Available in beta in Kubernetes 1.28 <!-- ##VERSION## -->
- Check this [CNCF Blog Post][cncf-blog-vap] for more details
[cncf-blog-vap]: https://www.cncf.io/blog/2023/09/14/policy-management-in-kubernetes-is-changing/
[cel-spec]: https://github.com/google/cel-spec
???
:EN:- Dynamic admission control with webhooks

View File

@@ -141,6 +141,12 @@ class: pic
class: pic
![](images/control-planes/non-dedicated-stacked-nodes.svg)
---
class: pic
![](images/control-planes/advanced-control-plane.svg)
---
@@ -151,12 +157,6 @@ class: pic
---
class: pic
![](images/control-planes/non-dedicated-stacked-nodes.svg)
---
# The Kubernetes API
[

View File

@@ -1,592 +0,0 @@
# ArgoCD
- We're going to implement a basic GitOps workflow with ArgoCD
- Pushing to the default branch will automatically deploy to our clusters
- There will be two clusters (`dev` and `prod`)
- The two clusters will have similar (but slightly different) workloads
![ArgoCD Logo](images/argocdlogo.png)
---
## ArgoCD concepts
ArgoCD manages **applications** by **syncing** their **live state** with their **target state**.
- **Application**: a group of Kubernetes resources managed by ArgoCD.
<br/>
Also a custom resource (`kind: Application`) managing that group of resources.
- **Application source type**: the **Tool** used to build the application (Kustomize, Helm...)
- **Target state**: the desired state of an **application**, as represented by the git repository.
- **Live state**: the current state of the application on the cluster.
- **Sync status**: whether or not the live state matches the target state.
- **Sync**: the process of making an application move to its target state.
<br/>
(e.g. by applying changes to a Kubernetes cluster)
(Check [ArgoCD core concepts](https://argo-cd.readthedocs.io/en/stable/core_concepts/) for more definitions!)
---
## Getting ready
- Let's make sure we have two clusters
- It's OK to use local clusters (kind, minikube...)
- We need to install the ArgoCD CLI ([argocd-packages], [argocd-binaries])
- **Highly recommended:** set up CLI completion!
- Of course we'll need a Git service, too
---
## Setting up ArgoCD
- The easiest way is to use upstream YAML manifests
- There is also a [Helm chart][argocd-helmchart] if we need more customization
.lab[
- Create a namespace for ArgoCD and install it there:
```bash
kubectl create namespace argocd
kubectl apply --namespace argocd -f \
https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
```
]
---
## Logging in with the ArgoCD CLI
- The CLI can talk to the ArgoCD API server or to the Kubernetes API server
- For simplicity, we're going to authenticate and communicate with the Kubernetes API
.lab[
- Authenticate with the ArgoCD API (that's what the `--core` flag does):
```bash
argocd login --core
```
- Check that everything is fine:
```bash
argocd version
```
]
--
🤔 `FATA[0000] error retrieving argocd-cm: configmap "argocd-cm" not found`
---
## ArgoCD CLI shortcomings
- When using "core" authentication, the ArgoCD CLI uses our current Kubernetes context
(as defined in our kubeconfig file)
- That context need to point to the correct namespace
(the namespace where we installed ArgoCD)
- In fact, `argocd login --core` doesn't communicate at all with ArgoCD!
(it only updates a local ArgoCD configuration file)
---
## Trying again in the right namespace
- We will need to run all `argocd` commands in the `argocd` namespace
(this limitation only applies to "core" authentication; see [issue 14167][issue14167])
.lab[
- Switch to the `argocd` namespace:
```bash
kubectl config set-context --current --namespace argocd
```
- Check that we can communicate with the ArgoCD API now:
```bash
argocd version
```
]
- Let's have a look at ArgoCD architecture!
---
class: pic
![ArgoCD Architecture](images/argocd_architecture.png)
---
## ArgoCD API Server
The API server is a gRPC/REST server which exposes the API consumed by the Web UI, CLI, and CI/CD systems. It has the following responsibilities:
- application management and status reporting
- invoking of application operations (e.g. sync, rollback, user-defined actions)
- repository and cluster credential management (stored as K8s secrets)
- authentication and auth delegation to external identity providers
- RBAC enforcement
- listener/forwarder for Git webhook events
---
## ArgoCD Repository Server
The repository server is an internal service which maintains a local cache of the Git repositories holding the application manifests. It is responsible for generating and returning the Kubernetes manifests when provided the following inputs:
- repository URL
- revision (commit, tag, branch)
- application path
- template specific settings: parameters, helm values...
---
## ArgoCD Application Controller
The application controller is a Kubernetes controller which continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo).
It detects *OutOfSync* application state and optionally takes corrective action.
It is responsible for invoking any user-defined hooks for lifecycle events (*PreSync, Sync, PostSync*).
---
## Preparing a repository for ArgoCD
- We need a repository with Kubernetes YAML manifests
- You can fork [kubercoins] or create a new, empty repository
- If you create a new, empty repository, add some manifests to it
---
## Add an Application
- An Application can be added to ArgoCD via the web UI or the CLI
(either way, this will create a custom resource of `kind: Application`)
- The Application should then automatically be deployed to our cluster
(the application manifests will be "applied" to the cluster)
.lab[
- Let's use the CLI to add an Application:
```bash
argocd app create kubercoins \
--repo https://github.com/`<your_user>/<your_repo>`.git \
--path . --revision `<branch>` \
--dest-server https://kubernetes.default.svc \
--dest-namespace kubercoins-prod
```
]
---
## Checking progress
- We can see sync status in the web UI or with the CLI
.lab[
- Let's check app status with the CLI:
```bash
argocd app list
```
- We can also check directly with the Kubernetes CLI:
```bash
kubectl get applications
```
]
- The app is there and it is `OutOfSync`!
---
## Manual sync with the CLI
- By default the "sync policy" is `manual`
- It can also be set to `auto`, which would check the git repository every 3 minutes
(this interval can be [configured globally][pollinginterval])
- Manual sync can be triggered with the CLI
.lab[
- Let's force an immediate sync of our app:
```bash
argocd app sync kubercoins
```
]
🤔 We're getting errors!
---
## Sync failed
We should receive a failure:
`FATA[0000] Operation has completed with phase: Failed`
And in the output, we see more details:
`Message: one or more objects failed to apply,`
<br/>
`reason: namespaces "kubercoins-prod" not found`
---
## Creating the namespace
- There are multiple ways to achieve that
- We could generate a YAML manifest for the namespace and add it to the git repository
- Or we could use "Sync Options" so that ArgoCD creates it automatically!
- ArgoCD provides many "Sync Options" to handle various edge cases
- Some [others](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/) are: `FailOnSharedResource`, `PruneLast`, `PrunePropagationPolicy`...
---
## Editing the app's sync options
- This can be done through the web UI or the CLI
.lab[
- Let's use the CLI once again:
```bash
argocd app edit kubercoins
```
- Add the following to the YAML manifest, at the root level:
```yaml
syncPolicy:
syncOptions:
- CreateNamespace=true
```
]
---
## Sync again
.lab[
- Let's retry the sync operation:
```bash
argocd app sync kubercoins
```
- And check the application status:
```bash
argocd app list
kubectl get applications
```
]
- It should show `Synced` and `Progressing`
- After a while (when all pods are running correctly) it should be `Healthy`
---
## Managing Applications via the Web UI
- ArgoCD is popular in large part due to its browser-based UI
- Let's see how to manage Applications in the web UI
.lab[
- Expose the web dashboard on a local port:
```bash
argocd admin dashboard
```
- This command will show the dashboard URL; open it in a browser
- Authentication should be automatic
]
Note: `argocd admin dashboard` is similar to `kubectl port-forward` or `kubectl-proxy`.
(The dashboard remains available as long as `argocd admin dashboard` is running.)
---
## Adding a staging Application
- Let's add another Application for a staging environment
- First, create a new branch (e.g. `staging`) in our kubercoins fork
- Then, in the ArgoCD web UI, click on the "+ NEW APP" button
(on a narrow display, it might just be "+", right next to buttons looking like 🔄 and ↩️)
- See next slides for details about that form!
---
## Defining the Application
| Field | Value |
|------------------|--------------------------------------------|
| Application Name | `kubercoins-stg` |
| Project Name | `default` |
| Sync policy | `Manual` |
| Sync options | check `auto-create namespace` |
| Repository URL | `https://github.com/<username>/<reponame>` |
| Revision | `<branchname>` |
| Path | `.` |
| Cluster URL | `https://kubernetes.default.svc` |
| Namespace | `kubercoins-stg` |
Then click on the "CREATE" button (top left).
---
## Synchronizing the Application
- After creating the app, it should now show up in the app tiles
(with a yellow outline to indicate that it's out of sync)
- Click on the "SYNC" button on the app tile to show the sync panel
- In the sync panel, click on "SYNCHRONIZE"
- The app will start to synchronize, and should become healthy after a little while
---
## Making changes
- Let's make changes to our application manifests and see what happens
.lab[
- Make a change to a manifest
(for instance, change the number of replicas of a Deployment)
- Commit that change and push it to the staging branch
- Check the application sync status:
```bash
argocd app list
```
]
- After a short period of time (a few minutes max) the app should show up "out of sync"
---
## Automated synchronization
- We don't want to manually sync after every change
(that wouldn't be true continuous deployment!)
- We're going to enable "auto sync"
- Note that this requires much more rigorous testing and observability!
(we need to be sure that our changes won't crash our app or even our cluster)
- Argo project also provides [Argo Rollouts][rollouts]
(a controller and CRDs to provide blue-green, canary deployments...)
- Today we'll just turn on automated sync for the staging namespace
---
## Enabling auto-sync
- In the web UI, go to *Applications* and click on *kubercoins-stg*
- Click on the "DETAILS" button (top left, might be just a "i" sign on narrow displays)
- Click on "ENABLE AUTO-SYNC" (under "SYNC POLICY")
- After a few minutes the changes should show up!
---
## Rolling back
- If we deploy a broken version, how do we recover?
- "The GitOps way": revert the changes in source control
(see next slide)
- Emergency rollback:
- disable auto-sync (if it was enabled)
- on the app page, click on "HISTORY AND ROLLBACK"
<br/>
(with the clock-with-backward-arrow icon)
- click on the "..." button next to the button we want to roll back to
- click "Rollback" and confirm
---
## Rolling back with GitOps
- The correct way to roll back is rolling back the code in source control
```bash
git checkout staging
git revert HEAD
git push origin staging
```
---
## Working with Helm
- ArgoCD supports different tools to process Kubernetes manifests:
Kustomize, Helm, Jsonnet, and [Config Management Plugins][cmp]
- Let's how to deploy Helm charts with ArgoCD!
- In the [kubercoins] repository, there is a branch called [helm-branch]
- It provides a generic Helm chart, in the [generic-service] directory
- There are service-specific values YAML files in the [values] directory
- Let's create one application for each of the 5 components of our app!
---
## Creating a Helm Application
- The example below uses "upstream" kubercoins
- Feel free to use your own fork instead!
.lab[
- Create an Application for `hasher`:
```bash
argocd app create hasher \
--repo https://github.com/jpetazzo/kubercoins.git \
--path generic-service --revision helm \
--dest-server https://kubernetes.default.svc \
--dest-namespace kubercoins-helm \
--sync-option CreateNamespace=true \
--values ../values/hasher.yaml \
--sync-policy=auto
```
]
---
## Deploying the rest of the application
- Option 1: repeat the previous command (updating app name and values)
- Option 2: author YAML manifests and apply them
---
## Additional considerations
- When running in production, ArgoCD can be integrated with an [SSO provider][sso]
- ArgoCD embeds and bundles [Dex] to delegate authentication
- it can also use an existing OIDC provider (Okta, Keycloak...)
- A single ArgoCD instance can manage multiple clusters
(but it's also fine to have one ArgoCD per cluster)
- ArgoCD can be complemented with [Argo Rollouts][rollouts] for advanced rollout control
(blue/green, canary...)
---
## Acknowledgements
Many thanks to
Anton (Ant) Weiss ([antweiss.com](https://antweiss.com), [@antweiss](https://twitter.com/antweiss))
and
Guilhem Lettron
for contributing an initial version and suggestions to this ArgoCD chapter.
All remaining typos, mistakes, or approximations are mine (Jérôme Petazzoni).
[argocd-binaries]: https://github.com/argoproj/argo-cd/releases/latest
[argocd-helmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
[argocd-packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
[Dex]: https://github.com/dexidp/dex
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
[helm-branch]: https://github.com/jpetazzo/kubercoins/tree/helm
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
[kubercoins]: https://github.com/jpetazzo/kubercoins
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
[rollouts]: https://argoproj.github.io/rollouts/
[sso]: https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/#sso
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
???
:EN:- Implementing gitops with ArgoCD
:FR:- Workflow gitops avec ArgoCD

View File

@@ -856,7 +856,7 @@ class: extra-details
- To learn more about Kubernetes attacks and threat models around RBAC:
📽️ [Hacking into Kubernetes Security for Beginners](https://www.youtube.com/watch?v=mLsCm9GVIQg)
by [V Körbes](https://twitter.com/veekorbes)
by [Ellen Körbes](https://twitter.com/ellenkorbes)
and [Tabitha Sable](https://twitter.com/TabbySable)
---

View File

@@ -1,173 +0,0 @@
# Bento & PostgreSQL
- Bento can also use SQL databases for input/output
- We're going to demonstrate that by writing to a PostgreSQL database
- That database will be deployed with the Cloud Native PostGres operator
(https://cloudnative-pg.io/)
---
## CNPG in a nutshell
- Free, open source
- Originally created by [EDB] (EnterpriseDB, well-known PgSQL experts)
- Non-exhaustive list of features:
- provisioning of Postgres servers, replicas, bouncers
- automatic failover
- backups (full backups and WAL shipping)
- provisioning from scratch, from backups, PITR
- manual and automated switchover (e.g. for node maintenance)
- and many more!
[EDB]: https://www.enterprisedb.com/workload/kubernetes
---
## What we're going to do
1. Install CNPG.
2. Provision a Postgres cluster.
3. Configure Bento to write to that cluster.
4. Set up a Grafana dashboard to see the data.
---
## 1⃣ Installing CNPG
Many options available, see the [documentation][cnpg-install]:
- raw YAML manifests
- kubectl CNPG plugin (`kubectl cnpg install generate`)
- Helm chart
- OLM
[cnpg-install]: https://cloudnative-pg.io/documentation/1.24/installation_upgrade/
---
## 2⃣ Provisioning a Postgres cluster
Minimal manifest:
```yaml
apiVersion: postgresql.cnpg.io/v1
kind: Cluster
metadata:
name: db
spec:
storage:
size: 1Gi
```
---
class: extra-details
## For production...
We might also add:
- `spec.monitoring.enablePodMonitor: true`
- `spec.instances: 2`
- `resources.{requests,limits}.{cpu,memory}`
- `walStorage.size`
- `backup`
- `postgresql.parameters`
See [this manifest][cluster-maximal] for a detailed example.
[cluster-maximal]: https://github.com/jpetazzo/pozok/blob/main/cluster-maximal.yaml
---
## 3⃣ Configuring Bento to write to SQL
- We'll use the [`sql_insert`][sql-insert] output
- If our cluster is named `mydb`, there will be a Secret `mydb-app`
- This Secret will contain a `uri` field
- That field can be used as the `dns` in the Bento configuration
- We will also need to create the table that we want to use
(see next slide for instructions)
[sql-insert]: https://warpstreamlabs.github.io/bento/docs/components/outputs/sql_insert
---
## Creating a table
- If we just want to store the city name and its population:
```sql
CREATE TABLE IF NOT EXISTS cities (
city varchar(100) NOT NULL,
population integer
);
```
- This statement can be executed:
- manually, by getting a `psql` shell with `kubectl cnpg psql mydb app`
- automatically, with Bento's `init_statatement`
---
## 4⃣ Viewing the table in Grafana
- In Grafana, in the home menu on the lift, click "connections"
- Add a PostgreSQL data source
- Enter the host:port, database, user, password
- Then add a visualization using that data source
(it should be relatively self-explanatory!)
---
class: extra-details
## Automating it all
- Expose PostgreSQL credentials through environment variables
(in the Bento container)
- Use the `${...}` syntax in Bento to use these environment variables
- Export the Grafana dashboard to a JSON file
- Store the JSON file in a ConfigMap, with label `grafana_dashboard=1`
- Create that ConfigMap in the namespace where Grafana is running
- Similarly, data sources (like the Redis and the PostgreSQL one) can be defined in YAML
- And that YAML can be put in a ConfigMap with label `grafana_datasource=1`

View File

@@ -1,450 +0,0 @@
# Autoscaling with KEDA
- Cluster autoscaling = automatically add nodes *when needed*
- *When needed* = when Pods are `Pending`
- How do these pods get created?
- When the Ollama Deployment is scaled up
- ... manually (e.g. `kubectl scale`)
- ... automatically (that's what we want to investigate now!)
---
## Ways to implement autoscaling
- Custom code
(e.g. crontab checking some value every few minutes and scaling accordingly)
- Kubernetes Horizontal Pod Autoscaler v1
(aka `kubectl autoscale`)
- Kubernetes Horizontal Pod Autoscaler v2 with custom metrics
(e.g. with Prometheus Adapter)
- Kubernetes Horizontal Pod Autoscaler v2 with external metrics
(e.g. with KEDA)
---
## Custom code
- No, we're not going to do that!
- But this would be an interesting exercise in RBAC
(setting minimal amount of permissions for the pod running our custom code)
---
## HPAv1
Pros: very straightforward
Cons: can only scale on CPU utilization
How it works:
- periodically measures average CPU *utilization* across pods
- if utilization is above/below a target (default: 80%), scale up/down
---
## HPAv1 in practice
- Create the autoscaling policy:
```bash
kubectl autoscale deployment ollama --max=1000
```
(The `--max` is required; it's a safety limit.)
- Check it:
```bash
kubectl describe hpa
```
- Send traffic, wait a bit: pods should be created automatically
---
## HPAv2 custom vs external
- Custom metrics = arbitrary metrics attached to Kubernetes objects
- External metrics = arbitrary metrics not related to Kubernetes objects
--
🤔
---
## HPAv2 custom metrics
- Examples:
- on Pods: CPU, RAM, network traffic...
- on Ingress: requests per second, HTTP status codes, request duration...
- on some worker Deployment: number of tasks processed, task duration...
- Requires an *adapter* to:
- expose the metrics through the Kubernetes *aggregation layer*
- map the actual metrics source to Kubernetes objects
Example: the [Prometheus adapter][prometheus-adapter]
[prometheus-adapter]: https://github.com/kubernetes-sigs/prometheus-adapter
---
## HPAv2 custom metrics in practice
- We're not going to cover this here
(too complex / not enough time!)
- If you want more details, check [my other course material][hpav2slides]
[hpav2slides]: https://2024-10-enix.container.training/4.yml.html#toc-scaling-with-custom-metrics
---
## HPAv2 external metrics
- Examples:
- arbitrary Prometheus query
- arbitrary SQL query
- number of messages in a queue
- and [many, many more][keda-scalers]
- Also requires an extra components to expose the metrics
Example: [KEDA (https://keda.sh/)](https://keda.sh)
[keda-scalers]: https://keda.sh/docs/latest/scalers/
---
## HPAv2 external metrics in practice
- We're going to install KEDA
- And set it up to autoscale depending on the number of messages in Redis
---
## Installing KEDA
Multiple options (details in the [documentation][keda-deploy]):
- YAML
- Operator Hub
- Helm chart 💡
```bash
helm upgrade --install --repo https://kedacore.github.io/charts \
--namespace keda-system --create-namespace keda keda
```
[keda-deploy]: https://keda.sh/docs/latest/deploy/
---
## Scaling according to Redis
- We need to create a KEDA Scaler
- This is done with a "ScaledObject" manifest
- [Here is the documentation][keda-redis-lists] for the Redis Lists Scaler
- Let's write that manifest!
[keda-redis-lists]: https://keda.sh/docs/latest/scalers/redis-lists/
---
## `keda-redis-scaler.yaml`
```yaml
apiVersion: keda.sh/v1alpha1
kind: ScaledObject
metadata:
name: ollama
spec:
scaleTargetRef:
name: ollama
triggers:
- type: redis
metadata:
address: redis.`default`.svc:6379
listName: cities
listLength: "10"
```
---
## Notes
- We need to update the `address` field with our namespace
(unless we are running in the `default` namespace)
- Alternative: use `addressFromEnv` and set an env var in the Ollama pods
- `listLength` gives the target ratio of `messages / replicas`
- In our example, KEDA will scale the Deployment to `messages / 100`
(rounded up!)
---
## Trying it out
- Apply the ScaledObject manifest
- Start a Bento pipeline loading e.g. 100-1000 cities in Redis
(100 on smaller clusters / slower CPUs, 1000 on bigger / faster ones)
- Check pod and nod resource usage
- What do we see?
--
🤩 The Deployment scaled up automatically!
--
🤔 But Pod resource usage remains very low (A few busy pods, many idle)
--
💡 Bento doesn't submit enough requests in parallel!
---
## Improving throughput
We're going to review multiple techniques:
1. Increase parallelism inside the Bento pipeline.
2. Run multiple Bento consumers.
3. Couple consumers and processors more tightly.
---
## 1⃣ Increase pipeline parallelism
- Set `parallel` to `true` in the `http` processor
- Wrap the input around a `batched` input
(otherwise, we don't have enough messages in flight)
- Increase `http` timeout significantly (e.g. to 5 minutes)
---
## Results
🎉 More messages flow through the pipeline
🎉 Many requests happen in parallel
🤔 Average Pod and Node CPU utilization is higher, but not maxed out
🤔 HTTP queue size (measured with HAProxy metrics) is relatively high
🤔 Latency is higher too
Why?
---
## Too many requests in parallel
- Ealier, we didn't have enough...
- ...Now, we have too much!
- However, for a very big request queue, it still wouldn't be enough
💡 We currently have a fixed parallelism. We need to make it dynamic!
---
## 2⃣ Run multiple Bento consumers
- Restore the original Bento configuration
(flip `parallel` back to `false`; remove the `batched` input)
- Run Bento in a Deployment
(e.g. with the [Bento Helm chart][bento-helm-chart])
- Autoscale that Deployment like we autoscaled the Ollama Deployment
[bento-helm-chart]: https://github.com/warpstreamlabs/bento-helm-chart
---
## Results
🤔🤔🤔 Pretty much the same as before!
(High throughput, high utilization but not maxed out, high latency...)
--
🤔🤔🤔 Why?
---
## Unbalanced load balancing
- All our requests go through the `ollama` Service
- We're still using the default Kubernetes service proxy!
- It doesn't spread the requests properly across all the backends
---
## 3⃣ Couple consumers and processors
What if:
--
instead of sending requests to a load balancer,
--
each queue consumer had its own Ollama instance?
---
## Current architecture
<pre class="mermaid">
flowchart LR
subgraph P1["Pod"]
H1["HAProxy"] --> O1["Ollama"]
end
subgraph P2["Pod"]
H2["HAProxy"] --> O2["Ollama"]
end
subgraph P3["Pod"]
H3["HAProxy"] --> O3["Ollama"]
end
Q["Queue<br/>(Redis)"] <--> C["Consumer<br/>(Bento)"] --> LB["Load Balancer<br/>(kube-proxy)"]
LB --> H1 & H2 & H3
</pre>
---
## Proposed architecture
<pre class="mermaid">
flowchart LR
subgraph P1["Consumer Pod"]
C1["Bento"] --> H1["HAProxy"] --> O1["Ollama"]
end
subgraph P2["Consumer Pod"]
C2["Bento"] --> H2["HAProxy"] --> O2["Ollama"]
end
subgraph P3["Consumer Pod"]
C3["Bento"] --> H3["HAProxy"] --> O3["Ollama"]
end
Queue["Queue"] <--> C1 & C2 & C3
</pre>
---
## 🏗️ Let's build something!
- Let's implement that architecture!
- See next slides for hints / getting started
---
## Hints
We need to:
- Update the Bento consumer configuration to talk to localhost
- Store that configuration in a ConfigMap
- Add a Bento container to the Ollama Deployment
- Profit!
---
## Results
🎉 Node and Pod utilization is maximized
🎉 HTTP queue size is bounded
🎉 Deployment autoscales up and down
---
## ⚠️ Scaling down
- Eventually, there are less messages in the queue
- The HPA scales down the Ollama Deployment
- This terminates some Ollama Pods
🤔 What happens if these Pods were processing requests?
--
- The requests might be lost!
---
## Avoiding lost messages
Option 1:
- cleanly shutdown the consumer
- make sure that Ollama can complete in-flight requests
(by extending its grace period)
- find a way to terminate Ollama when no more requests are in flight
Option 2:
- use *message acknowledgement*

View File

@@ -1,623 +0,0 @@
# Getting started with Bento
How can we move to a message queue architecture...
*...without rewriting a bunch of code?*
🤔
---
## Bento
https://bento.dev/
"Fancy stream processing made operationally mundane"
"Written in Go, deployed as a static binary, declarative configuration. Open source and cloud native as utter heck."
With ✨ amazing ✨ documentation 😍
---
class: extra-details
## Tiny bit of history
- Original project: Benthos
- May 30, 2024: [Redpanda acquires Benthos][redpanda-acquires-benthos]
- Benthos is now Redpanda Connect
- some parts have been relicensed as commercial products
- May 31, 2024: [Warpstream forks Benthos][warpstream-forks-benthos]
- that fork is named "Bento"
- it's fully open source
- We're going to use Bento here, but Redpanda Connect should work fine too!
---
## Bento concepts
- Message stream processor
- Each pipeline is configured by a YAML configuration that defines:
- input (where do we get the messages?)
- pipeline (optional: how do we transform the messages?)
- output (where do we put the messages afterwards?)
- Once Bento is started, it runs the pipelines forever
(except for pipelines that have a logical end, e.g. reading from a file)
- Embedded language (Bloblang) to manipulate/transform messages
---
## Messages
- Typically JSON objects
(but raw strings are also possible)
- Nesting, arrays, etc. are OK
---
## Getting started with Bento
We're going to:
1. Import a bunch of cities from a CSV file into a Redis queue.
2. Read back these cities using a web server.
3. Use an "enrichment workflow" to query our LLM for each city.
---
## 1⃣ Importing cities
Let's break down the work:
- download the data set
- create the Bento configuration
- deploy Redis
- start Bento
---
## Downloading the data set
- Example database:
https://www.kaggle.com/datasets/juanmah/world-cities
- Let's download and uncompress the data set:
```bash
curl -fsSL https://www.kaggle.com/api/v1/datasets/download/juanmah/world-cities |
funzip > cities.csv
```
(Ignore the "length error", it's harmless!)
- Check the structure of the data set:
```bash
head cities.csv
```
---
## Creating the Bento configuration
- We need to find which `input` and `output` to use
- Check the list with `bento list` or the [documentation][bento-inputs]
- Then run `bento create INPUTNAME/PIPELINENAME/OUTPUTNAME`
- Generate a configuration file:
```bash
bento create csv//redis_list > csv2redis.yaml
```
- Edit that configuration file; look for the `(required)` parameters
(Everything else can go away!)
---
## Resulting configuration
If we trim all the default values, here is the result:
```yaml
input:
csv:
paths: ["cities.csv"]
output:
redis_list:
url: redis://redis:6379 # No default (required)
key: cities
```
We'll call that value `csv2redis.yaml`.
---
## Deploying Redis
- Create a Deployment:
```bash
kubectl create deployment redis --image redis
```
- Expose it:
```bash
kubectl expose deployment redis --port 6379
```
---
## Starting Bento
Option 1: run it manually in a pod, to see what's going on.
```bash
bento --config csv2redis.yaml
```
Option 2: run it with e.g. the Bento Helm chart.
*We're not going to do that yet, since this particular pipeline has a logical end.*
*(The Helm chart is best suited to pipelines that run forever.)*
---
## Expected output
.small[
```
INFO Running main config from specified file @service=bento bento_version="" path=csv2redis.yaml
INFO Launching a Bento instance, use CTRL+C to close @service=bento
INFO Listening for HTTP requests at: http://0.0.0.0:4195 @service=bento
INFO Input type csv is now active @service=bento label="" path=root.input
INFO Output type redis_list is now active @service=bento label="" path=root.output
INFO Pipeline has terminated. Shutting down the service @service=bento
```
]
The pipeline should complete in just a few seconds.
---
## Checking what's in Redis
- Connect to our Redis instance:
```bash
redis-cli -h redis
```
- List keys:
```redis
KEYS *
```
- Check that the `cities` list has approx. 47000 elements:
```redis
LLEN cities
```
- Get the first element of the list:
```redis
LINDEX cities 0
```
---
## Fun with Bloblang
- Let's add a filter to keep only cities with a population above 10,000,000
- Add the following block to the Bento configuration:
```yaml
pipeline:
processors:
- switch:
- check: this.population == ""
processors:
- mapping: root = deleted()
- check: this.population.int64() < 10000000
processors:
- mapping: root = deleted()
```
(See the [docs][bento-switch] for details about the `switch` processor.)
---
## Testing our processor
- First, delete the existing `cities` list:
```bash
redis-cli -h redis DEL cities
```
- Then, run the Bento pipeline again:
```bash
bento --config csv2redis.yaml
```
(It should complain about a few cities where the population has a decimal point.)
- Check how many cities were loaded:
```bash
redis-cli -h redis LLEN cities
```
(There should be 47.)
---
## 2⃣ Consume the queue over HTTP
- We want to "get the next city" in the queue with a simple `curl`
- Our input will be `redis_list`
- Our output will be `http_server`
---
## Generate the Bento configuration
Option 1: `bento create redis_list//http_server`
Option 2: [read the docs][output-http-server]
---
## 🙋 Choose your own adventure
Do you want to try to write that configuration?
Or shall we see it right away?
--
⚠️ Spoilers on next slide!
---
## `redis2http.yaml`
```yaml
input:
redis_list:
url: redis://redis:`6379`
key: cities
output:
http_server:
path: /nextcity
```
This will set up an HTTP route to fetch *one* city.
It's also possible to batch, stream...
⚠️ As of November 2024, `bento create` uses port 6397 instead of 6379 for Redis!
---
## Trying it out
- Run Bento with this configuration:
```bash
bento --config redis2http.yaml &
```
- Retrieve one city:
```bash
curl http://localhost:4195/nextcity
```
- Check what happens after we retrive *all* the cities!
---
## 3⃣ Query our LLM for each city
- We want to ask our LLM who's the mayor of each of these cities
- We'll use a prompt that will usually ensure a short answer
(so that it's faster; we don't want to wait 30 seconds per city!)
- We'll test the prompt with the Ollama CLI
- Then we'll craft a proper HTTP API query
- Finally, we'll configure an [enrichment workflow][enrichment] in Bento
---
## Test our prompt
Assuming that our earlier Ollama Deployment is still running:
```bash
kubectl exec deployment/ollama -- \
ollama run qwen2:1.5b "
Who is the mayor of San Francisco?
Just give the name by itself on a single line.
If you don't know, don't say anything.
"
```
---
## Turn the prompt into an HTTP API query
Note: to install `http` in an Alpine container, run `apk add httpie`.
```bash
http http://ollama.default:11434/api/generate \
model=qwen2:1.5b stream:=false prompt="
Who is the mayor of Paris?
Just give the name by itself on a single line.
If you don't know, don't say anything.
"
```
We get a JSON payload, and we want to use the `response` field.
---
## Configure an enrichment workflow
The [Bento documentation][enrichment] is really good!
We need to set up:
- a `branch` processor
- a `request_map` to transform the city into an Ollama request
- an `http` processor to submit the request to Ollama
- a `result_map` to transform the Ollama response
---
## Without the `branch` processor
<pre class="mermaid">
flowchart LR
CITY["
city: Paris
country: France
population: 1106000
iso2: FR
...
"]
REQ["
model: qwen2:1.5b
stream: false
prompt: Who is the mayor of Paris?
"]
REP["
response: Anne Hidalgo
eval_count: ...
prompt_eval_count: ...
(other ollama fields)
"]
CITY@{ shape: card}
REQ@{ shape: card}
REP@{ shape: card}
style CITY text-align: left
style REQ text-align: left
style REP text-align: left
mapping@{ shape: diam }
http["http processor"]@{ shape: diam }
CITY --> mapping --> REQ --> http --> REP
</pre>
- We transform the `city` into an Ollama request
- The `http` processor submits the request to Ollama
- The final output is the Ollama response
---
## With the `branch` processor
<pre class="mermaid">
flowchart LR
CITY["
city: Paris
country: France
population: 1106000
iso2: FR
...
"]
REQ["
model: qwen2:1.5b
stream: false
prompt: Who is the mayor of Paris?
"]
REP["
response: Anne Hidalgo
eval_count: ...
prompt_eval_count: ...
(other ollama fields)
"]
OUT["
city: Paris
country: France
population: 1106000
iso2: FR
...
mayor: Anne Hidalgo
"]
CITY@{ shape: card}
REQ@{ shape: card}
REP@{ shape: card}
OUT@{ shape: card}
style CITY text-align: left
style REQ text-align: left
style REP text-align: left
style OUT text-align: left
branch@{ shape: diam }
request_map@{ shape: diam }
result_map@{ shape: diam }
http["http processor"]@{ shape: diam }
CITY --> branch
branch --> result_map
branch --> request_map
request_map --> REQ
REQ --> http
http --> REP
REP --> result_map
result_map --> OUT
</pre>
- The `branch` processor allows doing the processing "on the side"
- `request_map` and `result_map` transform the message before/after processing
- Then, the result is combined with the original message (the `city`)
---
```yaml
input:
csv:
paths: ["cities.csv"]
pipeline:
processors:
- branch:
request_map: |
root.model = "qwen2:1.5b"
root.stream = false
root.prompt = (
"Who is the mayor of %s? ".format(this.city) +
"Just give the name by itself on a single line. " +
"If you don't know, don't say anything."
)
processors:
- http:
url: http://ollama:11434/api/generate
verb: POST
result_map: |
root.mayor = this.response
```
---
## Trying it out
- Save the YAML on the previous page into a configuration file
- Run Bento with that configuration file
- What happens?
--
🤔 We're seeing errors due to timeouts
```
ERRO HTTP request to 'http://ollama...' failed: http://ollama...:
Post "http://ollama...": context deadline exceeded
(Client.Timeout exceeded while awaiting headers)
```
---
## 🙋 Choose your own adventure
How should we address errors?
- Option 1: increase the timeout in the [http][bento-http] processor
- Option 2: use a [retry][bento-retry] processor in the pipeline
- Option 3: use a [reject_errored][bento-reject] output
---
## 🏗️ Let's build something!
- We want to process 1000 cities with our LLM
(guessing who the mayor is, or something similar)
- Store the output wherever we want
(Redis, CSV file, JSONL files...)
- Deal correctly with errors
(we'll check that there are, indeed, 1000 cities in the output)
- Scale out to process faster
(scale ollama to e.g. 10 replicas, enable parallelism in Bento)
---
class: title
🍱 Lunch time! 🍱
---
## What happened?
- If your Ollama pods have *resource requests*:
→ your cluster may have auto-scaled
- If your Ollama pods don't have *resource requests*:
→ you probably have a bunch of container restarts, due to out-of-memory errors
🤔 What's that about?
[bento-http]: https://warpstreamlabs.github.io/bento/docs/components/processors/http/
[bento-inputs]: https://warpstreamlabs.github.io/bento/docs/components/inputs/about/
[bento-reject]: https://warpstreamlabs.github.io/bento/docs/components/outputs/reject_errored
[bento-retry]: https://warpstreamlabs.github.io/bento/docs/components/processors/retry
[bento-switch]: https://warpstreamlabs.github.io/bento/docs/components/processors/switch/
[enrichment]: https://warpstreamlabs.github.io/bento/cookbooks/enrichments/
[output-http-server]: https://warpstreamlabs.github.io/bento/docs/components/outputs/http_server
[redpanda-acquires-benthos]: https://www.redpanda.com/press/redpanda-acquires-benthos
[warpstream-forks-benthos]: https://www.warpstream.com/blog/announcing-bento-the-open-source-fork-of-the-project-formerly-known-as-benthos

View File

@@ -1,250 +0,0 @@
# Bento & RabbitMQ
- In some of the previous runs, messages were dropped
(we start with 1000 messages in `cities` and have e.g. 955 in `mayors`)
- This is caused by various errors during processing
(e.g. too many timeouts; Bento being shutdown halfway through...)
- ...And by the fact that we are using a Redis queue
(which doesn't offer delivery guarantees or acknowledgements)
- Can we get something better?
---
## The problem
- Some inputs (like `redis_list`) don't support *acknowledgements*
- When a message is pulled from the queue, it is deleted immediately
- If the message is lost for any reason, it is lost permanently
---
## The solution
- Some inputs (like `amqp_0_9`) support acknowledgements
- When a message is pulled from the queue:
- it is not visible anymore to other consumers
- it needs to be explicitly acknowledged
- The acknowledgement is done by Bento when the message reaches the output
- The acknowledgement deletes the message
- No acknowledgement after a while? Consumer crashes/disconnects?
Message gets requeued automatically!
---
## `amqp_0_9`
- Protocol used by RabbitMQ
- Very simplified behavior:
- messages are published to an [*exchange*][amqp-exchanges]
- messages have a *routing key*
- the exchange routes the message to one (or zero or more) queues
</br>(possibly using the routing key or message headers to decide which queue(s))
- [*consumers*][amqp-consumers] subscribe to queues to receive messages
[amqp-exchanges]: https://www.rabbitmq.com/tutorials/amqp-concepts#exchanges
[amqp-consumers]: https://www.rabbitmq.com/tutorials/amqp-concepts#consumers
---
## Using the default exchange
- There is a default exchange (called `""` - empty string)
- The routing key indicates the name of the queue to deliver to
- The queue needs to exist (we need to create it beforehand)
---
class: extra-details
## Defining custom exchanges
- Create an exchange
- exchange types: direct, fanout, topic, headers
- durability: persisted to disk to survive server restart or not?
- Create a binding
- which exchange?
- which routing key? (for direct exchanges)
- which queue?
---
## RabbitMQ on Kubernetes
- RabbitMQ can be deployed on Kubernetes:
- directly (creating e.g. a StatefulSet)
- with the RabbitMQ operator
- We're going to do the latter!
- The operator includes the "topology operator"
(to configure queues, exchanges, and bindings through custom resources)
---
## Installing the RabbitMQ operator
- Let's install it with this Helm chart:
```bash
helm upgrade --install --repo https://charts.bitnami.com/bitnami \
--namespace rabbitmq-system --create-namespace \
rabbitmq-cluster-operator rabbitmq-cluster-operator
```
---
## Deploying a simple RabbitMQ cluster
- Let's use the YAML manifests in that directory:
https://github.com/jpetazzo/beyond-load-balancers/tree/main/rabbitmq
- This creates:
- a `RabbitmqCluster` called `mq`
- a `Secret` called `mq-default-user` containing access credentials
- a durable `Queue` named `q1`
(We can ignore the `Exchange` and the `Binding`, we won't use them.)
---
## 🏗️ Let's build something!
Let's replace the `cities` Redis list with our RabbitMQ queue.
(See next slide for steps and hints!)
---
## Steps
1. Edit the Bento configuration for our "CSV importer".
(replace the `redis_list` output with `amqp_0_9`)
2. Run that pipeline and confirm that messages show up in RabbitMQ.
3. Edit the Bento configuration for the Ollama consumer.
(replace the `redis_list` input with `amqp_0_9`)
4. Trigger a scale up of the Ollama consumer.
5. Update the KEDA Scaler to use RabbitMQ instead of Redis.
---
## 1⃣ Sending messages to RabbitMQ
- Edit our Bento configuration (the one feeding the CSV file to Redis)
- We want the following `output` section:
```yaml
output:
amqp_0_9:
exchange: ""
key: q1
mandatory: true
urls:
- "${AMQP_URL}"
```
- Then export the AMQP_URL environment variable using `connection_string` from Secret `mq-default-user`
💡 Yes, we can directly use environment variables in Bento configuration!
---
## 2⃣ Testing our AMQP output
- Run the Bento pipeline
- To check that our messages made it:
```bash
kubectl exec mq-server-0 -- rabbitmqctl list_queues
```
- We can also use Prometheus metrics, e.g. `rabbitmq_queue_messages`
---
## 3⃣ Receiving messages from RabbitMQ
- Edit our other Bento configuration (the one in the Ollama consumer Pod)
- We want the following `input` section:
```yaml
input:
amqp_0_9:
urls:
- `amqp://...:5672/`
queue: q1
```
---
## 4⃣ Triggering Ollama scale up
- If the autoscaler is configured to scale to zero, disable it
(easiest solution: delete the ScaledObject)
- Then manually scale the Deployment to e.g. 4 Pods
- Check that messages are processed and show up in the output
(it should still be a Redis list at this point)
---
## 5⃣ Autoscaling on RabbitMQ
- We need to update our ScaledObject
- Check the [RabbitMQ Queue Scaler][keda-rabbitmq]
- Multiple ways to pass the AMQP URL:
- hardcode it (easier solution for testing!)
- use `...fromEnv` and set environment variables in target pod
- create and use a TriggerAuthentication
💡 Since we have the AMQP URL in a Secret, TriggerAuthentication works great!
[keda-rabbitmq]: https://keda.sh/docs/latest/scalers/rabbitmq-queue/

View File

@@ -55,7 +55,6 @@
`cert-manager.io/allow-direct-injection: "true"`
- See [cert-manager documentation] for details
[cert-manager documentation]: https://cert-manager.io/docs/concepts/ca-injector/
- See [cert-manager documentation][docs] for details
[docs]: https://cert-manager.io/docs/concepts/ca-injector/

View File

@@ -272,9 +272,9 @@ This can be overridden by setting the annotation:
- Can express `minAvailable` or `maxUnavailable`
- See [documentation][doc-pdb] for details and examples
- See [documentation] for details and examples
[doc-pdb]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
---

View File

@@ -81,7 +81,7 @@
## What version are we running anyway?
- When I say, "I'm running Kubernetes 1.28", is that the version of:
- When I say, "I'm running Kubernetes 1.22", is that the version of:
- kubectl
@@ -111,73 +111,6 @@
---
## Important questions
- Should we upgrade the control plane before or after the kubelets?
- Within the control plane, should we upgrade the API server first or last?
- How often should we upgrade?
- How long are versions maintained?
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
- Let's review the key elements together ...
---
## Kubernetes uses semantic versioning
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.28.9:
- MAJOR = 1
- MINOR = 28
- PATCH = 9
- It's always possible to mix and match different PATCH releases
(e.g. 1.28.9 and 1.28.13 are compatible)
- It is recommended to run the latest PATCH release
(but it's mandatory only when there is a security advisory)
---
## Version skew
- API server must be more recent than its clients (kubelet and control plane)
- ... Which means it must always be upgraded first
- All components support a difference of one¹ MINOR version
- This allows live upgrades (since we can mix e.g. 1.28 and 1.29)
- It also means that going from 1.28 to 1.30 requires going through 1.29
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
and kubectl, which can be one MINOR ahead or behind API server.]
---
## Release cycle
- There is a new PATCH relese whenever necessary
(every few weeks, or "ASAP" when there is a security vulnerability)
- There is a new MINOR release every 3 months (approximately)
- At any given time, three MINOR releases are maintained
- ... Which means that MINOR releases are maintained approximately 9 months
- We should expect to upgrade at least every 3 months (on average)
---
## General guidelines
- To update a component, use whatever was used to install it
@@ -206,6 +139,73 @@ and kubectl, which can be one MINOR ahead or behind API server.]
---
## Important questions
- Should we upgrade the control plane before or after the kubelets?
- Within the control plane, should we upgrade the API server first or last?
- How often should we upgrade?
- How long are versions maintained?
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
- Let's review the key elements together ...
---
## Kubernetes uses semantic versioning
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.22.17:
- MAJOR = 1
- MINOR = 22
- PATCH = 17
- It's always possible to mix and match different PATCH releases
(e.g. 1.22.17 and 1.22.5 are compatible)
- It is recommended to run the latest PATCH release
(but it's mandatory only when there is a security advisory)
---
## Version skew
- API server must be more recent than its clients (kubelet and control plane)
- ... Which means it must always be upgraded first
- All components support a difference of one¹ MINOR version
- This allows live upgrades (since we can mix e.g. 1.22 and 1.23)
- It also means that going from 1.22 to 1.24 requires going through 1.23
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
and kubectl, which can be one MINOR ahead or behind API server.]
---
## Release cycle
- There is a new PATCH relese whenever necessary
(every few weeks, or "ASAP" when there is a security vulnerability)
- There is a new MINOR release every 3 months (approximately)
- At any given time, three MINOR releases are maintained
- ... Which means that MINOR releases are maintained approximately 9 months
- We should expect to upgrade at least every 3 months (on average)
---
## In practice
- We are going to update a few cluster components
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
```
- Look for the `image:` line, and update it to e.g. `v1.30.1`
- Look for the `image:` line, and update it to e.g. `v1.24.1`
]
@@ -320,29 +320,53 @@ Note 2: kubeadm itself is still version 1.22.1..
- First things first: we need to upgrade kubeadm
- The Kubernetes package repositories are now split by minor versions
.lab[
(i.e. there is one repository for 1.28, another for 1.29, etc.)
- Upgrade kubeadm:
```
sudo apt install kubeadm=1.27.0-00
```
- This avoids accidentally upgrading from one minor version to another
- Check what kubeadm tells us:
```
sudo kubeadm upgrade plan
```
(e.g. with unattended upgrades or if packages haven't been held/pinned)
]
- We'll need to add the new package repository and unpin packages!
Problem: kubeadm doesn't know know how to handle
upgrades from version 1.22.
This is because we installed version 1.27.
We need to install kubeadm version 1.23.X.
---
## Installing the new packages
## Downgrading kubeadm
- Edit `/etc/apt/sources.list.d/kubernetes.list`
- We need to go back to kubeadm version 1.23.X.
(or copy it to e.g. `kubernetes-1.29.list` and edit that)
.lab[
- `apt-get update`
- View available versions for package `kubeadm`:
```bash
apt show kubeadm -a | grep ^Version | grep 1.23
```
- Now edit (or remove) `/etc/apt/preferences.d/kubernetes`
- Downgrade kubeadm:
```
sudo apt install kubeadm=1.23.0-00
```
- `apt-get install kubeadm` should now upgrade `kubeadm` correctly! 🎉
- Check what kubeadm tells us:
```
sudo kubeadm upgrade plan
```
]
kubeadm should now agree to upgrade to 1.23.X.
---
@@ -361,7 +385,7 @@ Note 2: kubeadm itself is still version 1.22.1..
- Look for the `image:` line, and restore it to the original value
(e.g. `v1.28.9`)
(e.g. `v1.22.17`)
- Wait for the control plane to come back up
@@ -375,14 +399,9 @@ Note 2: kubeadm itself is still version 1.22.1..
.lab[
- Check the upgrade plan:
```bash
sudo kubeadm upgrade plan
```
- Perform the upgrade:
```bash
sudo kubeadm upgrade apply v1.29.0
sudo kubeadm upgrade apply v1.23.0
```
]
@@ -399,9 +418,15 @@ Note 2: kubeadm itself is still version 1.22.1..
- Log into node `oldversion2`
- Update package lists and APT pins like we did before
- View available versions for package `kubelet`:
```bash
apt show kubelet -a | grep ^Version
```
- Then upgrade kubelet
- Upgrade kubelet:
```bash
sudo apt install kubelet=1.23.0-00
```
]
@@ -454,16 +479,13 @@ Note 2: kubeadm itself is still version 1.22.1..
.lab[
- Execute the whole upgrade procedure on each node:
- Download the configuration on each node, and upgrade kubelet:
```bash
for N in 1 2 3; do
ssh oldversion$N "
sudo sed -i s/1.28/1.29/ /etc/apt/sources.list.d/kubernetes.list &&
sudo rm /etc/apt/preferences.d/kubernetes &&
sudo apt update &&
sudo apt install kubeadm -y &&
sudo apt install kubeadm=1.23.0-00 &&
sudo kubeadm upgrade node &&
sudo apt install kubelet -y"
sudo apt install kubelet=1.23.0-00"
done
```
]
@@ -472,7 +494,7 @@ Note 2: kubeadm itself is still version 1.22.1..
## Checking what we've done
- All our nodes should now be updated to version 1.29
- All our nodes should now be updated to version 1.23.0
.lab[
@@ -485,115 +507,17 @@ Note 2: kubeadm itself is still version 1.22.1..
---
## And now, was that a good idea?
--
**Almost!**
--
- The official recommendation is to *drain* a node before performing node maintenance
(migrate all workloads off the node before upgrading it)
- How do we do that?
- Is it really necessary?
- Let's see!
---
## Draining a node
- This can be achieved with the `kubectl drain` command, which will:
- *cordon* the node (prevent new pods from being scheduled there)
- *evict* all the pods running on the node (delete them gracefully)
- the evicted pods will automatically be recreated somewhere else
- evictions might be blocked in some cases (Pod Disruption Budgets, `emptyDir` volumes)
- Once the node is drained, it can safely be upgraded, restarted...
- Once it's ready, it can be put back in commission with `kubectl uncordon`
---
## Is it necessary?
- When upgrading kubelet from one patch-level version to another:
- it's *probably fine*
- When upgrading system packages:
- it's *probably fine*
- except [when it's not][datadog-systemd-outage]
- When upgrading the kernel:
- it's *probably fine*
- ...as long as we can tolerate a restart of the containers on the node
- ...and that they will be unavailable for a few minutes (during the reboot)
[datadog-systemd-outage]: https://www.datadoghq.com/blog/engineering/2023-03-08-deep-dive-into-platform-level-impact/
---
## Is it necessary?
- When upgrading kubelet from one minor version to another:
- it *may or may not be fine*
- in some cases (e.g. migrating from Docker to containerd) it *will not*
- Here's what [the documentation][node-upgrade-docs] says:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- Do it at your own risk, and if you do, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
## Database operators to the rescue
- Moving stateful pods (e.g.: database server) can cause downtime
- Database replication can help:
- if a node contains database servers, we make sure these servers aren't primaries
- if they are primaries, we execute a *switch over*
- Some database operators (e.g. [CNPG]) will do that switch over automatically
(when they detect that a node has been *cordoned*)
[CNPG]: https://cloudnative-pg.io/
---
class: extra-details
## Skipping versions
- This example worked because we went from 1.28 to 1.29
- This example worked because we went from 1.22 to 1.23
- If you are upgrading from e.g. 1.26, you will have to go through 1.27 first
- If you are upgrading from e.g. 1.21, you will have to go through 1.22 first
- This means upgrading kubeadm to 1.27.X, then using it to upgrade the cluster
- This means upgrading kubeadm to 1.22.X, then using it to upgrade the cluster
- Then upgrading kubeadm to 1.28.X, etc.
- Then upgrading kubeadm to 1.23.X, etc.
- **Make sure to read the release notes before upgrading!**

View File

@@ -225,4 +225,4 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
:EN:- Scheduling pods together or separately
:EN:- Example: deploying a Consul cluster
:FR:- Lancer des pods ensemble ou séparément
:FR:- Exemple : lancer un cluster Consul
:FR:- Example : lancer un cluster Consul

View File

@@ -24,32 +24,6 @@
---
## A bit of history
Things related to Custom Resource Definitions:
- Kubernetes 1.??: `apiextensions.k8s.io/v1beta1` introduced
- Kubernetes 1.16: `apiextensions.k8s.io/v1` introduced
- Kubernetes 1.22: `apiextensions.k8s.io/v1beta1` [removed][changes-in-122]
- Kubernetes 1.25: [CEL validation rules available in beta][crd-validation-rules-beta]
- Kubernetes 1.28: [validation ratcheting][validation-ratcheting] in [alpha][feature-gates]
- Kubernetes 1.29: [CEL validation rules available in GA][cel-validation-rules]
- Kubernetes 1.30: [validation ratcheting][validation-ratcheting] in [beta][feature-gates]; enabled by default
[crd-validation-rules-beta]: https://kubernetes.io/blog/2022/09/23/crd-validation-rules-beta/
[cel-validation-rules]: https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules
[validation-ratcheting]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/4008-crd-ratcheting
[feature-gates]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features
[changes-in-122]: https://kubernetes.io/blog/2021/07/14/upcoming-changes-in-kubernetes-1-22/
---
## First slice of pizza
```yaml
@@ -68,6 +42,8 @@ Things related to Custom Resource Definitions:
(a few optional things become mandatory, see [this guide](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#customresourcedefinition-v122) for details)
- `apiextensions.k8s.io/v1beta1` is available since Kubernetes 1.16
---
## Second slice of pizza
@@ -120,9 +96,9 @@ The YAML below defines a resource using the CRD that we just created:
kind: Pizza
apiVersion: container.training/v1alpha1
metadata:
name: hawaiian
name: napolitana
spec:
toppings: [ cheese, ham, pineapple ]
toppings: [ mozzarella ]
```
.lab[
@@ -138,7 +114,11 @@ spec:
## Type validation
- Recent versions of Kubernetes will issue errors about unknown fields
- Older versions of Kubernetes will accept our pizza definition as is
- Newer versions, however, will issue warnings about unknown fields
(and if we use `--validate=false`, these fields will simply be dropped)
- We need to improve our OpenAPI schema
@@ -146,28 +126,6 @@ spec:
---
## Creating a bland pizza
- Let's try to create a pizza anyway!
.lab[
- Only provide the most basic YAML manifest:
```bash
kubectl create -f- <<EOF
kind: Pizza
apiVersion: container.training/v1alpha1
metadata:
name: hawaiian
EOF
```
]
- That should work! (As long as we don't try to add pineapple😁)
---
## Third slice of pizza
- Let's add a full OpenAPI v3 schema to our Pizza CRD
@@ -250,42 +208,24 @@ Note: we can update a CRD without having to re-create the corresponding resource
---
## Validation woes
## Better data validation
- Let's check what happens if we try to update our pizzas
- Let's change the data schema so that the sauce can only be `red` or `white`
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
.lab[
- Try to add a label:
- Update the Pizza CRD:
```bash
kubectl label pizza --all deliciousness=9001
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
```
]
--
- It works for the pizzas that have `sauce` and `toppings`, but not the other one!
- The other one doesn't pass validation, and *can't be modified*
---
## First, let's fix this!
- Option 1: delete the pizza
*(deletion isn't subject to validation)*
- Option 2: update the pizza to add `sauce` and `toppings`
*(writing a pizza that passes validation is fine)*
- Option 3: relax the validation rules
---
## Next, explain what's happening
## Validation *a posteriori*
- Some of the pizzas that we defined earlier *do not* pass validation
@@ -341,8 +281,6 @@ Note: we can update a CRD without having to re-create the corresponding resource
---
class: extra-details
## Migrating database content
- We need to *serve* a version as long as we *store* objects in that version
@@ -357,58 +295,6 @@ class: extra-details
---
## Validation ratcheting
- Good news: it's not always necessary to introduce new versions
(and to write the associated conversion webhooks)
- *Validation ratcheting allows updates to custom resources that fail validation to succeed if the validation errors were on unchanged keypaths*
- In other words: allow changes that don't introduce further validation errors
- This was introduced in Kubernetes 1.28 (alpha), enabled by default in 1.30 (beta)
- The rules are actually a bit more complex
- Another (maybe more accurate) explanation: allow to tighten or loosen some field definitions
---
## Validation ratcheting example
- Let's change the data schema so that the sauce can only be `red` or `white`
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
.lab[
- Update the Pizza CRD:
```bash
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
```
]
---
## Testing validation ratcheting
- This should work with Kubernetes 1.30 and above
(but give an error for the `brownie` pizza with previous versions of K8S)
.lab[
- Add another label:
```bash
kubectl label pizzas --all food=definitely
```
]
---
## Even better data validation
- If we need more complex data validation, we can use a validating webhook

View File

@@ -1,513 +0,0 @@
# Disruptions
In a perfect world...
- hardware never fails
- software never has bugs
- ...and never needs to be updated
- ...and uses a predictable amount of resources
- ...and these resources are infinite anyways
- network latency and packet loss are zero
- humans never make mistakes
--
😬
---
## Disruptions
In the real world...
- hardware will fail randomly (without advance notice)
- software has bugs
- ...and we constantly add new features
- ...and will sometimes use more resources than expected
- ...and these resources are limited
- network latency and packet loss are NOT zero
- humans make mistake (shutting down the wrong machine, the wrong app...)
---
## Disruptions
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
- There are **voluntary** and **involuntary** disruptions
- voluntary = directly initiated by humans (including by mistake!)
- involuntary = everything else
- In this section, we're going to see what they are and how to prevent them
(or at least, mitigate their effects)
---
## Node outage
- Example: hardware failure (server or network), low-level error
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
- **Involuntary** disruption (even if it results from human error!)
- Consequence: all workloads on that node become unresponsive
- Mitigations:
- scale workloads to at least 2 replicas (or more if quorum is needed)
- add anti-affinity scheduling constraints (to avoid having all pods on the same node)
---
## Node outage play-by-play
- Node goes down (or disconnected from network)
- Its lease (in Namespace `kube-node-lease`) doesn't get renewed
- Controller manager detects that and mark the node as "unreachable"
(this adds both a `NoSchedule` and `NoExecute` taints to the node)
- Eventually, the `NoExecute` taint will evict these pods
- This will trigger creation of replacement pods by owner controllers
(except for pods with a stable network identity, e.g. in a Stateful Set!)
---
## Node outage notes
- By default, pods will tolerate the `unreachable:NoExecute` taint for 5 minutes
(toleration automatically added by Admission controller `DefaultTolerationSeconds`)
- Pods of a Stateful Set don't recover automatically:
- as long as the Pod exists, a replacement Pod can't be created
- the Pod will exist as long as its Node exists
- deleting the Node (manually or automatically) will recover the Pod
---
## Memory/disk pressure
- Example: available memory on a node goes below a specific threshold
(because a pod is using too much memory and no limit was set)
- **Involuntary** disruption
- Consequence: kubelet starts to *evict* some pods
- Mitigations:
- set *resource limits* on containers to prevent them from using too much resources
- set *resource requests* on containers to make sure they don't get evicted
<br/>
(as long as they use less than what they requested)
- make sure that apps don't use more resources than what they've requested
---
## Memory/disk pressure play-by-play
- Memory leak in an application container, slowly causing very high memory usage
- Overall free memory on the node goes below the *soft* or the *hard* threshold
(default hard threshold = 100Mi; default soft threshold = none)
- When reaching the *soft* threshold:
- kubelet waits until the "eviction soft grace period" expires
- then (if resource usage is still above the threshold) it gracefully evicts pods
- When reaching the *hard* threshold:
- kubelet immediately and forcefully evicts pods
---
## Which pods are evicted?
- Kubelet only considers pods that are using *more* than what they requested
(and only for the resource that is under pressure, e.g. RAM or disk usage)
- First, it sorts pods by *priority¹* (as set with the `priorityClassName` in the pod spec)
- Then, by how much their resource usage exceeds their request
(again, for the resource that is under pressure)
- It evicts pods until enough resources have been freed up
---
## Soft (graceful) vs hard (forceful) eviction
- Soft eviction = graceful shutdown of the pod
(honor's the pod `terminationGracePeriodSeconds` timeout)
- Hard eviction = immediate shutdown of the pod
(kills all containers immediately)
---
## Memory/disk pressure notes
- If resource usage increases *very fast*, kubelet might not catch it fast enough
- For memory: this will trigger the kernel out-of-memory killer
- containers killed by OOM are automatically restarted (no eviction)
- eviction might happen at a later point though (if memory usage stays high)
- For disk: there is no "out-of-disk" killer, but writes will fail
- the `write` system call fails with `errno = ENOSPC` / `No space left on device`
- eviction typically happens shortly after (when kubelet catches up)
- When relying on disk/memory bursts a lot, using `priorityClasses` might help
---
## Memory/disk pressure delays
- By default, no soft threshold is defined
- Defining it requires setting both the threshold and the grace period
- Grace periods can be different for the different types of resources
- When a node is under pressure, kubelet places a `NoSchedule` taint
(to avoid adding more pods while the pod is under pressure)
- Once the node is no longer under pressure, kubelet clears the taint
(after waiting an extra timeout, `evictionPressureTransitionPeriod`, 5 min by default)
---
## Accidental deletion
- Example: developer deletes the wrong Deployment, the wrong Namespace...
- **Voluntary** disruption
(from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- only deploy to production systems through e.g. gitops workflows
- enforce peer review of changes
- only give users limited (e.g. read-only) access to production systems
- use canary deployments (might not catch all mistakes though!)
---
## Bad code deployment
- Example: critical bug introduced, application crashes immediately or is non-functional
- **Voluntary** disruption
(again, from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- readiness probes can mitigate immediate crashes
<br/>
(rolling update continues only when enough pods are ready)
- delayed crashes will require a rollback
<br/>
(manual intervention, or automated by a canary system)
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- terminate workloads one at a time, coordinating with users
--
🤔
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- ~~terminate workloads one at a time, coordinating with users~~
- use Pod Disruption Budgets
---
## Pod Disruption Budgets
- A PDB is a kind of *contract* between:
- "admins" = folks maintaining the cluster (e.g. adding/removing/updating nodes)
- "users" = folks deploying apps and workloads on the cluster
- A PDB expresses something like:
*in that particular set of pods, do not "disrupt" more than X at a time*
- Examples:
- in that set of frontend pods, do not disrupt more than 1 at a time
- in that set of worker pods, always have at least 10 ready
<br/>
(do not disrupt them if it would bring down the number of ready pods below 10)
---
## PDB - user side
- Cluster users create a PDB with a manifest like this one:
```yaml
@@INCLUDE[k8s/pod-disruption-budget.yaml]
```
- The PDB must indicate either `minAvailable` or `maxUnavailable`
---
## Rounding logic
- Percentages are rounded **up**
- When specifying `maxUnavailble` as a percentage, this can result in a higher perecentage
(e.g. `maxUnavailable: 50%` with 3 pods can result in 2 pods being unavailable!)
---
## Unmanaged pods
- Specifying `minAvailable: X` works all the time
- Specifying `minAvailable: X%` or `maxUnavaiable` requires *managed pods*
(pods that belong to a controller, e.g. Replica Set, Stateful Set...)
- This is because the PDB controller needs to know the total number of pods
(given by the `replicas` field, not merely by counting pod objects)
- The PDB controller will try to resolve the controller using the pod selector
- If that fails, the PDB controller will emit warning events
(visible with `kubectl describe pdb ...`)
---
## Zero
- `maxUnavailable: 0` means "do not disrupt my pods"
- Same thing if `minAvailable` is greater than or equal to the number of pods
- In that case, cluster admins are supposed to get in touch with cluster users
- This will prevent fully automated operation
(and some cluster admins automated systems might not honor that request)
---
## PDB - admin side
- As a cluster admin, we need to follow certain rules
- Only shut down (or restart) a node when no pods are running on that node
(except system pods belonging to Daemon Sets)
- To remove pods running on a node, we should use the *eviction API*
(which will check PDB constraints and honor them)
- To prevent new pods from being scheduled on a node, we can use a *taint*
- These operations are streamlined by `kubectl drain`, which will:
- *cordon* the node (add a `NoSchedule` taint)
- invoke the *eviction API* to remove pods while respecting their PDBs
---
## Theory vs practice
- `kubectl drain` won't evict pods using `emptyDir` volumes
(unless the `--delete-emptydir-data` flag is passed as well)
- Make sure that `emptyDir` volumes don't hold anything important
(they shouldn't, but... who knows!)
- Kubernetes lacks a standard way for users to express:
*this `emptyDir` volume can/cannot be safely deleted*
- If a PDB forbids an eviction, this requires manual coordination
---
class: extra-details
## Unhealthy pod eviction policy
- By default, unhealthy pods can only be evicted if PDB allows it
(unhealthy = running, but not ready)
- In many cases, unhealthy pods aren't healthy anyway, and can be removed
- This behavior is enabled by setting the appropriate field in the PDB manifest:
```yaml
spec:
unhealthyPodEvictionPolicy: AlwaysAllow
```
---
## Node upgrade
- Example: upgrading kubelet or the Linux kernel on a node
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are temporarily interrupted, and restarted
- this might disrupt these workloads
- Mitigations:
- migrate workloads off the done first (as if we were shutting it down)
---
## Node upgrade notes
- Is it necessary to drain a node before doing an upgrade?
- From [the documentation][node-upgrade-docs]:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- It's *probably* safe to upgrade in-place for:
- kernel upgrades
- kubelet patch-level upgrades (1.X.Y → 1.X.Z)
- It's *probably* better to drain the node for minor revisions kubelet upgrades (1.X → 1.Y)
- In doubt, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
## Manual rescheduling
- Example: moving workloads around to accommodate noisy neighbors or other issues
(e.g. pod X is doing a lot of disk I/O and this is starving other pods)
- **Voluntary** disruption
- Consequence:
- the moved workloads are temporarily interrupted
- Mitigations:
- define an appropriate number of replicas, declare PDBs
- use the [eviction API][eviction-API] to move workloads
[eviction-API]: https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/
???
:EN:- Voluntary and involuntary disruptions
:EN:- Pod Disruption Budgets
:FR:- "Disruptions" volontaires et involontaires
:FR:- Pod Disruption Budgets

Some files were not shown because too many files have changed in this diff Show More