mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-03-02 09:20:19 +00:00
Compare commits
1 Commits
2025-01-en
...
2024-05-en
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
719debd824 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -9,7 +9,6 @@ prepare-labs/terraform/many-kubernetes/one-kubernetes-config/config.tf
|
||||
prepare-labs/terraform/many-kubernetes/one-kubernetes-module/*.tf
|
||||
prepare-labs/terraform/tags
|
||||
prepare-labs/terraform/virtual-machines/openstack/*.tfvars
|
||||
prepare-labs/terraform/virtual-machines/proxmox/*.tfvars
|
||||
prepare-labs/www
|
||||
|
||||
slides/*.yml.html
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -eu
|
||||
|
||||
# https://open-api.netlify.com/#tag/dnsZone
|
||||
[ "${1-}" ] || {
|
||||
[ "$1" ] || {
|
||||
echo ""
|
||||
echo "Add a record in Netlify DNS."
|
||||
echo "This script is hardcoded to add a record to container.training".
|
||||
@@ -20,7 +18,7 @@ set -eu
|
||||
}
|
||||
|
||||
NETLIFY_CONFIG_FILE=~/.config/netlify/config.json
|
||||
if ! [ "${DOMAIN-}" ]; then
|
||||
if ! [ "$DOMAIN" ]; then
|
||||
DOMAIN=container.training
|
||||
fi
|
||||
|
||||
|
||||
@@ -1,15 +1,5 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Baseline resource usage per vcluster in our usecase:
|
||||
# 500 MB RAM
|
||||
# 10% CPU
|
||||
# (See https://docs.google.com/document/d/1n0lwp6rQKQUIuo_A5LQ1dgCzrmjkDjmDtNj1Jn92UrI)
|
||||
# PRO2-XS = 4 core, 16 gb
|
||||
|
||||
set -e
|
||||
|
||||
PROVIDER=scaleway
|
||||
STUDENTS=30
|
||||
|
||||
case "$PROVIDER" in
|
||||
linode)
|
||||
@@ -22,26 +12,20 @@ scaleway)
|
||||
;;
|
||||
esac
|
||||
|
||||
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
|
||||
|
||||
# set kubeconfig file
|
||||
export KUBECONFIG=~/kubeconfig
|
||||
|
||||
if [ "$PROVIDER" = "kind" ]; then
|
||||
kind create cluster --name konk
|
||||
ADDRTYPE=InternalIP
|
||||
else
|
||||
./labctl create --mode mk8s --settings settings/konk.env --provider $PROVIDER --tag konk
|
||||
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
|
||||
ADDRTYPE=ExternalIP
|
||||
fi
|
||||
cp tags/konk/stage2/kubeconfig.101 $KUBECONFIG
|
||||
|
||||
# set external_ip labels
|
||||
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="'$ADDRTYPE'")].address}{"\n"}{end}' |
|
||||
kubectl get nodes -o=jsonpath='{range .items[*]}{.metadata.name} {.status.addresses[?(@.type=="ExternalIP")].address}{"\n"}{end}' |
|
||||
while read node address; do
|
||||
kubectl label node $node external_ip=$address
|
||||
done
|
||||
|
||||
# vcluster all the things
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students $STUDENTS
|
||||
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 30
|
||||
|
||||
# install prometheus stack because that's cool
|
||||
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
|
||||
|
||||
@@ -57,7 +57,7 @@ need_tag() {
|
||||
if [ ! -d "tags/$TAG" ]; then
|
||||
die "Tag $TAG not found (directory tags/$TAG does not exist)."
|
||||
fi
|
||||
for FILE in mode provider settings.env status; do
|
||||
for FILE in settings.env ips.txt; do
|
||||
if [ ! -f "tags/$TAG/$FILE" ]; then
|
||||
warning "File tags/$TAG/$FILE not found."
|
||||
fi
|
||||
|
||||
@@ -19,22 +19,20 @@ _cmd_cards() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
OPTIONS_FILE=$2
|
||||
[ -f "$OPTIONS_FILE" ] || die "Please specify a YAML options file as 2nd argument."
|
||||
OPTIONS_FILE_PATH="$(readlink -f "$OPTIONS_FILE")"
|
||||
die FIXME
|
||||
|
||||
# This will process logins.jsonl to generate two files: cards.pdf and cards.html
|
||||
# This will process ips.txt to generate two files: ips.pdf and ips.html
|
||||
(
|
||||
cd tags/$TAG
|
||||
../../../lib/make-login-cards.py "$OPTIONS_FILE_PATH"
|
||||
../../../lib/ips-txt-to-html.py settings.yaml
|
||||
)
|
||||
|
||||
ln -sf ../tags/$TAG/cards.html www/$TAG.html
|
||||
ln -sf ../tags/$TAG/cards.pdf www/$TAG.pdf
|
||||
ln -sf ../tags/$TAG/ips.html www/$TAG.html
|
||||
ln -sf ../tags/$TAG/ips.pdf www/$TAG.pdf
|
||||
|
||||
info "Cards created. You can view them with:"
|
||||
info "xdg-open tags/$TAG/cards.html tags/$TAG/cards.pdf (on Linux)"
|
||||
info "open tags/$TAG/cards.html (on macOS)"
|
||||
info "xdg-open tags/$TAG/ips.html tags/$TAG/ips.pdf (on Linux)"
|
||||
info "open tags/$TAG/ips.html (on macOS)"
|
||||
info "Or you can start a web server with:"
|
||||
info "$0 www"
|
||||
}
|
||||
@@ -49,41 +47,6 @@ _cmd_clean() {
|
||||
done
|
||||
}
|
||||
|
||||
_cmd codeserver "Install code-server on the clusters"
|
||||
_cmd_codeserver() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
ARCH=${ARCHITECTURE-amd64}
|
||||
CODESERVER_VERSION=4.96.2
|
||||
CODESERVER_URL=https://github.com/coder/code-server/releases/download/v${CODESERVER_VERSION}/code-server-${CODESERVER_VERSION}-linux-${ARCH}.tar.gz
|
||||
pssh "
|
||||
set -e
|
||||
i_am_first_node || exit 0
|
||||
if ! [ -x /usr/local/bin/code-server ]; then
|
||||
curl -fsSL $CODESERVER_URL | sudo tar zx -C /opt
|
||||
sudo ln -s /opt/code-server-${CODESERVER_VERSION}-linux-${ARCH}/bin/code-server /usr/local/bin/code-server
|
||||
sudo -u $USER_LOGIN -H code-server --install-extension ms-azuretools.vscode-docker
|
||||
sudo -u $USER_LOGIN -H code-server --install-extension ms-kubernetes-tools.vscode-kubernetes-tools
|
||||
sudo -u $USER_LOGIN -H mkdir -p /home/$USER_LOGIN/.local/share/code-server/User
|
||||
echo '{\"workbench.startupEditor\": \"terminal\"}' | sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.local/share/code-server/User/settings.json
|
||||
sudo -u $USER_LOGIN mkdir -p /home/$USER_LOGIN/.config/systemd/user
|
||||
sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.config/systemd/user/code-server.service <<EOF
|
||||
[Unit]
|
||||
Description=code-server
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
|
||||
[Service]
|
||||
ExecStart=/usr/local/bin/code-server --bind-addr 0:1789
|
||||
Restart=always
|
||||
EOF
|
||||
sudo systemctl --user -M $USER_LOGIN@ enable code-server.service --now
|
||||
sudo loginctl enable-linger $USER_LOGIN
|
||||
fi"
|
||||
}
|
||||
|
||||
_cmd createuser "Create the user that students will use"
|
||||
_cmd_createuser() {
|
||||
TAG=$1
|
||||
@@ -294,12 +257,21 @@ _cmd_create() {
|
||||
terraform init
|
||||
echo tag = \"$TAG\" >> terraform.tfvars
|
||||
echo how_many_clusters = $STUDENTS >> terraform.tfvars
|
||||
if [ "$CLUSTERSIZE" ]; then
|
||||
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
|
||||
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
|
||||
for RETRY in 1 2 3; do
|
||||
if terraform apply -auto-approve; then
|
||||
touch terraform.ok
|
||||
break
|
||||
fi
|
||||
done
|
||||
if ! [ -f terraform.ok ]; then
|
||||
die "Terraform failed."
|
||||
fi
|
||||
)
|
||||
|
||||
sep
|
||||
info "Successfully created $COUNT instances with tag $TAG"
|
||||
echo create_ok > tags/$TAG/status
|
||||
|
||||
# If the settings.env file has a "STEPS" field,
|
||||
# automatically execute all the actions listed in that field.
|
||||
@@ -352,8 +324,8 @@ _cmd_clusterize() {
|
||||
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
|
||||
sudo systemctl restart ssh.service"
|
||||
|
||||
pssh -I < tags/$TAG/clusters.tsv "
|
||||
grep -w \$PSSH_HOST | tr '\t' '\n' > /tmp/cluster"
|
||||
pssh -I < tags/$TAG/clusters.txt "
|
||||
grep -w \$PSSH_HOST | tr ' ' '\n' > /tmp/cluster"
|
||||
pssh "
|
||||
echo \$PSSH_HOST > /tmp/ipv4
|
||||
head -n 1 /tmp/cluster | sudo tee /etc/ipv4_of_first_node
|
||||
@@ -374,10 +346,6 @@ _cmd_clusterize() {
|
||||
done < /tmp/cluster
|
||||
"
|
||||
|
||||
while read line; do
|
||||
printf '{"login": "%s", "password": "%s", "ipaddrs": "%s"}\n' "$USER_LOGIN" "$USER_PASSWORD" "$line"
|
||||
done < tags/$TAG/clusters.tsv > tags/$TAG/logins.jsonl
|
||||
|
||||
echo cluster_ok > tags/$TAG/status
|
||||
}
|
||||
|
||||
@@ -549,7 +517,6 @@ _cmd_kubeadm() {
|
||||
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
||||
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
|
||||
IGNORE_SWAP="- Swap"
|
||||
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
|
||||
fi
|
||||
|
||||
# Install a valid configuration for containerd
|
||||
@@ -573,7 +540,6 @@ nodeRegistration:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: JoinConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta3
|
||||
@@ -587,7 +553,6 @@ nodeRegistration:
|
||||
- NumCPU
|
||||
$IGNORE_SYSTEMVERIFICATION
|
||||
$IGNORE_SWAP
|
||||
$IGNORE_IPTABLES
|
||||
---
|
||||
kind: KubeletConfiguration
|
||||
apiVersion: kubelet.config.k8s.io/v1beta1
|
||||
@@ -616,9 +581,7 @@ EOF
|
||||
# Install weave as the pod network
|
||||
pssh "
|
||||
if i_am_first_node; then
|
||||
curl -fsSL https://github.com/weaveworks/weave/releases/download/v2.8.1/weave-daemonset-k8s-1.11.yaml |
|
||||
sed s,weaveworks/weave,quay.io/rackspace/weave, |
|
||||
kubectl apply -f-
|
||||
kubectl apply -f https://github.com/weaveworks/weave/releases/download/v2.8.1/weave-daemonset-k8s-1.11.yaml
|
||||
fi"
|
||||
|
||||
# FIXME this is a gross hack to add the deployment key to our SSH agent,
|
||||
@@ -781,16 +744,6 @@ EOF
|
||||
aws-iam-authenticator version
|
||||
fi"
|
||||
|
||||
# Install jless (jless.io)
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/jless ]; then
|
||||
##VERSION##
|
||||
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
|
||||
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
|
||||
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
|
||||
sudo mv jless /usr/local/bin
|
||||
fi"
|
||||
|
||||
# Install the krew package manager
|
||||
pssh "
|
||||
if [ ! -d /home/$USER_LOGIN/.krew ]; then
|
||||
@@ -803,7 +756,7 @@ EOF
|
||||
fi"
|
||||
|
||||
# Install kubecolor
|
||||
KUBECOLOR_VERSION=0.4.0
|
||||
KUBECOLOR_VERSION=0.3.2
|
||||
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
|
||||
pssh "
|
||||
if [ ! -x /usr/local/bin/kubecolor ]; then
|
||||
@@ -968,15 +921,6 @@ _cmd_inventory() {
|
||||
FIXME
|
||||
}
|
||||
|
||||
_cmd logins "Show login information for a group of instances"
|
||||
_cmd_logins() {
|
||||
TAG=$1
|
||||
need_tag $TAG
|
||||
|
||||
cat tags/$TAG/logins.jsonl \
|
||||
| jq -r '"\(if .codeServerPort then "\(.codeServerPort)\t" else "" end )\(.password)\tssh -l \(.login)\(if .port then " -p \(.port)" else "" end)\t\(.ipaddrs)"'
|
||||
}
|
||||
|
||||
_cmd maketag "Generate a quasi-unique tag for a group of instances"
|
||||
_cmd_maketag() {
|
||||
if [ -z $USER ]; then
|
||||
@@ -1027,9 +971,6 @@ _cmd_stage2() {
|
||||
cd tags/$TAG/stage2
|
||||
terraform init -upgrade
|
||||
terraform apply -auto-approve
|
||||
terraform output -raw logins_jsonl > ../logins.jsonl
|
||||
terraform output -raw ips_txt > ../ips.txt
|
||||
echo "stage2_ok" > status
|
||||
}
|
||||
|
||||
_cmd standardize "Deal with non-standard Ubuntu cloud images"
|
||||
@@ -1114,7 +1055,6 @@ _cmd_tailhist () {
|
||||
# halfway through and we're actually trying to download it again.
|
||||
pssh "
|
||||
set -e
|
||||
sudo apt-get install unzip -y
|
||||
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
|
||||
unzip websocketd-0.3.0-linux_$ARCH.zip websocketd
|
||||
sudo mv websocketd /usr/local/bin/websocketd
|
||||
@@ -1139,35 +1079,14 @@ EOF
|
||||
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
|
||||
}
|
||||
|
||||
_cmd terraform "Apply Terraform configuration to provision resources."
|
||||
_cmd_terraform() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
echo terraforming > tags/$TAG/status
|
||||
(
|
||||
cd tags/$TAG
|
||||
terraform apply -auto-approve
|
||||
# The Terraform provider for Proxmox has a bug; sometimes it fails
|
||||
# to obtain VM address from the QEMU agent. In that case, we put
|
||||
# ERROR in the ips.txt file (instead of the VM IP address). Detect
|
||||
# that so that we run Terraform again (this typically solves the issue).
|
||||
if grep -q ERROR ips.txt; then
|
||||
die "Couldn't obtain IP address of some machines. Try to re-run terraform."
|
||||
fi
|
||||
)
|
||||
echo terraformed > tags/$TAG/status
|
||||
|
||||
}
|
||||
|
||||
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
|
||||
_cmd_tools() {
|
||||
TAG=$1
|
||||
need_tag
|
||||
|
||||
pssh "
|
||||
set -e
|
||||
sudo apt-get -q update
|
||||
sudo apt-get -qy install apache2-utils argon2 emacs-nox git httping htop jid joe jq mosh tree unzip
|
||||
sudo apt-get -qy install apache2-utils emacs-nox git httping htop jid joe jq mosh python-setuptools tree unzip
|
||||
# This is for VMs with broken PRNG (symptom: running docker-compose randomly hangs)
|
||||
sudo apt-get -qy install haveged
|
||||
"
|
||||
@@ -1230,8 +1149,8 @@ _cmd_tags() {
|
||||
cd tags
|
||||
echo "[#] [Status] [Tag] [Mode] [Provider]"
|
||||
for tag in *; do
|
||||
if [ -f $tag/logins.jsonl ]; then
|
||||
count="$(wc -l < $tag/logins.jsonl)"
|
||||
if [ -f $tag/ips.txt ]; then
|
||||
count="$(wc -l < $tag/ips.txt)"
|
||||
else
|
||||
count="?"
|
||||
fi
|
||||
@@ -1307,13 +1226,7 @@ _cmd_passwords() {
|
||||
$0 ips "$TAG" | paste "$PASSWORDS_FILE" - | while read password nodes; do
|
||||
info "Setting password for $nodes..."
|
||||
for node in $nodes; do
|
||||
echo $USER_LOGIN $password | ssh $SSHOPTS -i tags/$TAG/id_rsa ubuntu@$node '
|
||||
read login password
|
||||
echo $login:$password | sudo chpasswd
|
||||
hashedpassword=$(echo -n $password | argon2 saltysalt$RANDOM -e)
|
||||
sudo -u $login mkdir -p /home/$login/.config/code-server
|
||||
echo "hashed-password: \"$hashedpassword\"" | sudo -u $login tee /home/$login/.config/code-server/config.yaml >/dev/null
|
||||
'
|
||||
echo $USER_LOGIN:$password | ssh $SSHOPTS -i tags/$TAG/id_rsa ubuntu@$node sudo chpasswd
|
||||
done
|
||||
done
|
||||
info "Done."
|
||||
@@ -1345,11 +1258,6 @@ _cmd_wait() {
|
||||
pssh -l $SSH_USER "
|
||||
if [ -d /var/lib/cloud ]; then
|
||||
cloud-init status --wait
|
||||
case $? in
|
||||
0) exit 0;; # all is good
|
||||
2) exit 0;; # recoverable error (happens with proxmox deprecated cloud-init payloads)
|
||||
*) exit 1;; # all other problems
|
||||
esac
|
||||
fi"
|
||||
}
|
||||
|
||||
@@ -1405,7 +1313,7 @@ EOF"
|
||||
_cmd www "Run a web server to access card HTML and PDF"
|
||||
_cmd_www() {
|
||||
cd www
|
||||
IPADDR=$(curl -fsSL canihazip.com/s || echo localhost)
|
||||
IPADDR=$(curl -sL canihazip.com/s)
|
||||
info "The following files are available:"
|
||||
for F in *; do
|
||||
echo "http://$IPADDR:8000/$F"
|
||||
|
||||
@@ -1,22 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import yaml
|
||||
import jinja2
|
||||
|
||||
|
||||
# Read settings from user-provided settings file
|
||||
context = yaml.safe_load(open(sys.argv[1]))
|
||||
|
||||
context["logins"] = []
|
||||
for line in open("logins.jsonl"):
|
||||
if line.strip():
|
||||
context["logins"].append(json.loads(line))
|
||||
ips = list(open("ips.txt"))
|
||||
clustersize = context["clustersize"]
|
||||
|
||||
print("---------------------------------------------")
|
||||
print(" Number of cards: {}".format(len(context["logins"])))
|
||||
print(" Number of IPs: {}".format(len(ips)))
|
||||
print(" VMs per cluster: {}".format(clustersize))
|
||||
print("---------------------------------------------")
|
||||
|
||||
assert len(ips)%clustersize == 0
|
||||
|
||||
clusters = []
|
||||
|
||||
while ips:
|
||||
cluster = ips[:clustersize]
|
||||
ips = ips[clustersize:]
|
||||
clusters.append(cluster)
|
||||
|
||||
context["clusters"] = clusters
|
||||
|
||||
template_file_name = context["cards_template"]
|
||||
template_file_path = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
@@ -25,23 +35,23 @@ template_file_path = os.path.join(
|
||||
template_file_name
|
||||
)
|
||||
template = jinja2.Template(open(template_file_path).read())
|
||||
with open("cards.html", "w") as f:
|
||||
f.write(template.render(**context))
|
||||
print("Generated cards.html")
|
||||
with open("ips.html", "w") as f:
|
||||
f.write(template.render(**context))
|
||||
print("Generated ips.html")
|
||||
|
||||
|
||||
try:
|
||||
import pdfkit
|
||||
paper_size = context["paper_size"]
|
||||
margin = {"A4": "0.5cm", "Letter": "0.2in"}[paper_size]
|
||||
with open("cards.html") as f:
|
||||
pdfkit.from_file(f, "cards.pdf", options={
|
||||
with open("ips.html") as f:
|
||||
pdfkit.from_file(f, "ips.pdf", options={
|
||||
"page-size": paper_size,
|
||||
"margin-top": margin,
|
||||
"margin-bottom": margin,
|
||||
"margin-left": margin,
|
||||
"margin-right": margin,
|
||||
})
|
||||
print("Generated cards.pdf")
|
||||
print("Generated ips.pdf")
|
||||
except ImportError:
|
||||
print("WARNING: could not import pdfkit; did not generate cards.pdf")
|
||||
print("WARNING: could not import pdfkit; did not generate ips.pdf")
|
||||
@@ -7,7 +7,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -7,7 +7,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -11,7 +11,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -7,10 +7,9 @@ USER_PASSWORD=training
|
||||
|
||||
# For a list of old versions, check:
|
||||
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
|
||||
KUBEVERSION=1.28.9
|
||||
KUBEVERSION=1.24.14
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -6,7 +6,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -6,7 +6,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -6,7 +6,6 @@ USER_LOGIN=docker
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
@@ -15,5 +14,6 @@ STEPS="
|
||||
createuser
|
||||
webssh
|
||||
tailhist
|
||||
cards
|
||||
ips
|
||||
"
|
||||
"
|
||||
@@ -3,4 +3,4 @@ CLUSTERSIZE=5
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=
|
||||
|
||||
STEPS="terraform stage2"
|
||||
STEPS="stage2"
|
||||
|
||||
@@ -6,7 +6,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -7,7 +7,6 @@ USER_LOGIN=k8s
|
||||
USER_PASSWORD=training
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
CLUSTERSIZE=2
|
||||
|
||||
USER_LOGIN=k8s
|
||||
USER_PASSWORD=
|
||||
|
||||
STEPS="terraform stage2"
|
||||
STEPS="stage2"
|
||||
|
||||
@@ -11,7 +11,6 @@ USER_LOGIN=portal
|
||||
USER_PASSWORD=CHANGEME
|
||||
|
||||
STEPS="
|
||||
terraform
|
||||
wait
|
||||
standardize
|
||||
clusterize
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
{%- set url = url
|
||||
| default("http://FIXME.container.training/") -%}
|
||||
{%- set pagesize = pagesize
|
||||
| default(10) -%}
|
||||
| default(9) -%}
|
||||
{%- set lang = lang
|
||||
| default("en") -%}
|
||||
{%- set event = event
|
||||
@@ -15,36 +15,79 @@
|
||||
{%- set backside = backside
|
||||
| default(False) -%}
|
||||
{%- set image = image
|
||||
| default(False) -%}
|
||||
| default("kube") -%}
|
||||
{%- set clusternumber = clusternumber
|
||||
| default(None) -%}
|
||||
{%- set thing = thing
|
||||
| default("lab environment") -%}
|
||||
|
||||
{%- if lang == "en" -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
{{ thing }} for this {{ event }}.
|
||||
You can connect to it with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- if qrcode == True -%}
|
||||
{%- set qrcode = "https://container.training/q" -%}
|
||||
{%- elif qrcode -%}
|
||||
{%- set qrcode = qrcode -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
{{ thing }} pour cette formation.
|
||||
Vous pouvez vous y connecter
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
|
||||
{# You can also set img_bottom_src instead. #}
|
||||
{%- set img_logo_src = {
|
||||
"docker": "https://s3-us-west-2.amazonaws.com/www.breadware.com/integrations/docker.png",
|
||||
"swarm": "https://cdn.wp.nginx.com/wp-content/uploads/2016/07/docker-swarm-hero2.png",
|
||||
"kube": "https://avatars1.githubusercontent.com/u/13629408",
|
||||
"enix": "https://enix.io/static/img/logos/logo-domain-cropped.png",
|
||||
}[image] -%}
|
||||
{%- if lang == "en" and clustersize == 1 -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
machine for this {{ event }}.
|
||||
You can connect to this VM with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Your machine is:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" and clustersize != 1 -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information to your very own
|
||||
cluster for this {{ event }}.
|
||||
You can connect to each VM with any SSH client.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Your machines are:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" and clustersize == 1 -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
machine pour cette formation.
|
||||
Vous pouvez vous connecter à cette machine virtuelle
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Adresse IP:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" and clusterprefix != "node" -%}
|
||||
{%- set intro -%}
|
||||
Here is the connection information for the
|
||||
<strong>{{ clusterprefix }}</strong> environment.
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" and clustersize != 1 -%}
|
||||
{%- set intro -%}
|
||||
Voici les informations permettant de se connecter à votre
|
||||
cluster pour cette formation.
|
||||
Vous pouvez vous connecter à chaque machine virtuelle
|
||||
avec n'importe quel client SSH.
|
||||
{%- endset -%}
|
||||
{%- set listhead -%}
|
||||
Adresses IP:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "en" -%}
|
||||
{%- set slides_are_at -%}
|
||||
You can find the slides at:
|
||||
{%- endset -%}
|
||||
{%- set slides_are_at -%}
|
||||
You can find the slides at:
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
{%- if lang == "fr" -%}
|
||||
{%- set slides_are_at -%}
|
||||
Le support de formation est à l'adresse suivante :
|
||||
{%- endset -%}
|
||||
{%- set slides_are_at -%}
|
||||
Le support de formation est à l'adresse suivante :
|
||||
{%- endset -%}
|
||||
{%- endif -%}
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
|
||||
<html>
|
||||
@@ -59,21 +102,25 @@
|
||||
}
|
||||
body {
|
||||
/* this is A4 minus 0.5cm margins */
|
||||
width: 20cm;
|
||||
height: 28.7cm;
|
||||
width: 20cm;
|
||||
height: 28.7cm;
|
||||
}
|
||||
{% elif paper_size == "Letter" %}
|
||||
@page {
|
||||
size: Letter; /* 8.5in x 11in */
|
||||
size: Letter;
|
||||
margin: 0.2in;
|
||||
}
|
||||
body {
|
||||
width: 6.75in; /* two cards wide */
|
||||
margin-left: 0.875in; /* (8.5in - 6.75in)/2 */
|
||||
margin-top: 0.1875in; /* (11in - 5 cards)/2 */
|
||||
/* this is Letter minus 0.2in margins */
|
||||
width: 8.6in;
|
||||
heigth: 10.6in;
|
||||
}
|
||||
{% endif %}
|
||||
|
||||
|
||||
body, table {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
line-height: 1em;
|
||||
font-size: 15px;
|
||||
font-family: 'Slabo 27px';
|
||||
@@ -87,45 +134,47 @@ table {
|
||||
padding-left: 0.4em;
|
||||
}
|
||||
|
||||
td:first-child {
|
||||
width: 10.5em;
|
||||
}
|
||||
|
||||
div.card {
|
||||
div {
|
||||
float: left;
|
||||
border: 0.01in dotted black;
|
||||
border: 1px dotted black;
|
||||
{% if backside %}
|
||||
height: 33%;
|
||||
{% endif %}
|
||||
/* columns * (width+left+right) < 100% */
|
||||
/*
|
||||
columns * (width+left+right) < 100%
|
||||
height: 33%;
|
||||
width: 24.8%;
|
||||
width: 33%;
|
||||
width: 24.8%;
|
||||
*/
|
||||
width: 3.355in; /* 3.375in minus two 0.01in borders */
|
||||
height: 2.105in; /* 2.125in minus two 0.01in borders */
|
||||
/**/
|
||||
width: 33%;
|
||||
/**/
|
||||
}
|
||||
|
||||
p {
|
||||
margin: 0.8em;
|
||||
}
|
||||
|
||||
div.front {
|
||||
{% if image %}
|
||||
background-image: url("{{ image }}");
|
||||
background-repeat: no-repeat;
|
||||
background-size: 1in;
|
||||
background-position-x: 2.8in;
|
||||
background-position-y: center;
|
||||
{% endif %}
|
||||
div.back {
|
||||
border: 1px dotted grey;
|
||||
}
|
||||
|
||||
span.scale {
|
||||
white-space: nowrap;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
img.logo {
|
||||
height: 4.5em;
|
||||
float: right;
|
||||
}
|
||||
|
||||
img.bottom {
|
||||
height: 2.5em;
|
||||
display: block;
|
||||
margin: 0.5em auto;
|
||||
}
|
||||
|
||||
.qrcode img {
|
||||
height: 5.8em;
|
||||
padding: 1em 1em 0.5em 1em;
|
||||
float: left;
|
||||
width: 40%;
|
||||
margin: 1em;
|
||||
}
|
||||
|
||||
.logpass {
|
||||
@@ -140,97 +189,101 @@ span.scale {
|
||||
height: 0;
|
||||
}
|
||||
</style>
|
||||
<script type="text/javascript" src="qrcode.min.js"></script>
|
||||
<script type="text/javascript" src="https://cdn.rawgit.com/davidshimjs/qrcodejs/gh-pages/qrcode.min.js"></script>
|
||||
<script type="text/javascript">
|
||||
function qrcodes() {
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("qrcode"),
|
||||
(e, index) => {
|
||||
new QRCode(e, {
|
||||
text: "{{ qrcode }}",
|
||||
correctLevel: QRCode.CorrectLevel.L
|
||||
});
|
||||
}
|
||||
);
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("qrcode"),
|
||||
(e, index) => {
|
||||
new QRCode(e, {
|
||||
text: "{{ qrcode }}",
|
||||
correctLevel: QRCode.CorrectLevel.L
|
||||
});
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function scale() {
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("scale"),
|
||||
(e, index) => {
|
||||
var text_width = e.getBoundingClientRect().width;
|
||||
var box_width = e.parentElement.getBoundingClientRect().width;
|
||||
var percent = 100 * box_width / text_width + "%";
|
||||
e.style.fontSize = percent;
|
||||
}
|
||||
);
|
||||
[].forEach.call(
|
||||
document.getElementsByClassName("scale"),
|
||||
(e, index) => {
|
||||
var text_width = e.getBoundingClientRect().width;
|
||||
var box_width = e.parentElement.getBoundingClientRect().width;
|
||||
var percent = 100 * box_width / text_width + "%";
|
||||
e.style.fontSize = percent;
|
||||
}
|
||||
);
|
||||
}
|
||||
</script>
|
||||
</head>
|
||||
<body onload="qrcodes(); scale();">
|
||||
{% for login in logins %}
|
||||
<div class="card front">
|
||||
{% for cluster in clusters %}
|
||||
<div>
|
||||
<p>{{ intro }}</p>
|
||||
<p>
|
||||
{% if img_logo_src %}
|
||||
<img class="logo" src="{{ img_logo_src }}" />
|
||||
{% endif %}
|
||||
<table>
|
||||
<tr>
|
||||
<td>login:</td>
|
||||
<td>password:</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="logpass">{{ login.login }}</td>
|
||||
<td class="logpass">{{ login.password }}</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>IP address:</td>
|
||||
{% if login.port %}
|
||||
<td>port:</td>
|
||||
{% endif %}
|
||||
</tr>
|
||||
<tr>
|
||||
<td class="logpass">{{ login.ipaddrs.split("\t")[0] }}</td>
|
||||
{% if login.port %}
|
||||
<td class="logpass">{{ login.port }}</td>
|
||||
{% endif %}
|
||||
</tr>
|
||||
{% if clusternumber != None %}
|
||||
<tr><td>cluster:</td></tr>
|
||||
<tr><td class="logpass">{{ clusternumber + loop.index }}</td></tr>
|
||||
{% endif %}
|
||||
<tr><td>login:</td></tr>
|
||||
<tr><td class="logpass">{{ user_login }}</td></tr>
|
||||
<tr><td>password:</td></tr>
|
||||
<tr><td class="logpass">{{ user_password }}</td></tr>
|
||||
</table>
|
||||
|
||||
</p>
|
||||
<p>
|
||||
{{ listhead }}
|
||||
<table>
|
||||
{% for node in cluster %}
|
||||
<tr>
|
||||
<td>{{ clusterprefix }}{{ loop.index }}:</td>
|
||||
<td>{{ node }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
{% if url %}
|
||||
{{ slides_are_at }}
|
||||
{{ slides_are_at }}
|
||||
<p>
|
||||
<span class="scale">{{ url }}</span>
|
||||
</p>
|
||||
{% endif %}
|
||||
{% if img_bottom_src %}
|
||||
<img class="bottom" src="{{ img_bottom_src }}" />
|
||||
{% endif %}
|
||||
</p>
|
||||
</div>
|
||||
{% if loop.index%pagesize==0 or loop.last %}
|
||||
<span class="pagebreak"></span>
|
||||
{% if backside %}
|
||||
{% for x in range(pagesize) %}
|
||||
<div class="card back">
|
||||
{{ backside }}
|
||||
{#
|
||||
<p>Thanks for attending
|
||||
"Getting Started With Kubernetes and Container Orchestration"
|
||||
during CONFERENCE in Month YYYY!</p>
|
||||
<p>If you liked that workshop,
|
||||
I can train your team, in person or
|
||||
online, with custom courses of
|
||||
any length and any level.
|
||||
</p>
|
||||
{% if qrcode %}
|
||||
<p>If you're interested, please scan that QR code to contact me:</p>
|
||||
<span class="qrcode"></span>
|
||||
{% for x in range(pagesize) %}
|
||||
<div class="back">
|
||||
<p>Thanks for attending
|
||||
"Getting Started With Kubernetes and Container Orchestration"
|
||||
during CONFERENCE in Month YYYY!</p>
|
||||
<p>If you liked that workshop,
|
||||
I can train your team, in person or
|
||||
online, with custom courses of
|
||||
any length and any level.
|
||||
</p>
|
||||
{% if qrcode %}
|
||||
<p>If you're interested, please scan that QR code to contact me:</p>
|
||||
<span class="qrcode"></span>
|
||||
{% else %}
|
||||
<p>If you're interested, you can contact me at:</p>
|
||||
{% endif %}
|
||||
<p>jerome.petazzoni@gmail.com</p>
|
||||
#}
|
||||
</div>
|
||||
{% endfor %}
|
||||
<span class="pagebreak"></span>
|
||||
{% endif %}
|
||||
<p>If you're interested, you can contact me at:</p>
|
||||
{% endif %}
|
||||
<p>jerome.petazzoni@gmail.com</p>
|
||||
</div>
|
||||
{% endfor %}
|
||||
<span class="pagebreak"></span>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</body>
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
cards_template: cards.html
|
||||
paper_size: Letter
|
||||
url: https://2024-11-qconsf.container.training
|
||||
event: workshop
|
||||
backside: |
|
||||
<div class="qrcode"></div>
|
||||
<p>
|
||||
Thanks for attending the Asynchronous Architecture Patterns workshop at QCON!
|
||||
</p>
|
||||
<p>
|
||||
<b>This QR code will give you my contact info</b> as well as a link to a feedback form.
|
||||
</p>
|
||||
<p>
|
||||
If you liked this workshop, I can train your team, in person or online, with custom
|
||||
courses of any length and any level, on Docker, Kubernetes, and MLops.
|
||||
</p>
|
||||
qrcode: https://2024-11-qconsf.container.training/#contact
|
||||
thing: Kubernetes cluster
|
||||
image: logo-kubernetes.png
|
||||
@@ -8,8 +8,8 @@ resource "random_string" "_" {
|
||||
resource "time_static" "_" {}
|
||||
|
||||
locals {
|
||||
min_nodes_per_pool = var.min_nodes_per_cluster
|
||||
max_nodes_per_pool = var.max_nodes_per_cluster
|
||||
min_nodes_per_pool = var.nodes_per_cluster
|
||||
max_nodes_per_pool = var.nodes_per_cluster * 2
|
||||
timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
|
||||
tag = random_string._.result
|
||||
# Common tags to be assigned to all resources
|
||||
|
||||
@@ -14,20 +14,6 @@ provider "kubernetes" {
|
||||
config_path = "./kubeconfig.${index}"
|
||||
}
|
||||
|
||||
provider "helm" {
|
||||
alias = "cluster_${index}"
|
||||
kubernetes {
|
||||
config_path = "./kubeconfig.${index}"
|
||||
}
|
||||
}
|
||||
|
||||
# Password used for SSH and code-server access
|
||||
resource "random_string" "shpod_${index}" {
|
||||
length = 6
|
||||
special = false
|
||||
upper = false
|
||||
}
|
||||
|
||||
resource "kubernetes_namespace" "shpod_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
metadata {
|
||||
@@ -35,57 +21,120 @@ resource "kubernetes_namespace" "shpod_${index}" {
|
||||
}
|
||||
}
|
||||
|
||||
data "kubernetes_service" "shpod_${index}" {
|
||||
depends_on = [ helm_release.shpod_${index} ]
|
||||
resource "kubernetes_deployment" "shpod_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
metadata {
|
||||
name = "shpod"
|
||||
namespace = "shpod"
|
||||
namespace = kubernetes_namespace.shpod_${index}.metadata.0.name
|
||||
}
|
||||
spec {
|
||||
selector {
|
||||
match_labels = {
|
||||
app = "shpod"
|
||||
}
|
||||
}
|
||||
template {
|
||||
metadata {
|
||||
labels = {
|
||||
app = "shpod"
|
||||
}
|
||||
}
|
||||
spec {
|
||||
service_account_name = "shpod"
|
||||
container {
|
||||
image = "jpetazzo/shpod"
|
||||
name = "shpod"
|
||||
env {
|
||||
name = "PASSWORD"
|
||||
value = random_string.shpod_${index}.result
|
||||
}
|
||||
lifecycle {
|
||||
post_start {
|
||||
exec {
|
||||
command = [ "sh", "-c", "curl http://myip.enix.org/REMOTE_ADDR > /etc/HOSTIP || true" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
resources {
|
||||
limits = {
|
||||
cpu = "2"
|
||||
memory = "500M"
|
||||
}
|
||||
requests = {
|
||||
cpu = "100m"
|
||||
memory = "250M"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "helm_release" "shpod_${index}" {
|
||||
provider = helm.cluster_${index}
|
||||
repository = "https://shpod.in"
|
||||
chart = "shpod"
|
||||
name = "shpod"
|
||||
namespace = "shpod"
|
||||
create_namespace = false
|
||||
set {
|
||||
name = "service.type"
|
||||
value = "NodePort"
|
||||
resource "kubernetes_service" "shpod_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
lifecycle {
|
||||
# Folks might alter their shpod Service to expose extra ports.
|
||||
# Don't reset their changes.
|
||||
ignore_changes = [ spec ]
|
||||
}
|
||||
set {
|
||||
name = "resources.requests.cpu"
|
||||
value = "100m"
|
||||
metadata {
|
||||
name = "shpod"
|
||||
namespace = kubernetes_namespace.shpod_${index}.metadata.0.name
|
||||
}
|
||||
set {
|
||||
name = "resources.requests.memory"
|
||||
value = "500M"
|
||||
spec {
|
||||
selector = {
|
||||
app = "shpod"
|
||||
}
|
||||
port {
|
||||
name = "ssh"
|
||||
port = 22
|
||||
target_port = 22
|
||||
}
|
||||
type = "NodePort"
|
||||
}
|
||||
set {
|
||||
name = "resources.limits.cpu"
|
||||
value = "1"
|
||||
}
|
||||
|
||||
resource "kubernetes_service_account" "shpod_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
metadata {
|
||||
name = "shpod"
|
||||
namespace = kubernetes_namespace.shpod_${index}.metadata.0.name
|
||||
}
|
||||
set {
|
||||
name = "resources.limits.memory"
|
||||
value = "1000M"
|
||||
}
|
||||
|
||||
resource "kubernetes_cluster_role_binding" "shpod_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
metadata {
|
||||
name = "shpod"
|
||||
}
|
||||
set {
|
||||
name = "persistentVolume.enabled"
|
||||
value = "true"
|
||||
role_ref {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "ClusterRole"
|
||||
name = "cluster-admin"
|
||||
}
|
||||
set {
|
||||
name = "ssh.password"
|
||||
value = random_string.shpod_${index}.result
|
||||
subject {
|
||||
kind = "ServiceAccount"
|
||||
name = "shpod"
|
||||
namespace = "shpod"
|
||||
}
|
||||
set {
|
||||
name = "rbac.cluster.clusterRoles"
|
||||
value = "{cluster-admin}"
|
||||
subject {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "Group"
|
||||
name = "shpod-cluster-admins"
|
||||
}
|
||||
set {
|
||||
name = "codeServer.enabled"
|
||||
value = "true"
|
||||
}
|
||||
|
||||
resource "random_string" "shpod_${index}" {
|
||||
length = 6
|
||||
special = false
|
||||
upper = false
|
||||
}
|
||||
|
||||
provider "helm" {
|
||||
alias = "cluster_${index}"
|
||||
kubernetes {
|
||||
config_path = "./kubeconfig.${index}"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,28 +217,16 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
|
||||
|
||||
%{ endfor ~}
|
||||
|
||||
output "ips_txt" {
|
||||
output "ip_addresses_of_nodes" {
|
||||
value = join("\n", [
|
||||
%{ for index, cluster in clusters ~}
|
||||
join("\n", concat(
|
||||
join("\t", concat(
|
||||
[
|
||||
random_string.shpod_${index}.result,
|
||||
"ssh -l k8s -p $${kubernetes_service.shpod_${index}.spec[0].port[0].node_port}"
|
||||
],
|
||||
split(" ", file("./externalips.${index}"))
|
||||
)),
|
||||
%{ endfor ~}
|
||||
""
|
||||
])
|
||||
}
|
||||
|
||||
output "logins_jsonl" {
|
||||
value = join("\n", [
|
||||
%{ for index, cluster in clusters ~}
|
||||
jsonencode({
|
||||
login = "k8s",
|
||||
password = random_string.shpod_${index}.result,
|
||||
port = data.kubernetes_service.shpod_${index}.spec[0].port[0].node_port,
|
||||
codeServerPort = data.kubernetes_service.shpod_${index}.spec[0].port[1].node_port,
|
||||
ipaddrs = replace(file("./externalips.${index}"), " ", "\t"),
|
||||
}),
|
||||
%{ endfor ~}
|
||||
""
|
||||
])
|
||||
}
|
||||
|
||||
@@ -7,16 +7,11 @@ variable "how_many_clusters" {
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "min_nodes_per_cluster" {
|
||||
variable "nodes_per_cluster" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "max_nodes_per_cluster" {
|
||||
type = number
|
||||
default = 4
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
|
||||
@@ -4,7 +4,6 @@ resource "helm_release" "_" {
|
||||
create_namespace = true
|
||||
repository = "https://charts.loft.sh"
|
||||
chart = "vcluster"
|
||||
version = "0.19.7"
|
||||
set {
|
||||
name = "service.type"
|
||||
value = "NodePort"
|
||||
|
||||
@@ -14,9 +14,9 @@ $ hcloud server-type list | grep shared
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "cpx11"
|
||||
M = "cpx21"
|
||||
L = "cpx31"
|
||||
S = "cx11"
|
||||
M = "cx21"
|
||||
L = "cx31"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
variable "proxmox_endpoint" {
|
||||
type = string
|
||||
default = "https://localhost:8006/"
|
||||
}
|
||||
|
||||
variable "proxmox_username" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "proxmox_password" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "proxmox_template_node_name" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "proxmox_template_vm_id" {
|
||||
type = number
|
||||
default = null
|
||||
}
|
||||
|
||||
@@ -1,11 +0,0 @@
|
||||
# Since node size needs to be a string...
|
||||
# To indicate number of CPUs + RAM, just pass it as a string with a space between them.
|
||||
# RAM is in megabytes.
|
||||
variable "node_sizes" {
|
||||
type = map(any)
|
||||
default = {
|
||||
S = "1 2048"
|
||||
M = "2 4096"
|
||||
L = "3 8192"
|
||||
}
|
||||
}
|
||||
@@ -56,7 +56,6 @@ locals {
|
||||
cluster_name = format("%s-%03d", var.tag, cn[0])
|
||||
node_name = format("%s-%03d-%03d", var.tag, cn[0], cn[1])
|
||||
node_size = lookup(var.node_sizes, var.node_size, var.node_size)
|
||||
node_index = cn[0] * var.nodes_per_cluster + cn[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,10 +71,10 @@ resource "local_file" "ip_addresses" {
|
||||
resource "local_file" "clusters" {
|
||||
content = join("", formatlist("%s\n", [
|
||||
for cid in range(1, 1 + var.how_many_clusters) :
|
||||
join("\t",
|
||||
join(" ",
|
||||
[for nid in range(1, 1 + var.nodes_per_cluster) :
|
||||
local.ip_addresses[format("c%03dn%03d", cid, nid)]
|
||||
])]))
|
||||
filename = "clusters.tsv"
|
||||
filename = "clusters.txt"
|
||||
file_permission = "0600"
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ data "openstack_images_image_v2" "_" {
|
||||
most_recent = true
|
||||
properties = {
|
||||
os = "ubuntu"
|
||||
version = "24.04"
|
||||
version = "22.04"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../common.tf
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/proxmox/config.tf
|
||||
@@ -1,77 +0,0 @@
|
||||
data "proxmox_virtual_environment_nodes" "_" {}
|
||||
|
||||
locals {
|
||||
pve_nodes = data.proxmox_virtual_environment_nodes._.names
|
||||
}
|
||||
|
||||
resource "proxmox_virtual_environment_vm" "_" {
|
||||
node_name = local.pve_nodes[each.value.node_index % length(local.pve_nodes)]
|
||||
for_each = local.nodes
|
||||
name = each.value.node_name
|
||||
stop_on_destroy = true
|
||||
cpu {
|
||||
cores = split(" ", each.value.node_size)[0]
|
||||
type = "x86-64-v2-AES" # recommended for modern CPUs
|
||||
}
|
||||
memory {
|
||||
dedicated = split(" ", each.value.node_size)[1]
|
||||
}
|
||||
#disk {
|
||||
# datastore_id = "ceph"
|
||||
# file_id = proxmox_virtual_environment_file._.id
|
||||
# interface = "scsi0"
|
||||
# size = 30
|
||||
# discard = "on"
|
||||
#}
|
||||
clone {
|
||||
vm_id = var.proxmox_template_vm_id
|
||||
node_name = var.proxmox_template_node_name
|
||||
}
|
||||
agent {
|
||||
enabled = true
|
||||
}
|
||||
initialization {
|
||||
datastore_id = "ceph"
|
||||
user_account {
|
||||
username = "ubuntu"
|
||||
keys = [trimspace(tls_private_key.ssh.public_key_openssh)]
|
||||
}
|
||||
ip_config {
|
||||
ipv4 {
|
||||
address = "dhcp"
|
||||
#gateway =
|
||||
}
|
||||
}
|
||||
}
|
||||
network_device {
|
||||
bridge = "vmbr0"
|
||||
}
|
||||
operating_system {
|
||||
type = "l26"
|
||||
}
|
||||
}
|
||||
|
||||
#resource "proxmox_virtual_environment_download_file" "ubuntu_2404_20250115" {
|
||||
# content_type = "iso"
|
||||
# datastore_id = "cephfs"
|
||||
# node_name = "pve-lsd-1"
|
||||
# url = "https://cloud-images.ubuntu.com/releases/24.04/release-20250115/ubuntu-24.04-server-cloudimg-amd64.img"
|
||||
# file_name = "ubuntu_2404_20250115.img"
|
||||
#}
|
||||
#
|
||||
#resource "proxmox_virtual_environment_file" "_" {
|
||||
# datastore_id = "cephfs"
|
||||
# node_name = "pve-lsd-1"
|
||||
# source_file {
|
||||
# path = "/root/noble-server-cloudimg-amd64.img"
|
||||
# }
|
||||
#}
|
||||
|
||||
locals {
|
||||
ip_addresses = {
|
||||
for key, value in local.nodes :
|
||||
key => [for addr in flatten(concat(proxmox_virtual_environment_vm._[key].ipv4_addresses, ["ERROR"])) :
|
||||
addr if addr != "127.0.0.1"][0]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
proxmox = {
|
||||
source = "bpg/proxmox"
|
||||
version = "~> 0.70.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
provider "proxmox" {
|
||||
endpoint = var.proxmox_endpoint
|
||||
username = var.proxmox_username
|
||||
password = var.proxmox_password
|
||||
insecure = true
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
# If you want to deploy to Proxmox, you need to:
|
||||
# 1) copy that file to e.g. myproxmoxcluster.tfvars
|
||||
# 2) make sure you have a VM template with QEMU agent pre-installed
|
||||
# 3) customize the copy (you need to replace all the CHANGEME values)
|
||||
# 4) deploy with "labctl create --provider proxmox/myproxmoxcluster ..."
|
||||
|
||||
proxmox_endpoint = "https://localhost:8006/"
|
||||
proxmox_username = "terraform@pve"
|
||||
proxmox_password = "CHANGEME"
|
||||
|
||||
proxmox_template_node_name = "CHANGEME"
|
||||
proxmox_template_vm_id = CHANGEME
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
../../providers/proxmox/variables.tf
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 81 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 31 KiB |
1
prepare-labs/www/qrcode.min.js
vendored
1
prepare-labs/www/qrcode.min.js
vendored
File diff suppressed because one or more lines are too long
@@ -5,7 +5,7 @@ chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2025-01-enix.container.training/
|
||||
slides: https://2024-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2025-01-enix.container.training/
|
||||
slides: https://2024-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2025-01-enix.container.training/
|
||||
slides: https://2024-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -39,7 +39,6 @@ content:
|
||||
- k8s/helm-secrets.md
|
||||
- exercises/helm-umbrella-chart-details.md
|
||||
-
|
||||
- k8s/helmfile.md
|
||||
- k8s/ytt.md
|
||||
- k8s/gitworkflows.md
|
||||
- k8s/flux.md
|
||||
|
||||
@@ -5,7 +5,7 @@ chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2025-01-enix.container.training/
|
||||
slides: https://2024-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -26,9 +26,7 @@ content:
|
||||
- shared/toc.md
|
||||
- exercises/netpol-brief.md
|
||||
- exercises/sealed-secrets-brief.md
|
||||
- exercices/rbac-brief.md
|
||||
- exercises/kyverno-ingress-domain-name-brief.md
|
||||
- exercises/reqlim-brief.md
|
||||
- #1
|
||||
- k8s/demo-apps.md
|
||||
- k8s/netpol.md
|
||||
@@ -39,7 +37,6 @@ content:
|
||||
- k8s/ingress-tls.md
|
||||
- exercises/netpol-details.md
|
||||
- exercises/sealed-secrets-details.md
|
||||
- exercises/rbac-details.md
|
||||
- #2
|
||||
- k8s/extending-api.md
|
||||
- k8s/crd.md
|
||||
@@ -56,7 +53,6 @@ content:
|
||||
- k8s/apiserver-deepdive.md
|
||||
- k8s/aggregation-layer.md
|
||||
- k8s/hpa-v2.md
|
||||
- exercises/reqlim-details.md
|
||||
- #4
|
||||
- k8s/statefulsets.md
|
||||
- k8s/consul.md
|
||||
|
||||
@@ -5,7 +5,7 @@ chat: "[Mattermost](https://training.enix.io/mattermost)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2025-01-enix.container.training/
|
||||
slides: https://2024-05-enix.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
# Shortlinks for next training in English and French
|
||||
#/next https://www.eventbrite.com/e/livestream-intensive-kubernetes-bootcamp-tickets-103262336428
|
||||
/next https://qconsf.com/training/nov2024/asynchronous-architecture-patterns-scale-ml-and-other-high-latency-workloads
|
||||
/next https://skillsmatter.com/courses/700-advanced-kubernetes-concepts-workshop-jerome-petazzoni
|
||||
/hi5 https://enix.io/fr/services/formation/online/
|
||||
/us https://www.ardanlabs.com/live-training-events/deploying-microservices-and-traditional-applications-with-kubernetes-march-28-2022.html
|
||||
/uk https://skillsmatter.com/workshops/827-deploying-microservices-and-traditional-applications-with-kubernetes-with-jerome-petazzoni
|
||||
|
||||
676
slides/autopilot/package-lock.json
generated
676
slides/autopilot/package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -2,8 +2,8 @@
|
||||
"name": "container-training-pub-sub-server",
|
||||
"version": "0.0.1",
|
||||
"dependencies": {
|
||||
"express": "^4.21.1",
|
||||
"socket.io": "^4.8.0",
|
||||
"express": "^4.16.2",
|
||||
"socket.io": "^4.7.5",
|
||||
"socket.io-client": "^4.7.5"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
- In multi-stage builds, all stages can be built in parallel
|
||||
|
||||
(example: https://github.com/jpetazzo/shpod; [before][shpod-before-parallel] and [after][shpod-after-parallel])
|
||||
(example: https://github.com/jpetazzo/shpod; [before] and [after])
|
||||
|
||||
- Stages are built only when they are necessary
|
||||
|
||||
@@ -50,8 +50,8 @@
|
||||
|
||||
- Files are cached in the builder
|
||||
|
||||
[shpod-before-parallel]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
|
||||
[shpod-after-parallel]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[before]: https://github.com/jpetazzo/shpod/blob/c6efedad6d6c3dc3120dbc0ae0a6915f85862474/Dockerfile
|
||||
[after]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
|
||||
---
|
||||
|
||||
@@ -121,10 +121,10 @@ docker buildx build … \
|
||||
|
||||
- Must not use binary downloads with hard-coded architectures!
|
||||
|
||||
(streamlining a Dockerfile for multi-arch: [before][shpod-before-multiarch], [after][shpod-after-multiarch])
|
||||
(streamlining a Dockerfile for multi-arch: [before], [after])
|
||||
|
||||
[shpod-before-multiarch]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[shpod-after-multiarch]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
|
||||
[before]: https://github.com/jpetazzo/shpod/blob/d20887bbd56b5fcae2d5d9b0ce06cae8887caabf/Dockerfile
|
||||
[after]: https://github.com/jpetazzo/shpod/blob/c50789e662417b34fea6f5e1d893721d66d265b7/Dockerfile
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ Compose enables a simple, powerful onboarding workflow:
|
||||
|
||||
1. Checkout our code.
|
||||
|
||||
2. Run `docker compose up`.
|
||||
2. Run `docker-compose up`.
|
||||
|
||||
3. Our app is up and running!
|
||||
|
||||
@@ -66,19 +66,19 @@ class: pic
|
||||
|
||||
1. Write Dockerfiles
|
||||
|
||||
2. Describe our stack of containers in a YAML file (the "Compose file")
|
||||
2. Describe our stack of containers in a YAML file called `docker-compose.yml`
|
||||
|
||||
3. `docker compose up` (or `docker compose up -d` to run in the background)
|
||||
3. `docker-compose up` (or `docker-compose up -d` to run in the background)
|
||||
|
||||
4. Compose pulls and builds the required images, and starts the containers
|
||||
|
||||
5. Compose shows the combined logs of all the containers
|
||||
|
||||
(if running in the background, use `docker compose logs`)
|
||||
(if running in the background, use `docker-compose logs`)
|
||||
|
||||
6. Hit Ctrl-C to stop the whole stack
|
||||
|
||||
(if running in the background, use `docker compose stop`)
|
||||
(if running in the background, use `docker-compose stop`)
|
||||
|
||||
---
|
||||
|
||||
@@ -86,11 +86,11 @@ class: pic
|
||||
|
||||
After making changes to our source code, we can:
|
||||
|
||||
1. `docker compose build` to rebuild container images
|
||||
1. `docker-compose build` to rebuild container images
|
||||
|
||||
2. `docker compose up` to restart the stack with the new images
|
||||
2. `docker-compose up` to restart the stack with the new images
|
||||
|
||||
We can also combine both with `docker compose up --build`
|
||||
We can also combine both with `docker-compose up --build`
|
||||
|
||||
Compose will be smart, and only recreate the containers that have changed.
|
||||
|
||||
@@ -114,7 +114,7 @@ cd trainingwheels
|
||||
Second step: start the app.
|
||||
|
||||
```bash
|
||||
docker compose up
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
Watch Compose build and run the app.
|
||||
@@ -141,17 +141,7 @@ After ten seconds (or if we press `^C` again) it will forcibly kill them.
|
||||
|
||||
---
|
||||
|
||||
## The Compose file
|
||||
|
||||
* Historically: docker-compose.yml or .yaml
|
||||
|
||||
* Recently (kind of): can also be named compose.yml or .yaml
|
||||
|
||||
(Since [version 1.28.6, March 2021](https://docs.docker.com/compose/releases/release-notes/#1286))
|
||||
|
||||
---
|
||||
|
||||
## Example
|
||||
## The `docker-compose.yml` file
|
||||
|
||||
Here is the file used in the demo:
|
||||
|
||||
@@ -182,9 +172,9 @@ services:
|
||||
|
||||
A Compose file has multiple sections:
|
||||
|
||||
* `services` is mandatory. Each service corresponds to a container.
|
||||
* `version` is mandatory. (Typically use "3".)
|
||||
|
||||
* `version` is optional (it used to be mandatory). It can be ignored.
|
||||
* `services` is mandatory. Each service corresponds to a container.
|
||||
|
||||
* `networks` is optional and indicates to which networks containers should be connected.
|
||||
<br/>(By default, containers will be connected on a private, per-compose-file network.)
|
||||
@@ -193,24 +183,24 @@ A Compose file has multiple sections:
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Compose file versions
|
||||
|
||||
* Version 1 is legacy and shouldn't be used.
|
||||
|
||||
(If you see a Compose file without a `services` block, it's a legacy v1 file.)
|
||||
(If you see a Compose file without `version` and `services`, it's a legacy v1 file.)
|
||||
|
||||
* Version 2 added support for networks and volumes.
|
||||
|
||||
* Version 3 added support for deployment options (scaling, rolling updates, etc).
|
||||
|
||||
* Typically use `version: "3"`.
|
||||
|
||||
The [Docker documentation](https://docs.docker.com/compose/compose-file/)
|
||||
has excellent information about the Compose file format if you need to know more about versions.
|
||||
|
||||
---
|
||||
|
||||
## Containers in Compose file
|
||||
## Containers in `docker-compose.yml`
|
||||
|
||||
Each service in the YAML file must contain either `build`, or `image`.
|
||||
|
||||
@@ -288,7 +278,7 @@ For the full list, check: https://docs.docker.com/compose/compose-file/
|
||||
|
||||
`frontcopy_www`, `frontcopy_www_1`, `frontcopy_db_1`
|
||||
|
||||
- Alternatively, use `docker compose -p frontcopy`
|
||||
- Alternatively, use `docker-compose -p frontcopy`
|
||||
|
||||
(to set the `--project-name` of a stack, which default to the dir name)
|
||||
|
||||
@@ -298,10 +288,10 @@ For the full list, check: https://docs.docker.com/compose/compose-file/
|
||||
|
||||
## Checking stack status
|
||||
|
||||
We have `ps`, `docker ps`, and similarly, `docker compose ps`:
|
||||
We have `ps`, `docker ps`, and similarly, `docker-compose ps`:
|
||||
|
||||
```bash
|
||||
$ docker compose ps
|
||||
$ docker-compose ps
|
||||
Name Command State Ports
|
||||
----------------------------------------------------------------------------
|
||||
trainingwheels_redis_1 /entrypoint.sh red Up 6379/tcp
|
||||
@@ -320,13 +310,13 @@ If you have started your application in the background with Compose and
|
||||
want to stop it easily, you can use the `kill` command:
|
||||
|
||||
```bash
|
||||
$ docker compose kill
|
||||
$ docker-compose kill
|
||||
```
|
||||
|
||||
Likewise, `docker compose rm` will let you remove containers (after confirmation):
|
||||
Likewise, `docker-compose rm` will let you remove containers (after confirmation):
|
||||
|
||||
```bash
|
||||
$ docker compose rm
|
||||
$ docker-compose rm
|
||||
Going to remove trainingwheels_redis_1, trainingwheels_www_1
|
||||
Are you sure? [yN] y
|
||||
Removing trainingwheels_redis_1...
|
||||
@@ -337,19 +327,19 @@ Removing trainingwheels_www_1...
|
||||
|
||||
## Cleaning up (2)
|
||||
|
||||
Alternatively, `docker compose down` will stop and remove containers.
|
||||
Alternatively, `docker-compose down` will stop and remove containers.
|
||||
|
||||
It will also remove other resources, like networks that were created for the application.
|
||||
|
||||
```bash
|
||||
$ docker compose down
|
||||
$ docker-compose down
|
||||
Stopping trainingwheels_www_1 ... done
|
||||
Stopping trainingwheels_redis_1 ... done
|
||||
Removing trainingwheels_www_1 ... done
|
||||
Removing trainingwheels_redis_1 ... done
|
||||
```
|
||||
|
||||
Use `docker compose down -v` to remove everything including volumes.
|
||||
Use `docker-compose down -v` to remove everything including volumes.
|
||||
|
||||
---
|
||||
|
||||
@@ -379,15 +369,15 @@ Use `docker compose down -v` to remove everything including volumes.
|
||||
|
||||
- If the container is deleted, the volume gets orphaned
|
||||
|
||||
- Example: `docker compose down && docker compose up`
|
||||
- Example: `docker-compose down && docker-compose up`
|
||||
|
||||
- the old volume still exists, detached from its container
|
||||
|
||||
- a new volume gets created
|
||||
|
||||
- `docker compose down -v`/`--volumes` deletes volumes
|
||||
- `docker-compose down -v`/`--volumes` deletes volumes
|
||||
|
||||
(but **not** `docker compose down && docker compose down -v`!)
|
||||
(but **not** `docker-compose down && docker-compose down -v`!)
|
||||
|
||||
---
|
||||
|
||||
@@ -406,9 +396,9 @@ volumes:
|
||||
|
||||
- Volume will be named `<project>_data`
|
||||
|
||||
- It won't be orphaned with `docker compose down`
|
||||
- It won't be orphaned with `docker-compose down`
|
||||
|
||||
- It will correctly be removed with `docker compose down -v`
|
||||
- It will correctly be removed with `docker-compose down -v`
|
||||
|
||||
---
|
||||
|
||||
@@ -427,7 +417,7 @@ services:
|
||||
|
||||
(for migration, backups, disk usage accounting...)
|
||||
|
||||
- Won't be removed by `docker compose down -v`
|
||||
- Won't be removed by `docker-compose down -v`
|
||||
|
||||
---
|
||||
|
||||
@@ -461,7 +451,7 @@ services:
|
||||
|
||||
- This is used when bringing up individual services
|
||||
|
||||
(e.g. `docker compose up blah` or `docker compose run foo`)
|
||||
(e.g. `docker-compose up blah` or `docker-compose run foo`)
|
||||
|
||||
⚠️ It doesn't make a service "wait" for another one to be up!
|
||||
|
||||
@@ -481,9 +471,7 @@ class: extra-details
|
||||
|
||||
- `docker compose` command to deploy Compose stacks to some clouds
|
||||
|
||||
- in Go instead of Python
|
||||
|
||||
- progressively getting feature parity with `docker compose`
|
||||
- progressively getting feature parity with `docker-compose`
|
||||
|
||||
- also provides numerous improvements (e.g. leverages BuildKit by default)
|
||||
|
||||
|
||||
@@ -120,11 +120,11 @@ class: extra-details
|
||||
|
||||
(and won't end up in the resulting image)
|
||||
|
||||
- See the [documentation][dockerignore] for the little details
|
||||
- See the [documentation] for the little details
|
||||
|
||||
(exceptions can be made with `!`, multiple directory levels with `**`...)
|
||||
|
||||
[dockerignore]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
[documentation]: https://docs.docker.com/engine/reference/builder/#dockerignore-file
|
||||
|
||||
???
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
version: "2"
|
||||
|
||||
services:
|
||||
www:
|
||||
image: nginx
|
||||
@@ -1,4 +1,4 @@
|
||||
## Exercise — Ingress Controller
|
||||
## Exercise — Ingress
|
||||
|
||||
- Add an ingress controller to a Kubernetes cluster
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Exercise — Ingress Controller
|
||||
# Exercise — Ingress
|
||||
|
||||
- We want to expose a couple of web apps through an ingress controller
|
||||
|
||||
@@ -128,4 +128,4 @@ This is similar to the previous scenario, but with two significant changes:
|
||||
|
||||
1. We only want to run the ingress controller on nodes that have the role `ingress`.
|
||||
|
||||
2. We want to either use `hostPort`, or a list of `externalIPs` (not `hostNetwork`).
|
||||
2. We don't want to use `hostNetwork`, but a list of `externalIPs` instead.
|
||||
@@ -1,7 +0,0 @@
|
||||
## Exercise — Requests and Limits
|
||||
|
||||
- Check current resource allocation and utilization
|
||||
|
||||
- Make sure that all workloads have requests (and perhaps limits)
|
||||
|
||||
- Make sure that all *future* workloads too!
|
||||
@@ -1,55 +0,0 @@
|
||||
# Exercise — Requests and Limits
|
||||
|
||||
By default, if we don't specify *resource requests*,
|
||||
our workloads will run in `BestEffort` quality of service.
|
||||
|
||||
`BestEffort` is very bad for production workloads,
|
||||
because the scheduler has no idea of the actual resource
|
||||
requirements of our apps, and won't be able to make
|
||||
smart decisions about workload placement.
|
||||
|
||||
As a result, when the cluster gets overloaded,
|
||||
containers will be killed, pods will be evicted,
|
||||
and service disruptions will happen.
|
||||
|
||||
Let's solve this!
|
||||
|
||||
---
|
||||
|
||||
## Check current state
|
||||
|
||||
- Check *allocations*
|
||||
|
||||
(i.e. which pods have requests and limits for CPU and memory)
|
||||
|
||||
- Then check *utilization*
|
||||
|
||||
(i.e. actual resource usage)
|
||||
|
||||
- Possible tools: `kubectl`, plugins like `view-allocations`, Prometheus...
|
||||
|
||||
---
|
||||
|
||||
## Follow best practices
|
||||
|
||||
- We want to make sure that *all* workloads have requests
|
||||
|
||||
(and perhaps limits, too!)
|
||||
|
||||
- Depending on the workload:
|
||||
|
||||
- edit its YAML manifest
|
||||
|
||||
- adjust its Helm values
|
||||
|
||||
- add LimitRange in its Namespace
|
||||
|
||||
- Then check again to confirm that the job has been done properly!
|
||||
|
||||
---
|
||||
|
||||
## Be future-proof!
|
||||
|
||||
- We want to make sure that *future* workloads will have requests, too
|
||||
|
||||
- How can that be implemented?
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/sh
|
||||
for LINK in $(cat */*.md | sed -n 's/^\[\(.*\)\]:.*/\1/p' | sort | uniq -d); do
|
||||
grep '^\['"$LINK"'\]:' */*.md
|
||||
done
|
||||
|
||||
@@ -12,113 +12,113 @@
|
||||
<table>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 21 janvier 2025</td>
|
||||
<td>Mardi 14 mai 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 22 janvier 2025</td>
|
||||
<td>Mercredi 15 mai 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 23 janvier 2025</td>
|
||||
<td>Jeudi 16 mai 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 24 janvier 2025</td>
|
||||
<td>Vendredi 17 mai 2024</td>
|
||||
<td>
|
||||
<a href="1.yml.html">Docker Intensif</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 28 janvier 2025</td>
|
||||
<td>Mardi 21 mai 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 29 janvier 2025</td>
|
||||
<td>Mercredi 22 mai 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 30 janvier 2025</td>
|
||||
<td>Jeudi 23 mai 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 31 janvier 2025</td>
|
||||
<td>Vendredi 24 mai 2024</td>
|
||||
<td>
|
||||
<a href="2.yml.html">Fondamentaux Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Lundi 3 février 2025</td>
|
||||
<td>Mardi 28 mai 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 29 mai 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 30 mai 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 31 mai 2024</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mardi 4 juin 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 4 février 2025</td>
|
||||
<td>Mercredi 5 juin 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mercredi 5 février 2025</td>
|
||||
<td>Jeudi 6 juin 2024</td>
|
||||
<td>
|
||||
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Jeudi 7 février 2025</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 7 février 2025</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Lundi 10 février 2025</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mardi 11 février 2025</td>
|
||||
<td>
|
||||
<a href="4.yml.html">Kubernetes Avancé</a>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
<tr>
|
||||
<td>Mercredi 12 février 2025</td>
|
||||
<td>Lundi 10 juin 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Jeudi 13 février 2025</td>
|
||||
<td>Mardi 11 juin 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Vendredi 14 février 2025</td>
|
||||
<td>Mercredi 12 juin 2024</td>
|
||||
<td>
|
||||
<a href="5.yml.html">Opérer Kubernetes</a>
|
||||
</td>
|
||||
|
||||
@@ -141,6 +141,12 @@ class: pic
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
@@ -151,12 +157,6 @@ class: pic
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
# The Kubernetes API
|
||||
|
||||
[
|
||||
|
||||
@@ -42,19 +42,22 @@ ArgoCD manages **applications** by **syncing** their **live state** with their *
|
||||
|
||||
- It's OK to use local clusters (kind, minikube...)
|
||||
|
||||
- We need to install the ArgoCD CLI ([argocd-packages], [argocd-binaries])
|
||||
- We need to install the ArgoCD CLI ([packages], [binaries])
|
||||
|
||||
- **Highly recommended:** set up CLI completion!
|
||||
|
||||
- Of course we'll need a Git service, too
|
||||
|
||||
[packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
|
||||
[binaries]: https://github.com/argoproj/argo-cd/releases/latest
|
||||
|
||||
---
|
||||
|
||||
## Setting up ArgoCD
|
||||
|
||||
- The easiest way is to use upstream YAML manifests
|
||||
|
||||
- There is also a [Helm chart][argocd-helmchart] if we need more customization
|
||||
- There is also a [Helm chart][argohelmchart] if we need more customization
|
||||
|
||||
.lab[
|
||||
|
||||
@@ -67,6 +70,8 @@ ArgoCD manages **applications** by **syncing** their **live state** with their *
|
||||
|
||||
]
|
||||
|
||||
[argohelmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
|
||||
|
||||
---
|
||||
|
||||
## Logging in with the ArgoCD CLI
|
||||
@@ -132,6 +137,8 @@ ArgoCD manages **applications** by **syncing** their **live state** with their *
|
||||
|
||||
- Let's have a look at ArgoCD architecture!
|
||||
|
||||
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
@@ -190,6 +197,8 @@ It is responsible for invoking any user-defined hooks for lifecycle events (*Pre
|
||||
|
||||
- If you create a new, empty repository, add some manifests to it
|
||||
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
|
||||
---
|
||||
|
||||
## Add an Application
|
||||
@@ -259,6 +268,8 @@ It is responsible for invoking any user-defined hooks for lifecycle events (*Pre
|
||||
|
||||
🤔 We're getting errors!
|
||||
|
||||
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
|
||||
|
||||
---
|
||||
|
||||
## Sync failed
|
||||
@@ -446,6 +457,8 @@ Then click on the "CREATE" button (top left).
|
||||
|
||||
- Today we'll just turn on automated sync for the staging namespace
|
||||
|
||||
[rollouts]: https://argoproj.github.io/rollouts/
|
||||
|
||||
---
|
||||
|
||||
## Enabling auto-sync
|
||||
@@ -502,7 +515,7 @@ git push origin staging
|
||||
|
||||
- Let's how to deploy Helm charts with ArgoCD!
|
||||
|
||||
- In the [kubercoins] repository, there is a branch called [helm-branch]
|
||||
- In the [kubercoins] repository, there is a branch called [helm]
|
||||
|
||||
- It provides a generic Helm chart, in the [generic-service] directory
|
||||
|
||||
@@ -510,6 +523,12 @@ git push origin staging
|
||||
|
||||
- Let's create one application for each of the 5 components of our app!
|
||||
|
||||
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
[helm]: https://github.com/jpetazzo/kubercoins/tree/helm
|
||||
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
|
||||
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
|
||||
|
||||
---
|
||||
|
||||
## Creating a Helm Application
|
||||
@@ -560,31 +579,9 @@ git push origin staging
|
||||
|
||||
(blue/green, canary...)
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
Many thanks to
|
||||
Anton (Ant) Weiss ([antweiss.com](https://antweiss.com), [@antweiss](https://twitter.com/antweiss))
|
||||
and
|
||||
Guilhem Lettron
|
||||
for contributing an initial version and suggestions to this ArgoCD chapter.
|
||||
|
||||
All remaining typos, mistakes, or approximations are mine (Jérôme Petazzoni).
|
||||
|
||||
[argocd-binaries]: https://github.com/argoproj/argo-cd/releases/latest
|
||||
[argocd-helmchart]: https://artifacthub.io/packages/helm/argo/argocd-apps
|
||||
[argocd-packages]: https://argo-cd.readthedocs.io/en/stable/cli_installation/
|
||||
[cmp]: https://argo-cd.readthedocs.io/en/stable/operator-manual/config-management-plugins/
|
||||
[Dex]: https://github.com/dexidp/dex
|
||||
[generic-service]: https://github.com/jpetazzo/kubercoins/tree/helm/generic-service
|
||||
[helm-branch]: https://github.com/jpetazzo/kubercoins/tree/helm
|
||||
[issue14167]: https://github.com/argoproj/argo-cd/issues/14167
|
||||
[kubercoins]: https://github.com/jpetazzo/kubercoins
|
||||
[pollinginterval]: https://argo-cd.readthedocs.io/en/stable/faq/#how-often-does-argo-cd-check-for-changes-to-my-git-or-helm-repository
|
||||
[rollouts]: https://argoproj.github.io/rollouts/
|
||||
[sso]: https://argo-cd.readthedocs.io/en/stable/operator-manual/user-management/#sso
|
||||
[values]: https://github.com/jpetazzo/kubercoins/tree/helm/values
|
||||
[Dex]: https://github.com/dexidp/dex
|
||||
[rollouts]: https://argoproj.github.io/argo-rollouts/
|
||||
|
||||
???
|
||||
|
||||
|
||||
@@ -1,173 +0,0 @@
|
||||
# Bento & PostgreSQL
|
||||
|
||||
- Bento can also use SQL databases for input/output
|
||||
|
||||
- We're going to demonstrate that by writing to a PostgreSQL database
|
||||
|
||||
- That database will be deployed with the Cloud Native PostGres operator
|
||||
|
||||
(https://cloudnative-pg.io/)
|
||||
|
||||
---
|
||||
|
||||
## CNPG in a nutshell
|
||||
|
||||
- Free, open source
|
||||
|
||||
- Originally created by [EDB] (EnterpriseDB, well-known PgSQL experts)
|
||||
|
||||
- Non-exhaustive list of features:
|
||||
|
||||
- provisioning of Postgres servers, replicas, bouncers
|
||||
|
||||
- automatic failover
|
||||
|
||||
- backups (full backups and WAL shipping)
|
||||
|
||||
- provisioning from scratch, from backups, PITR
|
||||
|
||||
- manual and automated switchover (e.g. for node maintenance)
|
||||
|
||||
- and many more!
|
||||
|
||||
[EDB]: https://www.enterprisedb.com/workload/kubernetes
|
||||
|
||||
---
|
||||
|
||||
## What we're going to do
|
||||
|
||||
1. Install CNPG.
|
||||
|
||||
2. Provision a Postgres cluster.
|
||||
|
||||
3. Configure Bento to write to that cluster.
|
||||
|
||||
4. Set up a Grafana dashboard to see the data.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Installing CNPG
|
||||
|
||||
Many options available, see the [documentation][cnpg-install]:
|
||||
|
||||
- raw YAML manifests
|
||||
|
||||
- kubectl CNPG plugin (`kubectl cnpg install generate`)
|
||||
|
||||
- Helm chart
|
||||
|
||||
- OLM
|
||||
|
||||
[cnpg-install]: https://cloudnative-pg.io/documentation/1.24/installation_upgrade/
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Provisioning a Postgres cluster
|
||||
|
||||
Minimal manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: db
|
||||
spec:
|
||||
storage:
|
||||
size: 1Gi
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## For production...
|
||||
|
||||
We might also add:
|
||||
|
||||
- `spec.monitoring.enablePodMonitor: true`
|
||||
|
||||
- `spec.instances: 2`
|
||||
|
||||
- `resources.{requests,limits}.{cpu,memory}`
|
||||
|
||||
- `walStorage.size`
|
||||
|
||||
- `backup`
|
||||
|
||||
- `postgresql.parameters`
|
||||
|
||||
See [this manifest][cluster-maximal] for a detailed example.
|
||||
|
||||
[cluster-maximal]: https://github.com/jpetazzo/pozok/blob/main/cluster-maximal.yaml
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Configuring Bento to write to SQL
|
||||
|
||||
- We'll use the [`sql_insert`][sql-insert] output
|
||||
|
||||
- If our cluster is named `mydb`, there will be a Secret `mydb-app`
|
||||
|
||||
- This Secret will contain a `uri` field
|
||||
|
||||
- That field can be used as the `dns` in the Bento configuration
|
||||
|
||||
- We will also need to create the table that we want to use
|
||||
|
||||
(see next slide for instructions)
|
||||
|
||||
[sql-insert]: https://warpstreamlabs.github.io/bento/docs/components/outputs/sql_insert
|
||||
|
||||
---
|
||||
|
||||
## Creating a table
|
||||
|
||||
- If we just want to store the city name and its population:
|
||||
```sql
|
||||
CREATE TABLE IF NOT EXISTS cities (
|
||||
city varchar(100) NOT NULL,
|
||||
population integer
|
||||
);
|
||||
```
|
||||
|
||||
- This statement can be executed:
|
||||
|
||||
- manually, by getting a `psql` shell with `kubectl cnpg psql mydb app`
|
||||
|
||||
- automatically, with Bento's `init_statatement`
|
||||
|
||||
---
|
||||
|
||||
## 4️⃣ Viewing the table in Grafana
|
||||
|
||||
- In Grafana, in the home menu on the lift, click "connections"
|
||||
|
||||
- Add a PostgreSQL data source
|
||||
|
||||
- Enter the host:port, database, user, password
|
||||
|
||||
- Then add a visualization using that data source
|
||||
|
||||
(it should be relatively self-explanatory!)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Automating it all
|
||||
|
||||
- Expose PostgreSQL credentials through environment variables
|
||||
|
||||
(in the Bento container)
|
||||
|
||||
- Use the `${...}` syntax in Bento to use these environment variables
|
||||
|
||||
- Export the Grafana dashboard to a JSON file
|
||||
|
||||
- Store the JSON file in a ConfigMap, with label `grafana_dashboard=1`
|
||||
|
||||
- Create that ConfigMap in the namespace where Grafana is running
|
||||
|
||||
- Similarly, data sources (like the Redis and the PostgreSQL one) can be defined in YAML
|
||||
|
||||
- And that YAML can be put in a ConfigMap with label `grafana_datasource=1`
|
||||
@@ -1,450 +0,0 @@
|
||||
# Autoscaling with KEDA
|
||||
|
||||
- Cluster autoscaling = automatically add nodes *when needed*
|
||||
|
||||
- *When needed* = when Pods are `Pending`
|
||||
|
||||
- How do these pods get created?
|
||||
|
||||
- When the Ollama Deployment is scaled up
|
||||
|
||||
- ... manually (e.g. `kubectl scale`)
|
||||
|
||||
- ... automatically (that's what we want to investigate now!)
|
||||
|
||||
---
|
||||
|
||||
## Ways to implement autoscaling
|
||||
|
||||
- Custom code
|
||||
|
||||
(e.g. crontab checking some value every few minutes and scaling accordingly)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v1
|
||||
|
||||
(aka `kubectl autoscale`)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v2 with custom metrics
|
||||
|
||||
(e.g. with Prometheus Adapter)
|
||||
|
||||
- Kubernetes Horizontal Pod Autoscaler v2 with external metrics
|
||||
|
||||
(e.g. with KEDA)
|
||||
|
||||
---
|
||||
|
||||
## Custom code
|
||||
|
||||
- No, we're not going to do that!
|
||||
|
||||
- But this would be an interesting exercise in RBAC
|
||||
|
||||
(setting minimal amount of permissions for the pod running our custom code)
|
||||
|
||||
---
|
||||
|
||||
## HPAv1
|
||||
|
||||
Pros: very straightforward
|
||||
|
||||
Cons: can only scale on CPU utilization
|
||||
|
||||
How it works:
|
||||
|
||||
- periodically measures average CPU *utilization* across pods
|
||||
|
||||
- if utilization is above/below a target (default: 80%), scale up/down
|
||||
|
||||
---
|
||||
|
||||
## HPAv1 in practice
|
||||
|
||||
- Create the autoscaling policy:
|
||||
```bash
|
||||
kubectl autoscale deployment ollama --max=1000
|
||||
```
|
||||
(The `--max` is required; it's a safety limit.)
|
||||
|
||||
- Check it:
|
||||
```bash
|
||||
kubectl describe hpa
|
||||
```
|
||||
|
||||
- Send traffic, wait a bit: pods should be created automatically
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom vs external
|
||||
|
||||
- Custom metrics = arbitrary metrics attached to Kubernetes objects
|
||||
|
||||
- External metrics = arbitrary metrics not related to Kubernetes objects
|
||||
|
||||
--
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom metrics
|
||||
|
||||
- Examples:
|
||||
|
||||
- on Pods: CPU, RAM, network traffic...
|
||||
|
||||
- on Ingress: requests per second, HTTP status codes, request duration...
|
||||
|
||||
- on some worker Deployment: number of tasks processed, task duration...
|
||||
|
||||
- Requires an *adapter* to:
|
||||
|
||||
- expose the metrics through the Kubernetes *aggregation layer*
|
||||
|
||||
- map the actual metrics source to Kubernetes objects
|
||||
|
||||
Example: the [Prometheus adapter][prometheus-adapter]
|
||||
|
||||
[prometheus-adapter]: https://github.com/kubernetes-sigs/prometheus-adapter
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 custom metrics in practice
|
||||
|
||||
- We're not going to cover this here
|
||||
|
||||
(too complex / not enough time!)
|
||||
|
||||
- If you want more details, check [my other course material][hpav2slides]
|
||||
|
||||
[hpav2slides]: https://2024-10-enix.container.training/4.yml.html#toc-scaling-with-custom-metrics
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 external metrics
|
||||
|
||||
- Examples:
|
||||
|
||||
- arbitrary Prometheus query
|
||||
|
||||
- arbitrary SQL query
|
||||
|
||||
- number of messages in a queue
|
||||
|
||||
- and [many, many more][keda-scalers]
|
||||
|
||||
- Also requires an extra components to expose the metrics
|
||||
|
||||
Example: [KEDA (https://keda.sh/)](https://keda.sh)
|
||||
|
||||
[keda-scalers]: https://keda.sh/docs/latest/scalers/
|
||||
|
||||
---
|
||||
|
||||
## HPAv2 external metrics in practice
|
||||
|
||||
- We're going to install KEDA
|
||||
|
||||
- And set it up to autoscale depending on the number of messages in Redis
|
||||
|
||||
---
|
||||
|
||||
## Installing KEDA
|
||||
|
||||
Multiple options (details in the [documentation][keda-deploy]):
|
||||
|
||||
- YAML
|
||||
|
||||
- Operator Hub
|
||||
|
||||
- Helm chart 💡
|
||||
|
||||
```bash
|
||||
helm upgrade --install --repo https://kedacore.github.io/charts \
|
||||
--namespace keda-system --create-namespace keda keda
|
||||
```
|
||||
|
||||
[keda-deploy]: https://keda.sh/docs/latest/deploy/
|
||||
|
||||
---
|
||||
|
||||
## Scaling according to Redis
|
||||
|
||||
- We need to create a KEDA Scaler
|
||||
|
||||
- This is done with a "ScaledObject" manifest
|
||||
|
||||
- [Here is the documentation][keda-redis-lists] for the Redis Lists Scaler
|
||||
|
||||
- Let's write that manifest!
|
||||
|
||||
[keda-redis-lists]: https://keda.sh/docs/latest/scalers/redis-lists/
|
||||
|
||||
---
|
||||
|
||||
## `keda-redis-scaler.yaml`
|
||||
|
||||
```yaml
|
||||
apiVersion: keda.sh/v1alpha1
|
||||
kind: ScaledObject
|
||||
metadata:
|
||||
name: ollama
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
name: ollama
|
||||
triggers:
|
||||
- type: redis
|
||||
metadata:
|
||||
address: redis.`default`.svc:6379
|
||||
listName: cities
|
||||
listLength: "10"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Notes
|
||||
|
||||
- We need to update the `address` field with our namespace
|
||||
|
||||
(unless we are running in the `default` namespace)
|
||||
|
||||
- Alternative: use `addressFromEnv` and set an env var in the Ollama pods
|
||||
|
||||
- `listLength` gives the target ratio of `messages / replicas`
|
||||
|
||||
- In our example, KEDA will scale the Deployment to `messages / 100`
|
||||
|
||||
(rounded up!)
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Apply the ScaledObject manifest
|
||||
|
||||
- Start a Bento pipeline loading e.g. 100-1000 cities in Redis
|
||||
|
||||
(100 on smaller clusters / slower CPUs, 1000 on bigger / faster ones)
|
||||
|
||||
- Check pod and nod resource usage
|
||||
|
||||
- What do we see?
|
||||
|
||||
--
|
||||
|
||||
🤩 The Deployment scaled up automatically!
|
||||
|
||||
--
|
||||
|
||||
🤔 But Pod resource usage remains very low (A few busy pods, many idle)
|
||||
|
||||
--
|
||||
|
||||
💡 Bento doesn't submit enough requests in parallel!
|
||||
|
||||
---
|
||||
|
||||
## Improving throughput
|
||||
|
||||
We're going to review multiple techniques:
|
||||
|
||||
1. Increase parallelism inside the Bento pipeline.
|
||||
|
||||
2. Run multiple Bento consumers.
|
||||
|
||||
3. Couple consumers and processors more tightly.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Increase pipeline parallelism
|
||||
|
||||
- Set `parallel` to `true` in the `http` processor
|
||||
|
||||
- Wrap the input around a `batched` input
|
||||
|
||||
(otherwise, we don't have enough messages in flight)
|
||||
|
||||
- Increase `http` timeout significantly (e.g. to 5 minutes)
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🎉 More messages flow through the pipeline
|
||||
|
||||
🎉 Many requests happen in parallel
|
||||
|
||||
🤔 Average Pod and Node CPU utilization is higher, but not maxed out
|
||||
|
||||
🤔 HTTP queue size (measured with HAProxy metrics) is relatively high
|
||||
|
||||
🤔 Latency is higher too
|
||||
|
||||
Why?
|
||||
|
||||
---
|
||||
|
||||
## Too many requests in parallel
|
||||
|
||||
- Ealier, we didn't have enough...
|
||||
|
||||
- ...Now, we have too much!
|
||||
|
||||
- However, for a very big request queue, it still wouldn't be enough
|
||||
|
||||
💡 We currently have a fixed parallelism. We need to make it dynamic!
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Run multiple Bento consumers
|
||||
|
||||
- Restore the original Bento configuration
|
||||
|
||||
(flip `parallel` back to `false`; remove the `batched` input)
|
||||
|
||||
- Run Bento in a Deployment
|
||||
|
||||
(e.g. with the [Bento Helm chart][bento-helm-chart])
|
||||
|
||||
- Autoscale that Deployment like we autoscaled the Ollama Deployment
|
||||
|
||||
[bento-helm-chart]: https://github.com/warpstreamlabs/bento-helm-chart
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🤔🤔🤔 Pretty much the same as before!
|
||||
|
||||
(High throughput, high utilization but not maxed out, high latency...)
|
||||
|
||||
--
|
||||
|
||||
🤔🤔🤔 Why?
|
||||
|
||||
---
|
||||
|
||||
## Unbalanced load balancing
|
||||
|
||||
- All our requests go through the `ollama` Service
|
||||
|
||||
- We're still using the default Kubernetes service proxy!
|
||||
|
||||
- It doesn't spread the requests properly across all the backends
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Couple consumers and processors
|
||||
|
||||
What if:
|
||||
|
||||
--
|
||||
|
||||
instead of sending requests to a load balancer,
|
||||
|
||||
--
|
||||
|
||||
each queue consumer had its own Ollama instance?
|
||||
|
||||
---
|
||||
|
||||
## Current architecture
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
subgraph P1["Pod"]
|
||||
H1["HAProxy"] --> O1["Ollama"]
|
||||
end
|
||||
subgraph P2["Pod"]
|
||||
H2["HAProxy"] --> O2["Ollama"]
|
||||
end
|
||||
subgraph P3["Pod"]
|
||||
H3["HAProxy"] --> O3["Ollama"]
|
||||
end
|
||||
Q["Queue<br/>(Redis)"] <--> C["Consumer<br/>(Bento)"] --> LB["Load Balancer<br/>(kube-proxy)"]
|
||||
LB --> H1 & H2 & H3
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Proposed architecture
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
subgraph P1["Consumer Pod"]
|
||||
C1["Bento"] --> H1["HAProxy"] --> O1["Ollama"]
|
||||
end
|
||||
subgraph P2["Consumer Pod"]
|
||||
C2["Bento"] --> H2["HAProxy"] --> O2["Ollama"]
|
||||
end
|
||||
subgraph P3["Consumer Pod"]
|
||||
C3["Bento"] --> H3["HAProxy"] --> O3["Ollama"]
|
||||
end
|
||||
Queue["Queue"] <--> C1 & C2 & C3
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- Let's implement that architecture!
|
||||
|
||||
- See next slides for hints / getting started
|
||||
|
||||
---
|
||||
|
||||
## Hints
|
||||
|
||||
We need to:
|
||||
|
||||
- Update the Bento consumer configuration to talk to localhost
|
||||
|
||||
- Store that configuration in a ConfigMap
|
||||
|
||||
- Add a Bento container to the Ollama Deployment
|
||||
|
||||
- Profit!
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
🎉 Node and Pod utilization is maximized
|
||||
|
||||
🎉 HTTP queue size is bounded
|
||||
|
||||
🎉 Deployment autoscales up and down
|
||||
|
||||
---
|
||||
|
||||
## ⚠️ Scaling down
|
||||
|
||||
- Eventually, there are less messages in the queue
|
||||
|
||||
- The HPA scales down the Ollama Deployment
|
||||
|
||||
- This terminates some Ollama Pods
|
||||
|
||||
🤔 What happens if these Pods were processing requests?
|
||||
|
||||
--
|
||||
|
||||
- The requests might be lost!
|
||||
|
||||
---
|
||||
|
||||
## Avoiding lost messages
|
||||
|
||||
Option 1:
|
||||
|
||||
- cleanly shutdown the consumer
|
||||
|
||||
- make sure that Ollama can complete in-flight requests
|
||||
|
||||
(by extending its grace period)
|
||||
|
||||
- find a way to terminate Ollama when no more requests are in flight
|
||||
|
||||
Option 2:
|
||||
|
||||
- use *message acknowledgement*
|
||||
@@ -1,623 +0,0 @@
|
||||
# Getting started with Bento
|
||||
|
||||
How can we move to a message queue architecture...
|
||||
|
||||
*...without rewriting a bunch of code?*
|
||||
|
||||
🤔
|
||||
|
||||
---
|
||||
|
||||
## Bento
|
||||
|
||||
https://bento.dev/
|
||||
|
||||
"Fancy stream processing made operationally mundane"
|
||||
|
||||
"Written in Go, deployed as a static binary, declarative configuration. Open source and cloud native as utter heck."
|
||||
|
||||
With ✨ amazing ✨ documentation 😍
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Tiny bit of history
|
||||
|
||||
- Original project: Benthos
|
||||
|
||||
- May 30, 2024: [Redpanda acquires Benthos][redpanda-acquires-benthos]
|
||||
|
||||
- Benthos is now Redpanda Connect
|
||||
|
||||
- some parts have been relicensed as commercial products
|
||||
|
||||
- May 31, 2024: [Warpstream forks Benthos][warpstream-forks-benthos]
|
||||
|
||||
- that fork is named "Bento"
|
||||
|
||||
- it's fully open source
|
||||
|
||||
- We're going to use Bento here, but Redpanda Connect should work fine too!
|
||||
|
||||
---
|
||||
|
||||
## Bento concepts
|
||||
|
||||
- Message stream processor
|
||||
|
||||
- Each pipeline is configured by a YAML configuration that defines:
|
||||
|
||||
- input (where do we get the messages?)
|
||||
|
||||
- pipeline (optional: how do we transform the messages?)
|
||||
|
||||
- output (where do we put the messages afterwards?)
|
||||
|
||||
- Once Bento is started, it runs the pipelines forever
|
||||
|
||||
(except for pipelines that have a logical end, e.g. reading from a file)
|
||||
|
||||
- Embedded language (Bloblang) to manipulate/transform messages
|
||||
|
||||
---
|
||||
|
||||
## Messages
|
||||
|
||||
- Typically JSON objects
|
||||
|
||||
(but raw strings are also possible)
|
||||
|
||||
- Nesting, arrays, etc. are OK
|
||||
|
||||
---
|
||||
|
||||
## Getting started with Bento
|
||||
|
||||
We're going to:
|
||||
|
||||
1. Import a bunch of cities from a CSV file into a Redis queue.
|
||||
|
||||
2. Read back these cities using a web server.
|
||||
|
||||
3. Use an "enrichment workflow" to query our LLM for each city.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Importing cities
|
||||
|
||||
Let's break down the work:
|
||||
|
||||
- download the data set
|
||||
|
||||
- create the Bento configuration
|
||||
|
||||
- deploy Redis
|
||||
|
||||
- start Bento
|
||||
|
||||
---
|
||||
|
||||
## Downloading the data set
|
||||
|
||||
- Example database:
|
||||
|
||||
https://www.kaggle.com/datasets/juanmah/world-cities
|
||||
|
||||
- Let's download and uncompress the data set:
|
||||
```bash
|
||||
curl -fsSL https://www.kaggle.com/api/v1/datasets/download/juanmah/world-cities |
|
||||
funzip > cities.csv
|
||||
```
|
||||
|
||||
(Ignore the "length error", it's harmless!)
|
||||
|
||||
- Check the structure of the data set:
|
||||
```bash
|
||||
head cities.csv
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Creating the Bento configuration
|
||||
|
||||
- We need to find which `input` and `output` to use
|
||||
|
||||
- Check the list with `bento list` or the [documentation][bento-inputs]
|
||||
|
||||
- Then run `bento create INPUTNAME/PIPELINENAME/OUTPUTNAME`
|
||||
|
||||
- Generate a configuration file:
|
||||
```bash
|
||||
bento create csv//redis_list > csv2redis.yaml
|
||||
```
|
||||
|
||||
- Edit that configuration file; look for the `(required)` parameters
|
||||
|
||||
(Everything else can go away!)
|
||||
|
||||
---
|
||||
|
||||
## Resulting configuration
|
||||
|
||||
If we trim all the default values, here is the result:
|
||||
|
||||
```yaml
|
||||
input:
|
||||
csv:
|
||||
paths: ["cities.csv"]
|
||||
output:
|
||||
redis_list:
|
||||
url: redis://redis:6379 # No default (required)
|
||||
key: cities
|
||||
```
|
||||
|
||||
We'll call that value `csv2redis.yaml`.
|
||||
|
||||
---
|
||||
|
||||
## Deploying Redis
|
||||
|
||||
- Create a Deployment:
|
||||
```bash
|
||||
kubectl create deployment redis --image redis
|
||||
```
|
||||
|
||||
- Expose it:
|
||||
```bash
|
||||
kubectl expose deployment redis --port 6379
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Starting Bento
|
||||
|
||||
Option 1: run it manually in a pod, to see what's going on.
|
||||
|
||||
```bash
|
||||
bento --config csv2redis.yaml
|
||||
```
|
||||
|
||||
Option 2: run it with e.g. the Bento Helm chart.
|
||||
|
||||
*We're not going to do that yet, since this particular pipeline has a logical end.*
|
||||
|
||||
*(The Helm chart is best suited to pipelines that run forever.)*
|
||||
|
||||
---
|
||||
|
||||
## Expected output
|
||||
|
||||
.small[
|
||||
```
|
||||
INFO Running main config from specified file @service=bento bento_version="" path=csv2redis.yaml
|
||||
INFO Launching a Bento instance, use CTRL+C to close @service=bento
|
||||
INFO Listening for HTTP requests at: http://0.0.0.0:4195 @service=bento
|
||||
INFO Input type csv is now active @service=bento label="" path=root.input
|
||||
INFO Output type redis_list is now active @service=bento label="" path=root.output
|
||||
INFO Pipeline has terminated. Shutting down the service @service=bento
|
||||
```
|
||||
]
|
||||
|
||||
The pipeline should complete in just a few seconds.
|
||||
|
||||
---
|
||||
|
||||
## Checking what's in Redis
|
||||
|
||||
- Connect to our Redis instance:
|
||||
```bash
|
||||
redis-cli -h redis
|
||||
```
|
||||
|
||||
- List keys:
|
||||
```redis
|
||||
KEYS *
|
||||
```
|
||||
|
||||
- Check that the `cities` list has approx. 47000 elements:
|
||||
```redis
|
||||
LLEN cities
|
||||
```
|
||||
|
||||
- Get the first element of the list:
|
||||
```redis
|
||||
LINDEX cities 0
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Fun with Bloblang
|
||||
|
||||
- Let's add a filter to keep only cities with a population above 10,000,000
|
||||
|
||||
- Add the following block to the Bento configuration:
|
||||
|
||||
```yaml
|
||||
pipeline:
|
||||
processors:
|
||||
- switch:
|
||||
- check: this.population == ""
|
||||
processors:
|
||||
- mapping: root = deleted()
|
||||
- check: this.population.int64() < 10000000
|
||||
processors:
|
||||
- mapping: root = deleted()
|
||||
```
|
||||
|
||||
(See the [docs][bento-switch] for details about the `switch` processor.)
|
||||
|
||||
---
|
||||
|
||||
## Testing our processor
|
||||
|
||||
- First, delete the existing `cities` list:
|
||||
```bash
|
||||
redis-cli -h redis DEL cities
|
||||
```
|
||||
|
||||
- Then, run the Bento pipeline again:
|
||||
```bash
|
||||
bento --config csv2redis.yaml
|
||||
```
|
||||
(It should complain about a few cities where the population has a decimal point.)
|
||||
|
||||
- Check how many cities were loaded:
|
||||
```bash
|
||||
redis-cli -h redis LLEN cities
|
||||
```
|
||||
(There should be 47.)
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Consume the queue over HTTP
|
||||
|
||||
- We want to "get the next city" in the queue with a simple `curl`
|
||||
|
||||
- Our input will be `redis_list`
|
||||
|
||||
- Our output will be `http_server`
|
||||
|
||||
---
|
||||
|
||||
## Generate the Bento configuration
|
||||
|
||||
Option 1: `bento create redis_list//http_server`
|
||||
|
||||
Option 2: [read the docs][output-http-server]
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
Do you want to try to write that configuration?
|
||||
|
||||
Or shall we see it right away?
|
||||
|
||||
--
|
||||
|
||||
⚠️ Spoilers on next slide!
|
||||
|
||||
---
|
||||
|
||||
## `redis2http.yaml`
|
||||
|
||||
```yaml
|
||||
input:
|
||||
redis_list:
|
||||
url: redis://redis:`6379`
|
||||
key: cities
|
||||
output:
|
||||
http_server:
|
||||
path: /nextcity
|
||||
```
|
||||
|
||||
This will set up an HTTP route to fetch *one* city.
|
||||
|
||||
It's also possible to batch, stream...
|
||||
|
||||
⚠️ As of November 2024, `bento create` uses port 6397 instead of 6379 for Redis!
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Run Bento with this configuration:
|
||||
```bash
|
||||
bento --config redis2http.yaml &
|
||||
```
|
||||
|
||||
- Retrieve one city:
|
||||
```bash
|
||||
curl http://localhost:4195/nextcity
|
||||
```
|
||||
|
||||
- Check what happens after we retrive *all* the cities!
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Query our LLM for each city
|
||||
|
||||
- We want to ask our LLM who's the mayor of each of these cities
|
||||
|
||||
- We'll use a prompt that will usually ensure a short answer
|
||||
|
||||
(so that it's faster; we don't want to wait 30 seconds per city!)
|
||||
|
||||
- We'll test the prompt with the Ollama CLI
|
||||
|
||||
- Then we'll craft a proper HTTP API query
|
||||
|
||||
- Finally, we'll configure an [enrichment workflow][enrichment] in Bento
|
||||
|
||||
---
|
||||
|
||||
## Test our prompt
|
||||
|
||||
Assuming that our earlier Ollama Deployment is still running:
|
||||
|
||||
```bash
|
||||
kubectl exec deployment/ollama -- \
|
||||
ollama run qwen2:1.5b "
|
||||
Who is the mayor of San Francisco?
|
||||
Just give the name by itself on a single line.
|
||||
If you don't know, don't say anything.
|
||||
"
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Turn the prompt into an HTTP API query
|
||||
|
||||
Note: to install `http` in an Alpine container, run `apk add httpie`.
|
||||
|
||||
```bash
|
||||
http http://ollama.default:11434/api/generate \
|
||||
model=qwen2:1.5b stream:=false prompt="
|
||||
Who is the mayor of Paris?
|
||||
Just give the name by itself on a single line.
|
||||
If you don't know, don't say anything.
|
||||
"
|
||||
```
|
||||
|
||||
We get a JSON payload, and we want to use the `response` field.
|
||||
|
||||
---
|
||||
|
||||
## Configure an enrichment workflow
|
||||
|
||||
The [Bento documentation][enrichment] is really good!
|
||||
|
||||
We need to set up:
|
||||
|
||||
- a `branch` processor
|
||||
|
||||
- a `request_map` to transform the city into an Ollama request
|
||||
|
||||
- an `http` processor to submit the request to Ollama
|
||||
|
||||
- a `result_map` to transform the Ollama response
|
||||
|
||||
---
|
||||
|
||||
## Without the `branch` processor
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
|
||||
CITY["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
"]
|
||||
|
||||
REQ["
|
||||
model: qwen2:1.5b
|
||||
stream: false
|
||||
prompt: Who is the mayor of Paris?
|
||||
"]
|
||||
|
||||
REP["
|
||||
response: Anne Hidalgo
|
||||
eval_count: ...
|
||||
prompt_eval_count: ...
|
||||
(other ollama fields)
|
||||
"]
|
||||
|
||||
CITY@{ shape: card}
|
||||
REQ@{ shape: card}
|
||||
REP@{ shape: card}
|
||||
|
||||
style CITY text-align: left
|
||||
style REQ text-align: left
|
||||
style REP text-align: left
|
||||
|
||||
mapping@{ shape: diam }
|
||||
http["http processor"]@{ shape: diam }
|
||||
|
||||
CITY --> mapping --> REQ --> http --> REP
|
||||
</pre>
|
||||
|
||||
- We transform the `city` into an Ollama request
|
||||
|
||||
- The `http` processor submits the request to Ollama
|
||||
|
||||
- The final output is the Ollama response
|
||||
|
||||
---
|
||||
|
||||
## With the `branch` processor
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart LR
|
||||
|
||||
CITY["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
"]
|
||||
|
||||
REQ["
|
||||
model: qwen2:1.5b
|
||||
stream: false
|
||||
prompt: Who is the mayor of Paris?
|
||||
"]
|
||||
|
||||
REP["
|
||||
response: Anne Hidalgo
|
||||
eval_count: ...
|
||||
prompt_eval_count: ...
|
||||
(other ollama fields)
|
||||
"]
|
||||
|
||||
OUT["
|
||||
city: Paris
|
||||
country: France
|
||||
population: 1106000
|
||||
iso2: FR
|
||||
...
|
||||
mayor: Anne Hidalgo
|
||||
"]
|
||||
|
||||
CITY@{ shape: card}
|
||||
REQ@{ shape: card}
|
||||
REP@{ shape: card}
|
||||
OUT@{ shape: card}
|
||||
|
||||
style CITY text-align: left
|
||||
style REQ text-align: left
|
||||
style REP text-align: left
|
||||
style OUT text-align: left
|
||||
|
||||
branch@{ shape: diam }
|
||||
request_map@{ shape: diam }
|
||||
result_map@{ shape: diam }
|
||||
http["http processor"]@{ shape: diam }
|
||||
|
||||
CITY --> branch
|
||||
branch --> result_map
|
||||
branch --> request_map
|
||||
request_map --> REQ
|
||||
REQ --> http
|
||||
http --> REP
|
||||
REP --> result_map
|
||||
result_map --> OUT
|
||||
</pre>
|
||||
|
||||
- The `branch` processor allows doing the processing "on the side"
|
||||
|
||||
- `request_map` and `result_map` transform the message before/after processing
|
||||
|
||||
- Then, the result is combined with the original message (the `city`)
|
||||
|
||||
---
|
||||
|
||||
```yaml
|
||||
input:
|
||||
csv:
|
||||
paths: ["cities.csv"]
|
||||
pipeline:
|
||||
processors:
|
||||
- branch:
|
||||
request_map: |
|
||||
root.model = "qwen2:1.5b"
|
||||
root.stream = false
|
||||
root.prompt = (
|
||||
"Who is the mayor of %s? ".format(this.city) +
|
||||
"Just give the name by itself on a single line. " +
|
||||
"If you don't know, don't say anything."
|
||||
)
|
||||
processors:
|
||||
- http:
|
||||
url: http://ollama:11434/api/generate
|
||||
verb: POST
|
||||
result_map: |
|
||||
root.mayor = this.response
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- Save the YAML on the previous page into a configuration file
|
||||
|
||||
- Run Bento with that configuration file
|
||||
|
||||
- What happens?
|
||||
|
||||
--
|
||||
|
||||
🤔 We're seeing errors due to timeouts
|
||||
|
||||
```
|
||||
ERRO HTTP request to 'http://ollama...' failed: http://ollama...:
|
||||
Post "http://ollama...": context deadline exceeded
|
||||
(Client.Timeout exceeded while awaiting headers)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
How should we address errors?
|
||||
|
||||
- Option 1: increase the timeout in the [http][bento-http] processor
|
||||
|
||||
- Option 2: use a [retry][bento-retry] processor in the pipeline
|
||||
|
||||
- Option 3: use a [reject_errored][bento-reject] output
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- We want to process 1000 cities with our LLM
|
||||
|
||||
(guessing who the mayor is, or something similar)
|
||||
|
||||
- Store the output wherever we want
|
||||
|
||||
(Redis, CSV file, JSONL files...)
|
||||
|
||||
- Deal correctly with errors
|
||||
|
||||
(we'll check that there are, indeed, 1000 cities in the output)
|
||||
|
||||
- Scale out to process faster
|
||||
|
||||
(scale ollama to e.g. 10 replicas, enable parallelism in Bento)
|
||||
|
||||
---
|
||||
|
||||
class: title
|
||||
|
||||
🍱 Lunch time! 🍱
|
||||
|
||||
---
|
||||
|
||||
## What happened?
|
||||
|
||||
- If your Ollama pods have *resource requests*:
|
||||
|
||||
→ your cluster may have auto-scaled
|
||||
|
||||
- If your Ollama pods don't have *resource requests*:
|
||||
|
||||
→ you probably have a bunch of container restarts, due to out-of-memory errors
|
||||
|
||||
🤔 What's that about?
|
||||
|
||||
[bento-http]: https://warpstreamlabs.github.io/bento/docs/components/processors/http/
|
||||
[bento-inputs]: https://warpstreamlabs.github.io/bento/docs/components/inputs/about/
|
||||
[bento-reject]: https://warpstreamlabs.github.io/bento/docs/components/outputs/reject_errored
|
||||
[bento-retry]: https://warpstreamlabs.github.io/bento/docs/components/processors/retry
|
||||
[bento-switch]: https://warpstreamlabs.github.io/bento/docs/components/processors/switch/
|
||||
[enrichment]: https://warpstreamlabs.github.io/bento/cookbooks/enrichments/
|
||||
[output-http-server]: https://warpstreamlabs.github.io/bento/docs/components/outputs/http_server
|
||||
[redpanda-acquires-benthos]: https://www.redpanda.com/press/redpanda-acquires-benthos
|
||||
[warpstream-forks-benthos]: https://www.warpstream.com/blog/announcing-bento-the-open-source-fork-of-the-project-formerly-known-as-benthos
|
||||
|
||||
@@ -1,250 +0,0 @@
|
||||
# Bento & RabbitMQ
|
||||
|
||||
- In some of the previous runs, messages were dropped
|
||||
|
||||
(we start with 1000 messages in `cities` and have e.g. 955 in `mayors`)
|
||||
|
||||
- This is caused by various errors during processing
|
||||
|
||||
(e.g. too many timeouts; Bento being shutdown halfway through...)
|
||||
|
||||
- ...And by the fact that we are using a Redis queue
|
||||
|
||||
(which doesn't offer delivery guarantees or acknowledgements)
|
||||
|
||||
- Can we get something better?
|
||||
|
||||
---
|
||||
|
||||
## The problem
|
||||
|
||||
- Some inputs (like `redis_list`) don't support *acknowledgements*
|
||||
|
||||
- When a message is pulled from the queue, it is deleted immediately
|
||||
|
||||
- If the message is lost for any reason, it is lost permanently
|
||||
|
||||
---
|
||||
|
||||
## The solution
|
||||
|
||||
- Some inputs (like `amqp_0_9`) support acknowledgements
|
||||
|
||||
- When a message is pulled from the queue:
|
||||
|
||||
- it is not visible anymore to other consumers
|
||||
|
||||
- it needs to be explicitly acknowledged
|
||||
|
||||
- The acknowledgement is done by Bento when the message reaches the output
|
||||
|
||||
- The acknowledgement deletes the message
|
||||
|
||||
- No acknowledgement after a while? Consumer crashes/disconnects?
|
||||
|
||||
Message gets requeued automatically!
|
||||
|
||||
---
|
||||
|
||||
## `amqp_0_9`
|
||||
|
||||
- Protocol used by RabbitMQ
|
||||
|
||||
- Very simplified behavior:
|
||||
|
||||
- messages are published to an [*exchange*][amqp-exchanges]
|
||||
|
||||
- messages have a *routing key*
|
||||
|
||||
- the exchange routes the message to one (or zero or more) queues
|
||||
</br>(possibly using the routing key or message headers to decide which queue(s))
|
||||
|
||||
- [*consumers*][amqp-consumers] subscribe to queues to receive messages
|
||||
|
||||
[amqp-exchanges]: https://www.rabbitmq.com/tutorials/amqp-concepts#exchanges
|
||||
[amqp-consumers]: https://www.rabbitmq.com/tutorials/amqp-concepts#consumers
|
||||
|
||||
---
|
||||
|
||||
## Using the default exchange
|
||||
|
||||
- There is a default exchange (called `""` - empty string)
|
||||
|
||||
- The routing key indicates the name of the queue to deliver to
|
||||
|
||||
- The queue needs to exist (we need to create it beforehand)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Defining custom exchanges
|
||||
|
||||
- Create an exchange
|
||||
|
||||
- exchange types: direct, fanout, topic, headers
|
||||
|
||||
- durability: persisted to disk to survive server restart or not?
|
||||
|
||||
- Create a binding
|
||||
|
||||
- which exchange?
|
||||
|
||||
- which routing key? (for direct exchanges)
|
||||
|
||||
- which queue?
|
||||
|
||||
---
|
||||
|
||||
## RabbitMQ on Kubernetes
|
||||
|
||||
- RabbitMQ can be deployed on Kubernetes:
|
||||
|
||||
- directly (creating e.g. a StatefulSet)
|
||||
|
||||
- with the RabbitMQ operator
|
||||
|
||||
- We're going to do the latter!
|
||||
|
||||
- The operator includes the "topology operator"
|
||||
|
||||
(to configure queues, exchanges, and bindings through custom resources)
|
||||
|
||||
---
|
||||
|
||||
## Installing the RabbitMQ operator
|
||||
|
||||
- Let's install it with this Helm chart:
|
||||
|
||||
```bash
|
||||
helm upgrade --install --repo https://charts.bitnami.com/bitnami \
|
||||
--namespace rabbitmq-system --create-namespace \
|
||||
rabbitmq-cluster-operator rabbitmq-cluster-operator
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Deploying a simple RabbitMQ cluster
|
||||
|
||||
- Let's use the YAML manifests in that directory:
|
||||
|
||||
https://github.com/jpetazzo/beyond-load-balancers/tree/main/rabbitmq
|
||||
|
||||
- This creates:
|
||||
|
||||
- a `RabbitmqCluster` called `mq`
|
||||
|
||||
- a `Secret` called `mq-default-user` containing access credentials
|
||||
|
||||
- a durable `Queue` named `q1`
|
||||
|
||||
(We can ignore the `Exchange` and the `Binding`, we won't use them.)
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
Let's replace the `cities` Redis list with our RabbitMQ queue.
|
||||
|
||||
(See next slide for steps and hints!)
|
||||
|
||||
---
|
||||
|
||||
## Steps
|
||||
|
||||
1. Edit the Bento configuration for our "CSV importer".
|
||||
|
||||
(replace the `redis_list` output with `amqp_0_9`)
|
||||
|
||||
2. Run that pipeline and confirm that messages show up in RabbitMQ.
|
||||
|
||||
3. Edit the Bento configuration for the Ollama consumer.
|
||||
|
||||
(replace the `redis_list` input with `amqp_0_9`)
|
||||
|
||||
4. Trigger a scale up of the Ollama consumer.
|
||||
|
||||
5. Update the KEDA Scaler to use RabbitMQ instead of Redis.
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Sending messages to RabbitMQ
|
||||
|
||||
- Edit our Bento configuration (the one feeding the CSV file to Redis)
|
||||
|
||||
- We want the following `output` section:
|
||||
```yaml
|
||||
output:
|
||||
amqp_0_9:
|
||||
exchange: ""
|
||||
key: q1
|
||||
mandatory: true
|
||||
urls:
|
||||
- "${AMQP_URL}"
|
||||
```
|
||||
|
||||
- Then export the AMQP_URL environment variable using `connection_string` from Secret `mq-default-user`
|
||||
|
||||
💡 Yes, we can directly use environment variables in Bento configuration!
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Testing our AMQP output
|
||||
|
||||
- Run the Bento pipeline
|
||||
|
||||
- To check that our messages made it:
|
||||
```bash
|
||||
kubectl exec mq-server-0 -- rabbitmqctl list_queues
|
||||
```
|
||||
|
||||
- We can also use Prometheus metrics, e.g. `rabbitmq_queue_messages`
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Receiving messages from RabbitMQ
|
||||
|
||||
- Edit our other Bento configuration (the one in the Ollama consumer Pod)
|
||||
|
||||
- We want the following `input` section:
|
||||
```yaml
|
||||
input:
|
||||
amqp_0_9:
|
||||
urls:
|
||||
- `amqp://...:5672/`
|
||||
queue: q1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4️⃣ Triggering Ollama scale up
|
||||
|
||||
- If the autoscaler is configured to scale to zero, disable it
|
||||
|
||||
(easiest solution: delete the ScaledObject)
|
||||
|
||||
- Then manually scale the Deployment to e.g. 4 Pods
|
||||
|
||||
- Check that messages are processed and show up in the output
|
||||
|
||||
(it should still be a Redis list at this point)
|
||||
|
||||
---
|
||||
|
||||
## 5️⃣ Autoscaling on RabbitMQ
|
||||
|
||||
- We need to update our ScaledObject
|
||||
|
||||
- Check the [RabbitMQ Queue Scaler][keda-rabbitmq]
|
||||
|
||||
- Multiple ways to pass the AMQP URL:
|
||||
|
||||
- hardcode it (easier solution for testing!)
|
||||
|
||||
- use `...fromEnv` and set environment variables in target pod
|
||||
|
||||
- create and use a TriggerAuthentication
|
||||
|
||||
💡 Since we have the AMQP URL in a Secret, TriggerAuthentication works great!
|
||||
|
||||
[keda-rabbitmq]: https://keda.sh/docs/latest/scalers/rabbitmq-queue/
|
||||
@@ -55,7 +55,6 @@
|
||||
|
||||
`cert-manager.io/allow-direct-injection: "true"`
|
||||
|
||||
- See [cert-manager documentation] for details
|
||||
|
||||
[cert-manager documentation]: https://cert-manager.io/docs/concepts/ca-injector/
|
||||
- See [cert-manager documentation][docs] for details
|
||||
|
||||
[docs]: https://cert-manager.io/docs/concepts/ca-injector/
|
||||
|
||||
@@ -272,9 +272,9 @@ This can be overridden by setting the annotation:
|
||||
|
||||
- Can express `minAvailable` or `maxUnavailable`
|
||||
|
||||
- See [documentation][doc-pdb] for details and examples
|
||||
- See [documentation] for details and examples
|
||||
|
||||
[doc-pdb]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||
[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -81,7 +81,7 @@
|
||||
|
||||
## What version are we running anyway?
|
||||
|
||||
- When I say, "I'm running Kubernetes 1.28", is that the version of:
|
||||
- When I say, "I'm running Kubernetes 1.22", is that the version of:
|
||||
|
||||
- kubectl
|
||||
|
||||
@@ -129,15 +129,15 @@
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.28.9:
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.22.17:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 28
|
||||
- PATCH = 9
|
||||
- MINOR = 22
|
||||
- PATCH = 17
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.28.9 and 1.28.13 are compatible)
|
||||
(e.g. 1.22.17 and 1.22.5 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
@@ -153,9 +153,9 @@
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.28 and 1.29)
|
||||
- This allows live upgrades (since we can mix e.g. 1.22 and 1.23)
|
||||
|
||||
- It also means that going from 1.28 to 1.30 requires going through 1.29
|
||||
- It also means that going from 1.22 to 1.24 requires going through 1.23
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
|
||||
```
|
||||
|
||||
- Look for the `image:` line, and update it to e.g. `v1.30.1`
|
||||
- Look for the `image:` line, and update it to e.g. `v1.24.1`
|
||||
|
||||
]
|
||||
|
||||
@@ -320,29 +320,53 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- First things first: we need to upgrade kubeadm
|
||||
|
||||
- The Kubernetes package repositories are now split by minor versions
|
||||
.lab[
|
||||
|
||||
(i.e. there is one repository for 1.28, another for 1.29, etc.)
|
||||
- Upgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.27.0-00
|
||||
```
|
||||
|
||||
- This avoids accidentally upgrading from one minor version to another
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
(e.g. with unattended upgrades or if packages haven't been held/pinned)
|
||||
]
|
||||
|
||||
- We'll need to add the new package repository and unpin packages!
|
||||
Problem: kubeadm doesn't know know how to handle
|
||||
upgrades from version 1.22.
|
||||
|
||||
This is because we installed version 1.27.
|
||||
|
||||
We need to install kubeadm version 1.23.X.
|
||||
|
||||
---
|
||||
|
||||
## Installing the new packages
|
||||
## Downgrading kubeadm
|
||||
|
||||
- Edit `/etc/apt/sources.list.d/kubernetes.list`
|
||||
- We need to go back to kubeadm version 1.23.X.
|
||||
|
||||
(or copy it to e.g. `kubernetes-1.29.list` and edit that)
|
||||
.lab[
|
||||
|
||||
- `apt-get update`
|
||||
- View available versions for package `kubeadm`:
|
||||
```bash
|
||||
apt show kubeadm -a | grep ^Version | grep 1.23
|
||||
```
|
||||
|
||||
- Now edit (or remove) `/etc/apt/preferences.d/kubernetes`
|
||||
- Downgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.23.0-00
|
||||
```
|
||||
|
||||
- `apt-get install kubeadm` should now upgrade `kubeadm` correctly! 🎉
|
||||
- Check what kubeadm tells us:
|
||||
```
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
kubeadm should now agree to upgrade to 1.23.X.
|
||||
|
||||
---
|
||||
|
||||
@@ -361,7 +385,7 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- Look for the `image:` line, and restore it to the original value
|
||||
|
||||
(e.g. `v1.28.9`)
|
||||
(e.g. `v1.22.17`)
|
||||
|
||||
- Wait for the control plane to come back up
|
||||
|
||||
@@ -375,14 +399,9 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the upgrade plan:
|
||||
```bash
|
||||
sudo kubeadm upgrade plan
|
||||
```
|
||||
|
||||
- Perform the upgrade:
|
||||
```bash
|
||||
sudo kubeadm upgrade apply v1.29.0
|
||||
sudo kubeadm upgrade apply v1.23.0
|
||||
```
|
||||
|
||||
]
|
||||
@@ -399,9 +418,15 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
- Log into node `oldversion2`
|
||||
|
||||
- Update package lists and APT pins like we did before
|
||||
- View available versions for package `kubelet`:
|
||||
```bash
|
||||
apt show kubelet -a | grep ^Version
|
||||
```
|
||||
|
||||
- Then upgrade kubelet
|
||||
- Upgrade kubelet:
|
||||
```bash
|
||||
sudo apt install kubelet=1.23.0-00
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
@@ -454,16 +479,13 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
.lab[
|
||||
|
||||
- Execute the whole upgrade procedure on each node:
|
||||
- Download the configuration on each node, and upgrade kubelet:
|
||||
```bash
|
||||
for N in 1 2 3; do
|
||||
ssh oldversion$N "
|
||||
sudo sed -i s/1.28/1.29/ /etc/apt/sources.list.d/kubernetes.list &&
|
||||
sudo rm /etc/apt/preferences.d/kubernetes &&
|
||||
sudo apt update &&
|
||||
sudo apt install kubeadm -y &&
|
||||
sudo apt install kubeadm=1.23.0-00 &&
|
||||
sudo kubeadm upgrade node &&
|
||||
sudo apt install kubelet -y"
|
||||
sudo apt install kubelet=1.23.0-00"
|
||||
done
|
||||
```
|
||||
]
|
||||
@@ -472,7 +494,7 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
## Checking what we've done
|
||||
|
||||
- All our nodes should now be updated to version 1.29
|
||||
- All our nodes should now be updated to version 1.23.0
|
||||
|
||||
.lab[
|
||||
|
||||
@@ -565,35 +587,17 @@ Note 2: kubeadm itself is still version 1.22.1..
|
||||
|
||||
---
|
||||
|
||||
## Database operators to the rescue
|
||||
|
||||
- Moving stateful pods (e.g.: database server) can cause downtime
|
||||
|
||||
- Database replication can help:
|
||||
|
||||
- if a node contains database servers, we make sure these servers aren't primaries
|
||||
|
||||
- if they are primaries, we execute a *switch over*
|
||||
|
||||
- Some database operators (e.g. [CNPG]) will do that switch over automatically
|
||||
|
||||
(when they detect that a node has been *cordoned*)
|
||||
|
||||
[CNPG]: https://cloudnative-pg.io/
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Skipping versions
|
||||
|
||||
- This example worked because we went from 1.28 to 1.29
|
||||
- This example worked because we went from 1.22 to 1.23
|
||||
|
||||
- If you are upgrading from e.g. 1.26, you will have to go through 1.27 first
|
||||
- If you are upgrading from e.g. 1.21, you will have to go through 1.22 first
|
||||
|
||||
- This means upgrading kubeadm to 1.27.X, then using it to upgrade the cluster
|
||||
- This means upgrading kubeadm to 1.22.X, then using it to upgrade the cluster
|
||||
|
||||
- Then upgrading kubeadm to 1.28.X, etc.
|
||||
- Then upgrading kubeadm to 1.23.X, etc.
|
||||
|
||||
- **Make sure to read the release notes before upgrading!**
|
||||
|
||||
|
||||
@@ -225,4 +225,4 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
|
||||
:EN:- Scheduling pods together or separately
|
||||
:EN:- Example: deploying a Consul cluster
|
||||
:FR:- Lancer des pods ensemble ou séparément
|
||||
:FR:- Exemple : lancer un cluster Consul
|
||||
:FR:- Example : lancer un cluster Consul
|
||||
|
||||
@@ -20,45 +20,6 @@ This is (approximately) what we're going to do:
|
||||
|
||||
---
|
||||
|
||||
## Resource graph
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart TD
|
||||
H/D["charts/dockercoins<br/>(Helm chart)"]
|
||||
H/C["charts/color<br/>(Helm chart)"]
|
||||
|
||||
A/D["apps/dockercoins/flux.yaml<br/>(HelmRelease)"]
|
||||
A/B["apps/blue/flux.yaml<br/>(HelmRelease)"]
|
||||
A/G["apps/green/flux.yaml<br/>(HelmRelease)"]
|
||||
A/CM["apps/cert-manager/flux.yaml<br/>(HelmRelease)"]
|
||||
A/P["apps/kube-prometheus-stack/flux.yaml<br/>(HelmRelease + Kustomization)"]
|
||||
A/T["traefik/flux.yaml<br/>(HelmRelease)"]
|
||||
|
||||
C/D["clusters/dev/kustomization.yaml<br/>(Kustomization)"]
|
||||
C/P["clusters/prod/kustomization.yaml<br/>(Kustomization)"]
|
||||
|
||||
C/D --> A/B
|
||||
C/D --> A/D
|
||||
C/D --> A/G
|
||||
|
||||
C/P --> A/D
|
||||
C/P --> A/G
|
||||
C/P --> A/T
|
||||
C/P --> A/CM
|
||||
C/P --> A/P
|
||||
|
||||
A/D --> H/D
|
||||
A/B --> H/C
|
||||
A/G --> H/C
|
||||
A/P --> CHARTS & PV["apps/kube-prometheus-stack/manifests/configmap.yaml<br/>(Helm values)"]
|
||||
A/CM --> CHARTS
|
||||
A/T --> CHARTS
|
||||
|
||||
CHARTS["Charts on external repos"]
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Getting ready
|
||||
|
||||
- Let's make sure we have two clusters
|
||||
@@ -114,7 +75,7 @@ CHARTS["Charts on external repos"]
|
||||
--repository=$GITHUB_REPO \
|
||||
--branch=main \
|
||||
--path=./clusters/$FLUX_CLUSTER \
|
||||
--personal --private=false
|
||||
--personal --public
|
||||
```
|
||||
|
||||
]
|
||||
@@ -298,8 +259,8 @@ class: extra-details
|
||||
|
||||
- Put application manifests in their directory:
|
||||
```bash
|
||||
mkdir -p apps/dockercoins/manifests
|
||||
cp ~/container.training/k8s/dockercoins.yaml apps/dockercoins/manifests
|
||||
mkdir -p apps/dockercoins
|
||||
cp ~/container.training/k8s/dockercoins.yaml apps/dockercoins/
|
||||
```
|
||||
|
||||
- Create kustomization manifest:
|
||||
@@ -466,10 +427,6 @@ class: extra-details
|
||||
|
||||
`flux create helmrelease ... --values-from=ConfigMap/myapp`
|
||||
|
||||
- The ConfigMap or Secret must be in the same Namespace as the HelmRelease
|
||||
|
||||
(not the target namespace of that HelmRelease!)
|
||||
|
||||
---
|
||||
|
||||
## Gotchas
|
||||
|
||||
@@ -1,132 +0,0 @@
|
||||
class: title
|
||||
|
||||
*Tell me and I forget.*
|
||||
<br/>
|
||||
*Teach me and I remember.*
|
||||
<br/>
|
||||
*Involve me and I learn.*
|
||||
|
||||
Misattributed to Benjamin Franklin
|
||||
|
||||
[(Probably inspired by Chinese Confucian philosopher Xunzi)](https://www.barrypopik.com/index.php/new_york_city/entry/tell_me_and_i_forget_teach_me_and_i_may_remember_involve_me_and_i_will_lear/)
|
||||
|
||||
---
|
||||
|
||||
## Hands-on sections
|
||||
|
||||
- There will be *a lot* of examples and demos
|
||||
|
||||
- If you are attending a live workshop:
|
||||
|
||||
- follow along with the demos, ask questions at any time
|
||||
|
||||
- if you can, try to run some of the examples and demos in your environment
|
||||
|
||||
- if things are going too fast, ask the trainer to slow down :)
|
||||
|
||||
- If you are watching a recording or only reading the slides:
|
||||
|
||||
- it is **strongly** recommended to run **all** the examples and demos
|
||||
|
||||
- take advantage of the fact that you can pause at any time
|
||||
|
||||
---
|
||||
|
||||
class: in-person
|
||||
|
||||
## Where are we going to run our containers?
|
||||
|
||||
---
|
||||
|
||||
class: in-person, pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## If you're attending a live training or workshop
|
||||
|
||||
- Each person gets a private lab environment
|
||||
|
||||
- Your lab environments will be available for the duration of the workshop
|
||||
|
||||
(check with your instructor to know exactly when they'll be shut down)
|
||||
|
||||
- Note that for budget reasons¹, your environment will be fairly modest
|
||||
|
||||
- scenario 1: 4 nodes with 2 cores and 4 GB RAM ; no cluster autoscaling
|
||||
|
||||
- scenario 2: 1 node with 4 cores and 8 GB RAM ; cluster autoscaling
|
||||
|
||||
.footnote[¹That cloud thing is mighty expensive, yo]
|
||||
|
||||
---
|
||||
|
||||
## Running your own lab environment
|
||||
|
||||
- If you are following a self-paced course...
|
||||
|
||||
- Or watching a replay of a recorded course...
|
||||
|
||||
- ...You will need to set up a local environment for the labs
|
||||
|
||||
*or*
|
||||
|
||||
- If you want to use a specific cloud provider...
|
||||
|
||||
- Or want to see these concepts "at scale"...
|
||||
|
||||
- ...You can set up your own clusters with whatever capacity suits you
|
||||
|
||||
---
|
||||
|
||||
## Deploying your own Kubernetes cluster
|
||||
|
||||
- You need cloud provider credentials for this
|
||||
|
||||
- Option 1: use the cloud provider CLI, web UI, ...
|
||||
|
||||
- Option 2: use [one of these Terraform configurations][one-kubernetes]
|
||||
|
||||
(set `cluster_name`, `node_size`, `max_nodes_per_pool`, `location`, and GO!)
|
||||
|
||||
[one-kubernetes]: https://github.com/jpetazzo/container.training/tree/main/prepare-labs/terraform/one-kubernetes
|
||||
|
||||
---
|
||||
|
||||
## Deploying your own Kubernetes cluster.red[**s**]
|
||||
|
||||
- If you want to deliver your own training or workshop:
|
||||
|
||||
- deployment scripts are available in the [prepare-labs] directory
|
||||
|
||||
- you can use them to automatically deploy many lab environments
|
||||
|
||||
- they support many different infrastructure providers
|
||||
|
||||
- they can deploy dozens (even hundreds) of clusters at a time
|
||||
|
||||
[prepare-labs]: https://github.com/jpetazzo/container.training/tree/main/prepare-labs
|
||||
|
||||
---
|
||||
|
||||
class: in-person
|
||||
|
||||
## Why don't we run containers locally?
|
||||
|
||||
- Installing this stuff can be hard on some machines
|
||||
|
||||
(32 bits CPU or OS... Laptops without administrator access... etc.)
|
||||
|
||||
- *"The whole team downloaded all these container images from the WiFi!
|
||||
<br/>... and it went great!"* (Literally no-one ever)
|
||||
|
||||
- All you need is a computer (or even a phone or tablet!), with:
|
||||
|
||||
- an Internet connection
|
||||
|
||||
- a web browser
|
||||
|
||||
- an SSH client
|
||||
|
||||
- Some of the demos require multiple nodes to demonstrate scaling
|
||||
@@ -158,6 +158,8 @@
|
||||
|
||||
- Let's see the specific details for each of them!
|
||||
|
||||
[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
|
||||
|
||||
---
|
||||
|
||||
## `httpGet`
|
||||
@@ -294,6 +296,8 @@ class: extra-details
|
||||
|
||||
- Leverages standard [GRPC Health Checking Protocol][grpc]
|
||||
|
||||
[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
|
||||
|
||||
---
|
||||
|
||||
## Timing and thresholds
|
||||
@@ -509,10 +513,7 @@ class: extra-details
|
||||
|
||||
- Sometimes it can also make sense to embed a web server in the worker
|
||||
|
||||
[grpc]: https://grpc.github.io/grpc/core/md_doc_health-checking.html
|
||||
|
||||
???
|
||||
|
||||
:EN:- Using healthchecks to improve availability
|
||||
:FR:- Utiliser des *healthchecks* pour améliorer la disponibilité
|
||||
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
# Managing our stack with `helmfile`
|
||||
|
||||
- We've installed a few things with Helm
|
||||
|
||||
- And others with raw YAML manifests
|
||||
|
||||
- Perhaps you've used Kustomize sometimes
|
||||
|
||||
- How can we automate all this? Make it reproducible?
|
||||
|
||||
---
|
||||
|
||||
## Requirements
|
||||
|
||||
- We want something that is *idempotent*
|
||||
|
||||
= running it 1, 2, 3 times, should only install the stack once
|
||||
|
||||
- We want something that handles udpates
|
||||
|
||||
= modifying / reconfiguring without restarting from scratch
|
||||
|
||||
- We want something that is configurable
|
||||
|
||||
= with e.g. configuration files, environment variables...
|
||||
|
||||
- We want something that can handle *partial removals*
|
||||
|
||||
= ability to remove one element without affecting the rest
|
||||
|
||||
- Inspiration: Terraform, Docker Compose...
|
||||
|
||||
---
|
||||
|
||||
## Shell scripts?
|
||||
|
||||
✅ Idempotent, thanks to `kubectl apply -f`, `helm upgrade --install`
|
||||
|
||||
✅ Handles updates (edit script, re-run)
|
||||
|
||||
✅ Configurable
|
||||
|
||||
❌ Partial removals
|
||||
|
||||
If we remove an element from our script, it won't be uninstalled automatically.
|
||||
|
||||
---
|
||||
|
||||
## Umbrella chart?
|
||||
|
||||
Helm chart with dependencies on other charts.
|
||||
|
||||
✅ Idempotent
|
||||
|
||||
✅ Handles updates
|
||||
|
||||
✅ Configurable (with Helm values: YAML files and `--set`)
|
||||
|
||||
✅ Partial removals
|
||||
|
||||
❌ Complex (requires to learn advanced Helm features)
|
||||
|
||||
❌ Requires everything to be a Helm chart (adds (lots of) boilerplate)
|
||||
|
||||
---
|
||||
|
||||
## Helmfile
|
||||
|
||||
https://github.com/helmfile/helmfile
|
||||
|
||||
✅ Idempotent
|
||||
|
||||
✅ Handles updates
|
||||
|
||||
✅ Configurable (with values files, environment variables, and more)
|
||||
|
||||
✅ Partial removals
|
||||
|
||||
✅ Fairly easy to get started
|
||||
|
||||
🐙 Sometimes feels like summoning unspeakable powers / staring down the abyss
|
||||
|
||||
---
|
||||
|
||||
## What `helmfile` can install
|
||||
|
||||
- Helm charts from remote Helm repositories
|
||||
|
||||
- Helm charts from remote git repositories
|
||||
|
||||
- Helm charts from local directories
|
||||
|
||||
- Kustomizations
|
||||
|
||||
- Directories with raw YAML manifests
|
||||
|
||||
---
|
||||
|
||||
## How `helmfile` works
|
||||
|
||||
- Everything is defined in a main `helmfile.yaml`
|
||||
|
||||
- That file defines:
|
||||
|
||||
- `repositories` (remote Helm repositories)
|
||||
|
||||
- `releases` (things to install: Charts, YAML...)
|
||||
|
||||
- `environments` (optional: to specialize prod vs staging vs ...)
|
||||
|
||||
- Helm-style values file can be loaded in `enviroments`
|
||||
|
||||
- These values can then be used in the rest of the Helmfile
|
||||
|
||||
- Examples: [install essentials on a cluster][helmfile-ex-1], [run a Bento stack][helmfile-ex-2]
|
||||
|
||||
[helmfile-ex-1]: https://github.com/jpetazzo/beyond-load-balancers/blob/main/helmfile.yaml
|
||||
[helmfile-ex-2]: https://github.com/jpetazzo/beyond-load-balancers/blob/main/bento/helmfile.yaml
|
||||
|
||||
---
|
||||
|
||||
## `helmfile` commands
|
||||
|
||||
- `helmfile init` (optional; downloads plugins if needed)
|
||||
|
||||
- `helmfile apply` (updates all releases that have changed)
|
||||
|
||||
- `helmfile sync` (updates all releases even if they haven't changed)
|
||||
|
||||
- `helmfile destroy` (guess!)
|
||||
|
||||
---
|
||||
|
||||
## Helmfile tips
|
||||
|
||||
As seen in [this example](https://github.com/jpetazzo/beyond-load-balancers/blob/main/bento/helmfile.yaml#L21):
|
||||
|
||||
- variables can be used to simplify the file
|
||||
|
||||
- configuration values and secrets can be loaded from external sources
|
||||
|
||||
(Kubernetes Secrets, Vault... See [vals] for details)
|
||||
|
||||
- current namespace isn't exposed by default
|
||||
|
||||
- there's often more than one way to do it!
|
||||
|
||||
(this particular section could be improved by using Bento `${...}`)
|
||||
|
||||
[vals]: https://github.com/helmfile/vals
|
||||
|
||||
???
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- Write a helmfile (or two) to set up today's entire stack on a brand new cluster!
|
||||
|
||||
- Suggestion:
|
||||
|
||||
- one helmfile for singleton, cluster components
|
||||
<br/>
|
||||
(All our operators: Prometheus, Grafana, KEDA, CNPG, RabbitMQ Operator)
|
||||
|
||||
- one helmfile for the application stack
|
||||
<br/>
|
||||
(Bento, PostgreSQL cluster, RabbitMQ)
|
||||
@@ -96,7 +96,7 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Choose your own adventure!
|
||||
## Choose your adventure!
|
||||
|
||||
- We present 3 methods to obtain a certificate
|
||||
|
||||
|
||||
@@ -195,4 +195,4 @@ class: extra-details
|
||||
:EN:- Installing metrics-server
|
||||
|
||||
:EN:- Le *resource metrics pipeline*
|
||||
:FR:- Installation de metrics-server
|
||||
:FR:- Installtion de metrics-server
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
## What we will / won't cover
|
||||
|
||||
- Kubernetes provides low-level building blocks (pods, deployments, services...)
|
||||
|
||||
- There are many high-level frameworks out there for serverless, AI...:
|
||||
|
||||
[Knative](https://knative.dev/docs/),
|
||||
[KubeAI](https://www.kubeai.org/),
|
||||
[Kueue](https://kueue.sigs.k8s.io/)...
|
||||
|
||||
- We're going to sit somewhere in the middle:
|
||||
|
||||
reimplement some of the features of these high-level frameworks, in a flexible way
|
||||
|
||||
- This workshop will (hopefully!) give you a better eye to evaluate these frameworks, too
|
||||
|
||||
- We won't showcase GPUs today for budget reasons
|
||||
|
||||
(giving everyone a few GPU nodes would be prohibitive, sorry!)
|
||||
|
||||
---
|
||||
|
||||
## A word about our demo app
|
||||
|
||||
- We'll use Ollama with a relatively small LLM
|
||||
|
||||
(qwen2:1.5b)
|
||||
|
||||
- We'll use it to generate very short completions
|
||||
|
||||
(a few seconds of CPU)
|
||||
|
||||
- All the challenges that we will address are also visible on longer requests
|
||||
|
||||
(in fact, they are even more visible on longer requests!)
|
||||
|
||||
- We're sticking to short requests to save time and cover a lot of ground today
|
||||
|
||||
(but feel free to use more expensive prompts if you'd like!)
|
||||
|
||||
---
|
||||
|
||||
## Tiny bit of backstory...
|
||||
|
||||
The original prompt that we used when building the first version of this content was:
|
||||
|
||||
```
|
||||
If you go to {city}, I suggest that you
|
||||
```
|
||||
|
||||
This would typically take 10-30 seconds - and with much bigger Kubernetes nodes.
|
||||
|
||||
Today, we suggest that we use a prompt that generates shorter answers!
|
||||
@@ -1,343 +0,0 @@
|
||||
# Ollama in a nutshell
|
||||
|
||||
https://ollama.dev
|
||||
|
||||
"Get up and running with large language models"
|
||||
|
||||
"Docker, but for LLMs"
|
||||
|
||||
- Server to host (run) LLMs
|
||||
|
||||
- Controlled with CLI or API
|
||||
|
||||
- Download a model with `ollama pull`
|
||||
|
||||
- Run inference with `ollama run`
|
||||
|
||||
---
|
||||
|
||||
## Quick demo
|
||||
|
||||
⚠️ **Important note 1:** the commands in this section aren't meant
|
||||
to be executed on your Kubernetes clusters. They are meant to
|
||||
be executed on a local machine, and they assume that Ollama is
|
||||
installed and running. If you don't have Ollama on your local
|
||||
machine, it's OK to skip these demos!
|
||||
|
||||
⚠️ **Important note 2:** the models used by Ollama are fairly big
|
||||
(1.5 GB for the one used here; up to 10s or 100s of GB for bigger
|
||||
models). We do not recommend downloading them on conference WiFi.
|
||||
|
||||
Assuming Ollama is installed and running:
|
||||
|
||||
```
|
||||
ollama run qwen2:1.5b "What's the solution to global warming?"
|
||||
```
|
||||
|
||||
We're going to use this model because it's relatively small.
|
||||
|
||||
Many others are available (see https://ollama.dev/search).
|
||||
|
||||
---
|
||||
|
||||
## Other useful commands
|
||||
|
||||
- Start an interactive chat session:
|
||||
```bash
|
||||
ollama run qwen2:1.5b
|
||||
```
|
||||
|
||||
- Pull an model (or check for updates):
|
||||
```bash
|
||||
ollama pull qwen2:1.5b
|
||||
```
|
||||
|
||||
- See information on a model:
|
||||
```bash
|
||||
ollama show qwen2:1.5b
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Models on disk, in memory
|
||||
|
||||
- See models available on disk:
|
||||
```bash
|
||||
ollama list
|
||||
```
|
||||
|
||||
- See models loaded in memory:
|
||||
```bash
|
||||
ollama ps
|
||||
```
|
||||
|
||||
- Unload a model:
|
||||
```bash
|
||||
ollama stop qwen2:1.5b
|
||||
```
|
||||
|
||||
Models are automatically unloaded after 5 minutes (by default).
|
||||
|
||||
Ollama loads models in RAM, and in VRAM if it detects a supported GPU.
|
||||
|
||||
---
|
||||
|
||||
# Ollama on Kubernetes
|
||||
|
||||
Let's run Ollama on our Kubernetes cluster!
|
||||
|
||||
- Option 1: `kubectl run`
|
||||
|
||||
- Option 2: create a Deployment and a Service
|
||||
|
||||
- Option 3: use a Helm chart
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ `kubectl run`
|
||||
|
||||
Note: the `ollama/ollama` image is quite big (~2 GB transfer, ~4 GB on disk).
|
||||
|
||||
```bash
|
||||
kubectl run ollama --image ollama/ollama
|
||||
```
|
||||
|
||||
Wait for the pod to be up and running:
|
||||
```bash
|
||||
kubectl wait pod ollama --for=condition=Ready
|
||||
```
|
||||
|
||||
(If that command times out, try again and/or specify a higher timeout.)
|
||||
|
||||
```bash
|
||||
kubectl exec ollama -- ollama run qwen2:1.5b "What's Bach's best piece?"
|
||||
```
|
||||
|
||||
Shutdown the pod:
|
||||
```bash
|
||||
kubectl delete pod ollama
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Deployment + Service
|
||||
|
||||
Create the Deployment:
|
||||
```bash
|
||||
kubectl create deployment ollama --image ollama/ollama
|
||||
```
|
||||
|
||||
Create the Service:
|
||||
```bash
|
||||
kubectl create service clusterip ollama --tcp 11343
|
||||
```
|
||||
|
||||
Wait for the Service Endpoints to be available:
|
||||
```bash
|
||||
kubectl wait endpoints ollama --for=jsonpath={..ip}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## By the way... Why port 11434?
|
||||
|
||||
| 1 | 1 | 4 | 3 | 4 |
|
||||
|---|---|---|---|---|
|
||||
| L | L | A | M | A |
|
||||
|
||||
---
|
||||
|
||||
## Connecting to the Service
|
||||
|
||||
Let's use the `/api/generate` endpoint:
|
||||
|
||||
```bash
|
||||
kubectl run httpclient --rm -it --image alpine/httpie -- --ignore-stdin \
|
||||
http://ollama:11434/api/generate \
|
||||
model=qwen2:1.5b prompt="Write a limerick about Kubernetes"
|
||||
```
|
||||
|
||||
(See [Ollama API docs](https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-completion) for details.)
|
||||
|
||||
--
|
||||
|
||||
🤔 We get an error: the model needs to be downloaded first.
|
||||
|
||||
💡 When we used the `ollama run` CLI command earlier, it did it automatically for us.
|
||||
|
||||
---
|
||||
|
||||
## Pulling the model
|
||||
|
||||
Method 1:
|
||||
```bash
|
||||
kubectl exec deployment/ollama -- ollama pull qwen2:1.5b
|
||||
```
|
||||
|
||||
Method 2:
|
||||
```bash
|
||||
kubectl run httpclient --rm -it --image alpine/httpie -- --ignore-stdin \
|
||||
http://ollama:11434/api/pull \
|
||||
name=qwen2:1.5b
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Houston, we (are going to) have a problem...
|
||||
|
||||
- This works when there is only one pod
|
||||
|
||||
- What happens if we scale up the Deployment?
|
||||
|
||||
- We need to pull the model on every pod
|
||||
|
||||
- How should we do that?
|
||||
|
||||
---
|
||||
|
||||
## Potential solutions
|
||||
|
||||
- Bake the model into the image
|
||||
|
||||
🙅 Personal opinion: this is a bad idea (image size, maintenance...)
|
||||
|
||||
- Directly send a "pull" command to each pod, individually
|
||||
|
||||
🙁 Hackish, not great
|
||||
|
||||
- Use a Kubernetes lifecycle hook
|
||||
|
||||
💡 That works!
|
||||
|
||||
- Use a sidecar container to pull the model
|
||||
|
||||
🤔 Doable, but more work than the lifecycle hook
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
Should we add that lifecycle hook?
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Helm chart
|
||||
|
||||
- Let's check the [ArtifactHUB] for an Ollama Helm chart
|
||||
|
||||
- The most popular (as of November 2024) is [this one, by OTWLD][ollama-chart]
|
||||
|
||||
- ~~It has pockets~~
|
||||
|
||||
- It can pre-pull models! 🎉
|
||||
|
||||
[ArtifactHub]: https://artifacthub.io
|
||||
[ollama-chart]: https://artifacthub.io/packages/helm/ollama-helm/ollama
|
||||
|
||||
---
|
||||
|
||||
## Installing the Helm chart
|
||||
|
||||
Traditional method:
|
||||
```bash
|
||||
helm repo add ollama https://otwld.github.io/ollama-helm/
|
||||
helm install ollama ollama/ollama --set ollama.models={qwen2:1.5b}
|
||||
```
|
||||
|
||||
Idempotent¹, single-command method:
|
||||
```bash
|
||||
helm upgrade --install --repo https://otwld.github.io/ollama-helm/ \
|
||||
ollama ollama --set ollama.models={qwen2:1.5b}
|
||||
```
|
||||
|
||||
.footnote[¹Idempotent: which can be executed multiple times without adverse effect.]
|
||||
|
||||
---
|
||||
|
||||
## Testing the Helm installation
|
||||
|
||||
Just like before:
|
||||
```bash
|
||||
kubectl run httpclient --rm -it --image alpine/httpie -- --ignore-stdin \
|
||||
http://ollama:11434/api/generate \
|
||||
model=qwen2:1.5b prompt="Write a limerick about YAML" stream:=false
|
||||
```
|
||||
|
||||
And while we're here, check resource usage:
|
||||
```bash
|
||||
kubectl exec deployment/ollama -ti -- top
|
||||
```
|
||||
|
||||
There should be two processes:
|
||||
|
||||
- `ollama` itself, relatively small (~100 MB)
|
||||
|
||||
- the LLM subprocess, relatively big (~1.4 GB for qwen2:1.5b)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## HTTPie
|
||||
|
||||
https://httpie.io/
|
||||
|
||||
- CLI client to send requests to web services
|
||||
|
||||
- Similar to curl, but made specifically to talk to API backends
|
||||
|
||||
```bash
|
||||
httpie <URL> [key=value] [key=value] [key:=value]
|
||||
```
|
||||
|
||||
- The `key=value` pairs get turned into a JSON object
|
||||
|
||||
- `key:=value` indicates a parameter to be sent "as-is"
|
||||
|
||||
(ideal for e.g. boolean or numbers)
|
||||
|
||||
---
|
||||
|
||||
## Sending some load
|
||||
|
||||
We're going to use `hey`:
|
||||
|
||||
```bash
|
||||
kubectl run hey --rm -it --image nixery.dev/hey -- \
|
||||
hey -c 10 -n 10 -t 60 -m POST \
|
||||
-d '{"model": "qwen2:1.5b", "prompt": "vi or emacs?"}' \
|
||||
http://ollama:11434/api/generate
|
||||
```
|
||||
|
||||
Some explanations:
|
||||
|
||||
- `nixery.dev` = automatically generates images with [Nixery]
|
||||
- `-c` = concurrent requests
|
||||
- `-n` = total number of requests
|
||||
- `-t` = timeout in seconds
|
||||
|
||||
This is probably going to take (literally) a minute.
|
||||
|
||||
[Nixery]: https://nixery.dev/
|
||||
|
||||
---
|
||||
|
||||
## Performance analysis
|
||||
|
||||
- Let's start an interactive container with `hey`
|
||||
|
||||
(e.g., use the `alpine` image, then `apk add hey`)
|
||||
|
||||
- Try 10 requests, with a concurrency of 1/2/4
|
||||
|
||||
- Meanwhile, check the logs of the `ollama` pod
|
||||
|
||||
- Some results (your results may vary depending on CPU, random seed...):
|
||||
|
||||
- 1 = 0.08 reqs/s, average latency: 12s
|
||||
- 2 = 0.10 reqs/s, average latency: 18s
|
||||
- 4 = 0.12 reqs/s, average latency: 28s
|
||||
|
||||
- Higher concurrency = slightly higher throughput, much higher latency
|
||||
|
||||
🤔 We need metrics!
|
||||
@@ -1,273 +0,0 @@
|
||||
# Adding metrics
|
||||
|
||||
We want multiple kinds of metrics:
|
||||
|
||||
- instantaneous pod and node resource usage
|
||||
|
||||
- historical resource usage (=graphs)
|
||||
|
||||
- request duration
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Instantaneous resource usage
|
||||
|
||||
- We're going to use metrics-server
|
||||
|
||||
- Check if it's already installed:
|
||||
```bash
|
||||
kubectl top nodes
|
||||
```
|
||||
|
||||
- If we see a list of nodes, with CPU and RAM usage:
|
||||
|
||||
*great, metrics-server is installed!*
|
||||
|
||||
- If we see `error: Metrics API not available`:
|
||||
|
||||
*metrics-server isn't installed, so we'll install it!*
|
||||
|
||||
---
|
||||
|
||||
## Installing metrics-server
|
||||
|
||||
- In a lot of places, this is done with a little bit of custom YAML
|
||||
|
||||
(derived from the [official installation instructions](https://github.com/kubernetes-sigs/metrics-server#installation))
|
||||
|
||||
- We can also use a Helm chart:
|
||||
```bash
|
||||
helm upgrade --install metrics-server metrics-server \
|
||||
--create-namespace --namespace metrics-server \
|
||||
--repo https://kubernetes-sigs.github.io/metrics-server/ \
|
||||
--set args={--kubelet-insecure-tls=true}
|
||||
```
|
||||
|
||||
- The `args` flag specified above should be sufficient on most clusters
|
||||
|
||||
- After a minute, `kubectl top nodes` should show resource usage
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Historical resource usage
|
||||
|
||||
- We're going to use Prometheus (specifically: kube-prometheus-stack)
|
||||
|
||||
- This is a Helm chart bundling:
|
||||
|
||||
- Prometheus
|
||||
|
||||
- multiple exporters (node, kube-state-metrics...)
|
||||
|
||||
- Grafana
|
||||
|
||||
- a handful of Grafana dashboards
|
||||
|
||||
- Open Source
|
||||
|
||||
- Commercial alternatives: Datadog, New Relic...
|
||||
|
||||
---
|
||||
|
||||
## Installing kube-prometheus-stack
|
||||
|
||||
We're going to expose both Prometheus and Grafana with a NodePort:
|
||||
|
||||
```bash
|
||||
helm upgrade --install --repo https://prometheus-community.github.io/helm-charts \
|
||||
promstack kube-prometheus-stack \
|
||||
--namespace prom-system --create-namespace \
|
||||
--set prometheus.service.type=NodePort \
|
||||
--set grafana.service.type=NodePort \
|
||||
--set prometheus.prometheusSpec.podMonitorSelectorNilUsesHelmValues=false \
|
||||
--set prometheus.prometheusSpec.serviceMonitorSelectorNilUsesHelmValues=false \
|
||||
#
|
||||
```
|
||||
|
||||
This chart installation can take a while (up to a couple of minutes).
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## `...NilUsersHelmValues=false` ???
|
||||
|
||||
- kube-prometheus-stack uses the "Prometheus Operator"
|
||||
|
||||
- To configure "scrape targets", we create PodMonitor or ServiceMonitor resources
|
||||
|
||||
- By default, the Prometheus Operator will only look at \*Monitors with the right labels
|
||||
|
||||
- Our extra options mean "use all the Monitors that you find!"
|
||||
|
||||
---
|
||||
|
||||
## Connecting to Grafana
|
||||
|
||||
Check the NodePort allocated to Grafana:
|
||||
|
||||
```bash
|
||||
kubectl get service promstack-grafana --namespace prom-system
|
||||
```
|
||||
|
||||
Get the public address of one of our nodes:
|
||||
|
||||
```bash
|
||||
kubectl get nodes -o wide
|
||||
```
|
||||
|
||||
In a browser, connect to the public address of any node, on the node port.
|
||||
|
||||
The default login and password are `admin` / `prom-operator`.
|
||||
|
||||
Check the dashboard "Kubernetes / Compute Resources / Namespace (Pods)".
|
||||
|
||||
Select a namespace and see the CPU and RAM usage for the pods in that namespace.
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Request duration
|
||||
|
||||
- Unfortunately, as of November 2024, ollama doesn't expose metrics
|
||||
|
||||
(there is ongoing discussion about it: [issue 3144][3144], [PR 6537][6537])
|
||||
|
||||
- There are some [garbage AI-generated blog posts claiming otherwise][garbage]
|
||||
|
||||
(but it's AI-generated, so it bears no connection to truth whatsoever)
|
||||
|
||||
- So, what can we do?
|
||||
|
||||
[3144]: https://github.com/ollama/ollama/issues/3144#issuecomment-2153184254
|
||||
[6537]: https://github.com/ollama/ollama/pull/6537
|
||||
[garbage]: https://www.arsturn.com/blog/setting-up-ollama-prometheus-metrics
|
||||
|
||||
---
|
||||
|
||||
## HAProxy to the rescue
|
||||
|
||||
- HAProxy is a proxy that can handle TCP, HTTP, and more
|
||||
|
||||
- It can expose detailed Prometheus metrics about HTTP requests
|
||||
|
||||
- The plan: add a sidecar HAProxy to each Ollama container
|
||||
|
||||
- For that, we need to give up on the Ollama Helm chart
|
||||
|
||||
(and go back to basic manifests)
|
||||
|
||||
---
|
||||
|
||||
## 🙋 Choose your own adventure
|
||||
|
||||
Do we want to...
|
||||
|
||||
- write all the corresponding manifests?
|
||||
|
||||
- look at pre-written manifests and explain how they work?
|
||||
|
||||
- apply the manifests and carry on?
|
||||
|
||||
---
|
||||
|
||||
## 🏗️ Let's build something!
|
||||
|
||||
- If you have created Deployments / Services: clean them up first!
|
||||
|
||||
- Deploy Ollama with a sidecar HAProxy (sample configuration on next slide)
|
||||
|
||||
- Run a short benchmark campaign
|
||||
|
||||
(e.g. scale to 4 pods, try 4/8/16 parallel requests, 2 minutes each)
|
||||
|
||||
- Check live resource usage with `kubectl top nodes` / `kubectl top pods`
|
||||
|
||||
- Check historical usage with the Grafana dashboards
|
||||
|
||||
(for HAProxy metrics, you can use [Grafana dashboard 12693, HAProxy 2 Full][grafana-12693])
|
||||
|
||||
- If you don't want to write the manifests, you can use [these][ollama-yaml]
|
||||
|
||||
[grafana-12693]: https://grafana.com/grafana/dashboards/12693-haproxy-2-full/
|
||||
[ollama-yaml]: https://github.com/jpetazzo/beyond-load-balancers/tree/main/ollama
|
||||
|
||||
---
|
||||
|
||||
```
|
||||
global
|
||||
#log stdout format raw local0
|
||||
#daemon
|
||||
maxconn 32
|
||||
defaults
|
||||
#log global
|
||||
timeout client 1h
|
||||
timeout connect 1h
|
||||
timeout server 1h
|
||||
mode http
|
||||
`option abortonclose`
|
||||
frontend metrics
|
||||
bind :9000
|
||||
http-request use-service prometheus-exporter
|
||||
frontend ollama_frontend
|
||||
bind :8000
|
||||
default_backend ollama_backend
|
||||
`maxconn 16`
|
||||
backend ollama_backend
|
||||
server ollama_server localhost:11434 check
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## ⚠️ Connection queues
|
||||
|
||||
- HAProxy will happily queue *many* connections
|
||||
|
||||
- If a client sends a request, then disconnects:
|
||||
|
||||
- the request stays in the queue
|
||||
|
||||
- the request gets processed by the backend
|
||||
|
||||
- eventually, when the backend starts sending the reply, the connection is closed
|
||||
|
||||
- This can result in a backlog of queries that take a long time to resorb
|
||||
|
||||
- To avoid that: `option abortonclose` (see [HAProxy docs for details][abortonclose])
|
||||
|
||||
- Note that the issue is less severe when replies are streamed
|
||||
|
||||
[abortonclose]: https://www.haproxy.com/documentation/haproxy-configuration-manual/latest/#4-option%20abortonclose
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Ad-hoc HAProxy dashboard
|
||||
|
||||
- To consolidate all frontend and backend queues on a single graph:
|
||||
|
||||
- query: `haproxy_frontend_current_sessions`
|
||||
|
||||
- legend: `{{namespace}}/{{pod}}/{{proxy}}`
|
||||
|
||||
- options, "Color scheme", select "Classic palette (by series name)"
|
||||
|
||||
---
|
||||
|
||||
## What do we see?
|
||||
|
||||
- Imperfect load balancing
|
||||
|
||||
- Some backends receive more requests than others
|
||||
|
||||
- Sometimes, some backends are idle while others are busy
|
||||
|
||||
- However, CPU utilization on the node is maxed out
|
||||
|
||||
- This is because our node is oversubscribed
|
||||
|
||||
- This is because we haven't specified resource requests/limits (yet)
|
||||
|
||||
(we'll do that later!)
|
||||
@@ -1,155 +0,0 @@
|
||||
## Setting resource requests and limits
|
||||
|
||||
- Thanks to *requests*:
|
||||
|
||||
- our pods will have resources *reserved* for them
|
||||
|
||||
- we won't pack too many pods on a single node
|
||||
|
||||
- cluster autoscaling will trigger when needed (if possible!)
|
||||
|
||||
- Thanks to *limits*:
|
||||
|
||||
- our pods won't use more than a given amount of resources
|
||||
|
||||
- they won't use up all the available resources on the node
|
||||
|
||||
- behavior will be more consistent between loaded and unloaded state
|
||||
|
||||
---
|
||||
|
||||
## Memory
|
||||
|
||||
- Personal advice: set request and limit to the same value
|
||||
|
||||
- Check current or historical usage and add a bit of padding
|
||||
|
||||
(the more data historical data we have, the less padding we need)
|
||||
|
||||
- Consider 10% padding for "dataless" pods, more for pods with data
|
||||
|
||||
(so that the pod has "reserves" for page cache usage)
|
||||
|
||||
⚠️ Pods hitting their memory limit will be **killed!**
|
||||
|
||||
---
|
||||
|
||||
## CPU
|
||||
|
||||
- It's not necessary to set requests and limits to the same value
|
||||
|
||||
(this would cause a lot of waste for idle workloads)
|
||||
|
||||
- Let's see a few possible strategies!
|
||||
|
||||
---
|
||||
|
||||
## CPU for mostly idle pods
|
||||
|
||||
E.g.: web services, workers handling very few requests...
|
||||
|
||||
- Set the limit to at least one whole core
|
||||
|
||||
(to avoid throttling, especially on bursty workloads)
|
||||
|
||||
- Requests can be very low (e.g. 0.1 core)
|
||||
|
||||
⚠️ If requests are too low and the node is very loaded,
|
||||
the pod will slow down significantly!
|
||||
|
||||
(Because CPU cycles are allocated proportionally to CPU requests.)
|
||||
|
||||
---
|
||||
|
||||
## Inelastic CPU-hungry pods
|
||||
|
||||
- Pods with a fixed number of threads:
|
||||
|
||||
*set requests and limits to that number of threads*
|
||||
|
||||
- Pods where a specific level of performance needs to be guaranteed:
|
||||
|
||||
*set requests and limits to the number of cores providing that performance*
|
||||
|
||||
⚠️ If you set limits to higher levels, performance will be unpredictible!
|
||||
|
||||
(You'll get good performance when the node has extra cycles.)
|
||||
|
||||
---
|
||||
|
||||
## Elastic CPU-hungry pods
|
||||
|
||||
- Pods that could potentially use all the cores
|
||||
|
||||
(e.g. machine learning training and inference, depending on the models)
|
||||
|
||||
- Decide how many pods per node you want to pack
|
||||
|
||||
- Set CPU requests as a fraction of the number of cores of the nodes
|
||||
|
||||
(minus some padding)
|
||||
|
||||
- Example:
|
||||
|
||||
- nodes with 32 cores
|
||||
- we want 4 pods per node
|
||||
- CPU request: 7.5 cores
|
||||
|
||||
- Set limits to a higher level (up to node size)
|
||||
|
||||
---
|
||||
|
||||
## In practice
|
||||
|
||||
- Check memory usage of our Ollama pods:
|
||||
```bash
|
||||
kubectl top pods
|
||||
```
|
||||
(Or even better, look at historical usage in Prometheus or Grafana!)
|
||||
|
||||
- Check how many cores we have on our nodes:
|
||||
```bash
|
||||
kubectl get nodes -o json | jq .items[].status.capacity.cpu
|
||||
kubectl get nodes -o custom-columns=NAME:metadata.name,CPU:status.capacity.cpu
|
||||
```
|
||||
|
||||
- Let's decide that we want two Ollama pods per node
|
||||
|
||||
- What requests/limits should we set?
|
||||
|
||||
---
|
||||
|
||||
## Setting resources for Ollama
|
||||
|
||||
- Assumptions:
|
||||
|
||||
- we want two pods per node
|
||||
- each pod uses ~1500MiB RAM
|
||||
- nodes have 4 cores
|
||||
|
||||
- We'll set memory requests and limits to 2G
|
||||
|
||||
- We'll set CPU requests to 1.5 (4 cores / 2 pods, minus padding)
|
||||
|
||||
- We'll set CPU limits to twice the requests
|
||||
|
||||
```bash
|
||||
kubectl set resources deployment ollama \
|
||||
--requests=cpu=1.5,memory=2G \
|
||||
--limits=cpu=3,memory=2G
|
||||
```
|
||||
|
||||
⚠️ If you have an HAProxy side car, this will set its resources too!
|
||||
|
||||
---
|
||||
|
||||
## Results
|
||||
|
||||
- After setting these resource requests, we should see cluster autoscaling
|
||||
|
||||
- If not: scale up the Ollama Deployment to at least 3 replicas
|
||||
|
||||
- Check cluster autoscaler status with:
|
||||
```bash
|
||||
kubectl describe configmap --namespace kube-system cluster-autoscaler-status
|
||||
```
|
||||
@@ -40,7 +40,7 @@ using Kubernetes manifests and tooling.*
|
||||
|
||||
- etc.
|
||||
|
||||
[ArgoCD]: https://argoproj.github.io/cd/
|
||||
[ArgoCD]: https://github.com/argoproj/argo-cd
|
||||
[AWS]: https://aws-controllers-k8s.github.io/community/docs/community/services/
|
||||
[cert-manager]: https://cert-manager.io/
|
||||
[External Secrets Operator]: https://external-secrets.io/
|
||||
|
||||
@@ -1,210 +0,0 @@
|
||||
# Message Queue Architecture
|
||||
|
||||
There are (at least) three ways to distribute load:
|
||||
|
||||
- load balancers
|
||||
|
||||
- batch jobs
|
||||
|
||||
- message queues
|
||||
|
||||
Let's do a quick review of their pros/cons!
|
||||
|
||||
---
|
||||
|
||||
## 1️⃣ Load balancers
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart TD
|
||||
Client["Client"] ---> LB["Load balancer"]
|
||||
LB ---> B1["Backend"] & B2["Backend"] & B3["Backend"]
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Load balancers
|
||||
|
||||
- Latency: ~milliseconds (network latency)
|
||||
|
||||
- Overhead: very low (one extra network hop, one log message?)
|
||||
|
||||
- Great for short requests (a few milliseconds to a minute)
|
||||
|
||||
- Supported out of the box by the Kubernetes Service Proxy
|
||||
|
||||
(by default, this is `kube-proxy`)
|
||||
|
||||
- Suboptimal resource utilization due to imperfect balancing
|
||||
|
||||
(especially when there are multiple load balancers)
|
||||
|
||||
---
|
||||
|
||||
## 2️⃣ Batch jobs
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart TD
|
||||
subgraph K["Kubernetes Control Plane"]
|
||||
J1["Job"]@{ shape: card}
|
||||
J2["Job"]@{ shape: card}
|
||||
J3["..."]@{ shape: text}
|
||||
J4["Job"]@{ shape: card}
|
||||
end
|
||||
C["Client"] ---> K
|
||||
K <---> N1["Node"] & N2["Node"] & N3["Node"]
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Batch jobs
|
||||
|
||||
- Latency: a few seconds (many Kubernetes controllers involved)
|
||||
|
||||
- Overhead: significant due to all the moving pieces involved
|
||||
|
||||
(job controller, scheduler, kubelet; many writes to etcd and logs)
|
||||
|
||||
- Great for long requests (a few minutes to a few days)
|
||||
|
||||
- Supported out of the box by Kubernetes
|
||||
|
||||
(`kubectl create job hello --image alpine -- sleep 60`)
|
||||
|
||||
- Asynchronous processing requires some refactoring
|
||||
|
||||
(we don't get the response immediately)
|
||||
|
||||
---
|
||||
|
||||
## 3️⃣ Message queues
|
||||
|
||||
<pre class="mermaid">
|
||||
flowchart TD
|
||||
subgraph Q["Message queue"]
|
||||
M1["Message"]@{ shape: card}
|
||||
M2["Message"]@{ shape: card}
|
||||
M3["..."]@{ shape: text}
|
||||
M4["Message"]@{ shape: card}
|
||||
end
|
||||
C["Client"] ---> Q
|
||||
Q <---> W1["Worker"] & W2["Worker"] & W3["Worker"]
|
||||
</pre>
|
||||
|
||||
---
|
||||
|
||||
## Message queues
|
||||
|
||||
- Latency: a few milliseconds to a few seconds
|
||||
|
||||
- Overhead: intermediate
|
||||
|
||||
(very low with e.g. Redis, higher with e.g. Kafka)
|
||||
|
||||
- Great for all except very short requests
|
||||
|
||||
- Requires additional setup
|
||||
|
||||
- Asynchronous processing requires some refactoring
|
||||
|
||||
---
|
||||
|
||||
## Dealing with errors
|
||||
|
||||
- Load balancers
|
||||
|
||||
- errors reported immediately (client must retry)
|
||||
- some load balancers can retry automatically
|
||||
|
||||
- Batch jobs
|
||||
|
||||
- Kubernetes retries automatically
|
||||
- after `backoffLimit` retries, Job is marked as failed
|
||||
|
||||
- Message queues
|
||||
|
||||
- some queues have a concept of "acknowledgement"
|
||||
- some queues have a concept of "dead letter queue"
|
||||
- some extra work is required
|
||||
|
||||
---
|
||||
|
||||
## Some queue brokers
|
||||
|
||||
- Redis (with e.g. RPUSH, BLPOP)
|
||||
|
||||
*light, fast, easy to setup... no durability guarantee, no acknowledgement, no dead letter queue*
|
||||
|
||||
- Kafka
|
||||
|
||||
*heavy, complex to setup... strong deliverability guarantee, full featured*
|
||||
|
||||
- RabbitMQ
|
||||
|
||||
*somewhat in-between Redis and Kafka*
|
||||
|
||||
- SQL databases
|
||||
|
||||
*often requires polling, which adds extra latency; not as scalable as a "true" broker*
|
||||
|
||||
---
|
||||
|
||||
## More queue brokers
|
||||
|
||||
Many cloud providers offer hosted message queues (e.g.: Amazon SQS).
|
||||
|
||||
These are usually great options, with some drawbacks:
|
||||
|
||||
- vendor lock-in
|
||||
|
||||
- setting up extra environments (testing, staging...) can be more complex
|
||||
|
||||
(Setting up a singleton environment is usually very easy, thanks to web UI, CLI, etc.; setting up extra environments and assigning the right permissions with e.g. IAC is usually significantly more complex.)
|
||||
|
||||
---
|
||||
|
||||
## Implementing a message queue
|
||||
|
||||
1. Pick a broker
|
||||
|
||||
2. Deploy the broker
|
||||
|
||||
3. Set up the queue
|
||||
|
||||
4. Refactor our code
|
||||
|
||||
---
|
||||
|
||||
## Code refactoring (client)
|
||||
|
||||
Before:
|
||||
```python
|
||||
response = http.POST("http://api", payload=Request(...))
|
||||
```
|
||||
|
||||
After:
|
||||
```python
|
||||
client = queue.connect(...)
|
||||
client.publish(message=Request(...))
|
||||
```
|
||||
|
||||
Note: we don't get the response right way (if at all)!
|
||||
|
||||
---
|
||||
|
||||
## Code refactoring (server)
|
||||
|
||||
Before:
|
||||
```python
|
||||
server = http.server(request_handler=handler)
|
||||
server.listen("80")
|
||||
server.run()
|
||||
```
|
||||
|
||||
After:
|
||||
```python
|
||||
client = queue.connect(...)
|
||||
while true:
|
||||
message = client.consume()
|
||||
response = handler(message)
|
||||
# Write the response somewhere
|
||||
```
|
||||
@@ -194,7 +194,7 @@ class: extra-details
|
||||
|
||||
- use [static CPU manager policy](https://kubernetes.io/docs/tasks/administer-cluster/cpu-management-policies/#static-policy)
|
||||
|
||||
For more details, check [this blog post](https://erickhun.com/posts/kubernetes-faster-services-no-cpu-limits/) or these: ([part 1](https://engineering.indeedblog.com/blog/2019/12/unthrottled-fixing-cpu-limits-in-the-cloud/), [part 2](https://engineering.indeedblog.com/blog/2019/12/cpu-throttling-regression-fix/)).
|
||||
For more details, check [this blog post](https://erickhun.com/posts/kubernetes-faster-services-no-cpu-limits/) or these ones ([part 1](https://engineering.indeedblog.com/blog/2019/12/unthrottled-fixing-cpu-limits-in-the-cloud/), [part 2](https://engineering.indeedblog.com/blog/2019/12/cpu-throttling-regression-fix/)).
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -352,87 +352,6 @@ class: pic
|
||||
class: pic
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Traffic engineering
|
||||
|
||||
- By default, connections to a ClusterIP or a NodePort are load balanced
|
||||
across all the backends of their Service
|
||||
|
||||
- This can incur extra network hops (which add latency)
|
||||
|
||||
- To remove that extra hop, multiple mechanisms are available:
|
||||
|
||||
- `spec.externalTrafficPolicy`
|
||||
|
||||
- `spec.internalTrafficPolicy`
|
||||
|
||||
- [Topology aware routing](https://kubernetes.io/docs/concepts/services-networking/topology-aware-routing/) annotation (beta)
|
||||
|
||||
- `spec.trafficDistribution` (alpha in 1.30, beta in 1.31)
|
||||
|
||||
---
|
||||
|
||||
## `internal / externalTrafficPolicy`
|
||||
|
||||
- Applies respectively to `ClusterIP` and `NodePort` connections
|
||||
|
||||
- Can be set to `Cluster` or `Local`
|
||||
|
||||
- `Cluster`: load balance connections across all backends (default)
|
||||
|
||||
- `Local`: load balance connections to local backends (on the same node)
|
||||
|
||||
- With `Local`, if there is no local backend, the connection will fail!
|
||||
|
||||
(the parameter expresses a "hard rule", not a preference)
|
||||
|
||||
- Example: `externalTrafficPolicy: Local` for Ingress controllers
|
||||
|
||||
(as shown on earlier diagrams)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Topology aware routing
|
||||
|
||||
- In beta since Kubernetes 1.23
|
||||
|
||||
- Enabled with annotation `service.kubernetes.io/topology-mode=Auto`
|
||||
|
||||
- Relies on node label `topology.kubernetes.io/zone`
|
||||
|
||||
- Kubernetes service proxy will try to keep connections within a zone
|
||||
|
||||
(connections made by a pod in zone `a` will be sent to pods in zone `a`)
|
||||
|
||||
- ...Except if there are no pods in the zone (then fallback to all zones)
|
||||
|
||||
- This can mess up autoscaling!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## `spec.trafficDistribution`
|
||||
|
||||
- [KEP4444, Traffic Distribution for Services][kep4444]
|
||||
|
||||
- In alpha since Kubernetes 1.30, beta since Kubernetes 1.31
|
||||
|
||||
- Should eventually supersede topology aware routing
|
||||
|
||||
- Can be set to `PreferClose` (more values might be supported later)
|
||||
|
||||
- The meaning of `PreferClose` is implementation dependent
|
||||
|
||||
(with kube-proxy, it should work like topology aware routing: stay in a zone)
|
||||
|
||||
[kep4444]: https://github.com/kubernetes/enhancements/issues/4444
|
||||
|
||||
???
|
||||
|
||||
:EN:- Service types: ClusterIP, NodePort, LoadBalancer
|
||||
|
||||
@@ -144,30 +144,6 @@
|
||||
|
||||
---
|
||||
|
||||
## [Orbstack](https://orbstack.dev/)
|
||||
|
||||
- Mac only
|
||||
|
||||
- Runs Docker containers, Kubernetes, and Linux virtual machines
|
||||
|
||||
- Emphasis on speed and energy usage (battery life)
|
||||
|
||||
- Great support for `ClusterIP` and `LoadBalancer` services
|
||||
|
||||
- Free for personal use; paid product otherwise
|
||||
|
||||
---
|
||||
|
||||
## [Podman Desktop](https://podman-desktop.io/)
|
||||
|
||||
- Available on Linux, Mac, and Windows
|
||||
|
||||
- Free and open-source
|
||||
|
||||
- Doesn't support Kubernetes directly, but [supports KinD](https://podman-desktop.io/docs/kind)
|
||||
|
||||
---
|
||||
|
||||
## [Rancher Desktop](https://rancherdesktop.io/)
|
||||
|
||||
- Available on Linux, Mac, and Windows
|
||||
@@ -182,6 +158,8 @@
|
||||
|
||||
- Emphasis on ease of use (like Docker Desktop)
|
||||
|
||||
- Relatively young product (first release in May 2021)
|
||||
|
||||
- Based on k3s and other proven components
|
||||
|
||||
---
|
||||
|
||||
@@ -77,7 +77,7 @@ This is the flag that we're looking for:
|
||||
|
||||
- We only need to transfer the CSR (Certificate Signing Request) to the CA
|
||||
|
||||
(we never need to expose the private key)
|
||||
(we never need to expoes the private key)
|
||||
|
||||
.lab[
|
||||
|
||||
|
||||
@@ -245,9 +245,9 @@
|
||||
|
||||
- command-line flags
|
||||
|
||||
- Precedence of the different methods is defined in the [docs][data-values-merge-order]
|
||||
- Precedence of the different methods is defined in the [docs]
|
||||
|
||||
[data-values-merge-order]: https://carvel.dev/ytt/docs/v0.41.0/ytt-data-values/#data-values-merge-order
|
||||
[docs]: https://carvel.dev/ytt/docs/v0.41.0/ytt-data-values/#data-values-merge-order
|
||||
|
||||
---
|
||||
|
||||
@@ -462,13 +462,13 @@ spec:
|
||||
|
||||
- By default, `#@overlay/match` must find *exactly* one match
|
||||
|
||||
(that can be changed by specifying `expects=...`, `missing_ok=True`... see [docs][docs-ytt-overlaymatch])
|
||||
(that can be changed by specifying `expects=...`, `missing_ok=True`... see [docs])
|
||||
|
||||
- By default, the specified fields (here, `spec.replicas`) must exist
|
||||
|
||||
(that can also be changed by annotating the optional fields)
|
||||
|
||||
[docs-ytt-overlaymatch]: https://carvel.dev/ytt/docs/v0.41.0/lang-ref-ytt-overlay/#overlaymatch
|
||||
[docs]: https://carvel.dev/ytt/docs/v0.41.0/lang-ref-ytt-overlay/#overlaymatch
|
||||
|
||||
---
|
||||
|
||||
@@ -573,7 +573,7 @@ metadata:
|
||||
|
||||
## Overlays vs data values
|
||||
|
||||
- The documentation has a [detailed discussion][data-values-vs-overlays] about this question
|
||||
- The documentation has a [detailed discussion][docs] about this question
|
||||
|
||||
- In short:
|
||||
|
||||
@@ -587,7 +587,7 @@ metadata:
|
||||
|
||||
(keeping in mind that overlays are harder to write/understand/maintain)
|
||||
|
||||
[data-values-vs-overlays]: https://carvel.dev/ytt/docs/v0.41.0/data-values-vs-overlays/
|
||||
[docs]: https://carvel.dev/ytt/docs/v0.41.0/data-values-vs-overlays/
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
- Sur scène : Jérôme ([@jpetazzo@hachyderm.io])
|
||||
|
||||
- En backstage : Alexandre, Antoine, Aurélien (x2), Benjamin, David, Kostas, Nicolas, Paul, Sébastien, Thibault...
|
||||
- En backstage : Alexandre, Antoine, Aurélien (x2), Benjamin (x2), David, Kostas, Nicolas, Paul, Sébastien, Thibault...
|
||||
|
||||
- Horaires : tous les jours de 9h à 13h
|
||||
|
||||
@@ -57,22 +57,24 @@
|
||||
|
||||
## Allô Docker¹ ?
|
||||
|
||||
<!--
|
||||
|
||||
- Chaque après-midi : une heure de questions/réponses ouvertes !
|
||||
|
||||
(sauf le dernier jour)
|
||||
|
||||
- Une heure de questions/réponses ouvertes !
|
||||
-->
|
||||
|
||||
- Une heure de questions/réposnes ouvertes !
|
||||
|
||||
- Mercredi: 15h00-16h00
|
||||
|
||||
- Jeudi: 16h00-17h00
|
||||
|
||||
- Vendredi: 15h00-16h00
|
||||
|
||||
- Lundi: 15h30-16h30
|
||||
|
||||
- Sur [Jitsi][jitsi] (lien "visioconf" sur le portail de formation)
|
||||
|
||||
.footnote[¹Clin d'œil à l'excellent ["Quoi de neuf Docker?"][qdnd] de l'excellent [Nicolas Deloof][ndeloof] 🙂]
|
||||
|
||||
[qdnd]: https://www.youtube.com/channel/UCOAhkxpryr_BKybt9wIw-NQ
|
||||
[ndeloof]: https://github.com/ndeloof
|
||||
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/Janvier2025
|
||||
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerMai2024
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
title: |
|
||||
Asynchronous Architecture Patterns To Scale ML and Other High Latency Workloads on Kubernetes
|
||||
|
||||
#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
chat: "In person!"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://FIXME.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
exclude:
|
||||
- self-paced
|
||||
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- shared/about-slides.md
|
||||
#- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- k8s/prereqs-advanced.md
|
||||
- k8s/handson-mlops.md
|
||||
- shared/connecting.md
|
||||
- k8s/mlops-headsup.md
|
||||
- shared/toc.md
|
||||
-
|
||||
- k8s/ollama-intro.md
|
||||
- k8s/ollama-metrics.md
|
||||
- k8s/queue-architecture.md
|
||||
- k8s/bento-intro.md
|
||||
-
|
||||
- k8s/resource-limits.md
|
||||
- k8s/cluster-autoscaler.md
|
||||
- k8s/ollama-reqlim.md
|
||||
- k8s/bento-hpa.md
|
||||
- k8s/bento-rmq.md
|
||||
- k8s/bento-cnpg.md
|
||||
- k8s/helmfile.md
|
||||
- shared/thankyou.md
|
||||
- shared/contact.md
|
||||
@@ -46,7 +46,7 @@
|
||||
|
||||
(let's say we'll keep them online at least 1 year, how about that?)
|
||||
|
||||
- You can download the slides using this URL:
|
||||
- You can download the slides using that URL:
|
||||
|
||||
@@ZIP@@
|
||||
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
class: in-person
|
||||
|
||||
## Testing the connection to our lab environment
|
||||
## Connecting to our lab environment
|
||||
|
||||
.lab[
|
||||
|
||||
- Connect to your lab environment with your SSH client:
|
||||
- Log into the first VM (`node1`) with your SSH client:
|
||||
```bash
|
||||
ssh `user`@`A.B.C.D`
|
||||
ssh -p `32323` `user`@`A.B.C.D`
|
||||
```
|
||||
|
||||
(Make sure to replace the highlighted values with the ones provided to you!)
|
||||
(Replace `user` and `A.B.C.D` with the user and IP address provided to you)
|
||||
|
||||
<!--
|
||||
```bash
|
||||
@@ -28,7 +27,7 @@ done
|
||||
|
||||
You should see a prompt looking like this:
|
||||
```
|
||||
[A.B.C.D] (...) user@machine ~
|
||||
[A.B.C.D] (...) user@node1 ~
|
||||
$
|
||||
```
|
||||
If anything goes wrong — ask for help!
|
||||
@@ -41,11 +40,9 @@ class: in-person
|
||||
|
||||
- The shell history of the instructor is available online in real time
|
||||
|
||||
- The instructor will provide you a "magic URL"
|
||||
- Note the IP address of the instructor's virtual machine (A.B.C.D)
|
||||
|
||||
(typically, the instructor's lab address on port 1088 or 30088)
|
||||
|
||||
- Open that URL in your browser and you should see the history
|
||||
- Open http://A.B.C.D:1088 in your browser and you should see the history
|
||||
|
||||
- The history is updated in real time
|
||||
|
||||
@@ -60,7 +57,7 @@ class: in-person
|
||||
## Doing or re-doing the workshop on your own?
|
||||
|
||||
- Use something like
|
||||
[Play-With-Docker](https://labs.play-with-docker.com/) or
|
||||
[Play-With-Docker](http://play-with-docker.com/) or
|
||||
[Play-With-Kubernetes](https://training.play-with-kubernetes.com/)
|
||||
|
||||
Zero setup effort; but environment are short-lived and
|
||||
@@ -103,13 +100,13 @@ class: self-paced
|
||||
|
||||
.lab[
|
||||
|
||||
- Go to https://labs.play-with-docker.com/
|
||||
- Go to http://www.play-with-docker.com/
|
||||
|
||||
- Log in
|
||||
|
||||
- Create your first node
|
||||
|
||||
<!-- ```open https://labs.play-with-docker.com/``` -->
|
||||
<!-- ```open http://www.play-with-docker.com/``` -->
|
||||
|
||||
]
|
||||
|
||||
@@ -119,17 +116,21 @@ You will need a Docker ID to use Play-With-Docker.
|
||||
|
||||
---
|
||||
|
||||
## We don't need to connect to ALL the nodes
|
||||
## We will (mostly) interact with node1 only
|
||||
|
||||
- If your cluster has multiple nodes (e.g. `node1`, `node2`, ...):
|
||||
*These remarks apply only when using multiple nodes, of course.*
|
||||
|
||||
unless instructed, **all commands must be run from the first node**
|
||||
- Unless instructed, **all commands must be run from the first VM, `node1`**
|
||||
|
||||
- We don't need to check out/copy code or manifests on other nodes
|
||||
- We will only check out/copy the code on `node1`
|
||||
|
||||
- During normal operations, we do not need access to the other nodes
|
||||
|
||||
(but we could log into these nodes to troubleshoot or examine stuff)
|
||||
- If we had to troubleshoot issues, we would use a combination of:
|
||||
|
||||
- SSH (to access system logs, daemon status...)
|
||||
|
||||
- Docker API (to check running containers and container engine status)
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
name: contact
|
||||
|
||||
## Contact information
|
||||
|
||||
.column-half[
|
||||
Instructor:
|
||||
|
||||
📛 Jérôme Petazzoni
|
||||
<br/>
|
||||
📩 jerome.petazzoni@gmail.com
|
||||
<br/>
|
||||
🔗 https://linkedin.com/in/jpetazzo
|
||||
<br/>
|
||||
🦣 https://hachyderm.io/@jpetazzo
|
||||
|
||||
I can teach custom courses:
|
||||
|
||||
- Docker, Kubernetes, MLOps
|
||||
- from intro level to "black belt"
|
||||
- on site or remotely
|
||||
|
||||
Reach out if you're interested!
|
||||
]
|
||||
|
||||
.column-half[
|
||||
Assistant:
|
||||
|
||||
📛 AJ Bowen
|
||||
<br/>
|
||||
📩 aj@soulshake.net
|
||||
<br/>
|
||||
🔗 https://linkedin.com/in/ajbowen
|
||||
<br/>
|
||||
📃 https://github.com/soulshake
|
||||
|
||||
|
||||
I can consult on the following topics:
|
||||
|
||||
- Kubernetes
|
||||
- CI/CD
|
||||
- Terraform & Infra-as-code
|
||||
- Docker
|
||||
- AWS
|
||||
]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
## Using Play-With-Docker
|
||||
|
||||
- Open a new browser tab to [labs.play-with-docker.com](https://labs.play-with-docker.com/)
|
||||
- Open a new browser tab to [www.play-with-docker.com](http://www.play-with-docker.com/)
|
||||
|
||||
- Confirm that you're not a robot
|
||||
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
<!-- QRcode generated with "qrencode -t UTF-8" -->
|
||||
|
||||
.center[
|
||||
<pre style="padding-top: 0em; font-size: 18px; line-height: 20px;">
|
||||
█▀▀▀▀▀█ ▀▀▀█▄▀ ▀▄ ▀▄ ▀▄ ▄█▀ ▄ █▀▀▀▀▀█
|
||||
█ ███ █ ▀▄█ ▀▀▄█ ▄▀▀ ██▄▄ █ ███ █
|
||||
█ ▀▀▀ █ ▄▀█▀ █▀▀▀█ ▄█▀▄███ ▄ █ ▀▀▀ █
|
||||
▀▀▀▀▀▀▀ █▄▀ █▄█ ▀ █ █ ▀▄█▄▀ █ ▀▀▀▀▀▀▀
|
||||
▀▀ █▀▄▀ ▀▄ ▀▀█▄▄█▄▄ ▄▄▄ █▀ ▀▄▄ ▄▀
|
||||
▄█▄▀▄▀▀██▀ ▀▀██▄█ ▀▀▄█ ██▀ █▄█▀█▀▀
|
||||
▄ ▄▀▀ ▀ ▀█▀ ▄█▄▀▄▀ ▀ █ █ █▄▄▀▀▀▀▄█▄█▀
|
||||
█ ▀▀█▄▀▀█▀█ ▄▀ ▀▀ █▀▄ ▀▄ ██▄▀ ▄█ ▄▀█
|
||||
█▄▀▀▀ ▀▀ ███▀█▀▄ ▄▄█ ██ █▀▄▀▄ █▀▀▀
|
||||
▄ █▀▄▀ ▄▀ ▄▀▄ ██ ▀▀█ ▄█ █▀▀▄█▀ ▄ █
|
||||
█▀▀▄▄ ▀ ▀ ▀▀█ ▀▀▀ ▀▀ █▀██▄▀▀▀███▄█▀
|
||||
█▀█▀▄█▀██ ██ ▀ █▄█▀ ▀ ██▀ ██▄ █▄█▄▄█
|
||||
█▀█▀▄▄▀▀▀▄▀▄▀ ▄█ ▄▀█ ▄▀▄ █▄ ▀▀▄█▄▄▀
|
||||
█▀█▄█ ▀ ▀▀▄█▀ █▄▀ █ ▄ ▄▀▄█ █▄▄█▄▄▀█
|
||||
▀ ▀▀ ▀▀█▄ ▀ ▀ ▄▄███▄ ▄ █▀▀▀█▀██
|
||||
█▀▀▀▀▀█ ▀██ █ █▀▀ ▀█▀██▄█▀▄█ ▀ █▄ ▄▀
|
||||
█ ███ █ █▄██▀ ▀▄▀▀▄█▀ ▄▄▀██▀▀▀█▀▀ ▄ ▀
|
||||
█ ▀▀▀ █ ▄█▀▀▀▀▄▀▄▄█ ▄▀█▀▄ ▀ ▀█ █▄█
|
||||
▀▀▀▀▀▀▀ ▀▀ ▀▀ ▀ ▀ ▀ ▀ ▀ ▀ ▀ ▀
|
||||
</pre>
|
||||
]
|
||||
|
||||
@@ -221,10 +221,3 @@ td {
|
||||
padding: 0.1em 0.5em;
|
||||
background: #eee;
|
||||
}
|
||||
|
||||
/* Use this to layout a slide in two columns */
|
||||
.column-half {
|
||||
float: left;
|
||||
width: 50%;
|
||||
}
|
||||
|
||||
|
||||
@@ -32,19 +32,7 @@
|
||||
excludedClasses: [@@EXCLUDE@@]
|
||||
});
|
||||
</script>
|
||||
<script type="module">
|
||||
import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
|
||||
mermaid.initialize({ startOnLoad: false });
|
||||
slideshow.on('afterShowSlide', function (slide) {
|
||||
mermaid.run({
|
||||
nodes: document.querySelectorAll('div.remark-visible .mermaid'),
|
||||
});
|
||||
});
|
||||
// Reminder, if you want to tinker with mermaid,
|
||||
// you need to export it, for instance like this:
|
||||
// window.mermaid = mermaid;
|
||||
</script>
|
||||
|
||||
|
||||
<!--
|
||||
These two scripts will be available only when loading the
|
||||
content using the pub/sub server. Otherwise, they'll just
|
||||
|
||||
Reference in New Issue
Block a user