Compare commits

..

2 Commits

Author SHA1 Message Date
Julien Girardin
6a8e00fc7d Change last day schedule of Allo Docker for Julien 2023-05-30 15:44:33 +02:00
Jérôme Petazzoni
e8c2b29c8f ⚛️ HighFive 2023Q2 content update 2023-05-29 14:54:07 +02:00
75 changed files with 291 additions and 2864 deletions

View File

@@ -1,13 +0,0 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: my-pdb
spec:
#minAvailable: 2
#minAvailable: 90%
maxUnavailable: 1
#maxUnavailable: 10%
selector:
matchLabels:
app: my-app

View File

@@ -59,27 +59,6 @@ You don't **have to** install the CLI tools of the cloud provider(s) that you wa
If you want to provide your cloud credentials through other means, you will have to adjust the Terraform configuration files in `terraform/provider-config` accordingly.
Here is where we look for credentials for each provider:
- AWS: Terraform defaults; see [AWS provider documentation][creds-aws] (for instance, you can use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` environment variables, or AWS config and profile files)
- Azure: Terraform defaults; see [AzureRM provider documentation][creds-azure] (typically, you can authenticate with the `az` CLI and Terraform will pick it up automatically)
- Civo: CLI configuration file (`~/.civo.json`)
- Digital Ocean: CLI configuration file (`~/.config/doctl/config.yaml`)
- Exoscale: CLI configuration file (`~/.config/exoscale/exoscale.toml`)
- Google Cloud: FIXME, note that the project name is currently hard-coded to `prepare-tf`
- Hetzner: CLI configuration file (`~/.config/hcloud/cli.toml`)
- Linode: CLI configuration file (`~/.config/linode-cli`)
- OpenStack: you will need to write a tfvars file (check [that exemple](terraform/virtual-machines/openstack/tfvars.example))
- Oracle: Terraform defaults; see [OCI provider documentation][creds-oci] (for instance, you can set up API keys; or you can use a short-lived token generated by the OCI CLI with `oci session authenticate`)
- OVH: Terraform defaults; see [OVH provider documentation][creds-ovh] (this typically involves setting up 5 `OVH_...` environment variables)
- Scaleway: Terraform defaults; see [Scaleway provider documentation][creds-scw] (for instance, you can set environment variables, but it will also automatically pick up CLI authentication from `~/.config/scw/config.yaml`)
[creds-aws]: https://registry.terraform.io/providers/hashicorp/aws/latest/docs#authentication-and-configuration
[creds-azure]: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs#authenticating-to-azure
[creds-oci]: https://docs.oracle.com/en-us/iaas/Content/API/SDKDocs/terraformproviderconfiguration.htm#authentication
[creds-ovh]: https://registry.terraform.io/providers/ovh/ovh/latest/docs#provider-configuration
[creds-scw]: https://registry.terraform.io/providers/scaleway/scaleway/latest/docs#authentication
## General Workflow
- fork/clone repo

View File

@@ -21,11 +21,6 @@ digitalocean-pvc)
jq '.[] | select(.name | startswith("pvc-")) | .id' |
xargs -n1 -P10 doctl compute volume delete --force
;;
scaleway-pvc)
scw instance volume list --output json |
jq '.[] | select(.name | contains("_pvc-")) | .id' |
xargs -n1 -P10 scw instance volume delete
;;
*)
echo "Unknown combination of provider ('$1') and resource ('$2')."
;;

View File

@@ -10,18 +10,9 @@ fi
. ~/creds/creds.cloudflare.dns
cloudflare() {
case "$1" in
GET|POST|DELETE)
METHOD="$1"
shift
;;
*)
METHOD=""
;;
esac
URI=$1
shift
http --ignore-stdin $METHOD https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
http https://api.cloudflare.com/client/v4/$URI "$@" "Authorization:Bearer $CLOUDFLARE_TOKEN"
}
_list_zones() {
@@ -41,15 +32,6 @@ _populate_zone() {
done
}
_clear_zone() {
ZONE_ID=$(_get_zone_id $1)
for RECORD_ID in $(
cloudflare zones/$ZONE_ID/dns_records | jq -r .result[].id
); do
cloudflare DELETE zones/$ZONE_ID/dns_records/$RECORD_ID
done
}
_add_zone() {
cloudflare zones "name=$1"
}

View File

@@ -12,7 +12,7 @@
echo "$0 del <recordid>"
echo ""
echo "Example to create a A record for eu.container.training:"
echo "$0 add eu A 185.145.250.0"
echo "$0 add eu 185.145.250.0"
echo ""
exit 1
}
@@ -49,29 +49,27 @@ ZONE_ID=$(netlify dns_zones |
_list() {
netlify dns_zones/$ZONE_ID/dns_records |
jq -r '.[] | select(.type=="A" or .type=="AAAA") | [.hostname, .type, .value, .id] | @tsv' |
sort |
column --table
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
}
_add() {
NAME=$1.$DOMAIN
TYPE=$2
VALUE=$3
ADDR=$2
# It looks like if we create two identical records, then delete one of them,
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
# though it's still visible through the API and the website?)
if netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'" and .type=="'$TYPE'" and .value=="'$VALUE'")' |
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
grep .
then
echo "It looks like that record already exists. Refusing to create it."
exit 1
fi
netlify dns_zones/$ZONE_ID/dns_records type=$TYPE hostname=$NAME value=$VALUE ttl=300
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
netlify dns_zones/$ZONE_ID/dns_records |
jq '.[] | select(.hostname=="'$NAME'")'
@@ -90,7 +88,7 @@ case "$1" in
_list
;;
add)
_add $2 $3 $4
_add $2 $3
;;
del)
_del $2

View File

@@ -1,14 +1,10 @@
#!/bin/sh
# deploy big cluster
#TF_VAR_node_size=g6-standard-6 \
#TF_VAR_nodes_per_cluster=5 \
#TF_VAR_location=eu-west \
TF_VAR_node_size=PRO2-XS \
TF_VAR_node_size=g6-standard-6 \
TF_VAR_nodes_per_cluster=5 \
TF_VAR_location=fr-par-2 \
./labctl create --mode mk8s --settings settings/mk8s.env --provider scaleway --tag konk
TF_VAR_location=eu-west \
./labctl create --mode mk8s --settings settings/mk8s.env --provider linode --tag konk
# set kubeconfig file
cp tags/konk/stage2/kubeconfig.101 ~/kubeconfig
@@ -20,4 +16,4 @@ while read node address; do
done
# vcluster all the things
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 50
./labctl create --settings settings/mk8s.env --provider vcluster --mode mk8s --students 27

View File

@@ -126,7 +126,6 @@ set number
set shiftwidth=2
set softtabstop=2
set nowrap
set laststatus=2
SQRL
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
@@ -421,23 +420,10 @@ _cmd_kubebins() {
TAG=$1
need_tag
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
##VERSION##
case "$KUBEVERSION" in
1.19.*)
ETCD_VERSION=v3.4.13
CNI_VERSION=v0.8.7
;;
*)
ETCD_VERSION=v3.5.10
CNI_VERSION=v1.3.0
;;
esac
K8SBIN_VERSION="v$KUBEVERSION"
ETCD_VERSION=v3.4.13
K8SBIN_VERSION=v1.19.11 # Can't go to 1.20 because it requires a serviceaccount signing key.
CNI_VERSION=v0.8.7
ARCH=${ARCHITECTURE-amd64}
pssh --timeout 300 "
set -e
@@ -461,41 +447,30 @@ _cmd_kubebins() {
"
}
_cmd kubepkgs "Install Kubernetes packages (kubectl, kubeadm, kubelet)"
_cmd_kubepkgs() {
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
_cmd_kube() {
TAG=$1
need_tag
# Prior September 2023, there was a single Kubernetes package repo that
# contained packages for all versions, so we could just add that repo
# and install whatever was the latest version available there.
# Things have changed (versions after September 2023, e.g. 1.28.3 are
# not in the old repo) and now there is a different repo for each
# minor version, so we need to figure out what minor version we are
# installing to add the corresponding repo.
if [ "$KUBEVERSION" = "" ]; then
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
fi
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
# Since the new repo doesn't have older versions, add a safety check here.
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
if [ "$MINORVERSION" -lt 24 ]; then
die "Cannot install kubepkgs for versions before 1.24."
fi
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
if [ "$KUBEVERSION" ]; then
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
pssh "
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
Package: kubectl kubeadm kubelet
Pin: version $KUBEVERSION-*
Pin-Priority: 1000
EOF"
fi
# As of February 27th, 2023, packages.cloud.google.com seems broken
# (serves HTTP 500 errors for the GPG key), so let's pre-load that key.
pssh -I "sudo apt-key add -" < lib/kubernetes-apt-key.gpg
# Install packages
pssh --timeout 200 "
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
#curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
#sudo apt-key add - &&
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
@@ -505,18 +480,6 @@ EOF"
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
}
_cmd kubeadm "Setup kubernetes clusters with kubeadm"
_cmd_kubeadm() {
TAG=$1
need_tag
if [ "$KUBEVERSION" ]; then
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
IGNORE_SWAP="- Swap"
fi
# Install a valid configuration for containerd
# (first, the CRI interface needs to be re-enabled;
@@ -537,8 +500,6 @@ bootstrapTokens:
nodeRegistration:
ignorePreflightErrors:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
---
kind: JoinConfiguration
apiVersion: kubeadm.k8s.io/v1beta3
@@ -550,8 +511,6 @@ discovery:
nodeRegistration:
ignorePreflightErrors:
- NumCPU
$IGNORE_SYSTEMVERIFICATION
$IGNORE_SWAP
---
kind: KubeletConfiguration
apiVersion: kubelet.config.k8s.io/v1beta1

Binary file not shown.

View File

@@ -17,12 +17,6 @@ pssh() {
echo "[parallel-ssh] $@"
# There are some routers that really struggle with the number of TCP
# connections that we open when deploying large fleets of clusters.
# We're adding a 1 second delay here, but this can be cranked up if
# necessary - or down to zero, too.
sleep ${PSSH_DELAY_PRE-1}
$(which pssh || which parallel-ssh) -h $HOSTFILE -l ubuntu \
--par ${PSSH_PARALLEL_CONNECTIONS-100} \
--timeout 300 \

View File

@@ -1,6 +1,6 @@
CLUSTERSIZE=3
CLUSTERSIZE=1
CLUSTERPREFIX=polykube
CLUSTERPREFIX=dmuc
USER_LOGIN=k8s
USER_PASSWORD=training
@@ -10,11 +10,12 @@ STEPS="
standardize
clusterize
tools
kubepkgs
kubebins
docker
disabledocker
createuser
webssh
tailhist
kubebins
kubetools
ips
"

View File

@@ -1,26 +0,0 @@
CLUSTERSIZE=1
CLUSTERPREFIX=monokube
# We're sticking to this in the first DMUC lab,
# because it still works with Docker, and doesn't
# require a ServiceAccount signing key.
KUBEVERSION=1.19.11
USER_LOGIN=k8s
USER_PASSWORD=training
STEPS="
wait
standardize
clusterize
tools
docker
disabledocker
createuser
webssh
tailhist
kubebins
kubetools
ips
"

View File

@@ -7,7 +7,7 @@ USER_PASSWORD=training
# For a list of old versions, check:
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
KUBEVERSION=1.24.14
KUBEVERSION=1.22.5
STEPS="
wait
@@ -18,8 +18,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"

View File

@@ -14,8 +14,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"
"

View File

@@ -14,8 +14,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"
"

View File

@@ -15,8 +15,7 @@ STEPS="
createuser
webssh
tailhist
kubepkgs
kubeadm
kube
kubetools
kubetest
"

View File

@@ -1,8 +1,3 @@
#export TF_VAR_node_size=GP2.4
#export TF_VAR_node_size=g6-standard-6
#export TF_VAR_node_size=m7i.xlarge
CLUSTERSIZE=1
CLUSTERPREFIX=CHANGEME

View File

@@ -7,7 +7,7 @@ STUDENTS=2
#export TF_VAR_location=eu-north-1
export TF_VAR_node_size=S
SETTINGS=admin-monokube
SETTINGS=admin-dmuc
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \
@@ -15,7 +15,15 @@ TAG=$PREFIX-$SETTINGS
--settings settings/$SETTINGS.env \
--students $STUDENTS
SETTINGS=admin-polykube
SETTINGS=admin-kubenet
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \
--provider $PROVIDER \
--settings settings/$SETTINGS.env \
--students $STUDENTS
SETTINGS=admin-kuberouter
TAG=$PREFIX-$SETTINGS
./labctl create \
--tag $TAG \

View File

@@ -1,2 +0,0 @@
#!/bin/sh
exo zone

View File

@@ -1,8 +1,7 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.47.0"
source = "hashicorp/aws"
}
}
}

View File

@@ -1 +0,0 @@
../common.tf

View File

@@ -1 +0,0 @@
../../providers/azure/config.tf

View File

@@ -1,22 +0,0 @@
resource "azurerm_resource_group" "_" {
name = var.cluster_name
location = var.location
}
resource "azurerm_kubernetes_cluster" "_" {
name = var.cluster_name
location = var.location
dns_prefix = var.cluster_name
identity {
type = "SystemAssigned"
}
resource_group_name = azurerm_resource_group._.name
default_node_pool {
name = "x86"
node_count = var.min_nodes_per_pool
min_count = var.min_nodes_per_pool
max_count = var.max_nodes_per_pool
vm_size = local.node_size
enable_auto_scaling = true
}
}

View File

@@ -1,12 +0,0 @@
output "cluster_id" {
value = azurerm_kubernetes_cluster._.id
}
output "has_metrics_server" {
value = true
}
output "kubeconfig" {
value = azurerm_kubernetes_cluster._.kube_config_raw
sensitive = true
}

View File

@@ -1,7 +0,0 @@
terraform {
required_providers {
azurerm = {
source = "hashicorp/azurerm"
}
}
}

View File

@@ -1 +0,0 @@
../../providers/azure/variables.tf

View File

@@ -11,23 +11,17 @@ data "oci_containerengine_cluster_option" "_" {
locals {
compartment_id = oci_identity_compartment._.id
kubernetes_version = data.oci_containerengine_cluster_option._.kubernetes_versions[0]
images = [
for image in data.oci_containerengine_node_pool_option._.sources : image
if can(regex("OKE", image.source_name))
&& can(regex(substr(local.kubernetes_version, 1, -1), image.source_name))
&& !can(regex("GPU", image.source_name))
&& !can(regex("aarch64", image.source_name))
]
}
data "oci_identity_availability_domains" "_" {
compartment_id = local.compartment_id
}
data "oci_containerengine_node_pool_option" "_" {
compartment_id = local.compartment_id
node_pool_option_id = oci_containerengine_cluster._.id
data "oci_core_images" "_" {
compartment_id = local.compartment_id
operating_system = "Oracle Linux"
operating_system_version = "8"
shape = local.shape
}
resource "oci_containerengine_cluster" "_" {
@@ -62,7 +56,7 @@ resource "oci_containerengine_node_pool" "_" {
}
}
node_source_details {
image_id = local.images[0].image_id
image_id = data.oci_core_images._.images[0].id
source_type = "image"
}
}

View File

@@ -1 +0,0 @@
../common.tf

View File

@@ -1 +0,0 @@
../../providers/ovh/config.tf

View File

@@ -1,18 +0,0 @@
resource "ovh_cloud_project_kube" "_" {
name = var.cluster_name
region = var.location
version = local.k8s_version
}
resource "ovh_cloud_project_kube_nodepool" "_" {
kube_id = ovh_cloud_project_kube._.id
name = "x86"
flavor_name = local.node_size
desired_nodes = var.min_nodes_per_pool
min_nodes = var.min_nodes_per_pool
max_nodes = var.max_nodes_per_pool
}
locals {
k8s_version = "1.26"
}

View File

@@ -1,12 +0,0 @@
output "cluster_id" {
value = ovh_cloud_project_kube._.id
}
output "has_metrics_server" {
value = false
}
output "kubeconfig" {
sensitive = true
value = ovh_cloud_project_kube._.kubeconfig
}

View File

@@ -1,7 +0,0 @@
terraform {
required_providers {
ovh = {
source = "ovh/ovh"
}
}
}

View File

@@ -1 +0,0 @@
../../providers/ovh/variables.tf

View File

@@ -1,23 +1,10 @@
resource "scaleway_vpc_private_network" "_" {
}
# This is a kind of hack to use a custom security group with Kapsulse.
# See https://www.scaleway.com/en/docs/containers/kubernetes/reference-content/secure-cluster-with-private-network/
resource "scaleway_instance_security_group" "_" {
name = "kubernetes ${split("/", scaleway_k8s_cluster._.id)[1]}"
inbound_default_policy = "accept"
outbound_default_policy = "accept"
}
resource "scaleway_k8s_cluster" "_" {
name = var.cluster_name
name = var.cluster_name
#region = var.location
tags = var.common_tags
version = local.k8s_version
type = "kapsule"
cni = "cilium"
delete_additional_resources = true
private_network_id = scaleway_vpc_private_network._.id
}
resource "scaleway_k8s_pool" "_" {
@@ -30,7 +17,6 @@ resource "scaleway_k8s_pool" "_" {
max_size = var.max_nodes_per_pool
autoscaling = var.max_nodes_per_pool > var.min_nodes_per_pool
autohealing = true
depends_on = [ scaleway_instance_security_group._ ]
}
data "scaleway_k8s_version" "_" {

View File

@@ -44,5 +44,5 @@ locals {
guest_api_server_port = local.node_port
guest_api_server_url_new = "https://${local.guest_api_server_host}:${local.guest_api_server_port}"
guest_api_server_url_old = yamldecode(local.kubeconfig_raw).clusters[0].cluster.server
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
kubeconfig = replace(local.kubeconfig_raw, local.guest_api_server_url_old, local.guest_api_server_url_new)
}

View File

@@ -1,13 +0,0 @@
variable "node_sizes" {
type = map(any)
default = {
S = "d2-4"
M = "d2-4"
L = "d2-8"
}
}
variable "location" {
type = string
default = "BHS5"
}

View File

@@ -1,5 +1,5 @@
variable "node_sizes" {
type = map(any)
type = map(any)
default = {}
}

View File

@@ -1,22 +1,14 @@
resource "openstack_compute_instance_v2" "_" {
for_each = local.nodes
name = each.value.node_name
image_name = data.openstack_images_image_v2._.name
image_name = var.image
flavor_name = each.value.node_size
key_pair = openstack_compute_keypair_v2._.name
key_pair = openstack_compute_keypair_v2._.name
network {
port = openstack_networking_port_v2._[each.key].id
}
}
data "openstack_images_image_v2" "_" {
most_recent = true
properties = {
os = "ubuntu"
version = "22.04"
}
}
resource "openstack_networking_port_v2" "_" {
for_each = local.nodes
network_id = openstack_networking_network_v2._.id

View File

@@ -31,6 +31,10 @@ variable "external_network_id" {
type = string
}
variable "image" {
type = string
}
variable "node_sizes" {
type = map(any)
default = {}

View File

@@ -4,11 +4,6 @@
# another set of clusters while a first one is still running)
# you should set the TF_VAR_cluster_name environment variable.
if ! [ "$TF_VAR_cluster_name" ]; then
echo "Please set TF_VAR_cluster_name. Thanks."
exit 1
fi
cd terraform/one-kubernetes
case "$1" in

View File

@@ -5,7 +5,7 @@ chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -5,7 +5,7 @@ chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -1,91 +0,0 @@
title: |
Fondamentaux Kubernetes
chat: "`🔒kubernetes-training` (Slack)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
exclude:
- self-paced
content:
- shared/title.md
- logistics.md
- k8s/intro.md
- shared/about-slides.md
#- shared/chat-room-zoom-meeting.md
#- shared/chat-room-zoom-webinar.md
- shared/prereqs.md
- shared/handson.md
#- shared/webssh.md
- shared/connecting.md
- exercises/k8sfundamentals-brief.md
- exercises/yaml-brief.md
- exercises/localcluster-brief.md
- exercises/healthchecks-brief.md
- shared/toc.md
- # 1
#- k8s/versions-k8s.md
- shared/sampleapp.md
#- shared/composescale.md
#- shared/hastyconclusions.md
- shared/composedown.md
- k8s/concepts-k8s.md
- k8s/kubectlget.md
- k8s/kubectl-run.md
- k8s/kubectlexpose.md
- k8s/service-types.md
- k8s/kubenet.md
- k8s/shippingimages.md
#- k8s/buildshiprun-selfhosted.md
- k8s/buildshiprun-dockerhub.md
- exercises/k8sfundamentals-details.md
- k8s/ourapponkube.md
#- k8s/exercise-wordsmith.md
- # 2
- shared/yaml.md
- k8s/labels-annotations.md
- k8s/kubectl-logs.md
- k8s/logs-cli.md
- k8s/yamldeploy.md
- k8s/namespaces.md
- shared/declarative.md
- k8s/declarative.md
- k8s/deploymentslideshow.md
- k8s/setup-overview.md
- k8s/setup-devel.md
#- k8s/setup-managed.md
#- k8s/setup-selfhosted.md
- k8s/localkubeconfig.md
- k8s/accessinternal.md
- k8s/kubectlproxy.md
- exercises/yaml-details.md
- exercises/localcluster-details.md
- # 3
#- k8s/kubectlscale.md
- k8s/scalingdockercoins.md
- shared/hastyconclusions.md
- k8s/daemonset.md
- k8s/rollout.md
- k8s/healthchecks.md
#- k8s/healthchecks-more.md
- k8s/dashboard.md
- k8s/k9s.md
- k8s/tilt.md
- exercises/healthchecks-details.md
- # 4
- k8s/ingress.md
#- k8s/ingress-tls.md
#- k8s/ingress-advanced.md
- k8s/volumes.md
#- k8s/exercise-configmap.md
#- k8s/build-with-docker.md
#- k8s/build-with-kaniko.md
- k8s/configuration.md
- k8s/secrets.md
- k8s/batch-jobs.md
- shared/thankyou.md

View File

@@ -6,7 +6,7 @@ chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -5,7 +5,7 @@ chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "

View File

@@ -5,7 +5,7 @@ chat: "[Mattermost](https://highfive.container.training/mattermost)"
gitrepo: github.com/jpetazzo/container.training
slides: https://2024-01-enix.container.training/
slides: https://2023-05-enix.container.training/
#slidenumberprefix: "#SomeHashTag &mdash; "
@@ -27,14 +27,14 @@ content:
- shared/handson.md
- k8s/architecture.md
- k8s/deploymentslideshow.md
- k8s/dmuc-easy.md
- k8s/dmuc.md
-
- k8s/multinode.md
- k8s/cni.md
- k8s/interco.md
-
- k8s/dmuc-medium.md
- k8s/dmuc-hard.md
- k8s/cni-internals.md
#- k8s/interco.md
- k8s/apilb.md
-
- k8s/internal-apis.md
- k8s/staticpods.md
- k8s/cluster-upgrade.md

View File

@@ -113,16 +113,22 @@ class: pic
## Results
* [Dev-to-prod reduced from 9 months to 15 minutes (ING)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_ING_01.25.2015_1.pdf)
https://www.docker.com/sites/default/files/CS_ING_01.25.2015_1.pdf)
* [Continuous integration job time reduced by more than 60% (BBC)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_BBCNews_01.25.2015_1.pdf)
https://www.docker.com/sites/default/files/CS_BBCNews_01.25.2015_1.pdf)
* [Deploy 100 times a day instead of once a week (GILT)](
https://gallant-turing-d0d520.netlify.com/docker-case-studies/CS_Gilt_Groupe_03.18.2015_0.pdf)
https://www.docker.com/sites/default/files/CS_Gilt%20Groupe_03.18.2015_0.pdf)
* [70% infrastructure consolidation (MetLife)](
https://www.youtube.com/watch?v=Bwt3xigvlj0)
https://www.docker.com/customers/metlife-transforms-customer-experience-legacy-and-microservices-mashup)
* [60% infrastructure consolidation (Intesa Sanpaolo)](
https://blog.docker.com/2017/11/intesa-sanpaolo-builds-resilient-foundation-banking-docker-enterprise-edition/)
* [14x application density; 60% of legacy datacenter migrated in 4 months (GE Appliances)](
https://www.docker.com/customers/ge-uses-docker-enable-self-service-their-developers)
* etc.

View File

@@ -1,11 +0,0 @@
## Exercise — Enable RBAC
- Enable RBAC on a manually-deployed control plane
- This involves:
- generating different certificates
- distributing the certificates to the controllers
- enabling the proper authorizers in API server

View File

@@ -1,117 +0,0 @@
# Exercise — Enable RBAC
- We want to enable RBAC on the "polykube" cluster
(it doesn't matter whether we have 1 or multiple nodes)
- Ideally, we want to have, for instance:
- one key, certificate, and kubeconfig for a cluster admin
- one key, certificate, and kubeconfig for a user
<br/>
(with permissions in a single namespace)
- Bonus points: enable the NodeAuthorizer too!
- Check the following slides for hints
---
## Step 1
- Enable RBAC itself!
--
- This is done with an API server command-line flag
--
- Check [the documentation][kube-apiserver-doc] to see the flag
--
- For now, only enable `--authorization-mode=RBAC`
[kube-apiserver-doc]: https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/
---
## Step 2
- Our certificate doesn't work anymore, we need to generate a new one
--
- We need a certificate that will have *some* (ideally *all*) permissions
--
- Two options:
- use the equivalent of "root" (identity that completely skips permission checks)
- a "non-root" identity but which is granted permissions with RBAC
--
- The "non-root" option looks nice, but to grant permissions, we need permissions
- So let's start with the equivalent of "root"!
--
- The Kubernetes equivalent of `root` is the group `system:masters`
---
## Step 2, continued
- We need to generate a certificate for a user belonging to group `system:masters`
--
- In Kubernetes certificates, groups are encoded with the "organization" field
--
- That corresponds to `O=system:masters`
--
- In other words we need to generate a new certificate, but with a subject of:
`/CN=admin/O=system:masters/` (the `CN` doesn't matter)
- That certificate should be able to interact with the API server, like before
---
## Step 3
- Now, all our controllers have permissions issues
- We need to either:
- use that `system:masters` cert everywhere
- generate different certs for every controller, with the proper identities
- Suggestion: use `system-masters` everywhere to begin with
(and make sure the cluster is back on its feet)
---
## Step 4
At this point, there are two possible forks in the road:
1. Generate certs for the control plane controllers
(`kube-controller-manager`, `kube-scheduler`)
2. Generate cert(s) for the node(s) and enable `NodeAuthorizer`
Good luck!

View File

@@ -11,137 +11,107 @@
<body>
<table>
<tr>
<td>Mardi 9 janvier 2024</td>
<td>
<a href="2s.yml.html">Fondamentaux Kubernetes (intra)</a>
</td>
</tr>
<tr>
<td>Mercredi 10 janvier 2024</td>
<td>
<a href="2s.yml.html">Fondamentaux Kubernetes (intra)</a>
</td>
</tr>
<tr>
<td>Jeudi 11 janvier 2024</td>
<td>
<a href="2s.yml.html">Fondamentaux Kubernetes (intra)</a>
</td>
</tr>
<tr>
<td>Vendredi 12 janvier 2024</td>
<td>
<a href="2s.yml.html">Fondamentaux Kubernetes (intra)</a>
</td>
</tr>
<tr>
<td>Mardi 16 janvier 2024</td>
<td>Mardi 9 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Mercredi 17 janvier 2024</td>
<td>Mercredi 10 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Jeudi 18 janvier 2024</td>
<td>Jeudi 11 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Vendredi 19 janvier 2024</td>
<td>Vendredi 12 mai 2023</td>
<td>
<a href="1.yml.html">Docker Intensif</a>
</td>
</tr>
<tr>
<td>Mardi 23 janvier 2024</td>
<td>Lundi 15 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 24 janvier 2024</td>
<td>Mardi 16 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 25 janvier 2024</td>
<td>Mercredi 17 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Vendredi 26 janvier 2024</td>
<td>Lundi 22 mai 2023</td>
<td>
<a href="2.yml.html">Fondamentaux Kubernetes</a>
</td>
</tr>
<tr>
<td>Lundi 29 janvier 2024</td>
<td>Mardi 23 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mardi 30 janvier 2024</td>
<td>Mercredi 24 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Mercredi 31 janvier 2024</td>
<td>Jeudi 25 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Jeudi 1er février 2024</td>
<td>Vendredi 26 mai 2023</td>
<td>
<a href="4.yml.html">Kubernetes Avancé</a>
</td>
</tr>
<tr>
<td>Vendredi 2 février 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Lundi 5 février 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 6 février 2024</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 7 février 2024</td>
<td>Mardi 30 mai 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Jeudi 8 février 2024</td>
<td>Mercredi 31 mai 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Vendredi 9 février 2024</td>
<td>Jeudi 1er juin 2023</td>
<td>
<a href="3.yml.html">Packaging d'applications pour Kubernetes</a>
</td>
</tr>
<tr>
<td>Mardi 6 juin 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
<tr>
<td>Mercredi 7 juin 2023</td>
<td>
<a href="5.yml.html">Opérer Kubernetes</a>
</td>
</tr>
</table>
</body>
</html>

View File

@@ -981,6 +981,10 @@
# event: LISA
# title: Deploying and Scaling Applications with Docker Swarm
#2015-09-24-strangeloop
- title: Introduction to Docker and Containers
slides: intro-selfpaced.yml.html

View File

@@ -224,29 +224,17 @@ class: extra-details
- Example: run `ngrok http 1234`
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.app)
- `ngrok` will display a publicly-available URL (e.g. https://xxxxyyyyzzzz.ngrok.io)
- Connections to https://xxxxyyyyzzzz.ngrok.app will terminate at `localhost:1234`
- Connections to https://xxxxyyyyzzzz.ngrok.io will terminate at `localhost:1234`
- Basic product is free; extra features (vanity domains, end-to-end TLS...) for $$$
- Perfect to develop our webhook!
---
- Probably not for production, though
class: extra-details
## Ngrok in production
- Ngrok was initially known for its local webhook development features
- It now supports production scenarios as well
(load balancing, WAF, authentication, circuit-breaking...)
- Including some that are very relevant to Kubernetes
(e.g. [ngrok Ingress Controller](https://github.com/ngrok/kubernetes-ingress-controller)
(webhook requests and responses now pass through the ngrok platform)
---
@@ -555,23 +543,6 @@ Shell to the rescue!
(it should only allow values of `red`, `green`, `blue`)
---
## Coming soon...
- Kubernetes Validating Admission Policies
- Integrated with the Kubernetes API server
- Lets us define policies using [CEL (Common Expression Language)][cel-spec]
- Available in beta in Kubernetes 1.28 <!-- ##VERSION## -->
- Check this [CNCF Blog Post][cncf-blog-vap] for more details
[cncf-blog-vap]: https://www.cncf.io/blog/2023/09/14/policy-management-in-kubernetes-is-changing/
[cel-spec]: https://github.com/google/cel-spec
???
:EN:- Dynamic admission control with webhooks

View File

@@ -856,7 +856,7 @@ class: extra-details
- To learn more about Kubernetes attacks and threat models around RBAC:
📽️ [Hacking into Kubernetes Security for Beginners](https://www.youtube.com/watch?v=mLsCm9GVIQg)
by [V Körbes](https://twitter.com/veekorbes)
by [Ellen Körbes](https://twitter.com/ellenkorbes)
and [Tabitha Sable](https://twitter.com/TabbySable)
---

View File

@@ -111,6 +111,34 @@
---
## General guidelines
- To update a component, use whatever was used to install it
- If it's a distro package, update that distro package
- If it's a container or pod, update that container or pod
- If you used configuration management, update with that
---
## Know where your binaries come from
- Sometimes, we need to upgrade *quickly*
(when a vulnerability is announced and patched)
- If we are using an installer, we should:
- make sure it's using upstream packages
- or make sure that whatever packages it uses are current
- make sure we can tell it to pin specific component versions
---
## Important questions
- Should we upgrade the control plane before or after the kubelets?
@@ -178,34 +206,6 @@ and kubectl, which can be one MINOR ahead or behind API server.]
---
## General guidelines
- To update a component, use whatever was used to install it
- If it's a distro package, update that distro package
- If it's a container or pod, update that container or pod
- If you used configuration management, update with that
---
## Know where your binaries come from
- Sometimes, we need to upgrade *quickly*
(when a vulnerability is announced and patched)
- If we are using an installer, we should:
- make sure it's using upstream packages
- or make sure that whatever packages it uses are current
- make sure we can tell it to pin specific component versions
---
## In practice
- We are going to update a few cluster components
@@ -507,86 +507,6 @@ kubeadm should now agree to upgrade to 1.23.X.
---
## And now, was that a good idea?
--
**Almost!**
--
- The official recommendation is to *drain* a node before performing node maintenance
(migrate all workloads off the node before upgrading it)
- How do we do that?
- Is it really necessary?
- Let's see!
---
## Draining a node
- This can be achieved with the `kubectl drain` command, which will:
- *cordon* the node (prevent new pods from being scheduled there)
- *evict* all the pods running on the node (delete them gracefully)
- the evicted pods will automatically be recreated somewhere else
- evictions might be blocked in some cases (Pod Disruption Budgets, `emptyDir` volumes)
- Once the node is drained, it can safely be upgraded, restarted...
- Once it's ready, it can be put back in commission with `kubectl uncordon`
---
## Is it necessary?
- When upgrading kubelet from one patch-level version to another:
- it's *probably fine*
- When upgrading system packages:
- it's *probably fine*
- except [when it's not][datadog-systemd-outage]
- When upgrading the kernel:
- it's *probably fine*
- ...as long as we can tolerate a restart of the containers on the node
- ...and that they will be unavailable for a few minutes (during the reboot)
[datadog-systemd-outage]: https://www.datadoghq.com/blog/engineering/2023-03-08-deep-dive-into-platform-level-impact/
---
## Is it necessary?
- When upgrading kubelet from one minor version to another:
- it *may or may not be fine*
- in some cases (e.g. migrating from Docker to containerd) it *will not*
- Here's what [the documentation][node-upgrade-docs] says:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- Do it at your own risk, and if you do, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
class: extra-details
## Skipping versions

View File

@@ -1,513 +0,0 @@
# Disruptions
In a perfect world...
- hardware never fails
- software never has bugs
- ...and never needs to be updated
- ...and uses a predictable amount of resources
- ...and these resources are infinite anyways
- network latency and packet loss are zero
- humans never make mistakes
--
😬
---
## Disruptions
In the real world...
- hardware will fail randomly (without advance notice)
- software has bugs
- ...and we constantly add new features
- ...and will sometimes use more resources than expected
- ...and these resources are limited
- network latency and packet loss are NOT zero
- humans make mistake (shutting down the wrong machine, the wrong app...)
---
## Disruptions
- In Kubernetes, a "disruption" is something that stops the execution of a Pod
- There are **voluntary** and **involuntary** disruptions
- voluntary = directly initiated by humans (including by mistake!)
- involuntary = everything else
- In this section, we're going to see what they are and how to prevent them
(or at least, mitigate their effects)
---
## Node outage
- Example: hardware failure (server or network), low-level error
(includes kernel bugs, issues affecting underlying hypervisors or infrastructure...)
- **Involuntary** disruption (even if it results from human error!)
- Consequence: all workloads on that node become unresponsive
- Mitigations:
- scale workloads to at least 2 replicas (or more if quorum is needed)
- add anti-affinity scheduling constraints (to avoid having all pods on the same node)
---
## Node outage play-by-play
- Node goes down (or disconnected from network)
- Its lease (in Namespace `kube-node-lease`) doesn't get renewed
- Controller manager detects that and mark the node as "unreachable"
(this adds both a `NoSchedule` and `NoExecute` taints to the node)
- Eventually, the `NoExecute` taint will evict these pods
- This will trigger creation of replacement pods by owner controllers
(except for pods with a stable network identity, e.g. in a Stateful Set!)
---
## Node outage notes
- By default, pods will tolerate the `unreachable:NoExecute` taint for 5 minutes
(toleration automatically added by Admission controller `DefaultTolerationSeconds`)
- Pods of a Stateful Set don't recover automatically:
- as long as the Pod exists, a replacement Pod can't be created
- the Pod will exist as long as its Node exists
- deleting the Node (manually or automatically) will recover the Pod
---
## Memory/disk pressure
- Example: available memory on a node goes below a specific threshold
(because a pod is using too much memory and no limit was set)
- **Involuntary** disruption
- Consequence: kubelet starts to *evict* some pods
- Mitigations:
- set *resource limits* on containers to prevent them from using too much resources
- set *resource requests* on containers to make sure they don't get evicted
<br/>
(as long as they use less than what they requested)
- make sure that apps don't use more resources than what they've requested
---
## Memory/disk pressure play-by-play
- Memory leak in an application container, slowly causing very high memory usage
- Overall free memory on the node goes below the *soft* or the *hard* threshold
(default hard threshold = 100Mi; default soft threshold = none)
- When reaching the *soft* threshold:
- kubelet waits until the "eviction soft grace period" expires
- then (if resource usage is still above the threshold) it gracefully evicts pods
- When reaching the *hard* threshold:
- kubelet immediately and forcefully evicts pods
---
## Which pods are evicted?
- Kubelet only considers pods that are using *more* than what they requested
(and only for the resource that is under pressure, e.g. RAM or disk usage)
- First, it sorts pods by *priority¹* (as set with the `priorityClassName` in the pod spec)
- Then, by how much their resource usage exceeds their request
(again, for the resource that is under pressure)
- It evicts pods until enough resources have been freed up
---
## Soft (graceful) vs hard (forceful) eviction
- Soft eviction = graceful shutdown of the pod
(honor's the pod `terminationGracePeriodSeconds` timeout)
- Hard eviction = immediate shutdown of the pod
(kills all containers immediately)
---
## Memory/disk pressure notes
- If resource usage increases *very fast*, kubelet might not catch it fast enough
- For memory: this will trigger the kernel out-of-memory killer
- containers killed by OOM are automatically restarted (no eviction)
- eviction might happen at a later point though (if memory usage stays high)
- For disk: there is no "out-of-disk" killer, but writes will fail
- the `write` system call fails with `errno = ENOSPC` / `No space left on device`
- eviction typically happens shortly after (when kubelet catches up)
- When relying on disk/memory bursts a lot, using `priorityClasses` might help
---
## Memory/disk pressure delays
- By default, no soft threshold is defined
- Defining it requires setting both the threshold and the grace period
- Grace periods can be different for the different types of resources
- When a node is under pressure, kubelet places a `NoSchedule` taint
(to avoid adding more pods while the pod is under pressure)
- Once the node is no longer under pressure, kubelet clears the taint
(after waiting an extra timeout, `evictionPressureTransitionPeriod`, 5 min by default)
---
## Accidental deletion
- Example: developer deletes the wrong Deployment, the wrong Namespace...
- **Voluntary** disruption
(from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- only deploy to production systems through e.g. gitops workflows
- enforce peer review of changes
- only give users limited (e.g. read-only) access to production systems
- use canary deployments (might not catch all mistakes though!)
---
## Bad code deployment
- Example: critical bug introduced, application crashes immediately or is non-functional
- **Voluntary** disruption
(again, from Kubernetes' perspective!)
- Consequence: application is down
- Mitigations:
- readiness probes can mitigate immediate crashes
<br/>
(rolling update continues only when enough pods are ready)
- delayed crashes will require a rollback
<br/>
(manual intervention, or automated by a canary system)
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- terminate workloads one at a time, coordinating with users
--
🤔
---
## Node shutdown
- Example: scaling down a cluster to save money
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are terminated
- this might disrupt workloads that have too many replicas on that node
- or workloads that should not be interrupted at all
- Mitigations:
- ~~terminate workloads one at a time, coordinating with users~~
- use Pod Disruption Budgets
---
## Pod Disruption Budgets
- A PDB is a kind of *contract* between:
- "admins" = folks maintaining the cluster (e.g. adding/removing/updating nodes)
- "users" = folks deploying apps and workloads on the cluster
- A PDB expresses something like:
*in that particular set of pods, do not "disrupt" more than X at a time*
- Examples:
- in that set of frontend pods, do not disrupt more than 1 at a time
- in that set of worker pods, always have at least 10 ready
<br/>
(do not disrupt them if it would bring down the number of ready pods below 10)
---
## PDB - user side
- Cluster users create a PDB with a manifest like this one:
```yaml
@@INCLUDE[k8s/pod-disruption-budget.yaml]
```
- The PDB must indicate either `minAvailable` or `maxUnavailable`
---
## Rounding logic
- Percentages are rounded **up**
- When specifying `maxUnavailble` as a percentage, this can result in a higher perecentage
(e.g. `maxUnavailable: 50%` with 3 pods can result in 2 pods being unavailable!)
---
## Unmanaged pods
- Specifying `minAvailable: X` works all the time
- Specifying `minAvailable: X%` or `maxUnavaiable` requires *managed pods*
(pods that belong to a controller, e.g. Replica Set, Stateful Set...)
- This is because the PDB controller needs to know the total number of pods
(given by the `replicas` field, not merely by counting pod objects)
- The PDB controller will try to resolve the controller using the pod selector
- If that fails, the PDB controller will emit warning events
(visible with `kubectl describe pdb ...`)
---
## Zero
- `maxUnavailable: 0` means "do not disrupt my pods"
- Same thing if `minAvailable` is greater than or equal to the number of pods
- In that case, cluster admins are supposed to get in touch with cluster users
- This will prevent fully automated operation
(and some cluster admins automated systems might not honor that request)
---
## PDB - admin side
- As a cluster admin, we need to follow certain rules
- Only shut down (or restart) a node when no pods are running on that node
(except system pods belonging to Daemon Sets)
- To remove pods running on a node, we should use the *eviction API*
(which will check PDB constraints and honor them)
- To prevent new pods from being scheduled on a node, we can use a *taint*
- These operations are streamlined by `kubectl drain`, which will:
- *cordon* the node (add a `NoSchedule` taint)
- invoke the *eviction API* to remove pods while respecting their PDBs
---
## Theory vs practice
- `kubectl drain` won't evict pods using `emptyDir` volumes
(unless the `--delete-emptydir-data` flag is passed as well)
- Make sure that `emptyDir` volumes don't hold anything important
(they shouldn't, but... who knows!)
- Kubernetes lacks a standard way for users to express:
*this `emptyDir` volume can/cannot be safely deleted*
- If a PDB forbids an eviction, this requires manual coordination
---
class: extra-details
## Unhealthy pod eviction policy
- By default, unhealthy pods can only be evicted if PDB allows it
(unhealthy = running, but not ready)
- In many cases, unhealthy pods aren't healthy anyway, and can be removed
- This behavior is enabled by setting the appropriate field in the PDB manifest:
```yaml
spec:
unhealthyPodEvictionPolicy: AlwaysAllow
```
---
## Node upgrade
- Example: upgrading kubelet or the Linux kernel on a node
- **Voluntary** disruption
- Consequence:
- all workloads running on that node are temporarily interrupted, and restarted
- this might disrupt these workloads
- Mitigations:
- migrate workloads off the done first (as if we were shutting it down)
---
## Node upgrade notes
- Is it necessary to drain a node before doing an upgrade?
- From [the documentation][node-upgrade-docs]:
*Draining nodes before upgrading kubelet ensures that pods are re-admitted and containers are re-created, which may be necessary to resolve some security issues or other important bugs.*
- It's *probably* safe to upgrade in-place for:
- kernel upgrades
- kubelet patch-level upgrades (1.X.Y → 1.X.Z)
- It's *probably* better to drain the node for minor revisions kubelet upgrades (1.X → 1.Y)
- In doubt, test extensively in staging environments!
[node-upgrade-docs]: https://kubernetes.io/docs/tasks/administer-cluster/cluster-upgrade/#manual-deployments
---
## Manual rescheduling
- Example: moving workloads around to accommodate noisy neighbors or other issues
(e.g. pod X is doing a lot of disk I/O and this is starving other pods)
- **Voluntary** disruption
- Consequence:
- the moved workloads are temporarily interrupted
- Mitigations:
- define an appropriate number of replicas, declare PDBs
- use the [eviction API][eviction-API] to move workloads
[eviction-API]: https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/
???
:EN:- Voluntary and involuntary disruptions
:EN:- Pod Disruption Budgets
:FR:- "Disruptions" volontaires et involontaires
:FR:- Pod Disruption Budgets

View File

@@ -1,374 +0,0 @@
# Building our own cluster (hard)
- This section assumes that you already went through
*“Building our own cluster (medium)”*
- In that previous section, we built a cluster with a single node
- In this new section, we're going to add more nodes to the cluster
- Note: we will need the lab environment of that previous section
- If you haven't done it yet, you should go through that section first
---
## Our environment
- On `polykube1`, we should have our Kubernetes control plane
- We're also assuming that we have the kubeconfig file created earlier
(in `~/.kube/config`)
- We're going to work on `polykube2` and add it to the cluster
- This machine has exactly the same setup as `polykube1`
(Ubuntu LTS with CNI, etcd, and Kubernetes binaries installed)
- Note that we won't need the etcd binaries here
(the control plane will run solely on `polykube1`)
---
## Checklist
We need to:
- generate the kubeconfig file for `polykube2`
- install a container engine
- generate a CNI configuration file
- start kubelet
---
## Generating the kubeconfig file
- Ideally, we should generate a key pair and certificate for `polykube2`...
- ...and generate a kubeconfig file using these
- At the moment, for simplicity, we'll use the same key pair and certificate as earlier
- We have a couple of options:
- copy the required files (kubeconfig, key pair, certificate)
- "flatten" the kubeconfig file (embed the key and certificate within)
---
class: extra-details
## To flatten or not to flatten?
- "Flattening" the kubeconfig file can seem easier
(because it means we'll only have one file to move around)
- But it's easier to rotate the key or renew the certificate when they're in separate files
---
## Flatten and copy the kubeconfig file
- We'll flatten the file and copy it over
.lab[
- On `polykube1`, flatten the kubeconfig file:
```bash
kubectl config view --flatten > kubeconfig
```
- Then copy it to `polykube2`:
```bash
scp kubeconfig polykube2:
```
]
---
## Generate CNI configuration
Back on `polykube2`, put the following in `/etc/cni/net.d/kube.conf`:
```json
{
"cniVersion": "0.3.1",
"name": "kube",
"type": "bridge",
"bridge": "cni0",
"isDefaultGateway": true,
"ipMasq": true,
"hairpinMode": true,
"ipam": {
"type": "host-local",
"subnet": `"10.1.2.0/24"`
}
}
```
Note how we changed the subnet!
---
## Install container engine and start `kubelet`
.lab[
- Install `containerd`:
```bash
sudo apt-get install containerd -y
```
- Start `containerd`:
```bash
sudo systemctl start containerd
```
- Start `kubelet`:
```bash
sudo kubelet --kubeconfig kubeconfig
```
]
We're getting errors looking like:
```
"Post \"https://localhost:6443/api/v1/nodes\": ... connect: connection refused"
```
---
## Updating the kubeconfig file
- Our kubeconfig file still references `localhost:6443`
- This was fine on `polykube1`
(where `kubelet` was connecting to the control plane running locally)
- On `polykube2`, we need to change that and put the address of the API server
(i.e. the address of `polykube1`)
.lab[
- Update the `kubeconfig` file:
```bash
sed -i s/localhost:6443/polykube1:6443/ kubeconfig
```
]
---
## Starting `kubelet`
- `kubelet` should now start correctly (hopefully!)
.lab[
- On `polykube2`, start `kubelet`:
```bash
sudo kubelet --kubeconfig kubeconfig
```
- On `polykube1`, check that `polykube2` shows up and is `Ready`:
```bash
kubectl get nodes
```
]
---
## Testing connectivity
- From `polykube1`, can we connect to Pods running on `polykube2`? 🤔
.lab[
- Scale the test Deployment:
```bash
kubectl scale deployment blue --replicas=5
```
- Get the IP addresses of the Pods:
```bash
kubectl get pods -o wide
```
- Pick a Pod on `polykube2` and try to connect to it:
```bash
curl `10.1.2.2`
```
]
--
At that point, it doesn't work.
---
## Refresher on the *pod network*
- The *pod network* (or *pod-to-pod network*) has a few responsibilities:
- allocating and managing Pod IP addresses
- connecting Pods and Nodes
- connecting Pods together on a given node
- *connecting Pods together across nodes*
- That last part is the one that's not functioning in our cluster
- It typically requires some combination of routing, tunneling, bridging...
---
## Connecting networks together
- We can add manual routes between our nodes
- This requires adding `N x (N-1)` routes
(on each node, add a route to every other node)
- This will work on home labs where nodes are directly connected
(e.g. on an Ethernet switch, or same WiFi network, or a bridge between local VMs)
- ...Or on clouds where IP address filtering has been disabled
(by default, most cloud providers will discard packets going to unknown IP addresses)
- If IP address filtering is enabled, you'll have to use e.g. tunneling or overlay networks
---
## Important warning
- The technique that we are about to use doesn't work everywhere
- It only works if:
- all the nodes are directly connected to each other (at layer 2)
- the underlying network allows the IP addresses of our pods
- If we are on physical machines connected by a switch: OK
- If we are on virtual machines in a public cloud: NOT OK
- on AWS, we need to disable "source and destination checks" on our instances
- on OpenStack, we need to disable "port security" on our network ports
---
## Routing basics
- We need to tell *each* node:
"The subnet 10.1.N.0/24 is located on node N" (for all values of N)
- This is how we add a route on Linux:
```bash
ip route add 10.1.N.0/24 via W.X.Y.Z
```
(where `W.X.Y.Z` is the internal IP address of node N)
- We can see the internal IP addresses of our nodes with:
```bash
kubectl get nodes -o wide
```
---
## Adding our route
- Let's add a route from `polykube1` to `polykube2`
.lab[
- Check the internal address of `polykube2`:
```bash
kubectl get node polykube2 -o wide
```
- Now, on `polykube1`, add the route to the Pods running on `polykube2`:
```bash
sudo ip route add 10.1.2.0/24 via `A.B.C.D`
```
- Finally, check that we can now connect to a Pod running on `polykube2`:
```bash
curl 10.1.2.2
```
]
---
## What's next?
- The network configuration feels very manual:
- we had to generate the CNI configuration file (in `/etc/cni/net.d`)
- we had to manually update the nodes' routing tables
- Can we automate that?
**YES!**
- We could install something like [kube-router](https://www.kube-router.io/)
(which specifically takes care of the CNI configuration file and populates routing tables)
- Or we could also go with e.g. [Cilium](https://cilium.io/)
---
class: extra-details
## If you want to try Cilium...
- Add the `--root-ca-file` flag to the controller manager:
- use the certificate automatically generated by the API server
<br/>
(it should be in `/var/run/kubernetes/apiserver.crt`)
- or generate a key pair and certificate for the API server and point to
that certificate
- without that, you'll get certificate validation errors
<br/>
(because in our Pods, the `ca.crt` file used to validate the API server will be empty)
- Check the Cilium [without kube-proxy][ciliumwithoutkubeproxy] instructions
(make sure to pass the API server IP address and port!)
- Other pod-to-pod network implementations might also require additional steps
[ciliumwithoutkubeproxy]: https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/#kubeproxy-free
???
:EN:- Connecting nodes and pods
:FR:- Interconnecter les nœuds et les pods

View File

@@ -1,891 +0,0 @@
# Building our own cluster (medium)
- This section assumes that you already went through
*“Building our own cluster (easy)”*
- In that section, we saw how to run each control plane component manually...
...but with an older version of Kubernetes (1.19)
- In this section, we're going to do something similar...
...but with recent versions of Kubernetes!
- Note: we won't need the lab environment of that previous section
(we're going to build a new cluster from scratch)
---
## What remains the same
- We'll use machines with Kubernetes binaries pre-downloaded
- We'll run individual components by hand
(etcd, API server, controller manager, scheduler, kubelet)
- We'll run on a single node
(but we'll be laying the groundwork to add more nodes)
- We'll get the cluster to the point where we can run and expose pods
---
## What's different
- We'll need to generate TLS keys and certificates
(because it's mandatory with recent versions of Kubernetes)
- Things will be *a little bit more* secure
(but still not 100% secure, far from it!)
- We'll use containerd instead of Docker
(you could probably try with CRI-O or another CRI engine, too)
- We'll need to set up CNI for networking
- *And we won't do everything as root this time (but we might use `sudo` a lot)*
---
## Our environment
- We will use the machine indicated as `polykube1`
- This machine:
- runs Ubuntu LTS
- has Kubernetes, etcd, and CNI binaries installed
- but nothing is running
---
## Checking our environment
- Let's make sure we have everything we need first
.lab[
- Log into the `polykube1` machine
- Check available versions:
```bash
etcd -version
kube-apiserver --version
```
]
---
## The plan
We'll follow the same methodology as for the "easy" section
1. Start API server
2. Interact with it (create Deployment and Service)
3. See what's broken
4. Fix it and go back to step 2 until it works!
---
## Dealing with multiple processes
- Again, we are going to start many processes
- Depending on what you're comfortable with, you can:
- open multiple windows and multiple SSH connections
- use a terminal multiplexer like screen or tmux
- put processes in the background with `&`
<br/>(warning: log output might get confusing to read!)
---
## Starting API server
.lab[
- Try to start the API server:
```bash
kube-apiserver
# It will complain about permission to /var/run/kubernetes
sudo kube-apiserver
# Now it will complain about a bunch of missing flags, including:
# --etcd-servers
# --service-account-issuer
# --service-account-signing-key-file
```
]
Just like before, we'll need to start etcd.
But we'll also need some TLS keys!
---
## Generating TLS keys
- There are many ways to generate TLS keys (and certificates)
- A very popular and modern tool to do that is [cfssl]
- We're going to use the old-fashioned [openssl] CLI
- Feel free to use cfssl or any other tool if you prefer!
[cfssl]: https://github.com/cloudflare/cfssl#using-the-command-line-tool
[openssl]: https://www.openssl.org/docs/man3.0/man1/
---
## How many keys do we need?
At the very least, we need the following two keys:
- ServiceAccount key pair
- API client key pair, aka "CA key"
(technically, we will need a *certificate* for that key pair)
But if we wanted to tighten the cluster security, we'd need many more...
---
## The other keys
These keys are not strictly necessary at this point:
- etcd key pair
*without that key, communication with etcd will be insecure*
- API server endpoint key pair
*the API server will generate this one automatically if we don't*
- kubelet key pair (used by API server to connect to kubelets)
*without that key, commands like kubectl logs/exec will be insecure*
---
## Would you like some auth with that?
If we want to enable authentication and authorization, we also need various API client key pairs signed by the "CA key" mentioned earlier. That would include (non-exhaustive list):
- controller manager key pair
- scheduler key pair
- in most cases: kube-proxy (or equivalent) key pair
- in most cases: key pairs for the nodes joining the cluster
(these might be generated through TLS bootstrap tokens)
- key pairs for users that will interact with the clusters
(unless another authentication mechanism like OIDC is used)
---
## Generating our keys and certificates
.lab[
- Generate the ServiceAccount key pair:
```bash
openssl genrsa -out sa.key 2048
```
- Generate the CA key pair:
```bash
openssl genrsa -out ca.key 2048
```
- Generate a self-signed certificate for the CA key:
```bash
openssl x509 -new -key ca.key -out ca.cert -subj /CN=kubernetes/
```
]
---
## Starting etcd
- This one is easy!
.lab[
- Start etcd:
```bash
etcd
```
]
Note: if you want a bit of extra challenge, you can try
to generate the etcd key pair and use it.
(You will need to pass it to etcd and to the API server.)
---
## Starting API server
- We need to use the keys and certificate that we just generated
.lab[
- Start the API server:
```bash
sudo kube-apiserver \
--etcd-servers=http://localhost:2379 \
--service-account-signing-key-file=sa.key \
--service-account-issuer=https://kubernetes \
--service-account-key-file=sa.key \
--client-ca-file=ca.cert
```
]
The API server should now start.
But can we really use it? 🤔
---
## Trying `kubectl`
- Let's try some simple `kubectl` command
.lab[
- Try to list Namespaces:
```bash
kubectl get namespaces
```
]
We're getting an error message like this one:
```
The connection to the server localhost:8080 was refused -
did you specify the right host or port?
```
---
## What's going on?
- Recent versions of Kubernetes don't support unauthenticated API access
- The API server doesn't support listening on plain HTTP anymore
- `kubectl` still tries to connect to `localhost:8080` by default
- But there is nothing listening there
- Our API server listens on port 6443, using TLS
---
## Trying to access the API server
- Let's use `curl` first to confirm that everything works correctly
(and then we will move to `kubectl`)
.lab[
- Try to connect with `curl`:
```bash
curl https://localhost:6443
# This will fail because the API server certificate is unknown.
```
- Try again, skipping certificate verification:
```bash
curl --insecure https://localhost:6443
```
]
We should now see an `Unauthorized` Kubernetes API error message.
</br>
We need to authenticate with our key and certificate.
---
## Authenticating with the API server
- For the time being, we can use the CA key and cert directly
- In a real world scenario, we would *never* do that!
(because we don't want the CA key to be out there in the wild)
.lab[
- Try again, skipping cert verification, and using the CA key and cert:
```bash
curl --insecure --key ca.key --cert ca.cert https://localhost:6443
```
]
We should see a list of API routes.
---
class: extra-details
## Doing it right
In the future, instead of using the CA key and certificate,
we should generate a new key, and a certificate for that key,
signed by the CA key.
Then we can use that new key and certificate to authenticate.
Example:
```
### Generate a key pair
openssl genrsa -out user.key
### Extract the public key
openssl pkey -in user.key -out user.pub -pubout
### Generate a certificate signed by the CA key
openssl x509 -new -key ca.key -force_pubkey user.pub -out user.cert \
-subj /CN=kubernetes-user/
```
---
## Writing a kubeconfig file
- We now want to use `kubectl` instead of `curl`
- We'll need to write a kubeconfig file for `kubectl`
- There are many way to do that; here, we're going to use `kubectl config`
- We'll need to:
- set the "cluster" (API server endpoint)
- set the "credentials" (the key and certficate)
- set the "context" (referencing the cluster and credentials)
- use that context (make it the default that `kubectl` will use)
---
## Set the cluster
The "cluster" section holds the API server endpoint.
.lab[
- Set the API server endpoint:
```bash
kubectl config set-cluster polykube --server=https://localhost:6443
```
- Don't verify the API server certificate:
```bash
kubectl config set-cluster polykube --insecure-skip-tls-verify
```
]
---
## Set the credentials
The "credentials" section can hold a TLS key and certificate, or a token, or configuration information for a plugin (for instance, when using AWS EKS or GCP GKE, they use a plugin).
.lab[
- Set the client key and certificate:
```bash
kubectl config set-credentials polykube \
--client-key ca.key \
--client-certificate ca.cert
```
]
---
## Set and use the context
The "context" section references the "cluster" and "credentials" that we defined earlier.
(It can also optionally reference a Namespace.)
.lab[
- Set the "context":
```bash
kubectl config set-context polykube --cluster polykube --user polykube
```
- Set that context to be the default context:
```bash
kubectl config use-context polykube
```
]
---
## Review the kubeconfig file
The kubeconfig file should look like this:
.small[
```yaml
apiVersion: v1
clusters:
- cluster:
insecure-skip-tls-verify: true
server: https://localhost:6443
name: polykube
contexts:
- context:
cluster: polykube
user: polykube
name: polykube
current-context: polykube
kind: Config
preferences: {}
users:
- name: polykube
user:
client-certificate: /root/ca.cert
client-key: /root/ca.key
```
]
---
## Trying the kubeconfig file
- We should now be able to access our cluster's API!
.lab[
- Try to list Namespaces:
```bash
kubectl get namespaces
```
]
This should show the classic `default`, `kube-system`, etc.
---
class: extra-details
## Do we need `--client-ca-file` ?
Technically, we didn't need to specify the `--client-ca-file` flag!
But without that flag, no client can be authenticated.
Which means that we wouldn't be able to issue any API request!
---
## Running pods
- We can now try to create a Deployment
.lab[
- Create a Deployment:
```bash
kubectl create deployment blue --image=jpetazzo/color
```
- Check the results:
```bash
kubectl get deployments,replicasets,pods
```
]
Our Deployment exists, but not the Replica Set or Pod.
We need to run the controller manager.
---
## Running the controller manager
- Previously, we used the `--master` flag to pass the API server address
- Now, we need to authenticate properly
- The simplest way at this point is probably to use the same kubeconfig file!
.lab[
- Start the controller manager:
```bash
kube-controller-manager --kubeconfig .kube/config
```
- Check the results:
```bash
kubectl get deployments,replicasets,pods
```
]
---
## What's next?
- Normally, the last commands showed us a Pod in `Pending` state
- We need two things to continue:
- the scheduler (to assign the Pod to a Node)
- a Node!
- We're going to run `kubelet` to register the Node with the cluster
---
## Running `kubelet`
- Let's try to run `kubelet` and see what happens!
.lab[
- Start `kubelet`:
```bash
sudo kubelet
```
]
We should see an error about connecting to `containerd.sock`.
We need to run a container engine!
(For instance, `containerd`.)
---
## Running `containerd`
- We need to install and start `containerd`
- You could try another engine if you wanted
(but there might be complications!)
.lab[
- Install `containerd`:
```bash
sudo apt-get install containerd
```
- Start `containerd`:
```bash
sudo containerd
```
]
---
class: extra-details
## Configuring `containerd`
Depending on how we install `containerd`, it might need a bit of extra configuration.
Watch for the following symptoms:
- `containerd` refuses to start
(rare, unless there is an *invalid* configuration)
- `containerd` starts but `kubelet` can't connect
(could be the case if the configuration disables the CRI socket)
- `containerd` starts and things work but Pods keep being killed
(may happen if there is a mismatch in the cgroups driver)
---
## Starting `kubelet` for good
- Now that `containerd` is running, `kubelet` should start!
.lab[
- Try to start `kubelet`:
```bash
sudo kubelet
```
- In another terminal, check if our Node is now visible:
```bash
sudo kubectl get nodes
```
]
`kubelet` should now start, but our Node doesn't show up in `kubectl get nodes`!
This is because without a kubeconfig file, `kubelet` runs in standalone mode:
<br/>
it will not connect to a Kubernetes API server, and will only start *static pods*.
---
## Passing the kubeconfig file
- Let's start `kubelet` again, with our kubeconfig file
.lab[
- Stop `kubelet` (e.g. with `Ctrl-C`)
- Restart it with the kubeconfig file:
```bash
sudo kubelet --kubeconfig .kube/config
```
- Check our list of Nodes:
```bash
kubectl get nodes
```
]
This time, our Node should show up!
---
## Node readiness
- However, our Node shows up as `NotReady`
- If we wait a few minutes, the `kubelet` logs will tell us why:
*we're missing a CNI configuration!*
- As a result, the containers can't be connected to the network
- `kubelet` detects that and doesn't become `Ready` until this is fixed
---
## CNI configuration
- We need to provide a CNI configuration
- This is a file in `/etc/cni/net.d`
(the name of the file doesn't matter; the first file in lexicographic order will be used)
- Usually, when installing a "CNI plugin¹", this file gets installed automatically
- Here, we are going to write that file manually
.footnote[¹Technically, a *pod network*; typically running as a DaemonSet, which will install the file with a `hostPath` volume.]
---
## Our CNI configuration
Create the following file in e.g. `/etc/cni/net.d/kube.conf`:
```json
{
"cniVersion": "0.3.1",
"name": "kube",
"type": "bridge",
"bridge": "cni0",
"isDefaultGateway": true,
"ipMasq": true,
"hairpinMode": true,
"ipam": {
"type": "host-local",
"subnet": "10.1.1.0/24"
}
}
```
That's all we need - `kubelet` will detect and validate the file automatically!
---
## Checking our Node again
- After a short time (typically about 10 seconds) the Node should be `Ready`
.lab[
- Wait until the Node is `Ready`:
```bash
kubectl get nodes
```
]
If the Node doesn't show up as `Ready`, check the `kubelet` logs.
---
## What's next?
- At this point, we have a `Pending` Pod and a `Ready` Node
- All we need is the scheduler to bind the former to the latter
.lab[
- Run the scheduler:
```bash
kube-scheduler --kubeconfig .kube/config
```
- Check that the Pod gets assigned to the Node and becomes `Running`:
```bash
kubectl get pods
```
]
---
## Check network access
- Let's check that we can connect to our Pod, and that the Pod can connect outside
.lab[
- Get the Pod's IP address:
```bash
kubectl get pods -o wide
```
- Connect to the Pod (make sure to update the IP address):
```bash
curl `10.1.1.2`
```
- Check that the Pod has external connectivity too:
```bash
kubectl exec `blue-xxxxxxxxxx-yyyyy` -- ping -c3 1.1
```
]
---
## Expose our Deployment
- We can now try to expose the Deployment and connect to the ClusterIP
.lab[
- Expose the Deployment:
```bash
kubectl expose deployment blue --port=80
```
- Retrieve the ClusterIP:
```bash
kubectl get services
```
- Try to connect to the ClusterIP:
```bash
curl `10.0.0.42`
```
]
At this point, it won't work - we need to run `kube-proxy`!
---
## Running `kube-proxy`
- We need to run `kube-proxy`
(also passing it our kubeconfig file)
.lab[
- Run `kube-proxy`:
```bash
sudo kube-proxy --kubeconfig .kube/config
```
- Try again to connect to the ClusterIP:
```bash
curl `10.0.0.42`
```
]
This time, it should work.
---
## What's next?
- Scale up the Deployment, and check that load balancing works properly
- Enable RBAC, and generate individual certificates for each controller
(check the [certificate paths][certpath] section in the Kubernetes documentation
for a detailed list of all the certificates and keys that are used by the
control plane, and which flags are used by which components to configure them!)
- Add more nodes to the cluster
*Feel free to try these if you want to get additional hands-on experience!*
[certpath]: https://kubernetes.io/docs/setup/best-practices/certificates/#certificate-paths
???
:EN:- Setting up control plane certificates
:EN:- Implementing a basic CNI configuration
:FR:- Mettre en place les certificats du plan de contrôle
:FR:- Réaliser un configuration CNI basique

View File

@@ -1,4 +1,4 @@
# Building our own cluster (easy)
# Building our own cluster
- Let's build our own cluster!
@@ -33,7 +33,10 @@
## Our environment
- We will use the machine indicated as `monokube1`
- We will use the machine indicated as `dmuc1`
(this stands for "Dessine Moi Un Cluster" or "Draw Me A Sheep",
<br/>in homage to Saint-Exupery's "The Little Prince")
- This machine:
@@ -45,33 +48,13 @@
---
## The fine print
- We're going to use a *very old* version of Kubernetes
(specifically, 1.19)
- Why?
- It's much easier to set up than recent versions
- it's compatible with Docker (no need to set up CNI)
- it doesn't require a ServiceAccount keypair
- it can be exposed over plain HTTP (insecure but easier)
- We'll do that, and later, move to recent versions of Kubernetes!
---
## Checking our environment
- Let's make sure we have everything we need first
.lab[
- Log into the `monokube1` machine
- Log into the `dmuc1` machine
- Get root:
```bash
@@ -545,38 +528,7 @@ clusters:
]
If it works: great!
If it complains about a "cgroup driver", check the next slide.
---
## Cgroup drivers
- Cgroups ("control groups") are a Linux kernel feature
- They're used to account and limit resources
(e.g.: memory, CPU, block I/O...)
- There are multiple ways to manipulate cgroups, including:
- through a pseudo-filesystem (typically mounted in /sys/fs/cgroup)
- through systemd
- Kubelet and the container engine need to agree on which method to use
---
## Setting the cgroup driver
- If kubelet refused to start, mentioning a cgroup driver issue, try:
```bash
kubelet --kubeconfig ~/.kube/config --cgroup-driver=systemd
```
- That *should* do the trick!
Success!
---
@@ -595,7 +547,7 @@ If it complains about a "cgroup driver", check the next slide.
Our node should show up.
Its name will be its hostname (it should be `monokube1`).
Its name will be its hostname (it should be `dmuc1`).
---

View File

@@ -339,12 +339,34 @@ class: extra-details
---
## Service catalog
- *Service catalog* is another extension mechanism
- It's not extending the Kubernetes API strictly speaking
(but it still provides new features!)
- It doesn't create new types; it uses:
- ClusterServiceBroker
- ClusterServiceClass
- ClusterServicePlan
- ServiceInstance
- ServiceBinding
- It uses the Open service broker API
---
## Documentation
- [Custom Resource Definitions: when to use them](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/)
- [Custom Resources Definitions: how to use them](https://kubernetes.io/docs/tasks/access-kubernetes-api/custom-resources/custom-resource-definitions/)
- [Service Catalog](https://kubernetes.io/docs/concepts/extend-kubernetes/service-catalog/)
- [Built-in Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/admission-controllers/)
- [Dynamic Admission Controllers](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/)

View File

@@ -100,7 +100,7 @@ class: extra-details
- We present 3 methods to obtain a certificate
- We suggest that you use method 1 (self-signed certificate)
- We suggest that we use method 1 (self-signed certificate)
- it's the simplest and fastest method

View File

@@ -109,7 +109,7 @@ class: extra-details
- Install Go
(on our VMs: `sudo snap install go --classic` or `sudo apk add go`)
(on our VMs: `sudo snap install go --classic`)
- Install kubebuilder
@@ -250,7 +250,7 @@ spec:
## Loading an object
Open `internal/controllers/machine_controller.go`.
Open `controllers/machine_controller.go`.
Add that code in the `Reconcile` method, at the `TODO(user)` location:
@@ -505,7 +505,7 @@ if machine.Spec.SwitchPosition != "down" {
changeAt := machine.Status.SeenAt.Time.Add(5 * time.Second)
if now.Time.After(changeAt) {
machine.Spec.SwitchPosition = "down"
machine.Status.SeenAt = nil
machine.Status.SeenAt = nil
if err := r.Update(ctx, &machine); err != nil {
logger.Info("error updating switch position")
return ctrl.Result{}, client.IgnoreNotFound(err)
@@ -629,17 +629,17 @@ Note: this time, only create a new custom resource; not a new controller.
- We can retrieve associated switches like this:
```go
var switches uselessv1alpha1.SwitchList
var switches uselessv1alpha1.SwitchList
if err := r.List(ctx, &switches,
client.InNamespace(req.Namespace),
client.MatchingLabels{"machine": req.Name},
); err != nil {
logger.Error(err, "unable to list switches of the machine")
return ctrl.Result{}, client.IgnoreNotFound(err)
}
if err := r.List(ctx, &switches,
client.InNamespace(req.Namespace),
client.MatchingLabels{"machine": req.Name},
); err != nil {
logger.Error(err, "unable to list switches of the machine")
return ctrl.Result{}, client.IgnoreNotFound(err)
}
logger.Info("Found switches", "switches", switches)
logger.Info("Found switches", "switches", switches)
```
---
@@ -649,13 +649,13 @@ Note: this time, only create a new custom resource; not a new controller.
- Each time we reconcile a Machine, let's update its status:
```go
status := ""
for _, sw := range switches.Items {
status += string(sw.Spec.Position[0])
}
machine.Status.Positions = status
if err := r.Status().Update(ctx, &machine); err != nil {
...
status := ""
for _, sw := range switches.Items {
status += string(sw.Spec.Position[0])
}
machine.Status.Positions = status
if err := r.Status().Update(ctx, &machine); err != nil {
...
```
- Run the controller and check that POSITIONS gets updated
@@ -721,7 +721,7 @@ if err := r.Create(ctx, &sw); err != nil { ...
Define the following helper function:
```go
func (r *MachineReconciler) machineOfSwitch(ctx context.Context, obj client.Object) []ctrl.Request {
func (r *MachineReconciler) machineOfSwitch(obj client.Object) []ctrl.Request {
return []ctrl.Request{
ctrl.Request{
NamespacedName: types.NamespacedName{
@@ -746,7 +746,7 @@ func (r *MachineReconciler) SetupWithManager(mgr ctrl.Manager) error {
For(&uselessv1alpha1.Machine{}).
Owns(&uselessv1alpha1.Switch{}).
Watches(
&uselessv1alpha1.Switch{},
&source.Kind{Type: &uselessv1alpha1.Switch{}},
handler.EnqueueRequestsFromMapFunc(r.machineOfSwitch),
).
Complete(r)

View File

@@ -228,7 +228,7 @@ General workflow:
3. `kustomize edit add patch` to add patches to said resources
4. `kustomize edit add ...` or `kustomize edit set ...` (many options!)
4. `kustomized edit add ...` or `kustomize edit set ...` (many options!)
5. `kustomize build | kubectl apply -f-` or `kubectl apply -k .`
@@ -244,7 +244,7 @@ General workflow:
(just add `--help` after any command to see possible options!)
- Make sure to install the completion and try e.g. `kustomize edit add [TAB][TAB]`
- Make sure to install the completion and try e.g. `kustomize eidt add [TAB][TAB]`
---

View File

@@ -536,12 +536,12 @@ Note: the `apiVersion` field appears to be optional.
- Excerpt:
```yaml
generate:
kind: LimitRange
name: default-limitrange
namespace: "{{request.object.metadata.name}}"
data:
spec:
limits:
kind: LimitRange
name: default-limitrange
namespace: "{{request.object.metadata.name}}"
data:
spec:
limits:
```
- Note that we have to specify the `namespace`

View File

@@ -429,11 +429,11 @@ troubleshoot easily, without having to poke holes in our firewall.
- The API documentation has a lot of detail about the format of various objects: <!-- ##VERSION## -->
- [NetworkPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicy-v1-networking-k8s-io)
- [NetworkPolicy](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicy-v1-networking-k8s-io)
- [NetworkPolicySpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicyspec-v1-networking-k8s-io)
- [NetworkPolicySpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicyspec-v1-networking-k8s-io)
- [NetworkPolicyIngressRule](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.28/#networkpolicyingressrule-v1-networking-k8s-io)
- [NetworkPolicyIngressRule](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#networkpolicyingressrule-v1-networking-k8s-io)
- etc.

View File

@@ -114,7 +114,7 @@
- plugins (compiled in API server; enabled/disabled by reconfiguration)
- webhooks (registered dynamically)
- webhooks (registesred dynamically)
- Admission control has many other uses

View File

@@ -1,4 +1,4 @@
## Pre-requirements
# Pre-requirements
- Kubernetes concepts

View File

@@ -6,53 +6,11 @@
- We can specify *limits* and/or *requests*
- We can specify quantities of CPU and/or memory and/or ephemeral storage
- We can specify quantities of CPU and/or memory
---
## Requests vs limits
- *Requests* are *guaranteed reservations* of resources
- They are used for scheduling purposes
- Kubelet will use cgroups to e.g. guarantee a minimum amount of CPU time
- A container **can** use more than its requested resources
- A container using *less* than what it requested should never be killed or throttled
- A node **cannot** be overcommitted with requests
(the sum of all requests **cannot** be higher than resources available on the node)
- A small amount of resources is set aside for system components
(this explains why there is a difference between "capacity" and "allocatable")
---
## Requests vs limits
- *Limits* are "hard limits" (a container **cannot** exceed its limits)
- They aren't taken into account by the scheduler
- A container exceeding its memory limit is killed instantly
(by the kernel out-of-memory killer)
- A container exceeding its CPU limit is throttled
- A container exceeding its disk limit is killed
(usually with a small delay, since this is checked periodically by kubelet)
- On a given node, the sum of all limits **can** be higher than the node size
---
## Compressible vs incompressible resources
## CPU vs memory
- CPU is a *compressible resource*
@@ -66,29 +24,7 @@
- if we have N GB RAM and need 2N, we might run at... 0.1% speed!
- Disk is also an *incompressible resource*
- when the disk is full, writes will fail
- applications may or may not crash but persistent apps will be in trouble
---
## Running low on CPU
- Two ways for a container to "run low" on CPU:
- it's hitting its CPU limit
- all CPUs on the node are at 100% utilization
- The app in the container will run slower
(compared to running without a limit, or if CPU cycles were available)
- No other consequence
(but this could affect SLA/SLO for latency-sensitive applications!)
- As a result, exceeding limits will have different consequences for CPU and memory
---
@@ -200,7 +136,9 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
## Running low on memory
- When the kernel runs low on memory, it starts to reclaim used memory
- When the system runs low on memory, it starts to reclaim used memory
(we talk about "memory pressure")
- Option 1: free up some buffers and caches
@@ -224,91 +162,71 @@ For more details, check [this blog post](https://erickhun.com/posts/kubernetes-f
- If a container exceeds its memory *limit*, it gets killed immediately
- If a node memory usage gets too high, it will *evict* some pods
- If a node is overcommitted and under memory pressure, it will terminate some pods
(we say that the node is "under pressure", more on that in a bit!)
(see next slide for some details about what "overcommit" means here!)
[KEP 2400]: https://github.com/kubernetes/enhancements/blob/master/keps/sig-node/2400-node-swap/README.md#implementation-history
---
## Running low on disk
## Overcommitting resources
- When the kubelet runs low on disk, it starts to reclaim disk space
- *Limits* are "hard limits" (a container *cannot* exceed its limits)
(similarly to what the kernel does, but in different categories)
- a container exceeding its memory limit is killed
- Option 1: garbage collect dead pods and containers
- a container exceeding its CPU limit is throttled
(no consequence, but their logs will be deleted)
- On a given node, the sum of pod *limits* can be higher than the node size
- Option 2: remove unused images
- *Requests* are used for scheduling purposes
(no consequence, but these images will have to be repulled if we need them later)
- a container can use more than its requested CPU or RAM amounts
- Option 3: evict pods and remove them to reclaim their disk usage
- a container using *less* than what it requested should never be killed or throttled
- Note: this only applies to *ephemeral storage*, not to e.g. Persistent Volumes!
- On a given node, the sum of pod *requests* cannot be higher than the node size
---
## Ephemeral storage?
## Pod quality of service
- This includes:
Each pod is assigned a QoS class (visible in `status.qosClass`).
- the *read-write layer* of the container
<br/>
(any file creation/modification outside of its volumes)
- If limits = requests:
- `emptyDir` volumes mounted in the container
- as long as the container uses less than the limit, it won't be affected
- the container logs stored on the node
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
- This does not include:
- If requests &lt; limits:
- the container image
- as long as the container uses less than the request, it won't be affected
- other types of volumes (e.g. Persistent Volumes, `hostPath`, or `local` volumes)
- otherwise, it might be killed/evicted if the node gets overloaded
- if at least one container has *(requests&lt;limits)*, QoS is considered "Burstable"
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
---
class: extra-details
## Quality of service impact
## Disk limit enforcement
- When a node is overloaded, BestEffort pods are killed first
- Disk usage is periodically measured by kubelet
- Then, Burstable pods that exceed their requests
(with something equivalent to `du`)
- Burstable and Guaranteed pods below their requests are never killed
- There can be a small delay before pod termination when disk limit is exceeded
(except if their node fails)
- It's also possible to enable filesystem *project quotas*
- If we only use Guaranteed pods, no pod should ever be killed
(e.g. with EXT4 or XFS)
(as long as they stay within their limits)
- Remember that container logs are also accounted for!
(container log rotation/retention is managed by kubelet)
---
class: extra-details
## `nodefs` and `imagefs`
- `nodefs` is the main filesystem of the node
(holding, notably, `emptyDir` volumes and container logs)
- Optionally, the container engine can be configured to use an `imagefs`
- `imagefs` will store container images and container writable layers
- When there is a separate `imagefs`, its disk usage is tracked independently
- If `imagefs` usage gets too high, kubelet will remove old images first
(conversely, if `nodefs` usage gets too high, kubelet won't remove old images)
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
---
@@ -386,46 +304,6 @@ class: extra-details
---
## Pod quality of service
Each pod is assigned a QoS class (visible in `status.qosClass`).
- If limits = requests:
- as long as the container uses less than the limit, it won't be affected
- if all containers in a pod have *(limits=requests)*, QoS is considered "Guaranteed"
- If requests &lt; limits:
- as long as the container uses less than the request, it won't be affected
- otherwise, it might be killed/evicted if the node gets overloaded
- if at least one container has *(requests&lt;limits)*, QoS is considered "Burstable"
- If a pod doesn't have any request nor limit, QoS is considered "BestEffort"
---
## Quality of service impact
- When a node is overloaded, BestEffort pods are killed first
- Then, Burstable pods that exceed their requests
- Burstable and Guaranteed pods below their requests are never killed
(except if their node fails)
- If we only use Guaranteed pods, no pod should ever be killed
(as long as they stay within their limits)
(Pod QoS is also explained in [this page](https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/) of the Kubernetes documentation and in [this blog post](https://medium.com/google-cloud/quality-of-service-class-qos-in-kubernetes-bb76a89eb2c6).)
---
## Specifying resources
- Resource requests are expressed at the *container* level
@@ -438,9 +316,9 @@ Each pod is assigned a QoS class (visible in `status.qosClass`).
(so 100m = 0.1)
- Memory and ephemeral disk storage are expressed in bytes
- Memory is expressed in bytes
- These can have k, M, G, T, ki, Mi, Gi, Ti suffixes
- Memory can be expressed with k, M, G, T, ki, Mi, Gi, Ti suffixes
(corresponding to 10^3, 10^6, 10^9, 10^12, 2^10, 2^20, 2^30, 2^40)
@@ -456,13 +334,11 @@ containers:
image: jpetazzo/color
resources:
limits:
memory: "100Mi"
cpu: "100m"
ephemeral-storage: 10M
memory: "100Mi"
requests:
cpu: "10m"
ephemeral-storage: 10M
memory: "100Mi"
cpu: "10m"
```
This set of resources makes sure that this service won't be killed (as long as it stays below 100 MB of RAM), but allows its CPU usage to be throttled if necessary.
@@ -489,7 +365,7 @@ This set of resources makes sure that this service won't be killed (as long as i
---
## We need to specify resource values
## We need default resource values
- If we do not set resource values at all:
@@ -503,33 +379,9 @@ This set of resources makes sure that this service won't be killed (as long as i
- if the request is zero, the scheduler can't make a smart placement decision
- This is fine when learning/testing, absolutely not in production!
- To address this, we can set default values for resources
---
## How should we set resources?
- Option 1: manually, for each container
- simple, effective, but tedious
- Option 2: automatically, with the [Vertical Pod Autoscaler (VPA)][vpa]
- relatively simple, very minimal involvement beyond initial setup
- not compatible with HPAv1, can disrupt long-running workloads (see [limitations][vpa-limitations])
- Option 3: semi-automatically, with tools like [Robusta KRR][robusta]
- good compromise between manual work and automation
- Option 4: by creating LimitRanges in our Namespaces
- relatively simple, but "one-size-fits-all" approach might not always work
[robusta]: https://github.com/robusta-dev/krr
[vpa]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
[vpa-limitations]: https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler#known-limitations
- This is done with a LimitRange object
---
@@ -784,7 +636,7 @@ class: extra-details
- ResourceQuota per namespace
- Let's see one possible strategy to get started with resource limits
- Let's see a simple recommendation to get started with resource limits
---

View File

@@ -166,15 +166,17 @@
- [Kubernetes The Hard Way](https://github.com/kelseyhightower/kubernetes-the-hard-way) by Kelsey Hightower
*step by step guide to install Kubernetes on GCP, with certificates, HA...*
- step by step guide to install Kubernetes on Google Cloud
- covers certificates, high availability ...
- *“Kubernetes The Hard Way is optimized for learning, which means taking the long route to ensure you understand each task required to bootstrap a Kubernetes cluster.”*
- [Deep Dive into Kubernetes Internals for Builders and Operators](https://www.youtube.com/watch?v=3KtEAa7_duA)
*conference talk setting up a simplified Kubernetes cluster - no security or HA*
- conference presentation showing step-by-step control plane setup
- 🇫🇷[Démystifions les composants internes de Kubernetes](https://www.youtube.com/watch?v=OCMNA0dSAzc)
*improved version of the previous one, with certs and recent k8s versions*
- emphasis on simplicity, not on security and availability
---

View File

@@ -32,14 +32,12 @@ content:
- k8s/architecture.md
#- k8s/internal-apis.md
- k8s/deploymentslideshow.md
- k8s/dmuc-easy.md
- k8s/dmuc.md
-
- k8s/dmuc-medium.md
- k8s/dmuc-hard.md
#- k8s/multinode.md
#- k8s/cni.md
- k8s/multinode.md
- k8s/cni.md
- k8s/cni-internals.md
#- k8s/interco.md
- k8s/interco.md
-
- k8s/apilb.md
#- k8s/setup-overview.md

View File

@@ -32,13 +32,11 @@ content:
- k8s/architecture.md
- k8s/internal-apis.md
- k8s/deploymentslideshow.md
- k8s/dmuc-easy.md
- - k8s/dmuc-medium.md
- k8s/dmuc-hard.md
#- k8s/multinode.md
#- k8s/cni.md
- k8s/dmuc.md
- - k8s/multinode.md
- k8s/cni.md
- k8s/cni-internals.md
#- k8s/interco.md
- k8s/interco.md
- - k8s/apilb.md
- k8s/setup-overview.md
#- k8s/setup-devel.md
@@ -67,7 +65,6 @@ content:
- - k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/disruptions.md
- k8s/horizontal-pod-autoscaler.md
- - k8s/prometheus.md
#- k8s/prometheus-stack.md

View File

@@ -30,15 +30,13 @@ content:
- k8s/architecture.md
- k8s/internal-apis.md
- k8s/deploymentslideshow.md
- k8s/dmuc-easy.md
- k8s/dmuc.md
- #2
- k8s/dmuc-medium.md
- k8s/dmuc-hard.md
#- k8s/multinode.md
#- k8s/cni.md
#- k8s/interco.md
- k8s/cni-internals.md
- k8s/multinode.md
- k8s/cni.md
- k8s/interco.md
- #3
- k8s/cni-internals.md
- k8s/apilb.md
- k8s/control-plane-auth.md
- |

View File

@@ -131,7 +131,6 @@ content:
- k8s/resource-limits.md
- k8s/metrics-server.md
- k8s/cluster-sizing.md
- k8s/disruptions.md
- k8s/cluster-autoscaler.md
- k8s/horizontal-pod-autoscaler.md
- k8s/hpa-v2.md
@@ -152,11 +151,9 @@ content:
- k8s/owners-and-dependents.md
- k8s/events.md
-
- k8s/dmuc-easy.md
- k8s/dmuc-medium.md
- k8s/dmuc-hard.md
#- k8s/multinode.md
#- k8s/cni.md
- k8s/dmuc.md
- k8s/multinode.md
- k8s/cni.md
- k8s/cni-internals.md
- k8s/apilb.md
- k8s/staticpods.md

View File

@@ -61,11 +61,12 @@
(sauf le dernier jour)
- Lundi: 14h00-15h00
- Mardi: 15h-16h
- Mardi: 14h30-15h30
- Mercredi: 16h-17h
- Jeudi: 17h-18h
- Mercredi: 15h00-16h00
- Sur [Jitsi][jitsi] (lien "visioconf" sur le portail de formation)
@@ -73,4 +74,4 @@
[qdnd]: https://www.youtube.com/channel/UCOAhkxpryr_BKybt9wIw-NQ
[ndeloof]: https://github.com/ndeloof
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerJanvier2024
[jitsi]: https://training.enix.io/jitsi-magic/jitsi.container.training/AlloDockerMai2023

View File

@@ -1019,7 +1019,7 @@ class: prom-manual
--mount type=bind,source=/,target=/rootfs \
prom/node-exporter \
--path.procfs /host/proc \
--path.sysfs /host/sys \
--path.sysfs /host/proc \
--collector.filesystem.ignored-mount-points "^/(sys|proc|dev|host|etc)($|/)"
```

View File

@@ -15,7 +15,6 @@ h1, h2, h3, h4, h5, h6 {
font-weight: bold;
font-size: 45px !important;
margin-top: 0.5em;
margin-bottom: 0.75em;
}
code {