mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-03-02 01:10:20 +00:00
Compare commits
1 Commits
2022-01-lu
...
2021-12-k8
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9bad3f26f3 |
@@ -3,12 +3,6 @@
|
||||
# - no actual persistence
|
||||
# - scaling down to 1 will break the cluster
|
||||
# - pods may be colocated
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
@@ -34,6 +28,11 @@ subjects:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: consul
|
||||
@@ -62,7 +61,7 @@ spec:
|
||||
serviceAccountName: consul
|
||||
containers:
|
||||
- name: consul
|
||||
image: "consul:1.11"
|
||||
image: "consul:1.8"
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
valueFrom:
|
||||
|
||||
@@ -2,12 +2,6 @@
|
||||
# There is still no actual persistence, but:
|
||||
# - podAntiaffinity prevents pod colocation
|
||||
# - clusters works when scaling down to 1 (thanks to lifecycle hook)
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
@@ -33,6 +27,11 @@ subjects:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: consul
|
||||
@@ -69,7 +68,7 @@ spec:
|
||||
terminationGracePeriodSeconds: 10
|
||||
containers:
|
||||
- name: consul
|
||||
image: "consul:1.11"
|
||||
image: "consul:1.8"
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
valueFrom:
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
# Even better Consul cluster.
|
||||
# That one uses a volumeClaimTemplate to achieve true persistence.
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
@@ -31,6 +25,11 @@ subjects:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: consul
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: consul
|
||||
@@ -76,7 +75,7 @@ spec:
|
||||
terminationGracePeriodSeconds: 10
|
||||
containers:
|
||||
- name: consul
|
||||
image: "consul:1.11"
|
||||
image: "consul:1.8"
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /consul/data
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
apiVersion: kyverno.io/v1
|
||||
kind: ClusterPolicy
|
||||
metadata:
|
||||
name: ingress-domain-name
|
||||
spec:
|
||||
rules:
|
||||
- name: create-ingress
|
||||
match:
|
||||
resources:
|
||||
kinds:
|
||||
- Service
|
||||
generate:
|
||||
kind: Ingress
|
||||
name: "{{request.object.metadata.name}}"
|
||||
namespace: "{{request.object.metadata.namespace}}"
|
||||
data:
|
||||
spec:
|
||||
rules:
|
||||
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: "{{request.object.metadata.name}}"
|
||||
port:
|
||||
number: 80
|
||||
path: /
|
||||
pathType: Prefix
|
||||
@@ -1,32 +0,0 @@
|
||||
apiVersion: kyverno.io/v1
|
||||
kind: ClusterPolicy
|
||||
metadata:
|
||||
name: ingress-domain-name
|
||||
spec:
|
||||
rules:
|
||||
- name: create-ingress
|
||||
match:
|
||||
resources:
|
||||
kinds:
|
||||
- Service
|
||||
preconditions:
|
||||
- key: "{{request.object.spec.ports[0].name}}"
|
||||
operator: Equals
|
||||
value: http
|
||||
generate:
|
||||
kind: Ingress
|
||||
name: "{{request.object.metadata.name}}"
|
||||
namespace: "{{request.object.metadata.namespace}}"
|
||||
data:
|
||||
spec:
|
||||
rules:
|
||||
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.A.B.C.D.nip.io"
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: "{{request.object.metadata.name}}"
|
||||
port:
|
||||
name: http
|
||||
path: /
|
||||
pathType: Prefix
|
||||
@@ -1,37 +0,0 @@
|
||||
apiVersion: kyverno.io/v1
|
||||
kind: ClusterPolicy
|
||||
metadata:
|
||||
name: ingress-domain-name
|
||||
spec:
|
||||
rules:
|
||||
- name: create-ingress
|
||||
context:
|
||||
- name: configmap
|
||||
configMap:
|
||||
name: ingress-domain-name
|
||||
namespace: "{{request.object.metadata.namespace}}"
|
||||
match:
|
||||
resources:
|
||||
kinds:
|
||||
- Service
|
||||
preconditions:
|
||||
- key: "{{request.object.spec.ports[0].name}}"
|
||||
operator: Equals
|
||||
value: http
|
||||
generate:
|
||||
kind: Ingress
|
||||
name: "{{request.object.metadata.name}}"
|
||||
namespace: "{{request.object.metadata.namespace}}"
|
||||
data:
|
||||
spec:
|
||||
rules:
|
||||
- host: "{{request.object.metadata.name}}.{{request.object.metadata.namespace}}.{{configmap.data.domain}}"
|
||||
http:
|
||||
paths:
|
||||
- backend:
|
||||
service:
|
||||
name: "{{request.object.metadata.name}}"
|
||||
port:
|
||||
name: http
|
||||
path: /
|
||||
pathType: Prefix
|
||||
@@ -17,12 +17,12 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: blue
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: blue
|
||||
spec:
|
||||
containers:
|
||||
@@ -33,7 +33,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: blue
|
||||
name: color
|
||||
namespace: blue
|
||||
@@ -44,7 +44,7 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: blue
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -66,12 +66,12 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: green
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: green
|
||||
spec:
|
||||
containers:
|
||||
@@ -82,7 +82,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: green
|
||||
name: color
|
||||
namespace: green
|
||||
@@ -93,7 +93,7 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: green
|
||||
type: ClusterIP
|
||||
---
|
||||
@@ -115,12 +115,12 @@ metadata:
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: red
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: red
|
||||
spec:
|
||||
containers:
|
||||
@@ -131,7 +131,7 @@ apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: red
|
||||
name: color
|
||||
namespace: red
|
||||
@@ -142,6 +142,6 @@ spec:
|
||||
protocol: TCP
|
||||
targetPort: 80
|
||||
selector:
|
||||
app: rainbow
|
||||
app: color
|
||||
color: red
|
||||
type: ClusterIP
|
||||
|
||||
@@ -1,107 +1,17 @@
|
||||
⚠️ This is work in progress. The UX needs to be improved,
|
||||
and the docs could be better.
|
||||
|
||||
This directory contains a Terraform configuration to deploy
|
||||
a bunch of Kubernetes clusters on various cloud providers,
|
||||
using their respective managed Kubernetes products.
|
||||
a bunch of Kubernetes clusters on various cloud providers, using their respective managed Kubernetes products.
|
||||
|
||||
## With shell wrapper
|
||||
|
||||
This is the recommended use. It makes it easy to start N clusters
|
||||
on any provider. It will create a directory with a name like
|
||||
`tag-YYYY-MM-DD-HH-MM-SS-SEED-PROVIDER`, copy the Terraform configuration
|
||||
to that directory, then create the clusters using that configuration.
|
||||
|
||||
1. One-time setup: configure provider authentication for the provider(s) that you wish to use.
|
||||
|
||||
- Digital Ocean:
|
||||
```bash
|
||||
doctl auth init
|
||||
```
|
||||
|
||||
- Google Cloud Platform: you will need to create a project named `prepare-tf`
|
||||
and enable the relevant APIs for this project (sorry, if you're new to GCP,
|
||||
this sounds vague; but if you're familiar with it you know what to do; if you
|
||||
want to change the project name you can edit the Terraform configuration)
|
||||
|
||||
- Linode:
|
||||
```bash
|
||||
linode-cli configure
|
||||
```
|
||||
|
||||
- Oracle Cloud: FIXME
|
||||
(set up `oci` through the `oci-cli` Python package)
|
||||
|
||||
- Scaleway: run `scw init`
|
||||
|
||||
2. Optional: set number of clusters, cluster size, and region.
|
||||
|
||||
By default, 1 cluster will be configured, with 2 nodes, and auto-scaling up to 5 nodes.
|
||||
|
||||
If you want, you can override these parameters, with the following variables.
|
||||
|
||||
```bash
|
||||
export TF_VAR_how_many_clusters=5
|
||||
export TF_VAR_min_nodes_per_pool=2
|
||||
export TF_VAR_max_nodes_per_pool=4
|
||||
export TF_VAR_location=xxx
|
||||
```
|
||||
|
||||
The `location` variable is optional. Each provider should have a default value.
|
||||
The value of the `location` variable is provider-specific. Examples:
|
||||
|
||||
| Provider | Example value | How to see possible values
|
||||
|---------------|-------------------|---------------------------
|
||||
| Digital Ocean | `ams3` | `doctl compute region list`
|
||||
| Google Cloud | `europe-north1-a` | `gcloud compute zones list`
|
||||
| Linode | `eu-central` | `linode-cli regions list`
|
||||
| Oracle Cloud | `eu-stockholm-1` | `oci iam region list`
|
||||
|
||||
You can also specify multiple locations, and then they will be
|
||||
used in round-robin fashion.
|
||||
|
||||
For example, with Google Cloud, since the default quotas are very
|
||||
low (my account is limited to 8 public IP addresses per zone, and
|
||||
my requests to increase that quota were denied) you can do the
|
||||
following:
|
||||
|
||||
```bash
|
||||
export TF_VAR_location=$(gcloud compute zones list --format=json | jq -r .[].name | grep ^europe)
|
||||
```
|
||||
|
||||
Then when you apply, clusters will be created across all available
|
||||
zones in Europe. (When I write this, there are 20+ zones in Europe,
|
||||
so even with my quota, I can create 40 clusters.)
|
||||
|
||||
3. Run!
|
||||
|
||||
```bash
|
||||
./run.sh <providername>
|
||||
```
|
||||
|
||||
(If you don't specify a provider name, it will list available providers.)
|
||||
|
||||
4. Shutting down
|
||||
|
||||
Go to the directory that was created by the previous step (`tag-YYYY-MM...`)
|
||||
and run `terraform destroy`.
|
||||
|
||||
You can also run `./clean.sh` which will destroy ALL clusters deployed by the previous run script.
|
||||
|
||||
## Without shell wrapper
|
||||
|
||||
Expert mode.
|
||||
|
||||
Useful to run steps sperarately, and/or when working on the Terraform configurations.
|
||||
To use it:
|
||||
|
||||
1. Select the provider you wish to use.
|
||||
|
||||
Go to the `source` directory and edit `main.tf`.
|
||||
|
||||
Change the `source` attribute of the `module "clusters"` section.
|
||||
|
||||
Check the content of the `modules` directory to see available choices.
|
||||
|
||||
```bash
|
||||
vim main.tf
|
||||
```
|
||||
|
||||
2. Initialize the provider.
|
||||
|
||||
```bash
|
||||
@@ -110,20 +20,24 @@ terraform init
|
||||
|
||||
3. Configure provider authentication.
|
||||
|
||||
See steps above, and add the following extra steps:
|
||||
|
||||
- Digital Coean:
|
||||
```bash
|
||||
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
|
||||
```
|
||||
|
||||
- Linode:
|
||||
```bash
|
||||
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
|
||||
```
|
||||
- Digital Ocean: `export DIGITALOCEAN_ACCESS_TOKEN=...`
|
||||
(check `~/.config/doctl/config.yaml` for the token)
|
||||
- Linode: `export LINODE_TOKEN=...`
|
||||
(check `~/.config/linode-cli` for the token)
|
||||
- Oracle Cloud: it should use `~/.oci/config`
|
||||
- Scaleway: run `scw init`
|
||||
|
||||
4. Decide how many clusters and how many nodes per clusters you want.
|
||||
|
||||
```bash
|
||||
export TF_VAR_how_many_clusters=5
|
||||
export TF_VAR_min_nodes_per_pool=2
|
||||
# Optional (will enable autoscaler when available)
|
||||
export TF_VAR_max_nodes_per_pool=4
|
||||
# Optional (will only work on some providers)
|
||||
export TF_VAR_enable_arm_pool=true
|
||||
```
|
||||
|
||||
5. Provision clusters.
|
||||
|
||||
```bash
|
||||
@@ -132,7 +46,7 @@ terraform apply
|
||||
|
||||
6. Perform second stage provisioning.
|
||||
|
||||
This will install an SSH server on the clusters.
|
||||
This will install a SSH server on the clusters.
|
||||
|
||||
```bash
|
||||
cd stage2
|
||||
@@ -158,5 +72,5 @@ terraform destroy
|
||||
9. Clean up stage2.
|
||||
|
||||
```bash
|
||||
rm stage2/terraform.tfstate*
|
||||
rm stage/terraform.tfstate*
|
||||
```
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/sh
|
||||
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
|
||||
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
|
||||
for T in tag-*; do
|
||||
(
|
||||
cd $T
|
||||
terraform apply -destroy -auto-approve && mv ../$T ../deleted$T
|
||||
)
|
||||
done
|
||||
16
prepare-tf/locals.tf
Normal file
16
prepare-tf/locals.tf
Normal file
@@ -0,0 +1,16 @@
|
||||
resource "random_string" "_" {
|
||||
length = 5
|
||||
special = false
|
||||
upper = false
|
||||
}
|
||||
|
||||
resource "time_static" "_" {}
|
||||
|
||||
locals {
|
||||
tag = format("tf-%s-%s", formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339), random_string._.result)
|
||||
# Common tags to be assigned to all resources
|
||||
common_tags = [
|
||||
"created-by=terraform",
|
||||
"tag=${local.tag}"
|
||||
]
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
module "clusters" {
|
||||
source = "./modules/PROVIDER"
|
||||
source = "./modules/linode"
|
||||
for_each = local.clusters
|
||||
cluster_name = each.value.cluster_name
|
||||
min_nodes_per_pool = var.min_nodes_per_pool
|
||||
@@ -7,24 +7,22 @@ module "clusters" {
|
||||
enable_arm_pool = var.enable_arm_pool
|
||||
node_size = var.node_size
|
||||
common_tags = local.common_tags
|
||||
location = each.value.location
|
||||
}
|
||||
|
||||
locals {
|
||||
clusters = {
|
||||
for i in range(101, 101 + var.how_many_clusters) :
|
||||
i => {
|
||||
cluster_name = format("%s-%03d", local.tag, i)
|
||||
kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
|
||||
cluster_name = format("%s-%03d", local.tag, i)
|
||||
kubeconfig_path = format("./stage2/kubeconfig.%03d", i)
|
||||
#dashdash_kubeconfig = format("--kubeconfig=./stage2/kubeconfig.%03d", i)
|
||||
externalips_path = format("./stage2/externalips.%03d", i)
|
||||
flags_path = format("./stage2/flags.%03d", i)
|
||||
location = local.locations[i % length(local.locations)]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "local_file" "stage2" {
|
||||
filename = "./stage2/main.tf"
|
||||
filename = "./stage2/main.tf"
|
||||
file_permission = "0644"
|
||||
content = templatefile(
|
||||
"./stage2.tmpl",
|
||||
@@ -32,15 +30,6 @@ resource "local_file" "stage2" {
|
||||
)
|
||||
}
|
||||
|
||||
resource "local_file" "flags" {
|
||||
for_each = local.clusters
|
||||
filename = each.value.flags_path
|
||||
file_permission = "0600"
|
||||
content = <<-EOT
|
||||
has_metrics_server: ${module.clusters[each.key].has_metrics_server}
|
||||
EOT
|
||||
}
|
||||
|
||||
resource "local_file" "kubeconfig" {
|
||||
for_each = local.clusters
|
||||
filename = each.value.kubeconfig_path
|
||||
@@ -70,8 +59,8 @@ resource "null_resource" "wait_for_nodes" {
|
||||
}
|
||||
|
||||
data "external" "externalips" {
|
||||
for_each = local.clusters
|
||||
depends_on = [null_resource.wait_for_nodes]
|
||||
for_each = local.clusters
|
||||
depends_on = [ null_resource.wait_for_nodes ]
|
||||
program = [
|
||||
"sh",
|
||||
"-c",
|
||||
@@ -1,13 +1,12 @@
|
||||
resource "digitalocean_kubernetes_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
tags = var.common_tags
|
||||
# Region is mandatory, so let's provide a default value.
|
||||
region = var.location != null ? var.location : "nyc1"
|
||||
name = var.cluster_name
|
||||
tags = local.common_tags
|
||||
region = var.region
|
||||
version = var.k8s_version
|
||||
|
||||
node_pool {
|
||||
name = "x86"
|
||||
tags = var.common_tags
|
||||
name = "dok-x86"
|
||||
tags = local.common_tags
|
||||
size = local.node_type
|
||||
auto_scale = true
|
||||
min_nodes = var.min_nodes_per_pool
|
||||
@@ -5,7 +5,3 @@ output "kubeconfig" {
|
||||
output "cluster_id" {
|
||||
value = digitalocean_kubernetes_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = false
|
||||
}
|
||||
@@ -8,6 +8,10 @@ variable "common_tags" {
|
||||
default = []
|
||||
}
|
||||
|
||||
locals {
|
||||
common_tags = [for tag in var.common_tags : replace(tag, "=", "-")]
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
@@ -44,9 +48,9 @@ locals {
|
||||
|
||||
# To view supported regions, run:
|
||||
# doctl compute region list
|
||||
variable "location" {
|
||||
variable "region" {
|
||||
type = string
|
||||
default = null
|
||||
default = "nyc1"
|
||||
}
|
||||
|
||||
# To view supported versions, run:
|
||||
@@ -1,8 +1,7 @@
|
||||
resource "linode_lke_cluster" "_" {
|
||||
label = var.cluster_name
|
||||
tags = var.common_tags
|
||||
# "region" is mandatory, so let's provide a default value if none was given.
|
||||
region = var.location != null ? var.location : "eu-central"
|
||||
label = var.cluster_name
|
||||
tags = var.common_tags
|
||||
region = var.region
|
||||
k8s_version = var.k8s_version
|
||||
|
||||
pool {
|
||||
@@ -5,7 +5,3 @@ output "kubeconfig" {
|
||||
output "cluster_id" {
|
||||
value = linode_lke_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = false
|
||||
}
|
||||
@@ -42,11 +42,11 @@ locals {
|
||||
node_type = var.node_types[var.node_size]
|
||||
}
|
||||
|
||||
# To view supported regions, run:
|
||||
# To view supported versions, run:
|
||||
# linode-cli regions list
|
||||
variable "location" {
|
||||
variable "region" {
|
||||
type = string
|
||||
default = null
|
||||
default = "us-east"
|
||||
}
|
||||
|
||||
# To view supported versions, run:
|
||||
@@ -1,7 +1,6 @@
|
||||
resource "oci_identity_compartment" "_" {
|
||||
name = var.cluster_name
|
||||
description = var.cluster_name
|
||||
enable_delete = true
|
||||
name = var.cluster_name
|
||||
description = var.cluster_name
|
||||
}
|
||||
|
||||
locals {
|
||||
@@ -9,7 +9,3 @@ output "kubeconfig" {
|
||||
output "cluster_id" {
|
||||
value = oci_containerengine_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = false
|
||||
}
|
||||
@@ -70,13 +70,6 @@ locals {
|
||||
node_type = var.node_types[var.node_size]
|
||||
}
|
||||
|
||||
# To view supported regions, run:
|
||||
# oci iam region list | jq .data[].name
|
||||
variable "location" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
# To view supported versions, run:
|
||||
# oci ce cluster-options get --cluster-option-id all | jq -r '.data["kubernetes-versions"][]'
|
||||
variable "k8s_version" {
|
||||
@@ -1,6 +1,5 @@
|
||||
resource "scaleway_k8s_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
region = var.location
|
||||
tags = var.common_tags
|
||||
version = var.k8s_version
|
||||
cni = var.cni
|
||||
@@ -9,7 +8,7 @@ resource "scaleway_k8s_cluster" "_" {
|
||||
|
||||
resource "scaleway_k8s_pool" "_" {
|
||||
cluster_id = scaleway_k8s_cluster._.id
|
||||
name = "x86"
|
||||
name = "scw-x86"
|
||||
tags = var.common_tags
|
||||
node_type = local.node_type
|
||||
size = var.min_nodes_per_pool
|
||||
@@ -5,7 +5,3 @@ output "kubeconfig" {
|
||||
output "cluster_id" {
|
||||
value = scaleway_k8s_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = sort([var.k8s_version, "1.22"])[0] == "1.22"
|
||||
}
|
||||
@@ -47,12 +47,7 @@ variable "cni" {
|
||||
default = "cilium"
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
# To view supported versions, run:
|
||||
# See supported versions with:
|
||||
# scw k8s version list -o json | jq -r .[].name
|
||||
variable "k8s_version" {
|
||||
type = string
|
||||
@@ -1,49 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
TIME=$(which time)
|
||||
|
||||
PROVIDER=$1
|
||||
[ "$PROVIDER" ] || {
|
||||
echo "Please specify a provider as first argument, or 'ALL' for parallel mode."
|
||||
echo "Available providers:"
|
||||
ls -1 source/modules
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ "$TAG" ] || {
|
||||
TIMESTAMP=$(date +%Y-%m-%d-%H-%M-%S)
|
||||
RANDOMTAG=$(base64 /dev/urandom | tr A-Z a-z | tr -d /+ | head -c5)
|
||||
export TAG=tag-$TIMESTAMP-$RANDOMTAG
|
||||
}
|
||||
|
||||
[ "$PROVIDER" = "ALL" ] && {
|
||||
for PROVIDER in $(ls -1 source/modules); do
|
||||
$TERMINAL -T $TAG-$PROVIDER -e sh -c "
|
||||
export TAG=$TAG-$PROVIDER
|
||||
$0 $PROVIDER
|
||||
cd $TAG-$PROVIDER
|
||||
bash
|
||||
" &
|
||||
done
|
||||
exit 0
|
||||
}
|
||||
|
||||
[ -d "source/modules/$PROVIDER" ] || {
|
||||
echo "Provider '$PROVIDER' not found."
|
||||
echo "Available providers:"
|
||||
ls -1 source/modules
|
||||
exit 1
|
||||
}
|
||||
|
||||
export LINODE_TOKEN=$(grep ^token ~/.config/linode-cli | cut -d= -f2 | tr -d " ")
|
||||
export DIGITALOCEAN_ACCESS_TOKEN=$(grep ^access-token ~/.config/doctl/config.yaml | cut -d: -f2 | tr -d " ")
|
||||
|
||||
cp -a source $TAG
|
||||
cd $TAG
|
||||
cp -r modules/$PROVIDER modules/PROVIDER
|
||||
$TIME -o time.1.init terraform init
|
||||
$TIME -o time.2.stage1 terraform apply -auto-approve
|
||||
cd stage2
|
||||
$TIME -o ../time.3.init terraform init
|
||||
$TIME -o ../time.4.stage2 terraform apply -auto-approve
|
||||
@@ -1,19 +0,0 @@
|
||||
resource "random_string" "_" {
|
||||
length = 4
|
||||
number = false
|
||||
special = false
|
||||
upper = false
|
||||
}
|
||||
|
||||
resource "time_static" "_" {}
|
||||
|
||||
locals {
|
||||
timestamp = formatdate("YYYY-MM-DD-hh-mm", time_static._.rfc3339)
|
||||
tag = random_string._.result
|
||||
# Common tags to be assigned to all resources
|
||||
common_tags = [
|
||||
"created-by-terraform",
|
||||
format("created-at-%s", local.timestamp),
|
||||
format("created-for-%s", local.tag)
|
||||
]
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
resource "google_container_cluster" "_" {
|
||||
name = var.cluster_name
|
||||
project = local.project
|
||||
location = local.location
|
||||
min_master_version = var.k8s_version
|
||||
|
||||
# To deploy private clusters, uncomment the section below,
|
||||
# and uncomment the block in network.tf.
|
||||
# Private clusters require extra resources (Cloud NAT,
|
||||
# router, network, subnet) and the quota for some of these
|
||||
# resources is fairly low on GCP; so if you want to deploy
|
||||
# a lot of private clusters (more than 10), you can use these
|
||||
# blocks as a base but you will probably have to refactor
|
||||
# things quite a bit (you will at least need to define a single
|
||||
# shared router and use it across all the clusters).
|
||||
/*
|
||||
network = google_compute_network._.name
|
||||
subnetwork = google_compute_subnetwork._.name
|
||||
|
||||
private_cluster_config {
|
||||
enable_private_nodes = true
|
||||
# This must be set to "false".
|
||||
# (Otherwise, access to the public endpoint is disabled.)
|
||||
enable_private_endpoint = false
|
||||
# This must be set to a /28.
|
||||
# I think it shouldn't collide with the pod network subnet.
|
||||
master_ipv4_cidr_block = "10.255.255.0/28"
|
||||
}
|
||||
# Private clusters require "VPC_NATIVE" networking mode
|
||||
# (as opposed to the legacy "ROUTES").
|
||||
networking_mode = "VPC_NATIVE"
|
||||
# ip_allocation_policy is required for VPC_NATIVE clusters.
|
||||
ip_allocation_policy {
|
||||
# This is the block that will be used for pods.
|
||||
cluster_ipv4_cidr_block = "10.0.0.0/12"
|
||||
# The services block is optional
|
||||
# (GKE will pick one automatically).
|
||||
#services_ipv4_cidr_block = ""
|
||||
}
|
||||
*/
|
||||
|
||||
node_pool {
|
||||
name = "x86"
|
||||
node_config {
|
||||
tags = var.common_tags
|
||||
machine_type = local.node_type
|
||||
}
|
||||
initial_node_count = var.min_nodes_per_pool
|
||||
autoscaling {
|
||||
min_node_count = var.min_nodes_per_pool
|
||||
max_node_count = max(var.min_nodes_per_pool, var.max_nodes_per_pool)
|
||||
}
|
||||
}
|
||||
|
||||
# This is not strictly necessary.
|
||||
# We'll see if we end up using it.
|
||||
# (If it is removed, make sure to also remove the corresponding
|
||||
# key+cert variables from outputs.tf!)
|
||||
master_auth {
|
||||
client_certificate_config {
|
||||
issue_client_certificate = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
resource "google_compute_network" "_" {
|
||||
name = var.cluster_name
|
||||
project = local.project
|
||||
# The default is to create subnets automatically.
|
||||
# However, this creates one subnet per zone in all regions,
|
||||
# which causes a quick exhaustion of the subnet quota.
|
||||
auto_create_subnetworks = false
|
||||
}
|
||||
|
||||
resource "google_compute_subnetwork" "_" {
|
||||
name = var.cluster_name
|
||||
ip_cidr_range = "10.254.0.0/16"
|
||||
region = local.region
|
||||
network = google_compute_network._.id
|
||||
project = local.project
|
||||
}
|
||||
|
||||
resource "google_compute_router" "_" {
|
||||
name = var.cluster_name
|
||||
region = local.region
|
||||
network = google_compute_network._.name
|
||||
project = local.project
|
||||
}
|
||||
|
||||
resource "google_compute_router_nat" "_" {
|
||||
name = var.cluster_name
|
||||
router = google_compute_router._.name
|
||||
region = local.region
|
||||
project = local.project
|
||||
# Everyone in the network is allowed to NAT out.
|
||||
# (We would change this if we only wanted to allow specific subnets to NAT out.)
|
||||
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
|
||||
# Pick NAT addresses automatically.
|
||||
# (We would change this if we wanted to use specific addresses to NAT out.)
|
||||
nat_ip_allocate_option = "AUTO_ONLY"
|
||||
}
|
||||
*/
|
||||
@@ -1,35 +0,0 @@
|
||||
data "google_client_config" "_" {}
|
||||
|
||||
output "kubeconfig" {
|
||||
value = <<-EOT
|
||||
apiVersion: v1
|
||||
kind: Config
|
||||
current-context: ${google_container_cluster._.name}
|
||||
clusters:
|
||||
- name: ${google_container_cluster._.name}
|
||||
cluster:
|
||||
server: https://${google_container_cluster._.endpoint}
|
||||
certificate-authority-data: ${google_container_cluster._.master_auth[0].cluster_ca_certificate}
|
||||
contexts:
|
||||
- name: ${google_container_cluster._.name}
|
||||
context:
|
||||
cluster: ${google_container_cluster._.name}
|
||||
user: client-token
|
||||
users:
|
||||
- name: client-cert
|
||||
user:
|
||||
client-key-data: ${google_container_cluster._.master_auth[0].client_key}
|
||||
client-certificate-data: ${google_container_cluster._.master_auth[0].client_certificate}
|
||||
- name: client-token
|
||||
user:
|
||||
token: ${data.google_client_config._.access_token}
|
||||
EOT
|
||||
}
|
||||
|
||||
output "cluster_id" {
|
||||
value = google_container_cluster._.id
|
||||
}
|
||||
|
||||
output "has_metrics_server" {
|
||||
value = true
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
google = {
|
||||
source = "hashicorp/google"
|
||||
version = "4.5.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,68 +0,0 @@
|
||||
variable "cluster_name" {
|
||||
type = string
|
||||
default = "deployed-with-terraform"
|
||||
}
|
||||
|
||||
variable "common_tags" {
|
||||
type = list(string)
|
||||
default = []
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
}
|
||||
|
||||
variable "min_nodes_per_pool" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "max_nodes_per_pool" {
|
||||
type = number
|
||||
default = 5
|
||||
}
|
||||
|
||||
# FIXME
|
||||
variable "enable_arm_pool" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "node_types" {
|
||||
type = map(string)
|
||||
default = {
|
||||
"S" = "e2-small"
|
||||
"M" = "e2-medium"
|
||||
"L" = "e2-standard-2"
|
||||
}
|
||||
}
|
||||
|
||||
locals {
|
||||
node_type = var.node_types[var.node_size]
|
||||
}
|
||||
|
||||
# To view supported locations, run:
|
||||
# gcloud compute zones list
|
||||
variable "location" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
# To view supported versions, run:
|
||||
# gcloud container get-server-config --region=europe-north1 '--format=flattened(channels)'
|
||||
# But it's also possible to just specify e.g. "1.20" and it figures it out.
|
||||
variable "k8s_version" {
|
||||
type = string
|
||||
default = "1.21"
|
||||
}
|
||||
|
||||
locals {
|
||||
location = var.location != null ? var.location : "europe-north1-a"
|
||||
region = replace(local.location, "/-[a-z]$/", "")
|
||||
# Unfortunately, the following line doesn't work
|
||||
# (that attribute just returns an empty string)
|
||||
# so we have to hard-code the project name.
|
||||
#project = data.google_client_config._.project
|
||||
project = "prepare-tf"
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
variable "how_many_clusters" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
# Can be S, M, L.
|
||||
# We map these values to different specific instance types for each provider,
|
||||
# but the idea is that they shoudl correspond to the following sizes:
|
||||
# S = 2 GB RAM
|
||||
# M = 4 GB RAM
|
||||
# L = 8 GB RAM
|
||||
}
|
||||
|
||||
variable "min_nodes_per_pool" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "max_nodes_per_pool" {
|
||||
type = number
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "enable_arm_pool" {
|
||||
type = bool
|
||||
default = false
|
||||
}
|
||||
|
||||
variable "location" {
|
||||
type = string
|
||||
default = null
|
||||
}
|
||||
|
||||
# TODO: perhaps handle if it's space-separated instead of newline?
|
||||
locals {
|
||||
locations = var.location == null ? [null] : split("\n", var.location)
|
||||
}
|
||||
@@ -2,7 +2,7 @@ terraform {
|
||||
required_providers {
|
||||
kubernetes = {
|
||||
source = "hashicorp/kubernetes"
|
||||
version = "2.7.1"
|
||||
version = "2.0.3"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -119,11 +119,6 @@ resource "kubernetes_cluster_role_binding" "shpod_${index}" {
|
||||
name = "shpod"
|
||||
namespace = "shpod"
|
||||
}
|
||||
subject {
|
||||
api_group = "rbac.authorization.k8s.io"
|
||||
kind = "Group"
|
||||
name = "shpod-cluster-admins"
|
||||
}
|
||||
}
|
||||
|
||||
resource "random_string" "shpod_${index}" {
|
||||
@@ -140,10 +135,6 @@ provider "helm" {
|
||||
}
|
||||
|
||||
resource "helm_release" "metrics_server_${index}" {
|
||||
# Some providers pre-install metrics-server.
|
||||
# Some don't. Let's install metrics-server,
|
||||
# but only if it's not already installed.
|
||||
count = yamldecode(file("./flags.${index}"))["has_metrics_server"] ? 0 : 1
|
||||
provider = helm.cluster_${index}
|
||||
repository = "https://charts.bitnami.com/bitnami"
|
||||
chart = "metrics-server"
|
||||
@@ -191,7 +182,7 @@ resource "kubernetes_config_map" "kubeconfig_${index}" {
|
||||
- name: cluster-admin
|
||||
user:
|
||||
client-key-data: $${base64encode(tls_private_key.cluster_admin_${index}.private_key_pem)}
|
||||
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request_v1.cluster_admin_${index}.certificate)}
|
||||
client-certificate-data: $${base64encode(kubernetes_certificate_signing_request.cluster_admin_${index}.certificate)}
|
||||
EOT
|
||||
}
|
||||
}
|
||||
@@ -205,14 +196,11 @@ resource "tls_cert_request" "cluster_admin_${index}" {
|
||||
private_key_pem = tls_private_key.cluster_admin_${index}.private_key_pem
|
||||
subject {
|
||||
common_name = "cluster-admin"
|
||||
# Note: CSR API v1 doesn't allow issuing certs with "system:masters" anymore.
|
||||
#organization = "system:masters"
|
||||
# We'll use this custom group name instead.cluster-admin user.
|
||||
organization = "shpod-cluster-admins"
|
||||
organization = "system:masters"
|
||||
}
|
||||
}
|
||||
|
||||
resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
|
||||
resource "kubernetes_certificate_signing_request" "cluster_admin_${index}" {
|
||||
provider = kubernetes.cluster_${index}
|
||||
metadata {
|
||||
name = "cluster-admin"
|
||||
@@ -220,7 +208,6 @@ resource "kubernetes_certificate_signing_request_v1" "cluster_admin_${index}" {
|
||||
spec {
|
||||
usages = ["client auth"]
|
||||
request = tls_cert_request.cluster_admin_${index}.cert_request_pem
|
||||
signer_name = "kubernetes.io/kube-apiserver-client"
|
||||
}
|
||||
auto_approve = true
|
||||
}
|
||||
28
prepare-tf/variables.tf
Normal file
28
prepare-tf/variables.tf
Normal file
@@ -0,0 +1,28 @@
|
||||
variable "how_many_clusters" {
|
||||
type = number
|
||||
default = 2
|
||||
}
|
||||
|
||||
variable "node_size" {
|
||||
type = string
|
||||
default = "M"
|
||||
# Can be S, M, L.
|
||||
# S = 2 GB RAM
|
||||
# M = 4 GB RAM
|
||||
# L = 8 GB RAM
|
||||
}
|
||||
|
||||
variable "min_nodes_per_pool" {
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "max_nodes_per_pool" {
|
||||
type = number
|
||||
default = 0
|
||||
}
|
||||
|
||||
variable "enable_arm_pool" {
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
@@ -14,9 +14,7 @@ These tools can help you to create VMs on:
|
||||
|
||||
- [Docker](https://docs.docker.com/engine/installation/)
|
||||
- [Docker Compose](https://docs.docker.com/compose/install/)
|
||||
- [Parallel SSH](https://github.com/lilydjwg/pssh)
|
||||
(should be installable with `pip install git+https://github.com/lilydjwg/pssh`;
|
||||
on a Mac, try `brew install pssh`)
|
||||
- [Parallel SSH](https://code.google.com/archive/p/parallel-ssh/) (on a Mac: `brew install pssh`)
|
||||
|
||||
Depending on the infrastructure that you want to use, you also need to install
|
||||
the CLI that is specific to that cloud. For OpenStack deployments, you will
|
||||
|
||||
@@ -314,12 +314,11 @@ _cmd_kube() {
|
||||
SETTINGS=tags/$TAG/settings.yaml
|
||||
KUBEVERSION=$(awk '/^kubernetes_version:/ {print $2}' $SETTINGS)
|
||||
if [ "$KUBEVERSION" ]; then
|
||||
pssh "
|
||||
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
||||
Package: kubectl kubeadm kubelet
|
||||
Pin: version $KUBEVERSION*
|
||||
Pin-Priority: 1000
|
||||
EOF"
|
||||
EXTRA_APTGET="=$KUBEVERSION-00"
|
||||
EXTRA_KUBEADM="kubernetesVersion: v$KUBEVERSION"
|
||||
else
|
||||
EXTRA_APTGET=""
|
||||
EXTRA_KUBEADM=""
|
||||
fi
|
||||
|
||||
# Install packages
|
||||
@@ -330,8 +329,7 @@ EOF"
|
||||
sudo tee /etc/apt/sources.list.d/kubernetes.list"
|
||||
pssh --timeout 200 "
|
||||
sudo apt-get update -q &&
|
||||
sudo apt-get install -qy kubelet kubeadm kubectl &&
|
||||
sudo apt-mark hold kubelet kubeadm kubectl
|
||||
sudo apt-get install -qy kubelet$EXTRA_APTGET kubeadm$EXTRA_APTGET kubectl$EXTRA_APTGET &&
|
||||
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
|
||||
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
|
||||
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
|
||||
@@ -343,11 +341,6 @@ EOF"
|
||||
sudo swapoff -a"
|
||||
fi
|
||||
|
||||
# Re-enable CRI interface in containerd
|
||||
pssh "
|
||||
echo '# Use default parameters for containerd.' | sudo tee /etc/containerd/config.toml
|
||||
sudo systemctl restart containerd"
|
||||
|
||||
# Initialize kube control plane
|
||||
pssh --timeout 200 "
|
||||
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
|
||||
@@ -357,38 +350,19 @@ kind: InitConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta2
|
||||
bootstrapTokens:
|
||||
- token: \$(cat /tmp/token)
|
||||
nodeRegistration:
|
||||
# Comment out the next line to switch back to Docker.
|
||||
criSocket: /run/containerd/containerd.sock
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
---
|
||||
kind: JoinConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta2
|
||||
discovery:
|
||||
bootstrapToken:
|
||||
apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
|
||||
token: \$(cat /tmp/token)
|
||||
unsafeSkipCAVerification: true
|
||||
nodeRegistration:
|
||||
# Comment out the next line to switch back to Docker.
|
||||
criSocket: /run/containerd/containerd.sock
|
||||
ignorePreflightErrors:
|
||||
- NumCPU
|
||||
---
|
||||
kind: KubeletConfiguration
|
||||
apiVersion: kubelet.config.k8s.io/v1beta1
|
||||
# The following line is necessary when using Docker.
|
||||
# It doesn't seem necessary when using containerd.
|
||||
#cgroupDriver: cgroupfs
|
||||
cgroupDriver: cgroupfs
|
||||
---
|
||||
kind: ClusterConfiguration
|
||||
apiVersion: kubeadm.k8s.io/v1beta2
|
||||
apiServer:
|
||||
certSANs:
|
||||
- \$(cat /tmp/ipv4)
|
||||
$EXTRA_KUBEADM
|
||||
EOF
|
||||
sudo kubeadm init --config=/tmp/kubeadm-config.yaml
|
||||
sudo kubeadm init --config=/tmp/kubeadm-config.yaml --ignore-preflight-errors=NumCPU
|
||||
fi"
|
||||
|
||||
# Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
|
||||
@@ -412,8 +386,8 @@ EOF
|
||||
pssh --timeout 200 "
|
||||
if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
|
||||
FIRSTNODE=\$(cat /etc/name_of_first_node) &&
|
||||
ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
|
||||
sudo kubeadm join --config /tmp/kubeadm-config.yaml
|
||||
TOKEN=\$(ssh $SSHOPTS \$FIRSTNODE cat /tmp/token) &&
|
||||
sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
|
||||
fi"
|
||||
|
||||
# Install metrics server
|
||||
@@ -504,7 +478,7 @@ EOF
|
||||
if [ ! -x /usr/local/bin/kustomize ]; then
|
||||
curl -fsSL $URL |
|
||||
sudo tar -C /usr/local/bin -zx kustomize
|
||||
kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
|
||||
echo complete -C /usr/local/bin/kustomize kustomize | sudo tee /etc/bash_completion.d/kustomize
|
||||
kustomize version
|
||||
fi"
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
#!/bin/sh
|
||||
|
||||
# https://open-api.netlify.com/#tag/dnsZone
|
||||
[ "$1" ] || {
|
||||
echo ""
|
||||
echo "Add a record in Netlify DNS."
|
||||
echo "This script is hardcoded to add a record to container.training".
|
||||
echo ""
|
||||
echo "Syntax:"
|
||||
echo "$0 list"
|
||||
echo "$0 add <name> <ipaddr>"
|
||||
echo "$0 del <recordid>"
|
||||
echo "$0 <name> <ipaddr>"
|
||||
echo ""
|
||||
echo "Example to create a A record for eu.container.training:"
|
||||
echo "$0 add eu 185.145.250.0"
|
||||
echo "$0 eu 185.145.250.0"
|
||||
echo ""
|
||||
exit 1
|
||||
}
|
||||
|
||||
NAME=$1.container.training
|
||||
ADDR=$2
|
||||
|
||||
NETLIFY_USERID=$(jq .userId < ~/.config/netlify/config.json)
|
||||
NETLIFY_TOKEN=$(jq -r .users[$NETLIFY_USERID].auth.token < ~/.config/netlify/config.json)
|
||||
|
||||
@@ -29,54 +29,19 @@ netlify() {
|
||||
ZONE_ID=$(netlify dns_zones |
|
||||
jq -r '.[] | select ( .name == "container.training" ) | .id')
|
||||
|
||||
_list() {
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq -r '.[] | select(.type=="A") | [.hostname, .type, .value, .id] | @tsv'
|
||||
}
|
||||
# It looks like if we create two identical records, then delete one of them,
|
||||
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
|
||||
# though it's still visible through the API and the website?)
|
||||
|
||||
_add() {
|
||||
NAME=$1.container.training
|
||||
ADDR=$2
|
||||
if netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
|
||||
grep .
|
||||
then
|
||||
echo "It looks like that record already exists. Refusing to create it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
|
||||
|
||||
# It looks like if we create two identical records, then delete one of them,
|
||||
# Netlify DNS ends up in a weird state (the name doesn't resolve anymore even
|
||||
# though it's still visible through the API and the website?)
|
||||
|
||||
if netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'" and .type=="A" and .value=="'$ADDR'")' |
|
||||
grep .
|
||||
then
|
||||
echo "It looks like that record already exists. Refusing to create it."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records type=A hostname=$NAME value=$ADDR ttl=300
|
||||
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'")'
|
||||
}
|
||||
|
||||
_del() {
|
||||
RECORD_ID=$1
|
||||
# OK, since that one is dangerous, I'm putting the whole request explicitly here
|
||||
http DELETE \
|
||||
https://api.netlify.com/api/v1/dns_zones/$ZONE_ID/dns_records/$RECORD_ID \
|
||||
"Authorization:Bearer $NETLIFY_TOKEN"
|
||||
}
|
||||
|
||||
case "$1" in
|
||||
list)
|
||||
_list
|
||||
;;
|
||||
add)
|
||||
_add $2 $3
|
||||
;;
|
||||
del)
|
||||
_del $2
|
||||
;;
|
||||
*)
|
||||
echo "Unknown command '$1'."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
netlify dns_zones/$ZONE_ID/dns_records |
|
||||
jq '.[] | select(.hostname=="'$NAME'")'
|
||||
|
||||
@@ -14,9 +14,7 @@ paper_size: A4
|
||||
user_login: k8s
|
||||
user_password: training
|
||||
|
||||
# For a list of old versions, check:
|
||||
# https://kubernetes.io/releases/patch-releases/#non-active-branch-history
|
||||
kubernetes_version: 1.18.20
|
||||
kubernetes_version: 1.19.16
|
||||
|
||||
image:
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#/ /kube-halfday.yml.html 200!
|
||||
#/ /kube-fullday.yml.html 200!
|
||||
#/ /kube-twodays.yml.html 200!
|
||||
/ /kube.yml.html 200!
|
||||
/ /k8s.yml.html 200!
|
||||
|
||||
# And this allows to do "git clone https://container.training".
|
||||
/info/refs service=git-upload-pack https://github.com/jpetazzo/container.training/info/refs?service=git-upload-pack
|
||||
|
||||
@@ -109,7 +109,7 @@ class: extra-details
|
||||
|
||||
- Example: [ctr.run](https://ctr.run/)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Use ctr.run to automatically build a container image and run it:
|
||||
```bash
|
||||
|
||||
@@ -28,7 +28,7 @@ class: self-paced
|
||||
- Likewise, it will take more than merely *reading* these slides
|
||||
to make you an expert
|
||||
|
||||
- These slides include *tons* of demos, exercises, and examples
|
||||
- These slides include *tons* of exercises and examples
|
||||
|
||||
- They assume that you have access to a machine running Docker
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
(we will use the `rng` service in the dockercoins app)
|
||||
|
||||
- See what happens when the load increses
|
||||
- Observe the correct behavior of the readiness probe
|
||||
|
||||
(spoiler alert: it involves timeouts!)
|
||||
(when deploying e.g. an invalid image)
|
||||
|
||||
- Observe the behavior of the liveness probe
|
||||
|
||||
@@ -2,85 +2,34 @@
|
||||
|
||||
- We want to add healthchecks to the `rng` service in dockercoins
|
||||
|
||||
- The `rng` service exhibits an interesting behavior under load:
|
||||
|
||||
*its latency increases (which will cause probes to time out!)*
|
||||
|
||||
- We want to see:
|
||||
|
||||
- what happens when the readiness probe fails
|
||||
|
||||
- what happens when the liveness probe fails
|
||||
|
||||
- how to set "appropriate" probes and probe parameters
|
||||
|
||||
---
|
||||
|
||||
## Setup
|
||||
|
||||
- First, deploy a new copy of dockercoins
|
||||
|
||||
(for instance, in a brand new namespace)
|
||||
- Then, add a readiness probe on the `rng` service
|
||||
|
||||
- Pro tip #1: ping (e.g. with `httping`) the `rng` service at all times
|
||||
|
||||
- it should initially show a few milliseconds latency
|
||||
|
||||
- that will increase when we scale up
|
||||
|
||||
- it will also let us detect when the service goes "boom"
|
||||
|
||||
- Pro tip #2: also keep an eye on the web UI
|
||||
|
||||
---
|
||||
|
||||
## Readiness
|
||||
|
||||
- Add a readiness probe to `rng`
|
||||
|
||||
- this requires editing the pod template in the Deployment manifest
|
||||
|
||||
- use a simple HTTP check on the `/` route of the service
|
||||
|
||||
- keep all other parameters (timeouts, thresholds...) at their default values
|
||||
(using a simple HTTP check on the `/` route of the service)
|
||||
|
||||
- Check what happens when deploying an invalid image for `rng` (e.g. `alpine`)
|
||||
|
||||
*(If the probe was set up correctly, the app will continue to work,
|
||||
because Kubernetes won't switch over the traffic to the `alpine` containers,
|
||||
because they don't pass the readiness probe.)*
|
||||
- Then roll back `rng` to the original image and add a liveness probe
|
||||
|
||||
(with the same parameters)
|
||||
|
||||
- Scale up the `worker` service (to 15+ workers) and observe
|
||||
|
||||
- What happens?
|
||||
|
||||
---
|
||||
|
||||
## Readiness under load
|
||||
## Goal
|
||||
|
||||
- Then roll back `rng` to the original image
|
||||
- *Before* adding the readiness probe:
|
||||
|
||||
- Check what happens when we scale up the `worker` Deployment to 15+ workers
|
||||
updating the image of the `rng` service with `alpine` should break it
|
||||
|
||||
(get the latency above 1 second)
|
||||
- *After* adding the readiness probe:
|
||||
|
||||
*(We should now observe intermittent unavailability of the service, i.e. every
|
||||
30 seconds it will be unreachable for a bit, then come back, then go away again, etc.)*
|
||||
updating the image of the `rng` service with `alpine` shouldn't break it
|
||||
|
||||
---
|
||||
- When adding the liveness probe, nothing special should happen
|
||||
|
||||
## Liveness
|
||||
|
||||
- Now replace the readiness probe with a liveness probe
|
||||
|
||||
- What happens now?
|
||||
|
||||
*(At first the behavior looks the same as with the readiness probe:
|
||||
service becomes unreachable, then reachable again, etc.; but there is
|
||||
a significant difference behind the scenes. What is it?)*
|
||||
|
||||
---
|
||||
|
||||
## Readiness and liveness
|
||||
|
||||
- Bonus questions!
|
||||
|
||||
- What happens if we enable both probes at the same time?
|
||||
|
||||
- What strategies can we use so that both probes are useful?
|
||||
- Scaling the `worker` service will then cause disruptions
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
## Goal
|
||||
|
||||
- We want to be able to access the web app using a URL like:
|
||||
- We want to be able to access the web app using an URL like:
|
||||
|
||||
http://webapp.localdev.me
|
||||
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
⚠️ BROKEN EXERCISE - DO NOT USE
|
||||
|
||||
## Exercise — Ingress Secret Policy
|
||||
|
||||
*Implement policy to limit impact of ingress controller vulnerabilities.*
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
⚠️ BROKEN EXERCISE - DO NOT USE
|
||||
|
||||
# Exercise — Ingress Secret Policy
|
||||
|
||||
- Most ingress controllers have access to all Secrets
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
## Exercise — Generating Ingress With Kyverno
|
||||
|
||||
- When a Service gets created, automatically generate an Ingress
|
||||
|
||||
- Step 1: expose all services with a hard-coded domain name
|
||||
|
||||
- Step 2: only expose services that have a port named `http`
|
||||
|
||||
- Step 3: configure the domain name with a per-namespace ConfigMap
|
||||
@@ -1,33 +0,0 @@
|
||||
# Exercise — Generating Ingress With Kyverno
|
||||
|
||||
When a Service gets created...
|
||||
|
||||
*(for instance, Service `blue` in Namespace `rainbow`)*
|
||||
|
||||
...Automatically generate an Ingress.
|
||||
|
||||
*(for instance, with host name `blue.rainbow.MYDOMAIN.COM`)*
|
||||
|
||||
---
|
||||
|
||||
## Goals
|
||||
|
||||
- Step 1: expose all services with a hard-coded domain name
|
||||
|
||||
- Step 2: only expose services that have a port named `http`
|
||||
|
||||
- Step 3: configure the domain name with a per-namespace ConfigMap
|
||||
|
||||
(e.g. `kubectl create configmap ingress-domain-name --from-literal=domain=1.2.3.4.nip.io`)
|
||||
|
||||
---
|
||||
|
||||
## Hints
|
||||
|
||||
- We want to use a Kyverno `generate` ClusterPolicy
|
||||
|
||||
- For step 1, check [Generate Resources](https://kyverno.io/docs/writing-policies/generate/) documentation
|
||||
|
||||
- For step 2, check [Preconditions](https://kyverno.io/docs/writing-policies/preconditions/) documentation
|
||||
|
||||
- For step 3, check [External Data Sources](https://kyverno.io/docs/writing-policies/external-data-sources/) documentation
|
||||
@@ -1,9 +0,0 @@
|
||||
## Exercise — Terraform Node Pools
|
||||
|
||||
- Write a Terraform configuration to deploy a cluster
|
||||
|
||||
- The cluster should have two node pools with autoscaling
|
||||
|
||||
- Deploy two apps, each using exclusively one node pool
|
||||
|
||||
- Bonus: deploy an app balanced across both node pools
|
||||
@@ -1,69 +0,0 @@
|
||||
# Exercise — Terraform Node Pools
|
||||
|
||||
- Write a Terraform configuration to deploy a cluster
|
||||
|
||||
- The cluster should have two node pools with autoscaling
|
||||
|
||||
- Deploy two apps, each using exclusively one node pool
|
||||
|
||||
- Bonus: deploy an app balanced across both node pools
|
||||
|
||||
---
|
||||
|
||||
## Cluster deployment
|
||||
|
||||
- Write a Terraform configuration to deploy a cluster
|
||||
|
||||
- We want to have two node pools with autoscaling
|
||||
|
||||
- Example for sizing:
|
||||
|
||||
- 4 GB / 1 CPU per node
|
||||
|
||||
- pools of 1 to 4 nodes
|
||||
|
||||
---
|
||||
|
||||
## Cluster autoscaling
|
||||
|
||||
- Deploy an app on the cluster
|
||||
|
||||
(you can use `nginx`, `jpetazzo/color`...)
|
||||
|
||||
- Set a resource request (e.g. 1 GB RAM)
|
||||
|
||||
- Scale up and verify that the autoscaler kicks in
|
||||
|
||||
---
|
||||
|
||||
## Pool isolation
|
||||
|
||||
- We want to deploy two apps
|
||||
|
||||
- The first app should be deployed exclusively on the first pool
|
||||
|
||||
- The second app should be deployed exclusively on the second pool
|
||||
|
||||
- Check the next slide for hints!
|
||||
|
||||
---
|
||||
|
||||
## Hints
|
||||
|
||||
- One solution involves adding a `nodeSelector` to the pod templates
|
||||
|
||||
- Another solution involves adding:
|
||||
|
||||
- `taints` to the node pools
|
||||
|
||||
- matching `tolerations` to the pod templates
|
||||
|
||||
---
|
||||
|
||||
## Balancing
|
||||
|
||||
- Step 1: make sure that the pools are not balanced
|
||||
|
||||
- Step 2: deploy a new app, check that it goes to the emptiest pool
|
||||
|
||||
- Step 3: update the app so that it balances (as much as possible) between pools
|
||||
@@ -1,60 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
# The materials for a given training live in their own branch.
|
||||
# Sometimes, we write custom content (or simply new content) for a training,
|
||||
# and that content doesn't get merged back to main. This script tries to
|
||||
# detect that with the following heuristics:
|
||||
# - list all remote branches
|
||||
# - for each remote branch, list the changes that weren't merged into main
|
||||
# (using "diff main...$BRANCH", three dots)
|
||||
# - ignore a bunch of training-specific files that change all the time anyway
|
||||
# - for the remaining files, compute the diff between main and the branch
|
||||
# (using "diff main..$BRANCH", two dots)
|
||||
# - ignore changes of less than 10 lines
|
||||
# - also ignore a few red herrings
|
||||
# - display whatever is left
|
||||
|
||||
# For "git diff" (in the filter function) to work correctly, we must be
|
||||
# at the root of the repo.
|
||||
cd $(git rev-parse --show-toplevel)
|
||||
|
||||
BRANCHES=$(git branch -r | grep -v origin/HEAD | grep origin/2)
|
||||
|
||||
filter() {
|
||||
threshold=10
|
||||
while read filename; do
|
||||
case $filename in
|
||||
# Generic training-specific files
|
||||
slides/*.html) continue;;
|
||||
slides/*.yml) continue;;
|
||||
slides/logistics*.md) continue;;
|
||||
# Specific content that can be ignored
|
||||
#slides/containers/Local_Environment.md) threshold=100;;
|
||||
# Content that was moved/refactored enough to confuse us
|
||||
slides/containers/Local_Environment.md) threshold=100;;
|
||||
slides/exercises.md) continue;;
|
||||
slides/k8s/batch-jobs) threshold=20;;
|
||||
# Renames
|
||||
*/{*}*) continue;;
|
||||
esac
|
||||
git diff --find-renames --numstat main..$BRANCH -- "$filename" | {
|
||||
# If the files are identical, the diff will be empty, and "read" will fail.
|
||||
read plus minus filename || return
|
||||
# Ignore binary files (FIXME though?)
|
||||
if [ $plus = - ]; then
|
||||
return
|
||||
fi
|
||||
diff=$((plus-minus))
|
||||
if [ $diff -gt $threshold ]; then
|
||||
echo git diff main..$BRANCH -- $filename
|
||||
fi
|
||||
}
|
||||
done
|
||||
}
|
||||
|
||||
for BRANCH in $BRANCHES; do
|
||||
if FILES=$(git diff --find-renames --name-only main...$BRANCH | filter | grep .); then
|
||||
echo "🌳 $BRANCH:"
|
||||
echo "$FILES"
|
||||
fi
|
||||
done
|
||||
@@ -1,11 +1,13 @@
|
||||
title: |
|
||||
Kubernetes
|
||||
Kubernetes Training
|
||||
|
||||
chat: "[Chat room](https://lumen.container.training/mattermost)"
|
||||
#chat: "[Slack](https://dockercommunity.slack.com/messages/C7GKACWDV)"
|
||||
#chat: "[Gitter](https://gitter.im/jpetazzo/workshop-yyyymmdd-city)"
|
||||
chat: "[Mattermost](https://ardanlive.container.training/mattermost/)"
|
||||
|
||||
gitrepo: github.com/jpetazzo/container.training
|
||||
|
||||
slides: https://2022-01-lumen.container.training/
|
||||
slides: https://2021-12-k8s.container.training/
|
||||
|
||||
#slidenumberprefix: "#SomeHashTag — "
|
||||
|
||||
@@ -15,19 +17,25 @@ exclude:
|
||||
content:
|
||||
- shared/title.md
|
||||
- logistics.md
|
||||
- exercises/k8sfundamentals-brief.md
|
||||
- exercises/localcluster-brief.md
|
||||
- exercises/remotecluster-brief.md
|
||||
- exercises/healthchecks-brief.md
|
||||
- exercises/appconfig-brief.md
|
||||
- exercises/ingress-brief.md
|
||||
- k8s/intro.md
|
||||
- shared/about-slides.md
|
||||
- shared/chat-room-im.md
|
||||
#- shared/chat-room-slack.md
|
||||
#- shared/chat-room-zoom-meeting.md
|
||||
#- shared/chat-room-zoom-webinar.md
|
||||
- shared/toc.md
|
||||
-
|
||||
- # DAY 1
|
||||
- shared/prereqs.md
|
||||
#- shared/webssh.md
|
||||
- shared/connecting.md
|
||||
#- k8s/versions-k8s.md
|
||||
- shared/sampleapp.md
|
||||
#- shared/composescale.md
|
||||
#- shared/hastyconclusions.md
|
||||
- shared/composedown.md
|
||||
- k8s/concepts-k8s.md
|
||||
- k8s/kubectlget.md
|
||||
@@ -35,10 +43,9 @@ content:
|
||||
- k8s/kubenet.md
|
||||
- k8s/kubectlexpose.md
|
||||
- k8s/shippingimages.md
|
||||
#- k8s/buildshiprun-dockerhub.md
|
||||
- exercises/k8sfundamentals-details.md
|
||||
-
|
||||
- k8s/ourapponkube.md
|
||||
- # DAY 2
|
||||
- shared/declarative.md
|
||||
- k8s/declarative.md
|
||||
- k8s/deploymentslideshow.md
|
||||
@@ -48,50 +55,42 @@ content:
|
||||
- k8s/namespaces.md
|
||||
- k8s/yamldeploy.md
|
||||
- k8s/authoring-yaml.md
|
||||
- k8s/setup-overview.md
|
||||
- k8s/setup-devel.md
|
||||
- k8s/setup-managed.md
|
||||
#- k8s/setup-selfhosted.md
|
||||
- k8s/localkubeconfig.md
|
||||
- k8s/accessinternal.md
|
||||
#- k8s/kubectlproxy.md
|
||||
- exercises/localcluster-details.md
|
||||
- exercises/remotecluster-details.md
|
||||
- # DAY 3
|
||||
- k8s/scalingdockercoins.md
|
||||
- shared/hastyconclusions.md
|
||||
- k8s/daemonset.md
|
||||
- k8s/setup-overview.md
|
||||
- k8s/setup-devel.md
|
||||
#- k8s/setup-managed.md
|
||||
#- k8s/setup-selfhosted.md
|
||||
#- k8s/dashboard.md
|
||||
- k8s/localkubeconfig.md
|
||||
- k8s/accessinternal.md
|
||||
- exercises/localcluster-details.md
|
||||
-
|
||||
- k8s/rollout.md
|
||||
- k8s/healthchecks.md
|
||||
- exercises/healthchecks-details.md
|
||||
- k8s/ingress.md
|
||||
- exercises/ingress-details.md
|
||||
#- k8s/ingress-tls.md
|
||||
- k8s/kustomize.md
|
||||
- k8s/k9s.md
|
||||
- k8s/tilt.md
|
||||
-
|
||||
#- k8s/healthchecks-more.md
|
||||
- exercises/healthchecks-details.md
|
||||
- # DAY 4
|
||||
- k8s/volumes.md
|
||||
- k8s/configuration.md
|
||||
- k8s/secrets.md
|
||||
- k8s/ingress.md
|
||||
#- k8s/ingress-tls.md
|
||||
- exercises/appconfig-details.md
|
||||
- exercises/ingress-details.md
|
||||
- # DAY 5
|
||||
- k8s/netpol.md
|
||||
- k8s/authn-authz.md
|
||||
- k8s/resource-limits.md
|
||||
- k8s/metrics-server.md
|
||||
- k8s/cluster-sizing.md
|
||||
- k8s/horizontal-pod-autoscaler.md
|
||||
-
|
||||
- k8s/volumes.md
|
||||
- k8s/configuration.md
|
||||
- k8s/secrets.md
|
||||
- k8s/statefulsets.md
|
||||
- k8s/consul.md
|
||||
- k8s/pv-pvc-sc.md
|
||||
- k8s/volume-claim-templates.md
|
||||
#- k8s/portworx.md
|
||||
- k8s/openebs.md
|
||||
- k8s/stateful-failover.md
|
||||
#- k8s/batch-jobs.md
|
||||
-
|
||||
- |
|
||||
# (Extra content)
|
||||
- k8s/operators.md
|
||||
- k8s/sealed-secrets.md
|
||||
- k8s/eck.md
|
||||
- shared/thankyou.md
|
||||
#- k8s/horizontal-pod-autoscaler.md
|
||||
#-
|
||||
# - k8s/helm-intro.md
|
||||
# - k8s/helm-chart-format.md
|
||||
# - k8s/helm-create-basic-chart.md
|
||||
# - k8s/helm-create-better-chart.md
|
||||
@@ -32,7 +32,7 @@
|
||||
|
||||
- You're welcome to use whatever you like (e.g. AWS profiles)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Set the AWS region, API access key, and secret key:
|
||||
```bash
|
||||
@@ -58,7 +58,7 @@
|
||||
|
||||
- register it in our kubeconfig file
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Update our kubeconfig file:
|
||||
```bash
|
||||
|
||||
@@ -20,13 +20,13 @@
|
||||
|
||||
## Suspension of disbelief
|
||||
|
||||
The labs and demos in this section assume that we have set up `kubectl` on our
|
||||
The exercises in this section assume that we have set up `kubectl` on our
|
||||
local machine in order to access a remote cluster.
|
||||
|
||||
We will therefore show how to access services and pods of the remote cluster,
|
||||
from our local machine.
|
||||
|
||||
You can also run these commands directly on the cluster (if you haven't
|
||||
You can also run these exercises directly on the cluster (if you haven't
|
||||
installed and set up `kubectl` locally).
|
||||
|
||||
Running commands locally will be less useful
|
||||
@@ -58,7 +58,7 @@ installed and set up `kubectl` to communicate with your cluster.
|
||||
|
||||
- Let's access the `webui` service through `kubectl proxy`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Run an API proxy in the background:
|
||||
```bash
|
||||
@@ -101,7 +101,7 @@ installed and set up `kubectl` to communicate with your cluster.
|
||||
|
||||
- Let's access our remote Redis server
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Forward connections from local port 10000 to remote port 6379:
|
||||
```bash
|
||||
|
||||
@@ -198,7 +198,7 @@ Some examples ...
|
||||
|
||||
(the Node "echo" app, the Flask app, and one ngrok tunnel for each of them)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Go to the webhook directory:
|
||||
```bash
|
||||
@@ -244,7 +244,7 @@ class: extra-details
|
||||
|
||||
- We need to update the configuration with the correct `url`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the webhook configuration manifest:
|
||||
```bash
|
||||
@@ -271,7 +271,7 @@ class: extra-details
|
||||
|
||||
(so if the webhook server is down, we can still create pods)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Register the webhook:
|
||||
```bash
|
||||
@@ -288,7 +288,7 @@ It is strongly recommended to tail the logs of the API server while doing that.
|
||||
|
||||
- Let's create a pod and try to set a `color` label
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a pod named `chroma`:
|
||||
```bash
|
||||
@@ -328,7 +328,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
## Update the webhook configuration
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- First, check the ngrok URL of the tunnel for the Flask app:
|
||||
```bash
|
||||
@@ -395,7 +395,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
## Let's get to work!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Make sure we're in the right directory:
|
||||
```bash
|
||||
@@ -424,7 +424,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
... we'll store it in a ConfigMap, and install dependencies on the fly
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Load the webhook source in a ConfigMap:
|
||||
```bash
|
||||
@@ -446,7 +446,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
(of course, there are plenty others options; e.g. `cfssl`)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate a self-signed certificate:
|
||||
```bash
|
||||
@@ -470,7 +470,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
- Let's reconfigure the webhook to use our Service instead of ngrok
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the webhook configuration manifest:
|
||||
```bash
|
||||
@@ -504,7 +504,7 @@ Note: the webhook doesn't do anything (other than printing the request payload).
|
||||
|
||||
Shell to the rescue!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Load up our cert and encode it in base64:
|
||||
```bash
|
||||
|
||||
@@ -66,7 +66,7 @@
|
||||
|
||||
- We'll ask `kubectl` to show us the exacts requests that it's making
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the URI for a cluster-scope, "core" resource, e.g. a Node:
|
||||
```bash
|
||||
@@ -122,7 +122,7 @@ class: extra-details
|
||||
|
||||
- What about namespaced resources?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the URI for a namespaced, "core" resource, e.g. a Service:
|
||||
```bash
|
||||
@@ -169,7 +169,7 @@ class: extra-details
|
||||
|
||||
## Accessing a subresource
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List `kube-proxy` pods:
|
||||
```bash
|
||||
@@ -200,7 +200,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout
|
||||
|
||||
- There are at least three useful commands to introspect the API server
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List resources types, their group, kind, short names, and scope:
|
||||
```bash
|
||||
@@ -249,7 +249,7 @@ command=echo&command=hello&command=world&container=kube-proxy&stderr=true&stdout
|
||||
|
||||
The following assumes that `metrics-server` is deployed on your cluster.
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that the metrics.k8s.io is registered with `metrics-server`:
|
||||
```bash
|
||||
@@ -271,7 +271,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
|
||||
|
||||
- We can have multiple resources with the same name
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look for resources named `node`:
|
||||
```bash
|
||||
@@ -298,7 +298,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
|
||||
|
||||
- But we can look at the raw data (with `-o json` or `-o yaml`)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look at NodeMetrics objects with one of these commands:
|
||||
```bash
|
||||
@@ -320,7 +320,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
|
||||
|
||||
--
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Display node metrics:
|
||||
```bash
|
||||
@@ -342,7 +342,7 @@ The following assumes that `metrics-server` is deployed on your cluster.
|
||||
|
||||
- Then we can register that server by creating an APIService resource
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the definition used for the `metrics-server`:
|
||||
```bash
|
||||
|
||||
@@ -103,7 +103,7 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## `WithWaitGroup`
|
||||
## `WithWaitGroup`,
|
||||
|
||||
- When we shutdown, tells clients (with in-flight requests) to retry
|
||||
|
||||
|
||||
@@ -203,9 +203,9 @@ What does that mean?
|
||||
|
||||
## Let's experiment a bit!
|
||||
|
||||
- For this section, connect to the first node of the `test` cluster
|
||||
- For the exercises in this section, connect to the first node of the `test` cluster
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- SSH to the first node of the test cluster
|
||||
|
||||
@@ -224,7 +224,7 @@ What does that mean?
|
||||
|
||||
- Let's create a simple object
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a namespace with the following command:
|
||||
```bash
|
||||
@@ -246,7 +246,7 @@ This is equivalent to `kubectl create namespace hello`.
|
||||
|
||||
- Let's retrieve the object we just created
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Read back our object:
|
||||
```bash
|
||||
@@ -354,7 +354,7 @@ class: extra-details
|
||||
|
||||
- The easiest way is to use `kubectl label`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In one terminal, watch namespaces:
|
||||
```bash
|
||||
@@ -402,7 +402,7 @@ class: extra-details
|
||||
|
||||
- DELETED resources
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In one terminal, watch pods, displaying full events:
|
||||
```bash
|
||||
|
||||
@@ -361,7 +361,7 @@ class: extra-details
|
||||
|
||||
## Listing service accounts
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- The resource name is `serviceaccount` or `sa` for short:
|
||||
```bash
|
||||
@@ -378,7 +378,7 @@ class: extra-details
|
||||
|
||||
## Finding the secret
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List the secrets for the `default` service account:
|
||||
```bash
|
||||
@@ -398,7 +398,7 @@ class: extra-details
|
||||
|
||||
- The token is stored in the secret, wrapped with base64 encoding
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View the secret:
|
||||
```bash
|
||||
@@ -421,7 +421,7 @@ class: extra-details
|
||||
|
||||
- Let's send a request to the API, without and with the token
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Find the ClusterIP for the `kubernetes` service:
|
||||
```bash
|
||||
@@ -616,7 +616,7 @@ class: extra-details
|
||||
|
||||
- Nixery automatically generates images with the requested packages
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Run our pod:
|
||||
```bash
|
||||
@@ -632,7 +632,7 @@ class: extra-details
|
||||
|
||||
- Normally, at this point, we don't have any API permission
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check our permissions with `kubectl`:
|
||||
```bash
|
||||
@@ -658,7 +658,7 @@ class: extra-details
|
||||
|
||||
(but again, we could call it `view` or whatever we like)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the new role binding:
|
||||
```bash
|
||||
@@ -716,7 +716,7 @@ It's important to note a couple of details in these flags...
|
||||
|
||||
- We should be able to *view* things, but not to *edit* them
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check our permissions with `kubectl`:
|
||||
```bash
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
|
||||
- We can use the `--dry-run=client` option
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate the YAML for a Deployment without creating it:
|
||||
```bash
|
||||
@@ -128,7 +128,7 @@ class: extra-details
|
||||
|
||||
## The limits of `kubectl apply --dry-run=client`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate the YAML for a deployment:
|
||||
```bash
|
||||
@@ -161,7 +161,7 @@ class: extra-details
|
||||
|
||||
(all validation and mutation hooks will be executed)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try the same YAML file as earlier, with server-side dry run:
|
||||
```bash
|
||||
@@ -200,7 +200,7 @@ class: extra-details
|
||||
|
||||
- `kubectl diff` does a server-side dry run, *and* shows differences
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try `kubectl diff` on the YAML that we tweaked earlier:
|
||||
```bash
|
||||
|
||||
@@ -1,693 +0,0 @@
|
||||
# Amazon EKS
|
||||
|
||||
- Elastic Kubernetes Service
|
||||
|
||||
- AWS runs the Kubernetes control plane
|
||||
|
||||
(all we see is an API server endpoint)
|
||||
|
||||
- Pods can run on any combination of:
|
||||
|
||||
- EKS-managed nodes
|
||||
|
||||
- self-managed nodes
|
||||
|
||||
- Fargate
|
||||
|
||||
- Leverages and integrates with AWS services and APIs
|
||||
|
||||
---
|
||||
|
||||
## Some integrations
|
||||
|
||||
- Authenticate with IAM users and roles
|
||||
|
||||
- Associate IAM roles to Kubernetes ServiceAccounts
|
||||
|
||||
- Load balance traffic with ALB/ELB/NLB
|
||||
|
||||
- Persist data with EBS/EFS
|
||||
|
||||
- Label nodes with instance ID, instance type, region, AZ ...
|
||||
|
||||
- Pods can be "first class citizens" of VPC
|
||||
|
||||
---
|
||||
|
||||
## Pros/cons
|
||||
|
||||
- Fully managed control plane
|
||||
|
||||
- Handles deployment, upgrade, scaling of the control plane
|
||||
|
||||
- Available versions and features tend to lag a bit
|
||||
|
||||
- Doesn't fit the most demanding users
|
||||
|
||||
("demanding" starts somewhere between 100 and 1000 nodes)
|
||||
|
||||
---
|
||||
|
||||
## Good to know ...
|
||||
|
||||
- Some integrations are specific to EKS
|
||||
|
||||
(some authentication models)
|
||||
|
||||
- Many integrations are *not* specific to EKS
|
||||
|
||||
- The Cloud Controller Manager can run outside of EKS
|
||||
|
||||
(and provide LoadBalancer services, EBS volumes, and more)
|
||||
|
||||
---
|
||||
|
||||
# Provisioning clusters
|
||||
|
||||
- AWS console, API, CLI
|
||||
|
||||
- `eksctl`
|
||||
|
||||
- Infrastructure-as-Code
|
||||
|
||||
---
|
||||
|
||||
## AWS "native" provisioning
|
||||
|
||||
- AWS web console
|
||||
|
||||
- click-click-click!
|
||||
|
||||
- difficulty: low
|
||||
|
||||
- AWS API or CLI
|
||||
|
||||
- must provide subnets, ARNs
|
||||
|
||||
- difficulty: medium
|
||||
|
||||
---
|
||||
|
||||
## `eksctl`
|
||||
|
||||
- Originally developed by Weave
|
||||
|
||||
(back when AWS "native" provisioning wasn't very good)
|
||||
|
||||
- `eksctl create cluster` just works™
|
||||
|
||||
- Has been "adopted" by AWS
|
||||
|
||||
(is listed in official documentations)
|
||||
|
||||
---
|
||||
|
||||
## Infrastructure-as-Code
|
||||
|
||||
- Cloud Formation
|
||||
|
||||
- Terraform
|
||||
|
||||
[terraform-aws-eks](https://github.com/terraform-aws-modules/terraform-aws-eks)
|
||||
by the community
|
||||
([example](https://github.com/terraform-aws-modules/terraform-aws-eks/tree/master/examples/basic))
|
||||
|
||||
[terraform-provider-aws](https://github.com/hashicorp/terraform-provider-aws)
|
||||
by Hashicorp
|
||||
([example](https://github.com/hashicorp/terraform-provider-aws/tree/main/examples/eks-getting-started))
|
||||
|
||||
[Kubestack](https://www.kubestack.com/)
|
||||
|
||||
---
|
||||
|
||||
## Node groups
|
||||
|
||||
- Virtually all provisioning models have a concept of "node group"
|
||||
|
||||
- Node group = group of similar nodes in an ASG
|
||||
|
||||
- can span multiple AZ
|
||||
|
||||
- can have instances of different types¹
|
||||
|
||||
- A cluster will need at least one node group
|
||||
|
||||
.footnote[¹As I understand it, to specify fallbacks if one instance type is unavailable or out of capacity.]
|
||||
|
||||
---
|
||||
|
||||
# IAM → EKS authentication
|
||||
|
||||
- Access EKS clusters using IAM users and roles
|
||||
|
||||
- No special role, permission, or policy is needed in IAM
|
||||
|
||||
(but the `eks:DescribeCluster` permission can be useful, see later)
|
||||
|
||||
- Users and roles need to be explicitly listed in the cluster
|
||||
|
||||
- Configuration is done through a ConfigMap in the cluster
|
||||
|
||||
---
|
||||
|
||||
## Setting it up
|
||||
|
||||
- Nothing to do when creating the cluster
|
||||
|
||||
(feature is always enabled)
|
||||
|
||||
- Users and roles are *mapped* to Kubernetes users and groups
|
||||
|
||||
(through the `aws-auth` ConfigMap in `kube-system`)
|
||||
|
||||
- That's it!
|
||||
|
||||
---
|
||||
|
||||
## Mapping
|
||||
|
||||
- The `aws-auth` ConfigMap can contain two entries:
|
||||
|
||||
- `mapRoles` (map IAM roles)
|
||||
|
||||
- `mapUsers` (map IAM users)
|
||||
|
||||
- Each entry is a YAML file
|
||||
|
||||
- Each entry includes:
|
||||
|
||||
- `rolearn` or `userarn` to map
|
||||
|
||||
- `username` (as a string)
|
||||
|
||||
- `groups` (as a list; can be empty)
|
||||
|
||||
---
|
||||
|
||||
## Example
|
||||
|
||||
```yaml
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
namespace: kube-system
|
||||
name: aws-auth
|
||||
data:
|
||||
mapRoles: `|`
|
||||
- rolearn: arn:aws:iam::111122223333:role/blah
|
||||
username: blah
|
||||
groups: [ devs, ops ]
|
||||
mapUsers: `|`
|
||||
- userarn: arn:aws:iam::111122223333:user/alice
|
||||
username: alice
|
||||
groups: [ system:masters ]
|
||||
- userarn: arn:aws:iam::111122223333:user/bob
|
||||
username: bob
|
||||
groups: [ system:masters ]
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Client setup
|
||||
|
||||
- We need either the `aws` CLI or the `aws-iam-authenticator`
|
||||
|
||||
- We use them as `exec` plugins in `~/.kube/config`
|
||||
|
||||
- Done automatically by `eksctl`
|
||||
|
||||
- Or manually with `aws eks update-kubeconfig`
|
||||
|
||||
- Discovering the address of the API server requires one IAM permission
|
||||
|
||||
```json
|
||||
"Action": [
|
||||
"eks:DescribeCluster"
|
||||
],
|
||||
"Resource": "arn:aws:eks:<region>:<account>:cluster/<cluster-name>"
|
||||
```
|
||||
|
||||
(wildcards can be used when specifying the resource)
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## How it works
|
||||
|
||||
- The helper generates a token
|
||||
|
||||
(with `aws eks get-token` or `aws-iam-authenticator token`)
|
||||
|
||||
- Note: these calls will always succeed!
|
||||
|
||||
(even if AWS API keys are invalid)
|
||||
|
||||
- The token is used to authenticate with the Kubernetes API
|
||||
|
||||
- AWS' Kubernetes API server will decode and validate the token
|
||||
|
||||
(and map the underlying user or role accordingly)
|
||||
|
||||
---
|
||||
|
||||
## Read The Fine Manual
|
||||
|
||||
https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
|
||||
|
||||
---
|
||||
|
||||
# EKS → IAM authentication
|
||||
|
||||
- Access AWS services from workloads running on EKS
|
||||
|
||||
(e.g.: access S3 bucket from code running in a Pod)
|
||||
|
||||
- This works by associating an IAM role to a K8S ServiceAccount
|
||||
|
||||
- There are also a few specific roles used internally by EKS
|
||||
|
||||
(e.g. to let the nodes establish network configurations)
|
||||
|
||||
- ... We won't talk about these
|
||||
|
||||
---
|
||||
|
||||
## The big picture
|
||||
|
||||
- One-time setup task
|
||||
|
||||
([create an OIDC provider associated to our EKS cluster](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html))
|
||||
|
||||
- Create (or update) a role with an appropriate *trust policy*
|
||||
|
||||
(more on that later)
|
||||
|
||||
- Annotate service accounts to map them to that role
|
||||
|
||||
`eks.amazonaws.com/role-arn=arn:aws:iam::111122223333:role/some-iam-role`
|
||||
|
||||
- Create (or re-create) pods using that ServiceAccount
|
||||
|
||||
- The pods can now use that role!
|
||||
|
||||
---
|
||||
|
||||
## Trust policies
|
||||
|
||||
- IAM roles have a *trust policy* (aka *assume role policy*)
|
||||
|
||||
(cf `aws iam create-role ... --assume-role-policy-document ...`)
|
||||
|
||||
- That policy contains a *statement* list
|
||||
|
||||
- This list indicates who/what is allowed to assume (use) the role
|
||||
|
||||
- In the current scenario, that policy will contain something saying:
|
||||
|
||||
*ServiceAccount S on EKS cluster C is allowed to use this role*
|
||||
|
||||
---
|
||||
|
||||
## Trust policy for a single ServiceAccount
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
|
||||
},
|
||||
"Action": "sts:AssumeRoleWithWebIdentity",
|
||||
"Condition": {
|
||||
"StringEquals": {
|
||||
"${OIDC_PROVIDER}:sub":
|
||||
"system:serviceaccount:<namespace>:<service-account>"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Trust policy for multiple ServiceAccounts
|
||||
|
||||
```json
|
||||
{
|
||||
"Version": "2012-10-17",
|
||||
"Statement": [
|
||||
{
|
||||
"Effect": "Allow",
|
||||
"Principal": {
|
||||
"Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
|
||||
},
|
||||
"Action": "sts:AssumeRoleWithWebIdentity",
|
||||
"Condition": {
|
||||
"StringLike": {
|
||||
"${OIDC_PROVIDER}:sub":
|
||||
["system:serviceaccount:container-training:*"]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## The little details
|
||||
|
||||
- When pods are created, they are processed by a mutating webhook
|
||||
|
||||
(typically named `pod-identity-webhook`)
|
||||
|
||||
- Pods using a ServiceAccount with the right annotation get:
|
||||
|
||||
- an extra token
|
||||
<br/>
|
||||
(mounted in `/var/run/secrets/eks.amazonaws.com/serviceaccount/token`)
|
||||
|
||||
- a few env vars
|
||||
<br/>
|
||||
(including `AWS_WEB_IDENTITY_TOKEN_FILE` and `AWS_ROLE_ARN`)
|
||||
|
||||
- AWS client libraries and tooling will work this that
|
||||
|
||||
(see [this list](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts-minimum-sdk.html) for supported versions)
|
||||
|
||||
---
|
||||
|
||||
# CNI
|
||||
|
||||
- EKS is a compliant Kubernetes implementation
|
||||
|
||||
(which means we can use a wide range of CNI plugins)
|
||||
|
||||
- However, the recommended CNI plugin is the "AWS VPC CNI"
|
||||
|
||||
(https://github.com/aws/amazon-vpc-cni-k8s)
|
||||
|
||||
- Pods are then "first class citizens" of AWS VPC
|
||||
|
||||
---
|
||||
|
||||
## AWS VPC CNI
|
||||
|
||||
- Each Pod gets an address in a VPC subnet
|
||||
|
||||
- No overlay network, no encapsulation, no overhead
|
||||
|
||||
(other than AWS network fabric, obviously)
|
||||
|
||||
- Probably the fastest network option when running on AWS
|
||||
|
||||
- Allows "direct" load balancing (more on that later)
|
||||
|
||||
- Can use security groups with Pod traffic
|
||||
|
||||
- But: limits the number of Pods per Node
|
||||
|
||||
- But: more complex configuration (more on that later)
|
||||
|
||||
---
|
||||
|
||||
## Number of Pods per Node
|
||||
|
||||
- Each Pod gets an IP address on an ENI
|
||||
|
||||
(Elastic Network Interface)
|
||||
|
||||
- EC2 instances can only have a limited number of ENIs
|
||||
|
||||
(the exact limit depends on the instance type)
|
||||
|
||||
- ENIs can only have a limited number of IP addresses
|
||||
|
||||
(with variations here as well)
|
||||
|
||||
- This gives limits of e.g. 35 pods on `t3.large`, 29 on `c5.large` ...
|
||||
|
||||
(see
|
||||
[full list of limits per instance type](https://github.com/awslabs/amazon-eks-ami/blob/master/files/eni-max-pods.txt
|
||||
)
|
||||
and
|
||||
[ENI/IP details](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/pkg/awsutils/vpc_ip_resource_limit.go
|
||||
))
|
||||
|
||||
---
|
||||
|
||||
## Limits?
|
||||
|
||||
- These limits might seem low
|
||||
|
||||
- They're not *that* low if you compute e.g. the RAM/Pod ratio
|
||||
|
||||
- Except if you're running lots if tiny pods
|
||||
|
||||
- Bottom line: do the math!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Pre-loading
|
||||
|
||||
- It can take a little while to allocate/attach an ENI
|
||||
|
||||
- The AWS VPC CNI can keep a few extra addresses on each Node
|
||||
|
||||
(by default, one ENI worth of IP addresses)
|
||||
|
||||
- This is tunable if needed
|
||||
|
||||
(see [the docs](https://github.com/aws/amazon-vpc-cni-k8s/blob/master/docs/eni-and-ip-target.md
|
||||
) for details)
|
||||
|
||||
---
|
||||
|
||||
## Better load balancing
|
||||
|
||||
- The default path for inbound traffic is:
|
||||
|
||||
Load balancer → NodePort → Pod
|
||||
|
||||
- With the AWS VPC CNI, it becomes possible to do:
|
||||
|
||||
Load balancer → Pod
|
||||
|
||||
- More on that in the load balancing section!
|
||||
|
||||
---
|
||||
|
||||
## Configuration complexity
|
||||
|
||||
- The AWS VPC CNI is a very good solution when running EKS
|
||||
|
||||
- It brings optimized solutions to various use-cases:
|
||||
|
||||
- direct load balancing
|
||||
- user authentication
|
||||
- interconnection with other infrastructure
|
||||
- etc.
|
||||
|
||||
- Keep in mind that all these solutions are AWS-specific
|
||||
|
||||
- They can require a non-trivial amount of specific configuration
|
||||
|
||||
- Especially when moving from a simple POC to an IAC deployment!
|
||||
|
||||
---
|
||||
|
||||
# Load Balancers
|
||||
|
||||
- Here be dragons!
|
||||
|
||||
- Multiple options, each with different pros/cons
|
||||
|
||||
- It's necessary to know both AWS products and K8S concepts
|
||||
|
||||
---
|
||||
|
||||
## AWS load balancers
|
||||
|
||||
- CLB / Classic Load Balancer (formerly known as ELB)
|
||||
|
||||
- can work in L4 (TCP) or L7 (HTTP) mode
|
||||
- can do TLS unrolling
|
||||
- can't do websockets, HTTP/2, content-based routing ...
|
||||
|
||||
- NLB / Network Load Balancer
|
||||
|
||||
- high-performance L4 load balancer with TLS support
|
||||
|
||||
- ALB / Application Load Balancer
|
||||
|
||||
- HTTP load balancer
|
||||
- can do TLS unrolling
|
||||
- can do websockets, HTTP/2, content-based routing ...
|
||||
|
||||
---
|
||||
|
||||
## Load balancing modes
|
||||
|
||||
- "IP targets"
|
||||
|
||||
- send traffic directly from LB to Pods
|
||||
|
||||
- Pods must use the AWS VPC CNI
|
||||
|
||||
- compatible with Fargate Pods
|
||||
|
||||
- "Instance targets"
|
||||
|
||||
- send traffic to a NodePort (generally incurs an extra hop)
|
||||
|
||||
- Pods can use any CNI
|
||||
|
||||
- not compatible with Fargate Pods
|
||||
|
||||
- Each LB (Service) can use a different mode, if necessary
|
||||
|
||||
---
|
||||
|
||||
## Kubernetes load balancers
|
||||
|
||||
- Service (L4)
|
||||
|
||||
- ClusterIP: internal load balancing
|
||||
- NodePort: external load balancing on ports >30000
|
||||
- LoadBalancer: external load balancing on the port you want
|
||||
- ExternalIP: external load balancing directly on nodes
|
||||
|
||||
- Ingress (L7 HTTP)
|
||||
|
||||
- partial content-based routing (`Host` header, request path)
|
||||
- requires an Ingress Controller (in front)
|
||||
- works with Services (in back)
|
||||
|
||||
---
|
||||
|
||||
## Two controllers are available
|
||||
|
||||
- Kubernetes "in-tree" load balancer controller
|
||||
|
||||
- always available
|
||||
- used by default for LoadBalancer Services
|
||||
- creates CLB by default; can also do NLB
|
||||
- can only do "instance targets"
|
||||
- can use extra CLB features (TLS, HTTP)
|
||||
|
||||
- AWS Load Balancer Controller (fka AWS ALB Ingress Controller)
|
||||
|
||||
- optional add-on (requires additional config)
|
||||
- primarily meant to be an Ingress Controller
|
||||
- creates NLB and ALB
|
||||
- can do "instance targets" and "IP targets"
|
||||
- can also be used for LoadBalancer Services with type `nlb-ip`
|
||||
|
||||
- They can run side by side
|
||||
|
||||
---
|
||||
|
||||
## Which one should we use?
|
||||
|
||||
- AWS Load Balancer Controller supports "IP targets"
|
||||
|
||||
(which means direct routing of traffic to Pods)
|
||||
|
||||
- It can be used as an Ingress controller
|
||||
|
||||
- It *seems* to be the perfect solution for EKS!
|
||||
|
||||
- However ...
|
||||
|
||||
---
|
||||
|
||||
## Caveats
|
||||
|
||||
- AWS Load Balancer Controller requires extensive configuration
|
||||
|
||||
- a few hours to a few days to get it to work in a POC ...
|
||||
|
||||
- a few days to a few weeks to industrialize that process?
|
||||
|
||||
- It's AWS-specific
|
||||
|
||||
- It still introduces an extra hop, even if that hop is invisible
|
||||
|
||||
- Other ingress controllers can have interesting features
|
||||
|
||||
(canary deployment, A/B testing ...)
|
||||
|
||||
---
|
||||
|
||||
## Noteworthy annotations and docs
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb-ip`
|
||||
|
||||
- LoadBalancer Service with "IP targets" ([docs](https://kubernetes-sigs.github.io/aws-load-balancer-controller/latest/guide/service/nlb_ip_mode/))
|
||||
- requires AWS Load Balancer Controller
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-internal: "true"`
|
||||
|
||||
- internal load balancer (for private VPC)
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-type: nlb`
|
||||
|
||||
- opt for NLB instead of CLB with in-tree controller
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-proxy-protocol: "*"`
|
||||
|
||||
- use HAProxy [PROXY protocol](https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt)
|
||||
|
||||
---
|
||||
|
||||
## TLS-related annotations
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-ssl-cert`
|
||||
|
||||
- enable TLS and use that certificate
|
||||
- example value: `arn:aws:acm:<region>:<account>:certificate/<cert-id>`
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-ssl-ports`
|
||||
|
||||
- enable TLS *only* on the specified ports (when multiple ports are exposed)
|
||||
- example value: `"443,8443"`
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-ssl-negotiation-policy`
|
||||
|
||||
- specify ciphers and other TLS parameters to use (see [that list](https://docs.aws.amazon.com/elasticloadbalancing/latest/classic/elb-security-policy-table.html))
|
||||
- example value: `"ELBSecurityPolicy-TLS-1-2-2017-01"`
|
||||
|
||||
---
|
||||
|
||||
## To HTTP(S) or not to HTTP(S)
|
||||
|
||||
- `service.beta.kubernetes.io/aws-load-balancer-backend-protocol`
|
||||
|
||||
- can be either `http`, `https`, `ssl`, or `tcp`
|
||||
|
||||
- if `https` or `ssl`: enable TLS to the backend
|
||||
|
||||
- if `http` or `https`: enable HTTP `x-forwarded-for` headers (with `http` or `https`)
|
||||
|
||||
???
|
||||
|
||||
## Cluster autoscaling
|
||||
|
||||
## Logging
|
||||
|
||||
https://docs.aws.amazon.com/eks/latest/userguide/logging-using-cloudtrail.html
|
||||
|
||||
:EN:- Working with EKS
|
||||
:EN:- Cluster and user provisioning
|
||||
:EN:- Networking and load balancing
|
||||
|
||||
:FR:- Travailler avec EKS
|
||||
:FR:- Outils de déploiement
|
||||
:FR:- Intégration avec IAM
|
||||
:FR:- Fonctionalités réseau
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
- or we hit the *backoff limit* of the Job (default=6)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a Job that has a 50% chance of success:
|
||||
```bash
|
||||
@@ -49,7 +49,7 @@
|
||||
|
||||
- If the Pod fails, the Job creates another Pod
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the status of the Pod(s) created by the Job:
|
||||
```bash
|
||||
@@ -108,7 +108,7 @@ class: extra-details
|
||||
|
||||
(The Cron Job will not hold if a previous job is still running)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the Cron Job:
|
||||
```bash
|
||||
@@ -135,7 +135,7 @@ class: extra-details
|
||||
|
||||
(re-creating another one if it fails, for instance if its node fails)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the Jobs that are created:
|
||||
```bash
|
||||
|
||||
@@ -98,7 +98,7 @@
|
||||
|
||||
- Let's list our bootstrap tokens on a cluster created with kubeadm
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `test1`
|
||||
|
||||
@@ -145,7 +145,7 @@ class: extra-details
|
||||
|
||||
- The token we need to use has the form `abcdef.1234567890abcdef`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that it is accepted by the API server:
|
||||
```bash
|
||||
@@ -177,7 +177,7 @@ class: extra-details
|
||||
|
||||
- That information is stored in a public ConfigMap
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Retrieve that ConfigMap:
|
||||
```bash
|
||||
|
||||
@@ -88,7 +88,7 @@ spec:
|
||||
|
||||
- Let's try this out!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the port used by our self-hosted registry:
|
||||
```bash
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
|
||||
- Let's build the image for the DockerCoins `worker` service with Kaniko
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Find the port number for our self-hosted registry:
|
||||
```bash
|
||||
@@ -160,7 +160,7 @@ spec:
|
||||
|
||||
- The YAML for the pod is in `k8s/kaniko-build.yaml`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the pod:
|
||||
```bash
|
||||
|
||||
@@ -37,7 +37,7 @@ so that your build pipeline is automated.*
|
||||
|
||||
- We will deploy a registry container, and expose it with a NodePort
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the registry service:
|
||||
```bash
|
||||
@@ -57,7 +57,7 @@ so that your build pipeline is automated.*
|
||||
|
||||
- We need to find out which port has been allocated
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View the service details:
|
||||
```bash
|
||||
@@ -78,7 +78,7 @@ so that your build pipeline is automated.*
|
||||
|
||||
- A convenient Docker registry API route to remember is `/v2/_catalog`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
<!-- ```hide kubectl wait deploy/registry --for condition=available```-->
|
||||
|
||||
@@ -102,7 +102,7 @@ We should see:
|
||||
|
||||
- We can retag a small image, and push it to the registry
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Make sure we have the busybox image, and retag it:
|
||||
```bash
|
||||
@@ -123,7 +123,7 @@ We should see:
|
||||
|
||||
- Let's use the same endpoint as before
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Ensure that our busybox image is now in the local registry:
|
||||
```bash
|
||||
@@ -143,7 +143,7 @@ The curl command should now output:
|
||||
|
||||
- We are going to use a convenient feature of Docker Compose
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Go to the `stacks` directory:
|
||||
```bash
|
||||
@@ -217,7 +217,7 @@ class: extra-details
|
||||
|
||||
- All our images should now be in the registry
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Re-run the same `curl` command as earlier:
|
||||
```bash
|
||||
@@ -232,4 +232,4 @@ variable, so that we can quickly switch from
|
||||
the self-hosted registry to pre-built images
|
||||
hosted on the Docker Hub. So make sure that
|
||||
this $REGISTRY variable is set correctly when
|
||||
running these commands!*
|
||||
running the exercises!*
|
||||
@@ -56,7 +56,7 @@
|
||||
|
||||
- It can be installed with a YAML manifest, or with Helm
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Let's install the cert-manager Helm chart with this one-liner:
|
||||
```bash
|
||||
@@ -86,7 +86,7 @@
|
||||
|
||||
- The manifest shown on the previous slide is in @@LINK[k8s/cm-clusterissuer.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the ClusterIssuer:
|
||||
```bash
|
||||
@@ -115,7 +115,7 @@
|
||||
|
||||
- The manifest shown on the previous slide is in @@LINK[k8s/cm-certificate.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the Certificate to update the domain name
|
||||
|
||||
@@ -140,7 +140,7 @@
|
||||
|
||||
- then it waits for the challenge to complete
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View the resources created by cert-manager:
|
||||
```bash
|
||||
@@ -158,7 +158,7 @@
|
||||
|
||||
`http://<our-domain>/.well-known/acme-challenge/<token>`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the *path* of the Ingress in particular:
|
||||
```bash
|
||||
@@ -176,7 +176,7 @@
|
||||
|
||||
An Ingress Controller! 😅
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install an Ingress Controller:
|
||||
```bash
|
||||
|
||||
@@ -1,445 +0,0 @@
|
||||
# Cluster autoscaler
|
||||
|
||||
- When the cluster is full, we need to add more nodes
|
||||
|
||||
- This can be done manually:
|
||||
|
||||
- deploy new machines and add them to the cluster
|
||||
|
||||
- if using managed Kubernetes, use some API/CLI/UI
|
||||
|
||||
- Or automatically with the cluster autoscaler:
|
||||
|
||||
https://github.com/kubernetes/autoscaler
|
||||
|
||||
---
|
||||
|
||||
## Use-cases
|
||||
|
||||
- Batch job processing
|
||||
|
||||
"once in a while, we need to execute these 1000 jobs in parallel"
|
||||
|
||||
"...but the rest of the time there is almost nothing running on the cluster"
|
||||
|
||||
- Dynamic workload
|
||||
|
||||
"a few hours per day or a few days per week, we have a lot of traffic"
|
||||
|
||||
"...but the rest of the time, the load is much lower"
|
||||
|
||||
---
|
||||
|
||||
## Pay for what you use
|
||||
|
||||
- The point of the cloud is to "pay for what you use"
|
||||
|
||||
- If you have a fixed number of cloud instances running at all times:
|
||||
|
||||
*you're doing in wrong (except if your load is always the same)*
|
||||
|
||||
- If you're not using some kind of autoscaling, you're wasting money
|
||||
|
||||
(except if you like lining the pockets of your cloud provider)
|
||||
|
||||
---
|
||||
|
||||
## Running the cluster autoscaler
|
||||
|
||||
- We must run nodes on a supported infrastructure
|
||||
|
||||
- See [here] for a non-exhaustive list of supported providers
|
||||
|
||||
- Sometimes, the cluster autoscaler is installed automatically
|
||||
|
||||
(or by setting a flag / checking a box when creating the cluster)
|
||||
|
||||
- Sometimes, it requires additional work
|
||||
|
||||
(which is often non-trivial and highly provider-specific)
|
||||
|
||||
[here]: https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler/cloudprovider
|
||||
|
||||
---
|
||||
|
||||
## Scaling up in theory
|
||||
|
||||
IF a Pod is `Pending`,
|
||||
|
||||
AND adding a Node would allow this Pod to be scheduled,
|
||||
|
||||
THEN add a Node.
|
||||
|
||||
---
|
||||
|
||||
## Fine print 1
|
||||
|
||||
*IF a Pod is `Pending`...*
|
||||
|
||||
- First of all, the Pod must exist
|
||||
|
||||
- Pod creation might be blocked by e.g. a namespace quota
|
||||
|
||||
- In that case, the cluster autoscaler will never trigger
|
||||
|
||||
---
|
||||
|
||||
## Fine print 2
|
||||
|
||||
*IF a Pod is `Pending`...*
|
||||
|
||||
- If our Pods do not have resource requests:
|
||||
|
||||
*they will be in the `BestEffort` class*
|
||||
|
||||
- Generally, Pods in the `BestEffort` class are schedulable
|
||||
|
||||
- except if they have anti-affinity placement constraints
|
||||
|
||||
- except if all Nodes already run the max number of pods (110 by default)
|
||||
|
||||
- Therefore, if we want to leverage cluster autoscaling:
|
||||
|
||||
*our Pods should have resource requests*
|
||||
|
||||
---
|
||||
|
||||
## Fine print 3
|
||||
|
||||
*AND adding a Node would allow this Pod to be scheduled...*
|
||||
|
||||
- The autoscaler won't act if:
|
||||
|
||||
- the Pod is too big to fit on a single Node
|
||||
|
||||
- the Pod has impossible placement constraints
|
||||
|
||||
- Examples:
|
||||
|
||||
- "run one Pod per datacenter" with 4 pods and 3 datacenters
|
||||
|
||||
- "use this nodeSelector" but no such Node exists
|
||||
|
||||
---
|
||||
|
||||
## Trying it out
|
||||
|
||||
- We're going to check how much capacity is available on the cluster
|
||||
|
||||
- Then we will create a basic deployment
|
||||
|
||||
- We will add resource requests to that deployment
|
||||
|
||||
- Then scale the deployment to exceed the available capacity
|
||||
|
||||
- **The following commands require a working cluster autoscaler!**
|
||||
|
||||
---
|
||||
|
||||
## Checking available resources
|
||||
|
||||
.lab[
|
||||
|
||||
- Check how much CPU is allocatable on the cluster:
|
||||
```bash
|
||||
kubectl get nodes -o jsonpath={..allocatable.cpu}
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- If we see e.g. `2800m 2800m 2800m`, that means:
|
||||
|
||||
3 nodes with 2.8 CPUs allocatable each
|
||||
|
||||
- To trigger autoscaling, we will create 7 pods requesting 1 CPU each
|
||||
|
||||
(each node can fit 2 such pods)
|
||||
|
||||
---
|
||||
|
||||
## Creating our test Deployment
|
||||
|
||||
.lab[
|
||||
|
||||
- Create the Deployment:
|
||||
```bash
|
||||
kubectl create deployment blue --image=jpetazzo/color
|
||||
```
|
||||
|
||||
- Add a request for 1 CPU:
|
||||
```bash
|
||||
kubectl patch deployment blue --patch='
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: color
|
||||
resources:
|
||||
requests:
|
||||
cpu: 1
|
||||
'
|
||||
```
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Scaling up in practice
|
||||
|
||||
- This assumes that we have strictly less than 7 CPUs available
|
||||
|
||||
(adjust the numbers if necessary!)
|
||||
|
||||
.lab[
|
||||
|
||||
- Scale up the Deployment:
|
||||
```bash
|
||||
kubectl scale deployment blue --replicas=7
|
||||
```
|
||||
|
||||
- Check that we have a new Pod, and that it's `Pending`:
|
||||
```bash
|
||||
kubectl get pods
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Cluster autoscaling
|
||||
|
||||
- After a few minutes, a new Node should appear
|
||||
|
||||
- When that Node becomes `Ready`, the Pod will be assigned to it
|
||||
|
||||
- The Pod will then be `Running`
|
||||
|
||||
- Reminder: the `AGE` of the Pod indicates when the Pod was *created*
|
||||
|
||||
(it doesn't indicate when the Pod was scheduled or started!)
|
||||
|
||||
- To see other state transitions, check the `status.conditions` of the Pod
|
||||
|
||||
---
|
||||
|
||||
## Scaling down in theory
|
||||
|
||||
IF a Node has less than 50% utilization for 10 minutes,
|
||||
|
||||
AND all its Pods can be scheduled on other Nodes,
|
||||
|
||||
AND all its Pods are *evictable*,
|
||||
|
||||
AND the Node doesn't have a "don't scale me down" annotation¹,
|
||||
|
||||
THEN drain the Node and shut it down.
|
||||
|
||||
.footnote[¹The annotation is: `cluster-autoscaler.kubernetes.io/scale-down-disabled=true`]
|
||||
|
||||
---
|
||||
|
||||
## When is a Pod "evictable"?
|
||||
|
||||
By default, Pods are evictable, except if any of the following is true.
|
||||
|
||||
- They have a restrictive Pod Disruption Budget
|
||||
|
||||
- They are "standalone" (not controlled by a ReplicaSet/Deployment, StatefulSet, Job...)
|
||||
|
||||
- They are in `kube-system` and don't have a Pod Disruption Budget
|
||||
|
||||
- They have local storage (that includes `EmptyDir`!)
|
||||
|
||||
This can be overridden by setting the annotation:
|
||||
<br/>
|
||||
`cluster-autoscaler.kubernetes.io/safe-to-evict`
|
||||
<br/>(it can be set to `true` or `false`)
|
||||
|
||||
---
|
||||
|
||||
## Pod Disruption Budget
|
||||
|
||||
- Special resource to configure how many Pods can be *disrupted*
|
||||
|
||||
(i.e. shutdown/terminated)
|
||||
|
||||
- Applies to Pods matching a given selector
|
||||
|
||||
(typically matching the selector of a Deployment)
|
||||
|
||||
- Only applies to *voluntary disruption*
|
||||
|
||||
(e.g. cluster autoscaler draining a node, planned maintenance...)
|
||||
|
||||
- Can express `minAvailable` or `maxUnavailable`
|
||||
|
||||
- See [documentation] for details and examples
|
||||
|
||||
[documentation]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/
|
||||
|
||||
---
|
||||
|
||||
## Local storage
|
||||
|
||||
- If our Pods use local storage, they will prevent scaling down
|
||||
|
||||
- If we have e.g. an `EmptyDir` volume for caching/sharing:
|
||||
|
||||
make sure to set the `.../safe-to-evict` annotation to `true`!
|
||||
|
||||
- Even if the volume...
|
||||
|
||||
- ...only has a PID file or UNIX socket
|
||||
|
||||
- ...is empty
|
||||
|
||||
- ...is not mounted by any container in the Pod!
|
||||
|
||||
---
|
||||
|
||||
## Expensive batch jobs
|
||||
|
||||
- Careful if we have long-running batch jobs!
|
||||
|
||||
(e.g. jobs that take many hours/days to complete)
|
||||
|
||||
- These jobs could get evicted before they complete
|
||||
|
||||
(especially if they use less than 50% of the allocatable resources)
|
||||
|
||||
- Make sure to set the `.../safe-to-evict` annotation to `false`!
|
||||
|
||||
---
|
||||
|
||||
## Node groups
|
||||
|
||||
- Easy scenario: all nodes have the same size
|
||||
|
||||
- Realistic scenario: we have nodes of different sizes
|
||||
|
||||
- e.g. mix of CPU and GPU nodes
|
||||
|
||||
- e.g. small nodes for control plane, big nodes for batch jobs
|
||||
|
||||
- e.g. leveraging spot capacity
|
||||
|
||||
- The cluster autoscaler can handle it!
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Leveraging spot capacity
|
||||
|
||||
- AWS, Azure, and Google Cloud are typically more expensive then their competitors
|
||||
|
||||
- However, they offer *spot* capacity (spot instances, spot VMs...)
|
||||
|
||||
- *Spot* capacity:
|
||||
|
||||
- has a much lower cost (see e.g. AWS [spot instance advisor][awsspot])
|
||||
|
||||
- has a cost that varies continuously depending on regions, instance type...
|
||||
|
||||
- can be preempted at all times
|
||||
|
||||
- To be cost-effective, it is strongly recommended to leverage spot capacity
|
||||
|
||||
[awsspot]: https://aws.amazon.com/ec2/spot/instance-advisor/
|
||||
|
||||
---
|
||||
|
||||
## Node groups in practice
|
||||
|
||||
- The cluster autoscaler maps nodes to *node groups*
|
||||
|
||||
- this is an internal, provider-dependent mechanism
|
||||
|
||||
- the node group is sometimes visible through a proprietary label or annotation
|
||||
|
||||
- Each node group is scaled independently
|
||||
|
||||
- The cluster autoscaler uses [expanders] to decide which node group to scale up
|
||||
|
||||
(the default expander is "random", i.e. pick a node group at random!)
|
||||
|
||||
- Of course, only acceptable node groups will be considered
|
||||
|
||||
(i.e. node groups that could accommodate the `Pending` Pods)
|
||||
|
||||
[expanders]: https://github.com/kubernetes/autoscaler/blob/master/cluster-autoscaler/FAQ.md#what-are-expanders
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Scaling to zero
|
||||
|
||||
- *In general,* a node group needs to have at least one node at all times
|
||||
|
||||
(the cluster autoscaler uses that node to figure out the size, labels, taints... of the group)
|
||||
|
||||
- *On some providers,* there are special ways to specify labels and/or taints
|
||||
|
||||
(but if you want to scale to zero, check that the provider supports it!)
|
||||
|
||||
---
|
||||
|
||||
## Warning
|
||||
|
||||
- Autoscaling up is easy
|
||||
|
||||
- Autoscaling down is harder
|
||||
|
||||
- It might get stuck because Pods are not evictable
|
||||
|
||||
- Do at least a dry run to make sure that the cluster scales down correctly!
|
||||
|
||||
- Have alerts on cloud spend
|
||||
|
||||
- *Especially when using big/expensive nodes (e.g. with GPU!)*
|
||||
|
||||
---
|
||||
|
||||
## Preferred vs. Required
|
||||
|
||||
- Some Kubernetes mechanisms allow to express "soft preferences":
|
||||
|
||||
- affinity (`requiredDuringSchedulingIgnoredDuringExecution` vs `preferredDuringSchedulingIgnoredDuringExecution`)
|
||||
|
||||
- taints (`NoSchedule`/`NoExecute` vs `PreferNoSchedule`)
|
||||
|
||||
- Remember that these "soft preferences" can be ignored
|
||||
|
||||
(and given enough time and churn on the cluster, they will!)
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
- The cluster autoscaler publishes its status on a ConfigMap
|
||||
|
||||
.lab[
|
||||
|
||||
- Check the cluster autoscaler status:
|
||||
```bash
|
||||
kubectl describe configmap --namespace kube-system cluster-autoscaler-status
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- We can also check the logs of the autoscaler
|
||||
|
||||
(except on managed clusters where it's running internally, not visible to us)
|
||||
|
||||
---
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
Special thanks to [@s0ulshake] for their help with this section!
|
||||
|
||||
If you need help to run your data science workloads on Kubernetes,
|
||||
<br/>they're available for consulting.
|
||||
|
||||
(Get in touch with them through https://www.linkedin.com/in/ajbowen/)
|
||||
|
||||
[@s0ulshake]: https://twitter.com/s0ulshake
|
||||
@@ -18,9 +18,9 @@
|
||||
|
||||
- It's easy to check the version for the API server
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `oldversion1`
|
||||
- Log into node `test1`
|
||||
|
||||
- Check the version of kubectl and of the API server:
|
||||
```bash
|
||||
@@ -39,7 +39,7 @@
|
||||
|
||||
- It's also easy to check the version of kubelet
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check node versions (includes kubelet, kernel, container engine):
|
||||
```bash
|
||||
@@ -60,7 +60,7 @@
|
||||
|
||||
- If the control plane is self-hosted (running in pods), we can check it
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Show image versions for all pods in `kube-system` namespace:
|
||||
```bash
|
||||
@@ -81,7 +81,7 @@
|
||||
|
||||
## What version are we running anyway?
|
||||
|
||||
- When I say, "I'm running Kubernetes 1.18", is that the version of:
|
||||
- When I say, "I'm running Kubernetes 1.15", is that the version of:
|
||||
|
||||
- kubectl
|
||||
|
||||
@@ -157,15 +157,15 @@
|
||||
|
||||
## Kubernetes uses semantic versioning
|
||||
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.18.20:
|
||||
- Kubernetes versions look like MAJOR.MINOR.PATCH; e.g. in 1.17.2:
|
||||
|
||||
- MAJOR = 1
|
||||
- MINOR = 18
|
||||
- PATCH = 20
|
||||
- MINOR = 17
|
||||
- PATCH = 2
|
||||
|
||||
- It's always possible to mix and match different PATCH releases
|
||||
|
||||
(e.g. 1.18.20 and 1.18.15 are compatible)
|
||||
(e.g. 1.16.1 and 1.16.6 are compatible)
|
||||
|
||||
- It is recommended to run the latest PATCH release
|
||||
|
||||
@@ -181,9 +181,9 @@
|
||||
|
||||
- All components support a difference of one¹ MINOR version
|
||||
|
||||
- This allows live upgrades (since we can mix e.g. 1.18 and 1.19)
|
||||
- This allows live upgrades (since we can mix e.g. 1.15 and 1.16)
|
||||
|
||||
- It also means that going from 1.18 to 1.20 requires going through 1.19
|
||||
- It also means that going from 1.14 to 1.16 requires going through 1.15
|
||||
|
||||
.footnote[¹Except kubelet, which can be up to two MINOR behind API server,
|
||||
and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
@@ -214,7 +214,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
- We will change the version of the API server
|
||||
|
||||
- We will work with cluster `oldversion` (nodes `oldversion1`, `oldversion2`, `oldversion3`)
|
||||
- We will work with cluster `test` (nodes `test1`, `test2`, `test3`)
|
||||
|
||||
---
|
||||
|
||||
@@ -240,9 +240,9 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
- We will edit the YAML file to use a different image version
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `oldversion1`
|
||||
- Log into node `test1`
|
||||
|
||||
- Check API server version:
|
||||
```bash
|
||||
@@ -254,7 +254,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
sudo vim /etc/kubernetes/manifests/kube-apiserver.yaml
|
||||
```
|
||||
|
||||
- Look for the `image:` line, and update it to e.g. `v1.19.0`
|
||||
- Look for the `image:` line, and update it to e.g. `v1.16.0`
|
||||
|
||||
]
|
||||
|
||||
@@ -264,7 +264,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
- The API server will be briefly unavailable while kubelet restarts it
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the API server version:
|
||||
```bash
|
||||
@@ -299,7 +299,7 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
(note: this is possible only because the cluster was installed with kubeadm)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check what will be upgraded:
|
||||
```bash
|
||||
@@ -308,11 +308,11 @@ and kubectl, which can be one MINOR ahead or behind API server.]
|
||||
|
||||
]
|
||||
|
||||
Note 1: kubeadm thinks that our cluster is running 1.19.0.
|
||||
Note 1: kubeadm thinks that our cluster is running 1.16.0.
|
||||
<br/>It is confused by our manual upgrade of the API server!
|
||||
|
||||
Note 2: kubeadm itself is still version 1.18.20..
|
||||
<br/>It doesn't know how to upgrade do 1.19.X.
|
||||
Note 2: kubeadm itself is still version 1.15.9.
|
||||
<br/>It doesn't know how to upgrade do 1.16.X.
|
||||
|
||||
---
|
||||
|
||||
@@ -320,7 +320,7 @@ Note 2: kubeadm itself is still version 1.18.20..
|
||||
|
||||
- First things first: we need to upgrade kubeadm
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Upgrade kubeadm:
|
||||
```
|
||||
@@ -335,28 +335,28 @@ Note 2: kubeadm itself is still version 1.18.20..
|
||||
]
|
||||
|
||||
Problem: kubeadm doesn't know know how to handle
|
||||
upgrades from version 1.18.
|
||||
upgrades from version 1.15.
|
||||
|
||||
This is because we installed version 1.22 (or even later).
|
||||
This is because we installed version 1.17 (or even later).
|
||||
|
||||
We need to install kubeadm version 1.19.X.
|
||||
We need to install kubeadm version 1.16.X.
|
||||
|
||||
---
|
||||
|
||||
## Downgrading kubeadm
|
||||
|
||||
- We need to go back to version 1.19.X.
|
||||
- We need to go back to version 1.16.X (e.g. 1.16.6)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View available versions for package `kubeadm`:
|
||||
```bash
|
||||
apt show kubeadm -a | grep ^Version | grep 1.19
|
||||
apt show kubeadm -a | grep ^Version | grep 1.16
|
||||
```
|
||||
|
||||
- Downgrade kubeadm:
|
||||
```
|
||||
sudo apt install kubeadm=1.19.8-00
|
||||
sudo apt install kubeadm=1.16.6-00
|
||||
```
|
||||
|
||||
- Check what kubeadm tells us:
|
||||
@@ -366,7 +366,7 @@ We need to install kubeadm version 1.19.X.
|
||||
|
||||
]
|
||||
|
||||
kubeadm should now agree to upgrade to 1.19.8.
|
||||
kubeadm should now agree to upgrade to 1.16.6.
|
||||
|
||||
---
|
||||
|
||||
@@ -378,11 +378,11 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
- Or we can try the upgrade anyway
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Perform the upgrade:
|
||||
```bash
|
||||
sudo kubeadm upgrade apply v1.19.8
|
||||
sudo kubeadm upgrade apply v1.16.6
|
||||
```
|
||||
|
||||
]
|
||||
@@ -395,9 +395,9 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
- We can therefore use `apt` or `apt-get`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `oldversion3`
|
||||
- Log into node `test3`
|
||||
|
||||
- View available versions for package `kubelet`:
|
||||
```bash
|
||||
@@ -406,7 +406,7 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
- Upgrade kubelet:
|
||||
```bash
|
||||
sudo apt install kubelet=1.19.8-00
|
||||
sudo apt install kubelet=1.16.6-00
|
||||
```
|
||||
|
||||
]
|
||||
@@ -415,9 +415,9 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
## Checking what we've done
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `oldversion1`
|
||||
- Log into node `test1`
|
||||
|
||||
- Check node versions:
|
||||
```bash
|
||||
@@ -458,15 +458,15 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
(after upgrading the control plane)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Download the configuration on each node, and upgrade kubelet:
|
||||
```bash
|
||||
for N in 1 2 3; do
|
||||
ssh oldversion$N "
|
||||
sudo apt install kubeadm=1.19.8-00 &&
|
||||
ssh test$N "
|
||||
sudo apt install kubeadm=1.16.6-00 &&
|
||||
sudo kubeadm upgrade node &&
|
||||
sudo apt install kubelet=1.19.8-00"
|
||||
sudo apt install kubelet=1.16.6-00"
|
||||
done
|
||||
```
|
||||
]
|
||||
@@ -475,9 +475,9 @@ kubeadm should now agree to upgrade to 1.19.8.
|
||||
|
||||
## Checking what we've done
|
||||
|
||||
- All our nodes should now be updated to version 1.19.8
|
||||
- All our nodes should now be updated to version 1.16.6
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check nodes versions:
|
||||
```bash
|
||||
@@ -492,13 +492,13 @@ class: extra-details
|
||||
|
||||
## Skipping versions
|
||||
|
||||
- This example worked because we went from 1.18 to 1.19
|
||||
- This example worked because we went from 1.15 to 1.16
|
||||
|
||||
- If you are upgrading from e.g. 1.16, you will have to go through 1.17 first
|
||||
- If you are upgrading from e.g. 1.14, you will have to go through 1.15 first
|
||||
|
||||
- This means upgrading kubeadm to 1.17.X, then using it to upgrade the cluster
|
||||
- This means upgrading kubeadm to 1.15.X, then using it to upgrade the cluster
|
||||
|
||||
- Then upgrading kubeadm to 1.18.X, etc.
|
||||
- Then upgrading kubeadm to 1.16.X, etc.
|
||||
|
||||
- **Make sure to read the release notes before upgrading!**
|
||||
|
||||
|
||||
@@ -204,7 +204,7 @@ class: extra-details
|
||||
|
||||
## Logging into the new cluster
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into node `kuberouter1`
|
||||
|
||||
@@ -228,7 +228,7 @@ class: extra-details
|
||||
|
||||
- By default, kubelet gets the CNI configuration from `/etc/cni/net.d`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the content of `/etc/cni/net.d`
|
||||
|
||||
@@ -262,7 +262,7 @@ class: extra-details
|
||||
|
||||
(where `C` is our cluster number)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the Compose file to set the Cluster CIDR:
|
||||
```bash
|
||||
@@ -298,7 +298,7 @@ class: extra-details
|
||||
|
||||
(where `A.B.C.D` is the public address of `kuberouter1`, running the control plane)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the YAML file to set the API server address:
|
||||
```bash
|
||||
@@ -320,7 +320,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
|
||||
|
||||
- This is similar to what we did for the `kubenet` cluster
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate the kubeconfig file (replacing `X.X.X.X` with the address of `kuberouter1`):
|
||||
```bash
|
||||
@@ -338,7 +338,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
|
||||
|
||||
- We need to copy that kubeconfig file to the other nodes
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Copy `kubeconfig` to the other nodes:
|
||||
```bash
|
||||
@@ -359,7 +359,7 @@ Note: the DaemonSet won't create any pods (yet) since there are no nodes (yet).
|
||||
|
||||
- We need to pass `--network-plugin=cni`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Join the first node:
|
||||
```bash
|
||||
@@ -384,7 +384,7 @@ class: extra-details
|
||||
|
||||
(in `/etc/cni/net.d`)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the content of `/etc/cni/net.d`
|
||||
|
||||
@@ -400,7 +400,7 @@ class: extra-details
|
||||
|
||||
- Let's create a Deployment and expose it with a Service
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a Deployment running a web server:
|
||||
```bash
|
||||
@@ -423,7 +423,7 @@ class: extra-details
|
||||
|
||||
## Checking that everything works
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Get the ClusterIP address for the service:
|
||||
```bash
|
||||
@@ -449,7 +449,7 @@ class: extra-details
|
||||
|
||||
- What if we need to check that everything is working properly?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the IP addresses of our pods:
|
||||
```bash
|
||||
@@ -490,7 +490,7 @@ class: extra-details
|
||||
|
||||
## Trying `kubectl logs` / `kubectl exec`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to show the logs of a kube-router pod:
|
||||
```bash
|
||||
|
||||
@@ -384,7 +384,7 @@ We'll cover them just after!*
|
||||
|
||||
- We can create each Namespace, Deployment, and Service by hand, or...
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- We can deploy the app with a YAML manifest:
|
||||
```bash
|
||||
@@ -403,7 +403,7 @@ We'll cover them just after!*
|
||||
|
||||
- Since the `cluster.local` suffix can change, we'll use `x.y.svc`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that the app is up and running:
|
||||
```bash
|
||||
@@ -427,7 +427,7 @@ Here is the file that we will use, @@LINK[k8s/haproxy.cfg]:
|
||||
|
||||
## Creating the ConfigMap
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a ConfigMap named `haproxy` and holding the configuration file:
|
||||
```bash
|
||||
@@ -455,7 +455,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
|
||||
|
||||
## Creating the Pod
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the HAProxy Pod:
|
||||
```bash
|
||||
@@ -480,7 +480,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
|
||||
|
||||
(one request to `blue`, one request to `green`, one request to `blue`, etc.)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Send a few requests:
|
||||
```bash
|
||||
@@ -509,7 +509,7 @@ Here is @@LINK[k8s/haproxy.yaml], a Pod manifest using that ConfigMap:
|
||||
|
||||
## Creating the configmap
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Our configmap will have a single key, `http.addr`:
|
||||
```bash
|
||||
@@ -539,7 +539,7 @@ We are going to use the following pod definition:
|
||||
|
||||
- The resource definition from the previous slide is in @@LINK[k8s/registry.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the registry pod:
|
||||
```bash
|
||||
|
||||
@@ -86,7 +86,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
|
||||
|
||||
- We'll use the provided YAML file
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the stateful set and associated service:
|
||||
```bash
|
||||
@@ -177,7 +177,7 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
|
||||
|
||||
(pods will be replaced one by one)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy a better Consul cluster:
|
||||
```bash
|
||||
|
||||
@@ -74,7 +74,7 @@
|
||||
|
||||
- Let's create the Custom Resource Definition for our Coffee resource
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Load the CRD:
|
||||
```bash
|
||||
@@ -103,7 +103,7 @@ spec:
|
||||
taste: strong
|
||||
```
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a few types of coffee beans:
|
||||
```bash
|
||||
@@ -118,7 +118,7 @@ spec:
|
||||
|
||||
- By default, `kubectl get` only shows name and age of custom resources
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View the coffee beans that we just created:
|
||||
```bash
|
||||
@@ -195,7 +195,7 @@ There are many possibilities!
|
||||
|
||||
- Let's update our CRD using @@LINK[k8s/coffee-3.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Update the CRD:
|
||||
```bash
|
||||
|
||||
@@ -186,7 +186,7 @@ class: extra-details
|
||||
|
||||
.warning[If you want to use another name than `jean.doe`, update the YAML file!]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the global namespace for all users:
|
||||
```bash
|
||||
@@ -208,7 +208,7 @@ class: extra-details
|
||||
|
||||
(the token will be their password)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List the user's secrets:
|
||||
```bash
|
||||
@@ -228,7 +228,7 @@ class: extra-details
|
||||
|
||||
- Let's create a new context that will use that token to access the API
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add a new identity to our kubeconfig file:
|
||||
```bash
|
||||
@@ -254,7 +254,7 @@ class: extra-details
|
||||
|
||||
- Let's check that our access rights are set properly
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to access any resource:
|
||||
```bash
|
||||
@@ -280,7 +280,7 @@ class: extra-details
|
||||
|
||||
(many people prefer cfssl, easyrsa, or other tools; that's fine too!)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate the key and certificate signing request:
|
||||
```bash
|
||||
@@ -313,7 +313,7 @@ The command above generates:
|
||||
|
||||
## Sending the CSR to Kubernetes
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate and create the CSR resource:
|
||||
```bash
|
||||
@@ -344,7 +344,7 @@ The command above generates:
|
||||
|
||||
- Fow now, this is configured [through an experimental controller manager flag](https://github.com/kubernetes/kubernetes/issues/67324)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the static pod definition for the controller manager:
|
||||
```bash
|
||||
@@ -366,7 +366,7 @@ The command above generates:
|
||||
|
||||
- Let's inspect the CSR, and if it is valid, approve it
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Switch back to `cluster-admin`:
|
||||
```bash
|
||||
@@ -389,7 +389,7 @@ The command above generates:
|
||||
|
||||
## Obtaining the certificate
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Switch back to the user's identity:
|
||||
```bash
|
||||
@@ -414,7 +414,7 @@ The command above generates:
|
||||
|
||||
## Using the certificate
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add the key and certificate to kubeconfig:
|
||||
```bash
|
||||
|
||||
@@ -83,7 +83,7 @@
|
||||
|
||||
- Let's start with the YAML file for the current `rng` resource
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Dump the `rng` resource in YAML:
|
||||
```bash
|
||||
@@ -102,7 +102,7 @@
|
||||
|
||||
(It can't be that easy, right?)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Change `kind: Deployment` to `kind: DaemonSet`
|
||||
|
||||
@@ -169,7 +169,7 @@ We all knew this couldn't be that easy, right!
|
||||
|
||||
- The `--force` flag's actual name is `--validate=false`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to load our YAML file and ignore errors:
|
||||
```bash
|
||||
@@ -192,7 +192,7 @@ Wait ... Now, can it be *that* easy?
|
||||
|
||||
- Did we transform our `deployment` into a `daemonset`?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look at the resources that we have now:
|
||||
```bash
|
||||
@@ -289,7 +289,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t
|
||||
|
||||
- That set of pods is defined by the *selector* of the `rng` service
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the *selector* in the `rng` service definition:
|
||||
```bash
|
||||
@@ -312,7 +312,7 @@ The master node has [taints](https://kubernetes.io/docs/concepts/configuration/t
|
||||
|
||||
- For instance, with `kubectl get`, `kubectl logs`, `kubectl delete` ... and more
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Get the list of pods matching selector `app=rng`:
|
||||
```bash
|
||||
@@ -480,7 +480,7 @@ be any interruption.*
|
||||
|
||||
- `kubectl label` can use selectors itself
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add `active=yes` to all pods that have `app=rng`:
|
||||
```bash
|
||||
@@ -501,7 +501,7 @@ be any interruption.*
|
||||
|
||||
- the selector of the service (that's the one we want to change)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Update the service to add `active: yes` to its selector:
|
||||
```bash
|
||||
@@ -546,7 +546,7 @@ be any interruption.*
|
||||
|
||||
## Updating the service selector, take 2
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Update the YAML manifest of the service
|
||||
|
||||
@@ -592,7 +592,7 @@ If we did everything correctly, the web UI shouldn't show any change.
|
||||
|
||||
## Removing a pod from the load balancer
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In one window, check the logs of that pod:
|
||||
```bash
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
|
||||
- The guest/admin account
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create all the dashboard resources, with the following command:
|
||||
```bash
|
||||
@@ -69,7 +69,7 @@
|
||||
|
||||
## Connecting to the dashboard
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check which port the dashboard is on:
|
||||
```bash
|
||||
@@ -81,7 +81,7 @@
|
||||
You'll want the `3xxxx` port.
|
||||
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Connect to http://oneofournodes:3xxxx/
|
||||
|
||||
@@ -115,7 +115,7 @@ The dashboard will then ask you which authentication you want to use.
|
||||
|
||||
- Seriously, don't leave that thing running!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Remove what we just created:
|
||||
```bash
|
||||
@@ -160,7 +160,7 @@ The dashboard will then ask you which authentication you want to use.
|
||||
|
||||
(named `kubernetes-dashboard:cluster-admin`)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create all the dashboard resources, with the following command:
|
||||
```bash
|
||||
@@ -177,7 +177,7 @@ The dashboard will then ask you which authentication you want to use.
|
||||
|
||||
- Kubernetes will automatically generate a token for that ServiceAccount
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Display the token:
|
||||
```bash
|
||||
@@ -197,7 +197,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.
|
||||
|
||||
## Connecting to the dashboard
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check which port the dashboard is on:
|
||||
```bash
|
||||
@@ -209,7 +209,7 @@ Note that the secret name will actually be `cluster-admin-token-xxxxx`.
|
||||
You'll want the `3xxxx` port.
|
||||
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Connect to http://oneofournodes:3xxxx/
|
||||
|
||||
|
||||
@@ -1,157 +0,0 @@
|
||||
# Our demo apps
|
||||
|
||||
- We are going to use a few demo apps for demos and labs
|
||||
|
||||
- Let's get acquainted with them before we dive in!
|
||||
|
||||
---
|
||||
|
||||
## The `color` app
|
||||
|
||||
- Image name: `jpetazzo/color`, `ghcr.io/jpetazzo/color`
|
||||
|
||||
- Available for linux/amd64, linux/arm64, linux/arm/v7 platforms
|
||||
|
||||
- HTTP server listening on port 80
|
||||
|
||||
- Serves a web page with a single line of text
|
||||
|
||||
- The background of the page is derived from the hostname
|
||||
|
||||
(e.g. if the hostname is `blue-xyz-123`, the background is `blue`)
|
||||
|
||||
- The web page is "curl-friendly"
|
||||
|
||||
(it contains `\r` characters to hide HTML tags and declutter the output)
|
||||
|
||||
---
|
||||
|
||||
## The `color` app in action
|
||||
|
||||
- Create a Deployment called `blue` using image `jpetazzo/color`
|
||||
|
||||
- Expose that Deployment with a Service
|
||||
|
||||
- Connect to the Service with a web browser
|
||||
|
||||
- Connect to the Service with `curl`
|
||||
|
||||
---
|
||||
|
||||
## Dockercoins
|
||||
|
||||
- App with 5 microservices:
|
||||
|
||||
- `worker` (runs an infinite loop connecting to the other services)
|
||||
|
||||
- `rng` (web service; generates random numbers)
|
||||
|
||||
- `hasher` (web service; computes SHA sums)
|
||||
|
||||
- `redis` (holds a single counter incremented by the `worker` at each loop)
|
||||
|
||||
- `webui` (web app; displays a graph showing the rate of increase of the counter)
|
||||
|
||||
- Uses a mix of Node, Python, Ruby
|
||||
|
||||
- Very simple components (approx. 50 lines of code for the most complicated one)
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## Deploying Dockercoins
|
||||
|
||||
- Pre-built images available as `dockercoins/<component>:v0.1`
|
||||
|
||||
(e.g. `dockercoins/worker:v0.1`)
|
||||
|
||||
- Containers "discover" each other through DNS
|
||||
|
||||
(e.g. worker connects to `http://hasher/`)
|
||||
|
||||
- A Kubernetes YAML manifest is available in *the* repo
|
||||
|
||||
---
|
||||
|
||||
## The repository
|
||||
|
||||
- When we refer to "the" repository, it means:
|
||||
|
||||
https://github.com/jpetazzo/container.training
|
||||
|
||||
- It hosts slides, demo apps, deployment scripts...
|
||||
|
||||
- All the sample commands, labs, etc. will assume that it's available in:
|
||||
|
||||
`~/container.training`
|
||||
|
||||
- Let's clone the repo in our environment!
|
||||
|
||||
---
|
||||
|
||||
## Cloning the repo
|
||||
|
||||
.lab[
|
||||
|
||||
- There is a convenient shortcut to clone the repository:
|
||||
```bash
|
||||
git clone https://container.training
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
While the repository clones, fork it, star it ~~subscribe and hit the bell!~~
|
||||
|
||||
---
|
||||
|
||||
## Running Dockercoins
|
||||
|
||||
- All the Kubernetes manifests are in the `k8s` subdirectory
|
||||
|
||||
- This directory has a `dockercoins.yaml` manifest
|
||||
|
||||
.lab[
|
||||
|
||||
- Deploy Dockercoins:
|
||||
```bash
|
||||
kubectl apply -f ~/container.training/k8s/dockercoins.yaml
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
- The `webui` is exposed with a `NodePort` service
|
||||
|
||||
- Connect to it (through the `NodePort` or `port-forward`)
|
||||
|
||||
- Note, it might take a minute for the worker to start
|
||||
|
||||
---
|
||||
|
||||
## Details
|
||||
|
||||
- If the `worker` Deployment is scaled up, the graph should go up
|
||||
|
||||
- The `rng` Service is meant to be a bottleneck
|
||||
|
||||
(capping the graph to 10/second until `rng` is scaled up)
|
||||
|
||||
- There is artificial latency in the different services
|
||||
|
||||
(so that the app doesn't consume CPU/RAM/network)
|
||||
|
||||
---
|
||||
|
||||
## More colors
|
||||
|
||||
- The repository also contains a `rainbow.yaml` manifest
|
||||
|
||||
- It creates three namespaces (`blue`, `green`, `red`)
|
||||
|
||||
- In each namespace, there is an instance of the `color` app
|
||||
|
||||
(we can use that later to do *literal* blue-green deployment!)
|
||||
@@ -52,7 +52,7 @@
|
||||
|
||||
- Let's make sure we have everything we need first
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Log into the `dmuc1` machine
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
|
||||
## Starting API server
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to start the API server:
|
||||
```bash
|
||||
@@ -118,7 +118,7 @@ it cannot start without it.
|
||||
|
||||
## Starting etcd
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to start etcd:
|
||||
```bash
|
||||
@@ -144,7 +144,7 @@ serving insecure client requests on 127.0.0.1:2379, this is strongly discouraged
|
||||
|
||||
- That argument should be a comma-separated list of URLs
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start API server:
|
||||
```bash
|
||||
@@ -161,7 +161,7 @@ Success!
|
||||
|
||||
- Let's try a few "classic" commands
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List nodes:
|
||||
```bash
|
||||
@@ -201,7 +201,7 @@ class: extra-details
|
||||
|
||||
- Let's run a web server!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a Deployment with NGINX:
|
||||
```bash
|
||||
@@ -216,7 +216,7 @@ Success?
|
||||
|
||||
## Checking our Deployment status
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look at pods, deployments, etc.:
|
||||
```bash
|
||||
@@ -249,7 +249,7 @@ And, there is no ReplicaSet, and no Pod.
|
||||
|
||||
## Starting the controller manager
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to start the controller manager:
|
||||
```bash
|
||||
@@ -289,7 +289,7 @@ Using the inClusterConfig. This might not work.
|
||||
|
||||
## Starting the controller manager (for real)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start the controller manager:
|
||||
```bash
|
||||
@@ -304,7 +304,7 @@ Success!
|
||||
|
||||
## Checking our Deployment status
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check all our resources again:
|
||||
```bash
|
||||
@@ -371,7 +371,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.
|
||||
|
||||
- The ReplicaSet controller will no longer create pods referencing the (missing) token
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Programmatically change the `default` ServiceAccount:
|
||||
```bash
|
||||
@@ -402,7 +402,7 @@ Of course, we don't need to perform *all* the solutions mentioned here.
|
||||
|
||||
- Once we patch the default service account, the ReplicaSet can create a Pod
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that we now have a pod:
|
||||
```bash
|
||||
@@ -437,7 +437,7 @@ If we're impatient, we can restart the controller manager.
|
||||
|
||||
- We're going to use Docker (because it's the default option)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start the Docker Engine:
|
||||
```bash
|
||||
@@ -479,7 +479,7 @@ docker run alpine echo hello world
|
||||
|
||||
- Or we can generate the file with `kubectl`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the file `~/.kube/config` with `kubectl`:
|
||||
```bash
|
||||
@@ -519,7 +519,7 @@ clusters:
|
||||
|
||||
## Starting kubelet
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start kubelet with that kubeconfig file:
|
||||
```bash
|
||||
@@ -536,7 +536,7 @@ Success!
|
||||
|
||||
- Let's check that our node registered correctly
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List the nodes in our cluster:
|
||||
```bash
|
||||
@@ -555,7 +555,7 @@ Its name will be its hostname (it should be `dmuc1`).
|
||||
|
||||
- Let's check if our pod is running
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List all resources:
|
||||
```bash
|
||||
@@ -594,7 +594,7 @@ Which is normal: it needs to be *scheduled*.
|
||||
|
||||
- Just like for controller manager, we can use `--kubeconfig` or `--master`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start the scheduler:
|
||||
```bash
|
||||
@@ -613,7 +613,7 @@ Which is normal: it needs to be *scheduled*.
|
||||
|
||||
- Then it will be `Running`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check pod status:
|
||||
```bash
|
||||
@@ -654,7 +654,7 @@ class: extra-details
|
||||
|
||||
- Let's check that our pod correctly runs NGINX
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check our pod's IP address:
|
||||
```bash
|
||||
@@ -676,7 +676,7 @@ We should see the `Welcome to nginx!` page.
|
||||
|
||||
- We can now create a Service associated with this Deployment
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Expose the Deployment's port 80:
|
||||
```bash
|
||||
@@ -705,7 +705,7 @@ This won't work. We need kube-proxy to enable internal communication.
|
||||
|
||||
(although that will be deprecated in the future)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Start kube-proxy:
|
||||
```bash
|
||||
@@ -720,7 +720,7 @@ This won't work. We need kube-proxy to enable internal communication.
|
||||
|
||||
- Now that kube-proxy is running, we should be able to connect
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the Service's ClusterIP again, and retry connecting:
|
||||
```bash
|
||||
@@ -742,7 +742,7 @@ class: extra-details
|
||||
|
||||
- When a Service is created or updated, kube-proxy creates iptables rules
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check out the `OUTPUT` chain in the `nat` table:
|
||||
```bash
|
||||
@@ -766,7 +766,7 @@ class: extra-details
|
||||
|
||||
- The last command showed a chain named `KUBE-SVC-...` corresponding to our service
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that `KUBE-SVC-...` chain:
|
||||
```bash
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
|
||||
- ... But losing a node = losing the volumes on that node!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install the local path storage provisioner:
|
||||
```bash
|
||||
@@ -49,7 +49,7 @@
|
||||
|
||||
- Or we need to tag a StorageClass to be used as the default one
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List StorageClasses:
|
||||
```bash
|
||||
@@ -68,7 +68,7 @@ We should see the `local-path` StorageClass.
|
||||
|
||||
`storageclass.kubernetes.io/is-default-class: true`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Tag the StorageClass so that it's the default one:
|
||||
```bash
|
||||
@@ -99,7 +99,7 @@ Now, the StorageClass should have `(default)` next to its name.
|
||||
|
||||
- All these resources are grouped in a convenient YAML file
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install the operator:
|
||||
```bash
|
||||
@@ -114,7 +114,7 @@ Now, the StorageClass should have `(default)` next to its name.
|
||||
|
||||
- Let's see which CRDs were created
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List all CRDs:
|
||||
```bash
|
||||
@@ -135,7 +135,7 @@ This operator supports ElasticSearch, but also Kibana and APM. Cool!
|
||||
|
||||
- We need to create that namespace
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the `eck-demo` namespace:
|
||||
```bash
|
||||
@@ -180,7 +180,7 @@ ServiceAccount is located.
|
||||
- whether to use TLS or not
|
||||
- etc.
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create our ElasticSearch cluster:
|
||||
```bash
|
||||
@@ -197,7 +197,7 @@ ServiceAccount is located.
|
||||
|
||||
- It will report our cluster status through the CRD
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the logs of the operator:
|
||||
```bash
|
||||
@@ -231,7 +231,7 @@ ServiceAccount is located.
|
||||
|
||||
- But let's check at least if ElasticSearch is up!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Get the ClusterIP of our ES instance:
|
||||
```bash
|
||||
@@ -255,7 +255,7 @@ We get an authentication error. Our cluster is protected!
|
||||
|
||||
- It generates a random password and stores it in a Secret
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Extract the password:
|
||||
```bash
|
||||
@@ -280,7 +280,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
|
||||
|
||||
- We'll deploy a filebeat DaemonSet to collect node logs
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy filebeat:
|
||||
```bash
|
||||
@@ -314,7 +314,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
|
||||
|
||||
- Let's give it a try!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy a Kibana instance:
|
||||
```bash
|
||||
@@ -345,7 +345,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
|
||||
|
||||
- It's using the same user/password as ElasticSearch
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Get the NodePort allocated to Kibana:
|
||||
```bash
|
||||
@@ -364,7 +364,7 @@ We should see a JSON payload with the `"You Know, for Search"` tagline.
|
||||
|
||||
After the Kibana UI loads, we need to click around a bit
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Pick "explore on my own"
|
||||
|
||||
@@ -404,7 +404,7 @@ After the Kibana UI loads, we need to click around a bit
|
||||
|
||||
- We prepared yet another manifest for that!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy Cerebro:
|
||||
```bash
|
||||
@@ -428,7 +428,7 @@ After the Kibana UI loads, we need to click around a bit
|
||||
|
||||
- Let's change that!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the ElasticSearch cluster manifest:
|
||||
```bash
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
- When we use `kubectl describe` on an object, `kubectl` retrieves the associated events
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- See the API requests happening when we use `kubectl describe`:
|
||||
```bash
|
||||
@@ -82,7 +82,7 @@
|
||||
|
||||
- Let's create an event related to a Node, based on @@LINK[k8s/event-node.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `k8s/event-node.yaml`
|
||||
|
||||
@@ -100,7 +100,7 @@
|
||||
|
||||
- Let's create an event related to a Pod, based on @@LINK[k8s/event-pod.yaml]
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a pod
|
||||
|
||||
|
||||
@@ -77,18 +77,17 @@
|
||||
|
||||
- Create a new branch in your fork; e.g. `prod`
|
||||
|
||||
(e.g. with "branch" dropdown through the GitHub web UI)
|
||||
(e.g. by adding a line in the README through the GitHub web UI)
|
||||
|
||||
- This is the branch that we are going to use for deployment
|
||||
|
||||
---
|
||||
|
||||
## Setting up Flux with kustomize
|
||||
## Setting up Flux
|
||||
|
||||
- Clone the Flux repository:
|
||||
```bash
|
||||
```
|
||||
git clone https://github.com/fluxcd/flux
|
||||
cd flux
|
||||
```
|
||||
|
||||
- Edit `deploy/flux-deployment.yaml`
|
||||
@@ -100,27 +99,8 @@
|
||||
```
|
||||
|
||||
- Apply all the YAML:
|
||||
```bash
|
||||
kubectl apply -k deploy/
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Setting up Flux with Helm
|
||||
|
||||
- Add Flux helm repo:
|
||||
```bash
|
||||
helm repo add fluxcd https://charts.fluxcd.io
|
||||
```
|
||||
|
||||
- Install Flux:
|
||||
```bash
|
||||
kubectl create namespace flux
|
||||
helm upgrade --install flux \
|
||||
--set git.url=git@github.com:your-git-username/kubercoins \
|
||||
--set git.branch=prod \
|
||||
--namespace flux \
|
||||
fluxcd/flux
|
||||
kubectl apply -f deploy/
|
||||
```
|
||||
|
||||
---
|
||||
@@ -130,8 +110,8 @@
|
||||
- When it starts, Flux generates an SSH key
|
||||
|
||||
- Display that key:
|
||||
```bash
|
||||
kubectl -n flux logs deployment/flux | grep identity.pub | cut -d '"' -f2
|
||||
```
|
||||
kubectl logs deployment/flux | grep identity
|
||||
```
|
||||
|
||||
- Then add that key to the repository, giving it **write** access
|
||||
@@ -177,14 +157,14 @@
|
||||
## Setting up Gitkube
|
||||
|
||||
- Install the CLI:
|
||||
```bash
|
||||
```
|
||||
sudo curl -L -o /usr/local/bin/gitkube \
|
||||
https://github.com/hasura/gitkube/releases/download/v0.2.1/gitkube_linux_amd64
|
||||
sudo chmod +x /usr/local/bin/gitkube
|
||||
```
|
||||
|
||||
- Install Gitkube on the cluster:
|
||||
```bash
|
||||
```
|
||||
gitkube install --expose ClusterIP
|
||||
```
|
||||
|
||||
@@ -216,20 +196,20 @@
|
||||
## Pushing to our remote
|
||||
|
||||
- Get the `gitkubed` IP address:
|
||||
```bash
|
||||
```
|
||||
kubectl -n kube-system get svc gitkubed
|
||||
IP=$(kubectl -n kube-system get svc gitkubed -o json |
|
||||
jq -r .spec.clusterIP)
|
||||
```
|
||||
|
||||
- Get ourselves a sample repository with resource YAML files:
|
||||
```bash
|
||||
```
|
||||
git clone git://github.com/jpetazzo/kubercoins
|
||||
cd kubercoins
|
||||
```
|
||||
|
||||
- Add the remote and push to it:
|
||||
```bash
|
||||
```
|
||||
git remote add k8s ssh://default-example@$IP/~/git/default-example
|
||||
git push k8s master
|
||||
```
|
||||
|
||||
@@ -79,9 +79,9 @@
|
||||
|
||||
## Creating a new namespace
|
||||
|
||||
- This will make sure that we don't collide / conflict with previous labs and exercises
|
||||
- This will make sure that we don't collide / conflict with previous exercises
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the yellow namespace:
|
||||
```bash
|
||||
@@ -103,7 +103,7 @@
|
||||
|
||||
https://github.com/jpetazzo/kubercoins
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Clone that repository:
|
||||
```bash
|
||||
@@ -152,7 +152,7 @@ It will use the default success threshold (1 successful attempt = alive).
|
||||
|
||||
- Let's add the liveness probe, then deploy DockerCoins
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `rng-deployment.yaml` and add the liveness probe
|
||||
```bash
|
||||
@@ -180,7 +180,7 @@ It will use the default success threshold (1 successful attempt = alive).
|
||||
|
||||
- Let's generate traffic and see what happens!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Get the ClusterIP address of the rng service:
|
||||
```bash
|
||||
@@ -195,7 +195,7 @@ It will use the default success threshold (1 successful attempt = alive).
|
||||
|
||||
- Each command below will show us what's happening on a different level
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In one window, monitor cluster events:
|
||||
```bash
|
||||
@@ -220,7 +220,7 @@ It will use the default success threshold (1 successful attempt = alive).
|
||||
|
||||
- Let's use `ab` to send concurrent requests to rng
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In yet another window, generate traffic:
|
||||
```bash
|
||||
|
||||
@@ -1,18 +1,16 @@
|
||||
# Healthchecks
|
||||
|
||||
- Containers can have *healthchecks*
|
||||
- Kubernetes provides two kinds of healthchecks: liveness and readiness
|
||||
|
||||
- There are three kinds of healthchecks, corresponding to very different use-cases:
|
||||
- Healthchecks are *probes* that apply to *containers* (not to pods)
|
||||
|
||||
- liveness = detect when a container is "dead" and needs to be restarted
|
||||
- Each container can have two (optional) probes:
|
||||
|
||||
- readiness = detect when a container is ready to serve traffic
|
||||
- liveness = is this container dead or alive?
|
||||
|
||||
- startup = detect if a container has finished to boot
|
||||
- readiness = is this container ready to serve traffic?
|
||||
|
||||
- These healthchecks are optional (we can use none, all, or some of them)
|
||||
|
||||
- Different probes are available (HTTP request, TCP connection, program execution)
|
||||
- Different probes are available (HTTP, TCP, program execution)
|
||||
|
||||
- Let's see the difference and how to use them!
|
||||
|
||||
@@ -20,13 +18,11 @@
|
||||
|
||||
## Liveness probe
|
||||
|
||||
*This container is dead, we don't know how to fix it, other than restarting it.*
|
||||
|
||||
- Indicates if the container is dead or alive
|
||||
|
||||
- A dead container cannot come back to life
|
||||
|
||||
- If the liveness probe fails, the container is killed (destroyed)
|
||||
- If the liveness probe fails, the container is killed
|
||||
|
||||
(to make really sure that it's really dead; no zombies or undeads!)
|
||||
|
||||
@@ -54,31 +50,9 @@
|
||||
|
||||
---
|
||||
|
||||
## Readiness probe (1)
|
||||
## Readiness probe
|
||||
|
||||
*Make sure that a container is ready before continuing a rolling update.*
|
||||
|
||||
- Indicates if the container is ready to handle traffic
|
||||
|
||||
- When doing a rolling update, the Deployment controller waits for Pods to be ready
|
||||
|
||||
(a Pod is ready when all the containers in the Pod are ready)
|
||||
|
||||
- Improves reliability and safety of rolling updates:
|
||||
|
||||
- don't roll out a broken version (that doesn't pass readiness checks)
|
||||
|
||||
- don't lose processing capacity during a rolling update
|
||||
|
||||
---
|
||||
|
||||
## Readiness probe (2)
|
||||
|
||||
*Temporarily remove a container (overloaded or otherwise) from a Service load balancer.*
|
||||
|
||||
- A container can mark itself "not ready" temporarily
|
||||
|
||||
(e.g. if it's overloaded or needs to reload/restart/garbage collect...)
|
||||
- Indicates if the container is ready to serve traffic
|
||||
|
||||
- If a container becomes "unready" it might be ready again soon
|
||||
|
||||
@@ -106,9 +80,9 @@
|
||||
|
||||
- runtime is busy doing garbage collection or initial data load
|
||||
|
||||
- To redirect new connections to other Pods
|
||||
- For processes that take a long time to start
|
||||
|
||||
(e.g. fail the readiness probe when the Pod's load is too high)
|
||||
(more on that later)
|
||||
|
||||
---
|
||||
|
||||
@@ -146,35 +120,27 @@
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Startup probe
|
||||
|
||||
*The container takes too long to start, and is killed by the liveness probe!*
|
||||
- Kubernetes 1.16 introduces a third type of probe: `startupProbe`
|
||||
|
||||
- By default, probes (including liveness) start immediately
|
||||
(it is in `alpha` in Kubernetes 1.16)
|
||||
|
||||
- With the default probe interval and failure threshold:
|
||||
- It can be used to indicate "container not ready *yet*"
|
||||
|
||||
*a container must respond in less than 30 seconds, or it will be killed!*
|
||||
- process is still starting
|
||||
|
||||
- There are two ways to avoid that:
|
||||
- loading external data, priming caches
|
||||
|
||||
- set `initialDelaySeconds` (a fixed, rigid delay)
|
||||
- Before Kubernetes 1.16, we had to use the `initialDelaySeconds` parameter
|
||||
|
||||
- use a `startupProbe`
|
||||
(available for both liveness and readiness probes)
|
||||
|
||||
- Kubernetes will run only the startup probe, and when it succeeds, run the other probes
|
||||
- `initialDelaySeconds` is a rigid delay (always wait X before running probes)
|
||||
|
||||
---
|
||||
|
||||
## When to use a startup probe
|
||||
|
||||
- For containers that take a long time to start
|
||||
|
||||
(more than 30 seconds)
|
||||
|
||||
- Especially if that time can vary a lot
|
||||
|
||||
(e.g. fast in dev, slow in prod, or the other way around)
|
||||
- `startupProbe` works better when a container start time can vary a lot
|
||||
|
||||
---
|
||||
|
||||
@@ -224,16 +190,17 @@ Here is a pod template for the `rng` web service of the DockerCoins app:
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: healthy-app
|
||||
name: rng-with-liveness
|
||||
spec:
|
||||
containers:
|
||||
- name: myapp
|
||||
image: myregistry.io/myapp:v1.0
|
||||
- name: rng
|
||||
image: dockercoins/rng:v0.1
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
path: /
|
||||
port: 80
|
||||
periodSeconds: 5
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 1
|
||||
```
|
||||
|
||||
If the backend serves an error, or takes longer than 1s, 3 times in a row, it gets killed.
|
||||
@@ -300,7 +267,7 @@ If the Redis process becomes unresponsive, it will be killed.
|
||||
|
||||
(In that context, worker = process that doesn't accept connections)
|
||||
|
||||
- Readiness is useful mostly for rolling updates
|
||||
- Readiness isn't useful
|
||||
|
||||
(because workers aren't backends for a service)
|
||||
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
|
||||
- If you haven't done it before, you need to add the repo for that chart
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add the repo that holds the chart for the OWASP Juice Shop:
|
||||
```bash
|
||||
@@ -63,7 +63,7 @@
|
||||
|
||||
- We can use `helm pull` to download a chart from a repo
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Download the tarball for `juice/juice-shop`:
|
||||
```bash
|
||||
@@ -85,7 +85,7 @@
|
||||
|
||||
- Let's look at the files and directories in the `juice-shop` chart
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Display the tree structure of the chart we just downloaded:
|
||||
```bash
|
||||
@@ -108,7 +108,7 @@ We see the components mentioned above: `Chart.yaml`, `templates/`, `values.yaml`
|
||||
|
||||
(using the standard Go template library)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look at the template file for the Service resource:
|
||||
```bash
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
(Resource names, service types, number of replicas...)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a sample chart:
|
||||
```bash
|
||||
@@ -27,7 +27,7 @@
|
||||
|
||||
- There is a convenient `dockercoins.yml` in the repo
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Copy the YAML file to the `templates` subdirectory in the chart:
|
||||
```bash
|
||||
@@ -50,7 +50,7 @@
|
||||
|
||||
(as surprising as it might seem!)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Let's try to install the chart:
|
||||
```
|
||||
@@ -79,7 +79,7 @@ kind: Service, namespace: default, name: hasher
|
||||
|
||||
- we can also tell Helm to use a different namespace
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a new namespace:
|
||||
```bash
|
||||
@@ -99,7 +99,7 @@ kind: Service, namespace: default, name: hasher
|
||||
|
||||
- Let's try to see the release that we just deployed
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List Helm releases:
|
||||
```bash
|
||||
@@ -118,7 +118,7 @@ We have to specify its namespace (or switch to that namespace).
|
||||
|
||||
- Try again, with the correct namespace
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List Helm releases in `helmcoins`:
|
||||
```bash
|
||||
@@ -133,7 +133,7 @@ We have to specify its namespace (or switch to that namespace).
|
||||
|
||||
- We can check the worker logs, or the web UI
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Retrieve the NodePort number of the web UI:
|
||||
```bash
|
||||
@@ -181,7 +181,7 @@ have details about recommended annotations and labels.
|
||||
|
||||
- Let's remove that chart before moving on
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Delete the release (don't forget to specify the namespace):
|
||||
```bash
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
|
||||
- This will give us a basic chart that we will customize
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a basic chart:
|
||||
```bash
|
||||
@@ -81,7 +81,7 @@ This creates a basic chart in the directory `helmcoins`.
|
||||
|
||||
- Exception: for redis, we want to use the official image redis:latest
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Write YAML files for the 5 components, with the following model:
|
||||
```yaml
|
||||
@@ -98,7 +98,7 @@ This creates a basic chart in the directory `helmcoins`.
|
||||
|
||||
- For convenience, let's work in a separate namespace
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a new namespace (if it doesn't already exist):
|
||||
```bash
|
||||
@@ -126,7 +126,7 @@ This creates a basic chart in the directory `helmcoins`.
|
||||
helm upgrade COMPONENT-NAME CHART-DIRECTORY --install
|
||||
```
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install the 5 components of DockerCoins:
|
||||
```bash
|
||||
@@ -165,7 +165,7 @@ class: extra-details
|
||||
|
||||
- Let's see if DockerCoins is working!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the logs of the worker:
|
||||
```bash
|
||||
@@ -187,7 +187,7 @@ There are *many* issues to fix!
|
||||
|
||||
- It looks like our images can't be found
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Use `kubectl describe` on any of the pods in error
|
||||
|
||||
@@ -205,7 +205,7 @@ There are *many* issues to fix!
|
||||
|
||||
(and try to find the one generating the Deployment resource)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Show the structure of the `helmcoins` chart that Helm generated:
|
||||
```bash
|
||||
@@ -228,7 +228,7 @@ There are *many* issues to fix!
|
||||
|
||||
- Let's look for `AppVersion` there!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the file `helmcoins/Chart.yaml`
|
||||
|
||||
@@ -250,7 +250,7 @@ There are *many* issues to fix!
|
||||
|
||||
(to match what we've specified in our values YAML files)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `helmcoins/templates/deployment.yaml`
|
||||
|
||||
@@ -266,7 +266,7 @@ There are *many* issues to fix!
|
||||
|
||||
- To use the new template, we need to *upgrade* the release to use that chart
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Upgrade all components:
|
||||
```bash
|
||||
@@ -306,7 +306,7 @@ We should see all pods "Running". But ... not all of them are READY.
|
||||
|
||||
(`kubectl describe` will retrieve the events related to the object)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the events for the redis pods:
|
||||
```bash
|
||||
@@ -345,7 +345,7 @@ It's failing both its liveness and readiness probes!
|
||||
|
||||
`{{ end }}` at the end
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `helmcoins/templates/deployment.yaml`
|
||||
|
||||
@@ -386,7 +386,7 @@ This is what the new YAML should look like (added lines in yellow):
|
||||
|
||||
- We need to upgrade all the services again to use the new chart
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Upgrade all components:
|
||||
```bash
|
||||
@@ -410,7 +410,7 @@ Everything should now be running!
|
||||
|
||||
- Is this working now?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Let's check the logs of the worker:
|
||||
```bash
|
||||
@@ -429,7 +429,7 @@ Typically, that error means that the `redis` service doesn't exist.
|
||||
|
||||
- What about the services created by our chart?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the list of services:
|
||||
```bash
|
||||
@@ -452,7 +452,7 @@ We need to change that!
|
||||
|
||||
- `include` indicates a *template block* defined somewhere else
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Find where that `fullname` thing is defined:
|
||||
```bash
|
||||
@@ -473,7 +473,7 @@ We can look at the definition, but it's fairly complex ...
|
||||
|
||||
- The name of the release is available as `{{ .Release.Name }}`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `helmcoins/templates/service.yaml`
|
||||
|
||||
@@ -528,7 +528,7 @@ We can look at the definition, but it's fairly complex ...
|
||||
|
||||
- Let's add a `service.port` value to the redis release
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `redis.yaml` to add:
|
||||
```yaml
|
||||
@@ -563,7 +563,7 @@ We can look at the definition, but it's fairly complex ...
|
||||
|
||||
## Changing the deployment template
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `helmcoins/templates/deployment.yaml`
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
|
||||
- First, let's edit `Chart.yaml`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- In `Chart.yaml`, fill the `dependencies` section:
|
||||
```yaml
|
||||
@@ -93,7 +93,7 @@ use Bitnami's Redis chart.
|
||||
|
||||
- After adding the dependency, we ask Helm to pin an download it
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Ask Helm:
|
||||
```bash
|
||||
@@ -262,7 +262,7 @@ class: extra-details
|
||||
|
||||
## Embedding a dependency
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Decompress the chart:
|
||||
```yaml
|
||||
|
||||
@@ -203,7 +203,7 @@ class: extra-details
|
||||
|
||||
- If the `helm` CLI is not installed in your environment, install it
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check if `helm` is installed:
|
||||
```bash
|
||||
@@ -232,7 +232,7 @@ class: extra-details
|
||||
|
||||
- They can be managed (installed, upgraded...) with the `helm` CLI
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy Tiller:
|
||||
```bash
|
||||
@@ -258,7 +258,7 @@ class: extra-details
|
||||
- In a more realistic deployment, you might create per-user or per-team
|
||||
service accounts, roles, and role bindings
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Grant `cluster-admin` role to `kube-system:default` service account:
|
||||
```bash
|
||||
@@ -329,7 +329,7 @@ class: extra-details
|
||||
|
||||
- We can use `helm search hub <keyword>`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look for the OWASP Juice Shop app:
|
||||
```bash
|
||||
@@ -351,7 +351,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop
|
||||
|
||||
- We can also use the Artifact Hub search feature
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Go to https://artifacthub.io/
|
||||
|
||||
@@ -367,7 +367,7 @@ Then go to → https://artifacthub.io/packages/helm/seccurecodebox/juice-shop
|
||||
|
||||
- Click on the "Install" button, it will show instructions
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- First, add the repository for that chart:
|
||||
```bash
|
||||
@@ -393,7 +393,7 @@ Note: it is also possible to install directly a chart, with `--repo https://...`
|
||||
|
||||
- We can also use `--generate-name` to ask Helm to generate a name for us
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List the releases:
|
||||
```bash
|
||||
@@ -433,7 +433,7 @@ class: extra-details
|
||||
|
||||
- We can use a selector to see these resources
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- List all the resources created by this release:
|
||||
```bash
|
||||
@@ -472,7 +472,7 @@ It is defined in that chart. In other words, not all charts will provide this la
|
||||
|
||||
- We can inspect a chart with `helm show` or `helm inspect`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look at the README for the app:
|
||||
```bash
|
||||
@@ -500,7 +500,7 @@ The `readme` may or may not have (accurate) explanations for the values.
|
||||
|
||||
- We are going to update `my-juice-shop` to change the type of the service
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Update `my-juice-shop`:
|
||||
```bash
|
||||
@@ -523,7 +523,7 @@ All unspecified values will take the default values defined in the chart.
|
||||
|
||||
- Let's check the app that we just installed
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the node port allocated to the service:
|
||||
```bash
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
- If you haven't done it before, you need to add the repo for that chart
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add the repo that holds the chart for the OWASP Juice Shop:
|
||||
```bash
|
||||
@@ -33,7 +33,7 @@
|
||||
|
||||
- Let's use the `juice/juice-shop` chart as an example
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install a release called `orange` with the chart `juice/juice-shop`:
|
||||
```bash
|
||||
@@ -53,7 +53,7 @@
|
||||
|
||||
- Helm stores successive revisions of each release
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- View the history for that release:
|
||||
```bash
|
||||
@@ -76,7 +76,7 @@ Where does that come from?
|
||||
|
||||
- ConfigMaps, Secrets?
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Look for ConfigMaps and Secrets:
|
||||
```bash
|
||||
@@ -95,7 +95,7 @@ We should see a number of secrets with TYPE `helm.sh/release.v1`.
|
||||
|
||||
- Let's find out what is in these Helm secrets
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Examine the secret corresponding to the second release of `orange`:
|
||||
```bash
|
||||
@@ -113,7 +113,7 @@ There is a key named `release`.
|
||||
|
||||
- Let's see what's in this `release` thing!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Dump the secret:
|
||||
```bash
|
||||
@@ -131,7 +131,7 @@ Secrets are encoded in base64. We need to decode that!
|
||||
|
||||
- We can pipe the output through `base64 -d` or use go-template's `base64decode`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Decode the secret:
|
||||
```bash
|
||||
@@ -155,7 +155,7 @@ Let's try one more round of decoding!
|
||||
|
||||
- Just add one more base64 decode filter
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Decode it twice:
|
||||
```bash
|
||||
@@ -175,7 +175,7 @@ Let's try one more round of decoding!
|
||||
|
||||
- We could use `file` to figure out the data type
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Pipe the decoded release through `file -`:
|
||||
```bash
|
||||
@@ -196,7 +196,7 @@ Gzipped data! It can be decoded with `gunzip -c`.
|
||||
|
||||
- Let's uncompress the data and save it to a file
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Rerun the previous command, but with `| gunzip -c > release-info` :
|
||||
```bash
|
||||
|
||||
@@ -119,7 +119,7 @@
|
||||
|
||||
- Let's try to install a couple releases with that schema!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try an invalid `pullPolicy`:
|
||||
```bash
|
||||
@@ -147,7 +147,7 @@
|
||||
|
||||
- We can fix that with `"additionalProperties": false`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `values.schema.json` to add `"additionalProperties": false`
|
||||
```json
|
||||
@@ -165,7 +165,7 @@
|
||||
|
||||
## Testing with unknown properties
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Try to pass an extra property:
|
||||
```bash
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
|
||||
(it is a web server that will use 1s of CPU for each HTTP request)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy the web server:
|
||||
```bash
|
||||
@@ -101,7 +101,7 @@
|
||||
|
||||
- Let's start a bunch of commands to watch what is happening
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Monitor pod CPU usage:
|
||||
```bash
|
||||
@@ -143,7 +143,7 @@
|
||||
|
||||
- We will use `ab` (Apache Bench) to send traffic
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Send a lot of requests to the service, with a concurrency level of 3:
|
||||
```bash
|
||||
@@ -170,7 +170,7 @@ The CPU utilization should increase to 100%.
|
||||
|
||||
- There is a helper command to do that for us: `kubectl autoscale`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the HPA policy for the `busyhttp` deployment:
|
||||
```bash
|
||||
@@ -209,7 +209,7 @@ This can also be set with `--cpu-percent=`.
|
||||
|
||||
- Since our server can use up to 1 core, let's request 1 core
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the Deployment definition:
|
||||
```bash
|
||||
@@ -287,7 +287,7 @@ This can also be set with `--cpu-percent=`.
|
||||
|
||||
- Since `busyhttp` uses CPU cycles, let's stop it before moving on
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Delete the `busyhttp` Deployment:
|
||||
```bash
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
|
||||
- That's the easy part!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create a new namespace and switch to it:
|
||||
```bash
|
||||
@@ -90,7 +90,7 @@
|
||||
|
||||
(by about 100ms per `worker` Pod after the 3rd worker)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check the `webui` port and open it in your browser:
|
||||
```bash
|
||||
@@ -114,7 +114,7 @@
|
||||
|
||||
- It monitors exactly one URL, that must be passed as a command-line argument
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Deploy `httplat`:
|
||||
```bash
|
||||
@@ -148,7 +148,7 @@ class: extra-details
|
||||
|
||||
(because we can configure it dynamically with annotations)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- If it's not installed yet on the cluster, install Prometheus:
|
||||
```bash
|
||||
@@ -169,7 +169,7 @@ class: extra-details
|
||||
|
||||
- We can use annotations to tell Prometheus to collect the metrics
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Tell Prometheus to "scrape" our latency exporter:
|
||||
```bash
|
||||
@@ -191,7 +191,7 @@ You'll need to instruct it to scrape http://httplat.customscaling.svc:9080/metri
|
||||
|
||||
- Before moving on, confirm that Prometheus has our metrics
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Connect to Prometheus
|
||||
|
||||
@@ -407,7 +407,7 @@ Putting togeher @@LINK[k8s/hpa-v2-pa-httplat.yaml]:
|
||||
|
||||
- Of course, it won't quite work yet (we're missing the *Prometheus adapter*)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the HorizontalPodAutoscaler:
|
||||
```bash
|
||||
@@ -469,7 +469,7 @@ no custom metrics API (custom.metrics.k8s.io) registered
|
||||
|
||||
- There is ~~an app~~ a Helm chart for that
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Install the Prometheus adapter:
|
||||
```bash
|
||||
@@ -534,7 +534,7 @@ Here is the rule that we need to add to the configuration:
|
||||
|
||||
## Editing the adapter's configuration
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit the adapter's ConfigMap:
|
||||
```bash
|
||||
|
||||
@@ -1,181 +0,0 @@
|
||||
## Optimizing request flow
|
||||
|
||||
- With most ingress controllers, requests follow this path:
|
||||
|
||||
HTTP client → load balancer → NodePort → ingress controller Pod → app Pod
|
||||
|
||||
- Sometimes, some of these components can be on the same machine
|
||||
|
||||
(e.g. ingress controller Pod and app Pod)
|
||||
|
||||
- But they can also be on different machines
|
||||
|
||||
(each arrow = a potential hop)
|
||||
|
||||
- This could add some unwanted latency!
|
||||
|
||||
(See following diagrams)
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## External traffic policy
|
||||
|
||||
- The Service manifest has a field `spec.externalTrafficPolicy`
|
||||
|
||||
- Possible values are:
|
||||
|
||||
- `Cluster` (default) - load balance connections to all pods
|
||||
|
||||
- `Local` - only send connections to local pods (on the same node)
|
||||
|
||||
- When the policy is set to `Local`, we avoid one hop:
|
||||
|
||||
HTTP client → load balancer → NodePort .red[**→**] ingress controller Pod → app Pod
|
||||
|
||||
(See diagram on next slide)
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
## What if there is no Pod?
|
||||
|
||||
- If a connection for a Service arrives on a Node through a NodePort...
|
||||
|
||||
- ...And that Node doesn't host a Pod matching the selector of that Service...
|
||||
|
||||
(i.e. there is no local Pod)
|
||||
|
||||
- ...Then the connection is refused
|
||||
|
||||
- This can be detected from outside (by the external load balancer)
|
||||
|
||||
- The external load balancer won't send connections to these nodes
|
||||
|
||||
(See diagram on next slide)
|
||||
|
||||
---
|
||||
|
||||
class: pic
|
||||
|
||||

|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Internal traffic policy
|
||||
|
||||
- Since Kubernetes 1.21, there is also `spec.internalTrafficPolicy`
|
||||
|
||||
- It works similarly but for internal traffic
|
||||
|
||||
- It's an *alpha* feature
|
||||
|
||||
(not available by default; needs special steps to be enabled on the control plane)
|
||||
|
||||
- See the [documentation] for more details
|
||||
|
||||
[documentation]: https://kubernetes.io/docs/concepts/services-networking/service-traffic-policy/
|
||||
|
||||
---
|
||||
|
||||
## Other ways to save hops
|
||||
|
||||
- Run the ingress controller as a DaemonSet, using port 80 on the nodes:
|
||||
|
||||
HTTP client → load balancer → ingress controller on Node port 80 → app Pod
|
||||
|
||||
- Then simplify further by setting a set of DNS records pointing to the nodes:
|
||||
|
||||
HTTP client → ingress controller on Node port 80 → app Pod
|
||||
|
||||
- Or run a combined load balancer / ingress controller at the edge of the cluster:
|
||||
|
||||
HTTP client → edge ingress controller → app Pod
|
||||
|
||||
---
|
||||
|
||||
## Source IP address
|
||||
|
||||
- Obtaining the IP address of the HTTP client (from the app Pod) can be tricky!
|
||||
|
||||
- We should consider (at least) two steps:
|
||||
|
||||
- obtaining the IP address of the HTTP client (from the ingress controller)
|
||||
|
||||
- passing that IP address from the ingress controller to the HTTP client
|
||||
|
||||
- The second step is usually done by injecting an HTTP header
|
||||
|
||||
(typically `x-forwarded-for`)
|
||||
|
||||
- Most ingress controllers do that out of the box
|
||||
|
||||
- But how does the ingress controller obtain the IP address of the HTTP client? 🤔
|
||||
|
||||
---
|
||||
|
||||
## Scenario 1, direct connection
|
||||
|
||||
- If the HTTP client connects directly to the ingress controller: easy!
|
||||
|
||||
- e.g. when running a combined load balancer / ingress controller
|
||||
|
||||
- or when running the ingress controller as a Daemon Set directly on port 80
|
||||
|
||||
---
|
||||
|
||||
## Scenario 2, external load balancer
|
||||
|
||||
- Most external load balancers running in TCP mode don't expose client addresses
|
||||
|
||||
(HTTP client connects to load balancer; load balancer connects to ingress controller)
|
||||
|
||||
- The ingress controller will "see" the IP address of the load balancer
|
||||
|
||||
(instead of the IP address of the client)
|
||||
|
||||
- Many external load balancers support the [Proxy Protocol]
|
||||
|
||||
- This enables the ingress controller to "see" the IP address of the HTTP client
|
||||
|
||||
- It needs to be enabled on both ends (ingress controller and load balancer)
|
||||
|
||||
[ProxyProtocol]: https://www.haproxy.com/blog/haproxy/proxy-protocol/
|
||||
|
||||
---
|
||||
|
||||
## Scenario 3, leveraging `externalTrafficPolicy`
|
||||
|
||||
- In some cases, the external load balancer will preserve the HTTP client address
|
||||
|
||||
- It is then possible to set `externalTrafficPolicy` to `Local`
|
||||
|
||||
- The ingress controller will then "see" the HTTP client address
|
||||
|
||||
- If `externalTrafficPolicy` is set to `Cluster`:
|
||||
|
||||
- sometimes the client address will be visible
|
||||
|
||||
- when bouncing the connection to another node, the address might be changed
|
||||
|
||||
- This is a big "it depends!"
|
||||
|
||||
- Bottom line: rely on the two other techniques instead?
|
||||
@@ -85,7 +85,7 @@ class: extra-details
|
||||
|
||||
- Let's set it now
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Set the `DOMAIN` environment variable:
|
||||
```bash
|
||||
@@ -120,7 +120,7 @@ class: extra-details
|
||||
|
||||
- Thanks to `openssl`, generating a self-signed cert is just one command away!
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Generate a key and certificate:
|
||||
```bash
|
||||
@@ -175,7 +175,7 @@ class: extra-details
|
||||
|
||||
- Let's use a volume to get easy access to the generated key and certificate
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Obtain a certificate from Let's Encrypt:
|
||||
```bash
|
||||
@@ -203,7 +203,7 @@ Remove `--test-cert` to obtain a *real* certificate.
|
||||
|
||||
- they are owned by `root`
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Grant ourselves permissions on these files:
|
||||
```bash
|
||||
@@ -265,7 +265,7 @@ Remove `--test-cert` to obtain a *real* certificate.
|
||||
|
||||
- However, the Endpoints needs to be adapted to put the current node's address
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Edit `~/containers.training/k8s/certbot.yaml`
|
||||
|
||||
@@ -286,7 +286,7 @@ Remove `--test-cert` to obtain a *real* certificate.
|
||||
|
||||
(i.e. 8000)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Run `certbot`:
|
||||
```bash
|
||||
@@ -312,7 +312,7 @@ Remove `--test-cert` to get a production certificate.
|
||||
|
||||
(and owned by root)
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Grand ourselves permissions on these files:
|
||||
```bash
|
||||
@@ -338,7 +338,7 @@ Remove `--test-cert` to get a production certificate.
|
||||
|
||||
- We can create a Secret to hold them
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Create the Secret:
|
||||
```bash
|
||||
@@ -402,7 +402,7 @@ class: extra-details
|
||||
|
||||
## Using the certificate
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Add the `tls` section to an existing Ingress
|
||||
|
||||
|
||||
@@ -37,19 +37,18 @@
|
||||
- Service with `type: LoadBalancer`
|
||||
|
||||
- requires a particular controller (e.g. CCM, MetalLB)
|
||||
- costs a bit of money for each service
|
||||
- if TLS is desired, it has to be implemented by the app
|
||||
- works for any TCP protocol (not just HTTP)
|
||||
- doesn't interpret the HTTP protocol (no fancy routing)
|
||||
- costs a bit of money for each service
|
||||
|
||||
- Ingress
|
||||
|
||||
- requires an ingress controller
|
||||
- flat cost regardless of number of ingresses
|
||||
- can implement TLS transparently for the app
|
||||
- only supports HTTP
|
||||
- can do content-based routing (e.g. per URI)
|
||||
- lower cost per service
|
||||
<br/>(exact pricing depends on provider's model)
|
||||
|
||||
---
|
||||
|
||||
@@ -123,46 +122,18 @@
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Special cases
|
||||
|
||||
- GKE has "[GKE Ingress]", a custom ingress controller
|
||||
|
||||
(enabled by default)
|
||||
|
||||
- EKS has "AWS ALB Ingress Controller" as well
|
||||
|
||||
(not enabled by default, requires extra setup)
|
||||
|
||||
- They leverage cloud-specific HTTP load balancers
|
||||
|
||||
(GCP HTTP LB, AWS ALB)
|
||||
|
||||
- They typically a cost *per ingress resource*
|
||||
|
||||
[GKE Ingress]: https://cloud.google.com/kubernetes-engine/docs/concepts/ingress
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Single or multiple LoadBalancer
|
||||
|
||||
- Most ingress controllers will create a LoadBalancer Service
|
||||
|
||||
(and will receive all HTTP/HTTPS traffic through it)
|
||||
|
||||
- We need to point our DNS entries to the IP address of that LB
|
||||
|
||||
- Some rare ingress controllers will allocate one LB per ingress resource
|
||||
|
||||
(example: the GKE Ingress and ALB Ingress mentioned previously)
|
||||
(example: by default, the AWS ingress controller based on ALBs)
|
||||
|
||||
- This leads to increased costs
|
||||
|
||||
- Note that it's possible to have multiple "rules" per ingress resource
|
||||
|
||||
(this will reduce costs but may be less convenient to manage)
|
||||
|
||||
---
|
||||
|
||||
## Ingress in action
|
||||
@@ -251,22 +222,15 @@ class: extra-details
|
||||
|
||||
## Running Traefik
|
||||
|
||||
- The [Traefik documentation][traefikdoc] recommends to use a Helm chart
|
||||
- The [Traefik documentation](https://docs.traefik.io/user-guide/kubernetes/#deploy-trfik-using-a-deployment-or-daemonset) tells us to pick between Deployment and Daemon Set
|
||||
|
||||
- For simplicity, we're going to use a custom YAML manifest
|
||||
- We are going to use a Daemon Set so that each node can accept connections
|
||||
|
||||
- Our manifest will:
|
||||
|
||||
- use a Daemon Set so that each node can accept connections
|
||||
- We will do two minor changes to the [YAML provided by Traefik](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-ds.yaml):
|
||||
|
||||
- enable `hostNetwork`
|
||||
|
||||
- add a *toleration* so that Traefik also runs on all nodes
|
||||
|
||||
- We could do the same with the official [Helm chart][traefikchart]
|
||||
|
||||
[traefikdoc]: https://doc.traefik.io/traefik/getting-started/install-traefik/#use-the-helm-chart
|
||||
[traefikchart]: https://artifacthub.io/packages/helm/traefik/traefik
|
||||
- add a *toleration* so that Traefik also runs on `node1`
|
||||
|
||||
---
|
||||
|
||||
@@ -290,7 +254,7 @@ class: extra-details
|
||||
|
||||
## Checking taints on our nodes
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check our nodes specs:
|
||||
```bash
|
||||
@@ -341,7 +305,7 @@ class: extra-details
|
||||
|
||||
## Checking tolerations on the control plane
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check tolerations for CoreDNS:
|
||||
```bash
|
||||
@@ -367,7 +331,7 @@ class: extra-details
|
||||
|
||||
## Special tolerations
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check tolerations on `kube-proxy`:
|
||||
```bash
|
||||
@@ -396,7 +360,7 @@ This one is a special case that means "ignore all taints and run anyway."
|
||||
|
||||
- [Traefik's RBAC rules](https://github.com/containous/traefik/blob/v1.7/examples/k8s/traefik-rbac.yaml) allowing it to watch necessary API objects
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Apply the YAML:
|
||||
```bash
|
||||
@@ -411,7 +375,7 @@ This one is a special case that means "ignore all taints and run anyway."
|
||||
|
||||
- If Traefik started correctly, we now have a web server listening on each node
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Check that Traefik is serving 80/tcp:
|
||||
```bash
|
||||
@@ -430,7 +394,7 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
|
||||
- To make our lives easier, we will use [nip.io](http://nip.io)
|
||||
|
||||
- Check out `http://red.A.B.C.D.nip.io`
|
||||
- Check out `http://cheddar.A.B.C.D.nip.io`
|
||||
|
||||
(replacing A.B.C.D with the IP address of `node1`)
|
||||
|
||||
@@ -446,7 +410,7 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
|
||||
- With the current install method, it's listening on port 8080
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Go to `http://node1:8080` (replacing `node1` with its IP address)
|
||||
|
||||
@@ -458,36 +422,38 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
|
||||
## Setting up host-based routing ingress rules
|
||||
|
||||
- We are going to use the `jpetazzo/color` image
|
||||
- We are going to use `errm/cheese` images
|
||||
|
||||
- This image contains a simple static HTTP server on port 80
|
||||
(there are [3 tags available](https://hub.docker.com/r/errm/cheese/tags/): wensleydale, cheddar, stilton)
|
||||
|
||||
- We will run 3 deployments (`red`, `green`, `blue`)
|
||||
- These images contain a simple static HTTP server sending a picture of cheese
|
||||
|
||||
- We will run 3 deployments (one for each cheese)
|
||||
|
||||
- We will create 3 services (one for each deployment)
|
||||
|
||||
- Then we will create 3 ingress rules (one for each service)
|
||||
|
||||
- We will route `<color>.A.B.C.D.nip.io` to the corresponding deployment
|
||||
- We will route `<name-of-cheese>.A.B.C.D.nip.io` to the corresponding deployment
|
||||
|
||||
---
|
||||
|
||||
## Running colorful web servers
|
||||
## Running cheesy web servers
|
||||
|
||||
.lab[
|
||||
.exercise[
|
||||
|
||||
- Run all three deployments:
|
||||
```bash
|
||||
kubectl create deployment red --image=jpetazzo/color
|
||||
kubectl create deployment green --image=jpetazzo/color
|
||||
kubectl create deployment blue --image=jpetazzo/color
|
||||
kubectl create deployment cheddar --image=errm/cheese:cheddar
|
||||
kubectl create deployment stilton --image=errm/cheese:stilton
|
||||
kubectl create deployment wensleydale --image=errm/cheese:wensleydale
|
||||
```
|
||||
|
||||
- Create a service for each of them:
|
||||
```bash
|
||||
kubectl expose deployment red --port=80
|
||||
kubectl expose deployment green --port=80
|
||||
kubectl expose deployment blue --port=80
|
||||
kubectl expose deployment cheddar --port=80
|
||||
kubectl expose deployment stilton --port=80
|
||||
kubectl expose deployment wensleydale --port=80
|
||||
```
|
||||
|
||||
]
|
||||
@@ -503,17 +469,17 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
- Since Kubernetes 1.19, we can use `kubectl create ingress`
|
||||
|
||||
```bash
|
||||
kubectl create ingress red \
|
||||
--rule=red.`A.B.C.D`.nip.io/*=red:80
|
||||
kubectl create ingress cheddar \
|
||||
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80
|
||||
```
|
||||
|
||||
- We can specify multiple rules per resource
|
||||
|
||||
```bash
|
||||
kubectl create ingress rgb \
|
||||
--rule=red.`A.B.C.D`.nip.io/*=red:80 \
|
||||
--rule=green.`A.B.C.D`.nip.io/*=green:80 \
|
||||
--rule=blue.`A.B.C.D`.nip.io/*=blue:80
|
||||
kubectl create ingress cheeses \
|
||||
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80 \
|
||||
--rule=stilton.`A.B.C.D`.nip.io/*=stilton:80 \
|
||||
--rule=wensleydale.`A.B.C.D`.nip.io/*=wensleydale:80
|
||||
```
|
||||
|
||||
---
|
||||
@@ -523,14 +489,14 @@ This is normal: we haven't provided any ingress rule yet.
|
||||
- The `*` is important:
|
||||
|
||||
```
|
||||
--rule=red.A.B.C.D.nip.io/`*`=red:80
|
||||
--rule=cheddar.A.B.C.D.nip.io/`*`=cheddar:80
|
||||
```
|
||||
|
||||
- It means "all URIs below that path"
|
||||
|
||||
- Without the `*`, it means "only that exact path"
|
||||
|
||||
(if we omit it, requests for e.g. `red.A.B.C.D.nip.io/hello` will 404)
|
||||
(and requests for e.g. images or other URIs won't work)
|
||||
|
||||
---
|
||||
|
||||
@@ -542,15 +508,15 @@ Here is a minimal host-based ingress resource:
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: red
|
||||
name: cheddar
|
||||
spec:
|
||||
rules:
|
||||
- host: red.`A.B.C.D`.nip.io
|
||||
- host: cheddar.`A.B.C.D`.nip.io
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
backend:
|
||||
serviceName: red
|
||||
serviceName: cheddar
|
||||
servicePort: 80
|
||||
|
||||
```
|
||||
@@ -574,8 +540,8 @@ class: extra-details
|
||||
- If we want to see "modern" YAML, we can use `-o yaml --dry-run=client`:
|
||||
|
||||
```bash
|
||||
kubectl create ingress red -o yaml --dry-run=client \
|
||||
--rule=red.`A.B.C.D`.nip.io/*=red:80
|
||||
kubectl create ingress cheddar -o yaml --dry-run=client \
|
||||
--rule=cheddar.`A.B.C.D`.nip.io/*=cheddar:80
|
||||
|
||||
```
|
||||
|
||||
@@ -643,21 +609,13 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Vendor-specific example
|
||||
## A special feature in action
|
||||
|
||||
- Let's see how to implement *canary releases*
|
||||
- We're going to see how to implement *canary releases* with Traefik
|
||||
|
||||
- The example here will use Traefik v1
|
||||
- This feature is available on multiple ingress controllers
|
||||
|
||||
(which is obsolete)
|
||||
|
||||
- It won't work on your Kubernetes cluster!
|
||||
|
||||
(unless you're running an oooooold version of Kubernetes)
|
||||
|
||||
(and an equally oooooooold version of Traefik)
|
||||
|
||||
- We've left it here just as an example!
|
||||
- ... But it is configured very differently on each of them
|
||||
|
||||
---
|
||||
|
||||
@@ -698,7 +656,7 @@ class: extra-details
|
||||
|
||||
---
|
||||
|
||||
## Canary releases with Traefik v1
|
||||
## Canary releases with Traefik
|
||||
|
||||
- We need to deploy the canary and expose it with a separate service
|
||||
|
||||
@@ -710,6 +668,14 @@ class: extra-details
|
||||
|
||||
- If we want, we can send requests to more than 2 services
|
||||
|
||||
- Let's send requests to our 3 cheesy services!
|
||||
|
||||
.exercise[
|
||||
|
||||
- Create the resource shown on the next slide
|
||||
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## The Ingress resource
|
||||
@@ -719,34 +685,63 @@ class: extra-details
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: rgb
|
||||
name: cheeseplate
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/service-weights: |
|
||||
red: 50%
|
||||
green: 25%
|
||||
blue: 25%
|
||||
cheddar: 50%
|
||||
wensleydale: 25%
|
||||
stilton: 25%
|
||||
spec:
|
||||
rules:
|
||||
- host: rgb.`A.B.C.D`.nip.io
|
||||
- host: cheeseplate.`A.B.C.D`.nip.io
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
backend:
|
||||
serviceName: red
|
||||
serviceName: cheddar
|
||||
servicePort: 80
|
||||
- path: /
|
||||
backend:
|
||||
serviceName: green
|
||||
serviceName: wensleydale
|
||||
servicePort: 80
|
||||
- path: /
|
||||
backend:
|
||||
serviceName: blue
|
||||
serviceName: stilton
|
||||
servicePort: 80
|
||||
```
|
||||
]
|
||||
|
||||
---
|
||||
|
||||
## Testing the canary
|
||||
|
||||
- Let's check the percentage of requests going to each service
|
||||
|
||||
.exercise[
|
||||
|
||||
- Continuously send HTTP requests to the new ingress:
|
||||
```bash
|
||||
while sleep 0.1; do
|
||||
curl -s http://cheeseplate.A.B.C.D.nip.io/
|
||||
done
|
||||
```
|
||||
|
||||
]
|
||||
|
||||
We should see a 50/25/25 request mix.
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Load balancing fairness
|
||||
|
||||
Note: if we use odd request ratios, the load balancing algorithm might appear to be broken on a small scale (when sending a small number of requests), but on a large scale (with many requests) it will be fair.
|
||||
|
||||
For instance, with a 11%/89% ratio, we can see 79 requests going to the 89%-weighted service, and then requests alternating between the two services; then 79 requests again, etc.
|
||||
|
||||
---
|
||||
|
||||
class: extra-details
|
||||
|
||||
## Other ingress controllers
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user