merge from master with 1.19 updates

This commit is contained in:
Jerome Petazzoni
2020-09-16 08:58:29 +02:00
31 changed files with 1032 additions and 145 deletions

33
k8s/certbot.yaml Normal file
View File

@@ -0,0 +1,33 @@
kind: Service
apiVersion: v1
metadata:
name: certbot
spec:
ports:
- port: 80
protocol: TCP
---
apiVersion: networking.k8s.io/v1beta1
kind: Ingress
metadata:
name: certbot
spec:
rules:
- http:
paths:
- path: /.well-known/acme-challenge/
backend:
serviceName: certbot
servicePort: 80
---
apiVersion: v1
kind: Endpoints
metadata:
name: certbot
subsets:
- addresses:
- ip: A.B.C.D
ports:
- port: 8000
protocol: TCP

77
k8s/consul-1.yaml Normal file
View File

@@ -0,0 +1,77 @@
# Basic Consul cluster using Cloud Auto-Join.
# Caveats:
# - no actual persistence
# - scaling down to 1 will break the cluster
# - pods may be colocated
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: consul
rules:
- apiGroups: [""]
resources:
- pods
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: consul
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: consul
subjects:
- kind: ServiceAccount
name: consul
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: v1
kind: Service
metadata:
name: consul
spec:
ports:
- port: 8500
name: http
selector:
app: consul
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: consul
spec:
serviceName: consul
replicas: 3
selector:
matchLabels:
app: consul
template:
metadata:
labels:
app: consul
spec:
serviceAccountName: consul
containers:
- name: consul
image: "consul:1.8"
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- "agent"
- "-bootstrap-expect=3"
- "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\""
- "-client=0.0.0.0"
- "-data-dir=/consul/data"
- "-server"
- "-ui"

View File

@@ -1,5 +1,9 @@
# Better Consul cluster.
# There is still no actual persistence, but:
# - podAntiaffinity prevents pod colocation
# - clusters works when scaling down to 1 (thanks to lifecycle hook)
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
kind: Role
metadata:
name: consul
rules:
@@ -11,17 +15,16 @@ rules:
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
kind: RoleBinding
metadata:
name: consul
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
kind: Role
name: consul
subjects:
- kind: ServiceAccount
name: consul
namespace: default
---
apiVersion: v1
kind: ServiceAccount
@@ -68,11 +71,16 @@ spec:
terminationGracePeriodSeconds: 10
containers:
- name: consul
image: "consul:1.6"
image: "consul:1.8"
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- "agent"
- "-bootstrap-expect=3"
- "-retry-join=provider=k8s label_selector=\"app=consul\""
- "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\""
- "-client=0.0.0.0"
- "-data-dir=/consul/data"
- "-server"

104
k8s/consul-3.yaml Normal file
View File

@@ -0,0 +1,104 @@
# Even better Consul cluster.
# That one uses a volumeClaimTemplate to achieve true persistence.
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: consul
rules:
- apiGroups: [""]
resources:
- pods
verbs:
- get
- list
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: consul
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: consul
subjects:
- kind: ServiceAccount
name: consul
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: consul
---
apiVersion: v1
kind: Service
metadata:
name: consul
spec:
ports:
- port: 8500
name: http
selector:
app: consul
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: consul
spec:
serviceName: consul
replicas: 3
selector:
matchLabels:
app: consul
volumeClaimTemplates:
- metadata:
name: data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
template:
metadata:
labels:
app: consul
spec:
serviceAccountName: consul
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- persistentconsul
topologyKey: kubernetes.io/hostname
terminationGracePeriodSeconds: 10
containers:
- name: consul
image: "consul:1.8"
volumeMounts:
- name: data
mountPath: /consul/data
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
args:
- "agent"
- "-bootstrap-expect=3"
- "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\""
- "-client=0.0.0.0"
- "-data-dir=/consul/data"
- "-server"
- "-ui"
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- consul leave

View File

@@ -3,6 +3,10 @@ kind: Ingress
metadata:
name: whatever
spec:
#tls:
#- secretName: whatever.A.B.C.D.nip.io
# hosts:
# - whatever.A.B.C.D.nip.io
rules:
- host: whatever.A.B.C.D.nip.io
http:

View File

@@ -50,8 +50,10 @@ spec:
- --api.insecure
- --log.level=INFO
- --metrics.prometheus
- --providers.kubernetescrd
- --providers.kubernetesingress
- --entrypoints.http.Address=:80
- --entrypoints.https.Address=:443
- --entrypoints.https.http.tls.certResolver=default
---
kind: Service
apiVersion: v1

View File

@@ -1 +1 @@
traefik-v1.yaml
traefik-v2.yaml

View File

@@ -0,0 +1,24 @@
INFRACLASS=openstack-cli
# Copy that file to e.g. openstack or ovh, then customize it.
# Some Openstack providers (like OVHcloud) will let you download
# a file containing credentials. That's what you need to use.
# The file below contains some example values.
export OS_AUTH_URL=https://auth.cloud.ovh.net/v3/
export OS_IDENTITY_API_VERSION=3
export OS_USER_DOMAIN_NAME=${OS_USER_DOMAIN_NAME:-"Default"}
export OS_PROJECT_DOMAIN_NAME=${OS_PROJECT_DOMAIN_NAME:-"Default"}
export OS_TENANT_ID=abcd1234
export OS_TENANT_NAME="0123456"
export OS_USERNAME="user-xyz123"
export OS_PASSWORD=AbCd1234
export OS_REGION_NAME="GRA7"
# And then some values to indicate server type, image, etc.
# You can see available flavors with `openstack flavor list`
export OS_FLAVOR=s1-4
# You can see available images with `openstack image list`
export OS_IMAGE=896c5f54-51dc-44f0-8c22-ce99ba7164df
# You can create a key with `openstack keypair create --public-key ~/.ssh/id_rsa.pub containertraining`
export OS_KEY=containertraining

View File

@@ -1,4 +1,5 @@
INFRACLASS=openstack
INFRACLASS=openstack-tf
# If you are using OpenStack, copy this file (e.g. to "openstack" or "enix")
# and customize the variables below.
export TF_VAR_user="jpetazzo"
@@ -6,4 +7,4 @@ export TF_VAR_tenant="training"
export TF_VAR_domain="Default"
export TF_VAR_password="..."
export TF_VAR_auth_url="https://api.r1.nxs.enix.io/v3"
export TF_VAR_flavor="GP1.S"
export TF_VAR_flavor="GP1.S"

View File

@@ -0,0 +1,5 @@
INFRACLASS=hetzner
if ! [ -f ~/.config/hcloud/cli.toml ]; then
warn "~/.config/hcloud/cli.toml not found."
warn "Make sure that the Hetzner CLI (hcloud) is installed and configured."
fi

View File

@@ -1,5 +1 @@
INFRACLASS=scaleway
if ! [ -f ~/.config/scw/config.yaml ]; then
warn "~/.config/scw/config.yaml not found."
warn "Make sure that the scaleway CLI is installed and configured."
fi

View File

@@ -73,6 +73,19 @@ _cmd_deploy() {
apt-get update && apt-get install sudo -y"
fi
# FIXME
# Special case for hetzner since it doesn't have an ubuntu user
#if [ "$INFRACLASS" = "hetzner" ]; then
# pssh -l root "
#[ -d /home/ubuntu ] ||
# useradd ubuntu -m -s /bin/bash
#echo 'ubuntu ALL=(ALL:ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
#[ -d /home/ubuntu/.ssh ] ||
# install --owner=ubuntu --mode=700 --directory /home/ubuntu/.ssh
#[ -f /home/ubuntu/.ssh/authorized_keys ] ||
# install --owner=ubuntu --mode=600 /root/.ssh/authorized_keys --target-directory /home/ubuntu/.ssh"
#fi
# Copy settings and install Python YAML parser
pssh -I tee /tmp/settings.yaml <tags/$TAG/settings.yaml
pssh "
@@ -187,7 +200,7 @@ _cmd_kube() {
pssh --timeout 200 "
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
kubeadm token generate > /tmp/token &&
sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4)
sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4) --ignore-preflight-errors=NumCPU
fi"
# Put kubeconfig in ubuntu's and docker's accounts
@@ -224,13 +237,13 @@ _cmd_kube() {
# Install kubectx and kubens
pssh "
[ -d kubectx ] || git clone https://github.com/ahmetb/kubectx &&
sudo ln -sf /home/ubuntu/kubectx/kubectx /usr/local/bin/kctx &&
sudo ln -sf /home/ubuntu/kubectx/kubens /usr/local/bin/kns &&
sudo cp /home/ubuntu/kubectx/completion/*.bash /etc/bash_completion.d &&
sudo ln -sf \$HOME/kubectx/kubectx /usr/local/bin/kctx &&
sudo ln -sf \$HOME/kubectx/kubens /usr/local/bin/kns &&
sudo cp \$HOME/kubectx/completion/*.bash /etc/bash_completion.d &&
[ -d kube-ps1 ] || git clone https://github.com/jonmosco/kube-ps1 &&
sudo -u docker sed -i s/docker-prompt/kube_ps1/ /home/docker/.bashrc &&
sudo -u docker tee -a /home/docker/.bashrc <<EOF
. /home/ubuntu/kube-ps1/kube-ps1.sh
. \$HOME/kube-ps1/kube-ps1.sh
KUBE_PS1_PREFIX=""
KUBE_PS1_SUFFIX=""
KUBE_PS1_SYMBOL_ENABLE="false"
@@ -303,25 +316,12 @@ _cmd_kubetest() {
set -e
if i_am_first_node; then
which kubectl
for NODE in \$(awk /[0-9]\$/\ {print\ \\\$2} /etc/hosts); do
for NODE in \$(grep [0-9]\$ /etc/hosts | grep -v ^127 | awk {print\ \\\$2}); do
echo \$NODE ; kubectl get nodes | grep -w \$NODE | grep -w Ready
done
fi"
}
_cmd ids "(FIXME) List the instance IDs belonging to a given tag or token"
_cmd_ids() {
TAG=$1
need_tag $TAG
info "Looking up by tag:"
aws_get_instance_ids_by_tag $TAG
# Just in case we managed to create instances but weren't able to tag them
info "Looking up by token:"
aws_get_instance_ids_by_client_token $TAG
}
_cmd ips "Show the IP addresses for a given tag"
_cmd_ips() {
TAG=$1
@@ -338,10 +338,22 @@ _cmd_ips() {
done < tags/$TAG/ips.txt
}
_cmd list "List available groups for a given infrastructure"
_cmd list "List all VMs on a given infrastructure (or all infras if no arg given)"
_cmd_list() {
need_infra $1
infra_list
case "$1" in
"")
for INFRA in infra/*; do
$0 list $INFRA
done
;;
*/example.*)
;;
*)
need_infra $1
sep "Listing instances for $1"
infra_list
;;
esac
}
_cmd listall "List VMs running on all configured infrastructures"
@@ -435,16 +447,6 @@ _cmd_opensg() {
infra_opensg
}
_cmd portworx "Prepare the nodes for Portworx deployment"
_cmd_portworx() {
TAG=$1
need_tag
pssh "
sudo truncate --size 10G /portworx.blk &&
sudo losetup /dev/loop4 /portworx.blk"
}
_cmd disableaddrchecks "Disable source/destination IP address checks"
_cmd_disableaddrchecks() {
TAG=$1
@@ -489,18 +491,6 @@ _cmd_quotas() {
infra_quotas
}
_cmd retag "(FIXME) Apply a new tag to a group of VMs"
_cmd_retag() {
OLDTAG=$1
NEWTAG=$2
TAG=$OLDTAG
need_tag
if [[ -z "$NEWTAG" ]]; then
die "You must specify a new tag to apply."
fi
aws_tag_instances $OLDTAG $NEWTAG
}
_cmd ssh "Open an SSH session to the first node of a tag"
_cmd_ssh() {
TAG=$1
@@ -687,11 +677,12 @@ _cmd_webssh() {
sudo apt-get update &&
sudo apt-get install python-tornado python-paramiko -y"
pssh "
[ -d webssh ] || git clone https://github.com/jpetazzo/webssh"
cd /opt
[ -d webssh ] || sudo git clone https://github.com/jpetazzo/webssh"
pssh "
for KEYFILE in /etc/ssh/*.pub; do
read a b c < \$KEYFILE; echo localhost \$a \$b
done > webssh/known_hosts"
done | sudo tee /opt/webssh/known_hosts"
pssh "cat >webssh.service <<EOF
[Unit]
Description=webssh
@@ -700,7 +691,7 @@ Description=webssh
WantedBy=multi-user.target
[Service]
WorkingDirectory=/home/ubuntu/webssh
WorkingDirectory=/opt/webssh
ExecStart=/usr/bin/env python run.py --fbidhttp=false --port=1080 --policy=reject
User=nobody
Group=nogroup
@@ -723,11 +714,6 @@ _cmd_www() {
python3 -m http.server
}
greet() {
IAMUSER=$(aws iam get-user --query 'User.UserName')
info "Hello! You seem to be UNIX user $USER, and IAM user $IAMUSER."
}
pull_tag() {
# Pre-pull a bunch of images
pssh --timeout 900 'for I in \
@@ -817,27 +803,3 @@ make_key_name() {
SHORT_FINGERPRINT=$(ssh-add -l | grep RSA | head -n1 | cut -d " " -f 2 | tr -d : | cut -c 1-8)
echo "${SHORT_FINGERPRINT}-${USER}"
}
sync_keys() {
# make sure ssh-add -l contains "RSA"
ssh-add -l | grep -q RSA \
|| die "The output of \`ssh-add -l\` doesn't contain 'RSA'. Start the agent, add your keys?"
AWS_KEY_NAME=$(make_key_name)
info "Syncing keys... "
if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then
aws ec2 import-key-pair --key-name $AWS_KEY_NAME \
--public-key-material "$(ssh-add -L \
| grep -i RSA \
| head -n1 \
| cut -d " " -f 1-2)" &>/dev/null
if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then
die "Somehow, importing the key didn't work. Make sure that 'ssh-add -l | grep RSA | head -n1' returns an RSA key?"
else
info "Imported new key $AWS_KEY_NAME."
fi
else
info "Using existing key $AWS_KEY_NAME."
fi
}

View File

@@ -1,9 +1,14 @@
if ! command -v aws >/dev/null; then
warn "AWS CLI (aws) not found."
fi
infra_list() {
aws_display_tags
aws ec2 describe-instances --output json |
jq -r '.Reservations[].Instances[] | [.InstanceId, .ClientToken, .State.Name, .InstanceType ] | @tsv'
}
infra_quotas() {
greet
aws_greet
max_instances=$(aws ec2 describe-account-attributes \
--attribute-names max-instances \
@@ -21,10 +26,10 @@ infra_start() {
COUNT=$1
# Print our AWS username, to ease the pain of credential-juggling
greet
aws_greet
# Upload our SSH keys to AWS if needed, to be added to each VM's authorized_keys
key_name=$(sync_keys)
key_name=$(aws_sync_keys)
AMI=$(aws_get_ami) # Retrieve the AWS image ID
if [ -z "$AMI" ]; then
@@ -61,7 +66,7 @@ infra_start() {
aws_tag_instances $TAG $TAG
# Wait until EC2 API tells us that the instances are running
wait_until_tag_is_running $TAG $COUNT
aws_wait_until_tag_is_running $TAG $COUNT
aws_get_instance_ips_by_tag $TAG > tags/$TAG/ips.txt
}
@@ -98,7 +103,7 @@ infra_disableaddrchecks() {
done
}
wait_until_tag_is_running() {
aws_wait_until_tag_is_running() {
max_retry=100
i=0
done_count=0
@@ -214,3 +219,32 @@ aws_get_ami() {
##VERSION##
find_ubuntu_ami -r $AWS_DEFAULT_REGION -a amd64 -v 18.04 -t hvm:ebs -N -q
}
aws_greet() {
IAMUSER=$(aws iam get-user --query 'User.UserName')
info "Hello! You seem to be UNIX user $USER, and IAM user $IAMUSER."
}
aws_sync_keys() {
# make sure ssh-add -l contains "RSA"
ssh-add -l | grep -q RSA \
|| die "The output of \`ssh-add -l\` doesn't contain 'RSA'. Start the agent, add your keys?"
AWS_KEY_NAME=$(make_key_name)
info "Syncing keys... "
if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then
aws ec2 import-key-pair --key-name $AWS_KEY_NAME \
--public-key-material "$(ssh-add -L \
| grep -i RSA \
| head -n1 \
| cut -d " " -f 1-2)" &>/dev/null
if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then
die "Somehow, importing the key didn't work. Make sure that 'ssh-add -l | grep RSA | head -n1' returns an RSA key?"
else
info "Imported new key $AWS_KEY_NAME."
fi
else
info "Using existing key $AWS_KEY_NAME."
fi
}

View File

@@ -0,0 +1,57 @@
if ! command -v hcloud >/dev/null; then
warn "Hetzner CLI (hcloud) not found."
fi
if ! [ -f ~/.config/hcloud/cli.toml ]; then
warn "~/.config/hcloud/cli.toml not found."
fi
infra_list() {
[ "$(hcloud server list -o json)" = "null" ] && return
hcloud server list -o json |
jq -r '.[] | [.id, .name , .status, .server_type.name] | @tsv'
}
infra_start() {
COUNT=$1
HETZNER_INSTANCE_TYPE=${HETZNER_INSTANCE_TYPE-cx21}
HETZNER_DATACENTER=${HETZNER_DATACENTER-nbg1-dc3}
HETZNER_IMAGE=${HETZNER_IMAGE-168855}
for I in $(seq 1 $COUNT); do
NAME=$(printf "%s-%03d" $TAG $I)
sep "Starting instance $I/$COUNT"
info " Datacenter: $HETZNER_DATACENTER"
info " Name: $NAME"
info " Instance type: $HETZNER_INSTANCE_TYPE"
hcloud server create \
--type=${HETZNER_INSTANCE_TYPE} \
--datacenter=${HETZNER_DATACENTER} \
--image=${HETZNER_IMAGE} \
--name=$NAME \
--label=tag=$TAG \
--ssh-key ~/.ssh/id_rsa.pub
done
hetzner_get_ips_by_tag $TAG > tags/$TAG/ips.txt
}
infra_stop() {
for ID in $(hetzner_get_ids_by_tag $TAG); do
info "Scheduling deletion of instance $ID..."
hcloud server delete $ID &
done
info "Waiting for deletion to complete..."
wait
}
hetzner_get_ids_by_tag() {
TAG=$1
hcloud server list --selector=tag=$TAG -o json | jq -r .[].name
}
hetzner_get_ips_by_tag() {
TAG=$1
hcloud server list --selector=tag=$TAG -o json | jq -r .[].public_net.ipv4.ip
}

View File

@@ -0,0 +1,53 @@
infra_list() {
openstack server list -f json |
jq -r '.[] | [.ID, .Name , .Status, .Flavor] | @tsv'
}
infra_start() {
COUNT=$1
sep "Starting $COUNT instances"
info " Region: $OS_REGION_NAME"
info " User: $OS_USERNAME"
info " Flavor: $OS_FLAVOR"
info " Image: $OS_IMAGE"
openstack server create \
--flavor $OS_FLAVOR \
--image $OS_IMAGE \
--key-name $OS_KEY \
--min $COUNT --max $COUNT \
--property workshopctl=$TAG \
$TAG
sep "Waiting for IP addresses to be available"
GOT=0
while [ "$GOT" != "$COUNT" ]; do
echo "Got $GOT/$COUNT IP addresses."
oscli_get_ips_by_tag $TAG > tags/$TAG/ips.txt
GOT="$(wc -l < tags/$TAG/ips.txt)"
done
}
infra_stop() {
info "Counting instances..."
oscli_get_instances_json $TAG |
jq -r .[].Name |
wc -l
info "Deleting instances..."
oscli_get_instances_json $TAG |
jq -r .[].Name |
xargs -P10 -n1 openstack server delete
info "Done."
}
oscli_get_instances_json() {
TAG=$1
openstack server list -f json --name "${TAG}-[0-9]*"
}
oscli_get_ips_by_tag() {
TAG=$1
oscli_get_instances_json $TAG |
jq -r .[].Networks | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' || true
}

View File

@@ -1,15 +1,18 @@
infra_list() {
die "unimplemented"
}
if ! command -v scw >/dev/null; then
warn "Scaleway CLI (scw) not found."
fi
if ! [ -f ~/.config/scw/config.yaml ]; then
warn "~/.config/scw/config.yaml not found."
fi
infra_quotas() {
die "unimplemented"
infra_list() {
scw instance server list -o json |
jq -r '.[] | [.id, .name, .state, .commercial_type] | @tsv'
}
infra_start() {
COUNT=$1
AWS_KEY_NAME=$(make_key_name)
SCW_INSTANCE_TYPE=${SCW_INSTANCE_TYPE-DEV1-M}
SCW_ZONE=${SCW_ZONE-fr-par-1}
@@ -29,12 +32,12 @@ infra_start() {
}
infra_stop() {
for ID in $(scw_get_ids_by_tag $TAG); do
info "Scheduling deletion of instance $ID..."
scw instance server delete force-shutdown=true server-id=$ID &
done
info "Waiting for deletion to complete..."
wait
info "Counting instances..."
scw_get_ids_by_tag $TAG | wc -l
info "Deleting instances..."
scw_get_ids_by_tag $TAG |
xargs -n1 -P10 -I@@ \
scw instance server delete force-shutdown=true server-id=@@
}
scw_get_ids_by_tag() {
@@ -46,11 +49,3 @@ scw_get_ips_by_tag() {
TAG=$1
scw instance server list name=$TAG -o json | jq -r .[].public_ip.address
}
infra_opensg() {
die "unimplemented"
}
infra_disableaddrchecks() {
die "unimplemented"
}

View File

@@ -0,0 +1,23 @@
infra_disableaddrchecks() {
die "unimplemented"
}
infra_list() {
die "unimplemented"
}
infra_opensg() {
die "unimplemented"
}
infra_quotas() {
die "unimplemented"
}
infra_start() {
die "unimplemented"
}
infra_stop() {
die "unimplemented"
}

View File

@@ -37,7 +37,7 @@ def system(cmd):
td = str(t2-t1)[:5]
f.write(bold("[{}] in {}s\n".format(retcode, td)))
STEP += 1
with open("/home/ubuntu/.bash_history", "a") as f:
with open(os.environ["HOME"] + "/.bash_history", "a") as f:
f.write("{}\n".format(cmd))
if retcode != 0:
msg = "The following command failed with exit code {}:\n".format(retcode)

View File

@@ -18,7 +18,13 @@ pssh() {
echo "[parallel-ssh] $@"
export PSSH=$(which pssh || which parallel-ssh)
$PSSH -h $HOSTFILE -l ubuntu \
if [ "$INFRACLASS" = hetzner ]; then
LOGIN=root
else
LOGIN=ubuntu
fi
$PSSH -h $HOSTFILE -l $LOGIN \
--par 100 \
-O LogLevel=ERROR \
-O UserKnownHostsFile=/dev/null \

View File

@@ -29,6 +29,7 @@ apiurl = "https://dns.api.gandi.net/api/v5/domains"
if len(sys.argv) == 2:
tag = sys.argv[1]
domains = open(domains_file).read().split()
domains = [ d for d in domains if not d.startswith('#') ]
ips = open(f"tags/{tag}/ips.txt").read().split()
settings_file = f"tags/{tag}/settings.yaml"
clustersize = yaml.safe_load(open(settings_file))["clustersize"]

View File

@@ -1,7 +1,7 @@
resource "openstack_compute_instance_v2" "machine" {
count = "${var.count}"
name = "${format("%s-%04d", "${var.prefix}", count.index+1)}"
image_name = "Ubuntu 16.04.5 (Xenial Xerus)"
image_name = "Ubuntu 18.04.4 20200324"
flavor_name = "${var.flavor}"
security_groups = ["${openstack_networking_secgroup_v2.full_access.name}"]
key_pair = "${openstack_compute_keypair_v2.ssh_deploy_key.name}"

View File

@@ -15,7 +15,6 @@ for lib in lib/*.sh; do
done
DEPENDENCIES="
aws
ssh
curl
jq

View File

@@ -58,6 +58,7 @@
speaker: jpetazzo
title: Intensive Docker Online Workshop
attend: https://fwdays.com/en/event/intensive-docker-workshop
slides: https://2020-08-fwdays.container.training/
- date: [2020-09-12, 2020-09-13]
country: www
@@ -66,6 +67,7 @@
speaker: jpetazzo
title: Kubernetes Intensive Online Workshop
attend: https://fwdays.com/en/event/kubernetes-intensive-workshop
slides: https://2020-09-fwdays.container.training/
- date: [2020-07-07, 2020-07-09]
country: www

403
slides/k8s/ingress-tls.md Normal file
View File

@@ -0,0 +1,403 @@
# Ingress and TLS certificates
- Most ingress controllers support TLS connections
(in a way that is standard across controllers)
- The TLS key and certificate are stored in a Secret
- The Secret is then referenced in the Ingress resource:
```yaml
spec:
tls:
- secretName: XXX
hosts:
- YYY
rules:
- ZZZ
```
---
## Obtaining a certificate
- In the next section, we will need a TLS key and certificate
- These usually come in [PEM](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail) format:
```
-----BEGIN CERTIFICATE-----
MIIDATCCAemg...
...
-----END CERTIFICATE-----
```
- We will see how to generate a self-signed certificate
(easy, fast, but won't be recognized by web browsers)
- We will also see how to obtain a certificate from [Let's Encrypt](https://letsencrypt.org/)
(requires the cluster to be reachable through a domain name)
---
class: extra-details
## In production ...
- A very popular option is to use the [cert-manager](https://cert-manager.io/docs/) operator
- It's a flexible, modular approach to automated certificate management
- For simplicity, in this section, we will use [certbot](https://certbot.eff.org/)
- The method shown here works well for one-time certs, but lacks:
- automation
- renewal
---
## Which domain to use
- If you're doing this in a training:
*the instructor will tell you what to use*
- If you're doing this on your own Kubernetes cluster:
*you should use a domain that points to your cluster*
- More precisely:
*you should use a domain that points to your ingress controller*
- If you don't have a domain name, you can use [nip.io](https://nip.io/)
(if your ingress controller is on 1.2.3.4, you can use `whatever.1.2.3.4.nip.io`)
---
## Setting `$DOMAIN`
- We will use `$DOMAIN` in the following section
- Let's set it now
.exercise[
- Set the `DOMAIN` environment variable:
```bash
export DOMAIN=...
```
]
---
## Method 1, self-signed certificate
- Thanks to `openssl`, generating a self-signed cert is just one command away!
.exercise[
- Generate a key and certificate:
```bash
openssl req \
-newkey rsa -nodes -keyout privkey.pem \
-x509 -days 30 -subj /CN=$DOMAIN/ -out cert.pem
```
]
This will create two files, `privkey.pem` and `cert.pem`.
---
## Method 2, Let's Encrypt with certbot
- `certbot` is an [ACME](https://tools.ietf.org/html/rfc8555) client
(Automatic Certificate Management Environment)
- We can use it to obtain certificates from Let's Encrypt
- It needs to listen to port 80
(to complete the [HTTP-01 challenge](https://letsencrypt.org/docs/challenge-types/))
- If port 80 is already taken by our ingress controller, see method 3
---
class: extra-details
## HTTP-01 challenge
- `certbot` contacts Let's Encrypt, asking for a cert for `$DOMAIN`
- Let's Encrypt gives a token to `certbot`
- Let's Encrypt then tries to access the following URL:
`http://$DOMAIN/.well-known/acme-challenge/<token>`
- That URL needs to be routed to `certbot`
- Once Let's Encrypt gets the response from `certbot`, it issues the certificate
---
## Running certbot
- There is a very convenient container image, `certbot/certbot`
- Let's use a volume to get easy access to the generated key and certificate
.exercise[
- Obtain a certificate from Let's Encrypt:
```bash
EMAIL=your.address@example.com
docker run --rm -p 80:80 -v $PWD/letsencrypt:/etc/letsencrypt \
certbot/certbot certonly \
-m $EMAIL \
--standalone --agree-tos -n \
--domain $DOMAIN \
--test-cert
```
]
This will get us a "staging" certificate.
Remove `--test-cert` to obtain a *real* certificate.
---
## Copying the key and certificate
- If everything went fine:
- the key and certificate files are in `letsencrypt/live/$DOMAIN`
- they are owned by `root`
.exercise[
- Grant ourselves permissions on these files:
```bash
sudo chown -R $USER letsencrypt
```
- Copy the certificate and key to the current directory:
```bash
cp letsencrypt/live/test/{cert,privkey}.pem .
```
]
---
## Method 3, certbot with Ingress
- Sometimes, we can't simply listen to port 80:
- we might already have an ingress controller there
- our nodes might be on an internal network
- But we can define an Ingress to route the HTTP-01 challenge to `certbot`!
- Our Ingress needs to route all requests to `/.well-known/acme-challenge` to `certbot`
- There are at least two ways to do that:
- run `certbot` in a Pod (and extract the cert+key when it's done)
- run `certbot` in a container on a node (and manually route traffic to it)
- We're going to use the second option
(mostly because it will give us an excuse to tinker with Endpoints resources!)
---
## The plan
- We need the following resources:
- an Endpoints¹ listing a hard-coded IP address and port
<br/>(where our `certbot` container will be listening)
- a Service corresponding to that Endpoints
- an Ingress sending requests to `/.well-known/acme-challenge/*` to that Service
<br/>(we don't even need to include a domain name in it)
- Then we need to start `certbot` so that it's listening on the right address+port
.footnote[¹Endpoints is always plural, because even a single resource is a list of endpoints.]
---
## Creating resources
- We prepared a YAML file to create the three resources
- However, the Endpoints needs to be adapted to put the current node's address
.exercise[
- Edit `~/containers.training/k8s/certbot.yaml`
(replace `A.B.C.D` with the current node's address)
- Create the resources:
```bash
kubectl apply -f ~/containers.training/k8s/certbot.yaml
```
]
---
## Obtaining the certificate
- Now we can run `certbot`, listening on the port listed in the Endpoints
(i.e. 8000)
.exercise[
- Run `certbot`:
```bash
EMAIL=your.address@example.com
docker run --rm -p 8000:80 -v $PWD/letsencrypt:/etc/letsencrypt \
certbot/certbot certonly \
-m $EMAIL \
--standalone --agree-tos -n \
--domain $DOMAIN \
--test-cert
```
]
This is using the staging environment.
Remove `--test-cert` to get a production certificate.
---
## Copying the certificate
- Just like in the previous method, the certificate is in `letsencrypt/live/$DOMAIN`
(and owned by root)
.exercise[
- Grand ourselves permissions on these files:
```bash
sudo chown -R $USER letsencrypt
```
- Copy the certificate and key to the current directory:
```bash
cp letsencrypt/live/$DOMAIN/{cert,privkey}.pem .
```
]
---
## Creating the Secret
- We now have two files:
- `privkey.pem` (the private key)
- `cert.pem` (the certificate)
- We can create a Secret to hold them
.exercise[
- Create the Secret:
```bash
kubectl create secret tls $DOMAIN --cert=cert.pem --key=privkey.pem
```
]
---
## Ingress with TLS
- To enable TLS for an Ingress, we need to add a `tls` section to the Ingress:
```yaml
spec:
tls:
- secretName: DOMAIN
hosts:
- DOMAIN
rules: ...
```
- The list of hosts will be used by the ingress controller
(to know which certificate to use with [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication))
- Of course, the name of the secret can be different
(here, for clarity and convenience, we set it to match the domain)
---
class: extra-details
## About the ingress controller
- Many ingress controllers can use different "stores" for keys and certificates
- Our ingress controller needs to be configured to use secrets
(as opposed to, e.g., obtain certificates directly with Let's Encrypt)
---
## Using the certificate
.exercise[
- Edit the Ingress manifest, `~/container.training/k8s/ingress.yaml`
- Uncomment the `tls` section
- Update the `secretName` and `hosts` list
- Create or update the Ingress:
```bash
kubectl apply -f ~/container.training/k8s/ingress.yaml
```
- Check that the URL now works over `https`
(it might take a minute to be picked up by the ingress controller)
]
---
## Discussion
*To repeat something mentioned earlier ...*
- The methods presented here are for *educational purpose only*
- In most production scenarios, the certificates will be obtained automatically
- A very popular option is to use the [cert-manager](https://cert-manager.io/docs/) operator
???
:EN:- Ingress and TLS
:FR:- Certificats TLS et *ingress*

View File

@@ -149,6 +149,28 @@
---
class: extra-details
## Supporting other CPU architectures
- The `jpetazzo/httpenv` image is currently only available for `x86_64`
(the "classic" Intel 64 bits architecture found on most PCs and Macs)
- That image won't work on other architectures
(e.g. Raspberry Pi or other ARM-based machines)
- Note that Docker supports [multi-arch](https://www.docker.com/blog/multi-arch-build-and-images-the-simple-way/) images
(so *technically* we could make it work across multiple architectures)
- If you want to build `httpenv` for your own platform, here is the source:
https://github.com/jpetazzo/httpenv
---
## Creating a deployment for our HTTP server
- We will create a deployment with `kubectl create deployment`

View File

@@ -191,6 +191,8 @@ are a few tools that can help us.*
## Developer experience
*These questions constitute a quick "smoke test" for our strategy:*
- How do we on-board a new developer?
- What do they need to install to get a dev stack?
@@ -199,8 +201,6 @@ are a few tools that can help us.*
- How does someone add a component to a stack?
*These questions are good "sanity checks" to validate our strategy!*
---
## Some guidelines

View File

@@ -58,7 +58,7 @@
## Deploying Consul
- We will use a slightly different YAML file
- Let's use a new manifest for our Consul cluster
- The only differences between that file and the previous one are:
@@ -66,15 +66,11 @@
- the corresponding `volumeMounts` in the Pod spec
- the label `consul` has been changed to `persistentconsul`
<br/>
(to avoid conflicts with the other Stateful Set)
.exercise[
- Apply the persistent Consul YAML file:
```bash
kubectl apply -f ~/container.training/k8s/persistent-consul.yaml
kubectl apply -f ~/container.training/k8s/consul-3.yaml
```
]
@@ -97,7 +93,7 @@
kubectl get pv
```
- The Pod `persistentconsul-0` is not scheduled yet:
- The Pod `consul-0` is not scheduled yet:
```bash
kubectl get pods -o wide
```
@@ -112,9 +108,9 @@
- In a Stateful Set, the Pods are started one by one
- `persistentconsul-1` won't be created until `persistentconsul-0` is running
- `consul-1` won't be created until `consul-0` is running
- `persistentconsul-0` has a dependency on an unbound Persistent Volume Claim
- `consul-0` has a dependency on an unbound Persistent Volume Claim
- The scheduler won't schedule the Pod until the PVC is bound
@@ -152,7 +148,7 @@
- Once a PVC is bound, its pod can start normally
- Once the pod `persistentconsul-0` has started, `persistentconsul-1` can be created, etc.
- Once the pod `consul-0` has started, `consul-1` can be created, etc.
- Eventually, our Consul cluster is up, and backend by "persistent" volumes
@@ -160,7 +156,7 @@
- Check that our Consul clusters has 3 members indeed:
```bash
kubectl exec persistentconsul-0 -- consul members
kubectl exec consul-0 -- consul members
```
]

View File

@@ -218,7 +218,9 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
- Replace X.X.X.X and Y.Y.Y.Y with the addresses of other nodes
- The same command-line can be used on all nodes (convenient!)
- A node can add its own address (it will work fine)
- ... Which means that we can use the same command-line on all nodes (convenient!)
---
@@ -258,19 +260,13 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \
## Putting it all together
- The file `k8s/consul.yaml` defines the required resources
- The file `k8s/consul-1.yaml` defines the required resources
(service account, cluster role, cluster role binding, service, stateful set)
(service account, role, role binding, service, stateful set)
- It has a few extra touches:
- Inspired by this [excellent tutorial](https://github.com/kelseyhightower/consul-on-kubernetes) by Kelsey Hightower
- a `podAntiAffinity` prevents two pods from running on the same node
- a `preStop` hook makes the pod leave the cluster when shutdown gracefully
This was inspired by this [excellent tutorial](https://github.com/kelseyhightower/consul-on-kubernetes) by Kelsey Hightower.
Some features from the original tutorial (TLS authentication between
nodes and encryption of gossip traffic) were removed for simplicity.
(many features from the original tutorial were removed for simplicity)
---
@@ -282,7 +278,7 @@ nodes and encryption of gossip traffic) were removed for simplicity.
- Create the stateful set and associated service:
```bash
kubectl apply -f ~/container.training/k8s/consul.yaml
kubectl apply -f ~/container.training/k8s/consul-1.yaml
```
- Check the logs as the pods come up one after another:
@@ -306,6 +302,88 @@ nodes and encryption of gossip traffic) were removed for simplicity.
## Caveats
- The scheduler may place two Consul pods on the same node
- if that node fails, we lose two Consul pods at the same time
- this will cause the cluster to fail
- Scaling down the cluster will cause it to fail
- when a Consul member leaves the cluster, it needs to inform the others
- otherwise, the last remaining node doesn't have quorum and stops functioning
- This Consul cluster doesn't use real persistence yet
- data is stored in the containers' ephemeral filesystem
- if a pod fails, its replacement starts from a blank slate
---
## Improving pod placement
- We need to tell the scheduler:
*do not put two of these pods on the same node!*
- This is done with an `affinity` section like the following one:
```yaml
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: app
operator: In
values:
- consul
topologyKey: kubernetes.io/hostname
```
---
## Using a lifecycle hook
- When a Consul member leaves the cluster, it needs to execute:
```bash
consul leave
```
- This is done with a `lifecycle` section like the following one:
```yaml
lifecycle:
preStop:
exec:
command:
- /bin/sh
- -c
- consul leave
```
---
## Running a better Consul cluster
- Let's try to add the scheduling constraint and lifecycle hook
- We can do that in the same namespace or another one (as we like)
- If we do that in the same namespace, we will see a rolling update
(pods will be replaced one by one)
.exercise[
- Deploy a better Consul cluster:
```bash
kubectl apply -f ~/container.training/k8s/consul-2.yaml
```
]
---
## Still no persistence, though
- We aren't using actual persistence yet
(no `volumeClaimTemplate`, Persistent Volume, etc.)

View File

@@ -77,6 +77,7 @@ content:
- k8s/kubectlproxy.md
-
- k8s/ingress.md
- k8s/ingress-tls.md
- k8s/kustomize.md
- k8s/helm-intro.md
- k8s/helm-chart-format.md

View File

@@ -74,6 +74,7 @@ content:
#- k8s/dryrun.md
#- k8s/exercise-yaml.md
- k8s/ingress.md
#- k8s/ingress-tls.md
-
- k8s/volumes.md
#- k8s/exercise-configmap.md