diff --git a/k8s/certbot.yaml b/k8s/certbot.yaml new file mode 100644 index 00000000..c784cc3a --- /dev/null +++ b/k8s/certbot.yaml @@ -0,0 +1,33 @@ +kind: Service +apiVersion: v1 +metadata: + name: certbot +spec: + ports: + - port: 80 + protocol: TCP +--- +apiVersion: networking.k8s.io/v1beta1 +kind: Ingress +metadata: + name: certbot +spec: + rules: + - http: + paths: + - path: /.well-known/acme-challenge/ + backend: + serviceName: certbot + servicePort: 80 +--- +apiVersion: v1 +kind: Endpoints +metadata: + name: certbot +subsets: +- addresses: + - ip: A.B.C.D + ports: + - port: 8000 + protocol: TCP + diff --git a/k8s/consul-1.yaml b/k8s/consul-1.yaml new file mode 100644 index 00000000..f913201f --- /dev/null +++ b/k8s/consul-1.yaml @@ -0,0 +1,77 @@ +# Basic Consul cluster using Cloud Auto-Join. +# Caveats: +# - no actual persistence +# - scaling down to 1 will break the cluster +# - pods may be colocated +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: consul +rules: + - apiGroups: [""] + resources: + - pods + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: consul +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: consul +subjects: + - kind: ServiceAccount + name: consul +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: consul +--- +apiVersion: v1 +kind: Service +metadata: + name: consul +spec: + ports: + - port: 8500 + name: http + selector: + app: consul +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: consul +spec: + serviceName: consul + replicas: 3 + selector: + matchLabels: + app: consul + template: + metadata: + labels: + app: consul + spec: + serviceAccountName: consul + containers: + - name: consul + image: "consul:1.8" + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + args: + - "agent" + - "-bootstrap-expect=3" + - "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\"" + - "-client=0.0.0.0" + - "-data-dir=/consul/data" + - "-server" + - "-ui" diff --git a/k8s/consul.yaml b/k8s/consul-2.yaml similarity index 78% rename from k8s/consul.yaml rename to k8s/consul-2.yaml index d8452a0c..e683aacd 100644 --- a/k8s/consul.yaml +++ b/k8s/consul-2.yaml @@ -1,5 +1,9 @@ +# Better Consul cluster. +# There is still no actual persistence, but: +# - podAntiaffinity prevents pod colocation +# - clusters works when scaling down to 1 (thanks to lifecycle hook) apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole +kind: Role metadata: name: consul rules: @@ -11,17 +15,16 @@ rules: - list --- apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding +kind: RoleBinding metadata: name: consul roleRef: apiGroup: rbac.authorization.k8s.io - kind: ClusterRole + kind: Role name: consul subjects: - kind: ServiceAccount name: consul - namespace: default --- apiVersion: v1 kind: ServiceAccount @@ -68,11 +71,16 @@ spec: terminationGracePeriodSeconds: 10 containers: - name: consul - image: "consul:1.6" + image: "consul:1.8" + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace args: - "agent" - "-bootstrap-expect=3" - - "-retry-join=provider=k8s label_selector=\"app=consul\"" + - "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\"" - "-client=0.0.0.0" - "-data-dir=/consul/data" - "-server" diff --git a/k8s/consul-3.yaml b/k8s/consul-3.yaml new file mode 100644 index 00000000..af62fe0e --- /dev/null +++ b/k8s/consul-3.yaml @@ -0,0 +1,104 @@ +# Even better Consul cluster. +# That one uses a volumeClaimTemplate to achieve true persistence. +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: consul +rules: + - apiGroups: [""] + resources: + - pods + verbs: + - get + - list +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: consul +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: consul +subjects: + - kind: ServiceAccount + name: consul +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: consul +--- +apiVersion: v1 +kind: Service +metadata: + name: consul +spec: + ports: + - port: 8500 + name: http + selector: + app: consul +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: consul +spec: + serviceName: consul + replicas: 3 + selector: + matchLabels: + app: consul + volumeClaimTemplates: + - metadata: + name: data + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi + template: + metadata: + labels: + app: consul + spec: + serviceAccountName: consul + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - persistentconsul + topologyKey: kubernetes.io/hostname + terminationGracePeriodSeconds: 10 + containers: + - name: consul + image: "consul:1.8" + volumeMounts: + - name: data + mountPath: /consul/data + env: + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + args: + - "agent" + - "-bootstrap-expect=3" + - "-retry-join=provider=k8s label_selector=\"app=consul\" namespace=\"$(NAMESPACE)\"" + - "-client=0.0.0.0" + - "-data-dir=/consul/data" + - "-server" + - "-ui" + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - consul leave diff --git a/k8s/ingress.yaml b/k8s/ingress.yaml index 19a7a427..0e1967ad 100644 --- a/k8s/ingress.yaml +++ b/k8s/ingress.yaml @@ -3,6 +3,10 @@ kind: Ingress metadata: name: whatever spec: + #tls: + #- secretName: whatever.A.B.C.D.nip.io + # hosts: + # - whatever.A.B.C.D.nip.io rules: - host: whatever.A.B.C.D.nip.io http: diff --git a/k8s/traefik-v2.yaml b/k8s/traefik-v2.yaml index 64060532..c8705a7e 100644 --- a/k8s/traefik-v2.yaml +++ b/k8s/traefik-v2.yaml @@ -50,8 +50,10 @@ spec: - --api.insecure - --log.level=INFO - --metrics.prometheus - - --providers.kubernetescrd - --providers.kubernetesingress + - --entrypoints.http.Address=:80 + - --entrypoints.https.Address=:443 + - --entrypoints.https.http.tls.certResolver=default --- kind: Service apiVersion: v1 diff --git a/k8s/traefik.yaml b/k8s/traefik.yaml index 1ab529aa..8ac2c242 120000 --- a/k8s/traefik.yaml +++ b/k8s/traefik.yaml @@ -1 +1 @@ -traefik-v1.yaml \ No newline at end of file +traefik-v2.yaml \ No newline at end of file diff --git a/prepare-vms/infra/example.openstack-cli b/prepare-vms/infra/example.openstack-cli new file mode 100644 index 00000000..d20f79d6 --- /dev/null +++ b/prepare-vms/infra/example.openstack-cli @@ -0,0 +1,24 @@ +INFRACLASS=openstack-cli + +# Copy that file to e.g. openstack or ovh, then customize it. +# Some Openstack providers (like OVHcloud) will let you download +# a file containing credentials. That's what you need to use. +# The file below contains some example values. +export OS_AUTH_URL=https://auth.cloud.ovh.net/v3/ +export OS_IDENTITY_API_VERSION=3 +export OS_USER_DOMAIN_NAME=${OS_USER_DOMAIN_NAME:-"Default"} +export OS_PROJECT_DOMAIN_NAME=${OS_PROJECT_DOMAIN_NAME:-"Default"} +export OS_TENANT_ID=abcd1234 +export OS_TENANT_NAME="0123456" +export OS_USERNAME="user-xyz123" +export OS_PASSWORD=AbCd1234 +export OS_REGION_NAME="GRA7" + +# And then some values to indicate server type, image, etc. +# You can see available flavors with `openstack flavor list` +export OS_FLAVOR=s1-4 +# You can see available images with `openstack image list` +export OS_IMAGE=896c5f54-51dc-44f0-8c22-ce99ba7164df +# You can create a key with `openstack keypair create --public-key ~/.ssh/id_rsa.pub containertraining` +export OS_KEY=containertraining + diff --git a/prepare-vms/infra/example.openstack b/prepare-vms/infra/example.openstack-tf similarity index 84% rename from prepare-vms/infra/example.openstack rename to prepare-vms/infra/example.openstack-tf index a99c43af..bad23dfa 100644 --- a/prepare-vms/infra/example.openstack +++ b/prepare-vms/infra/example.openstack-tf @@ -1,4 +1,5 @@ -INFRACLASS=openstack +INFRACLASS=openstack-tf + # If you are using OpenStack, copy this file (e.g. to "openstack" or "enix") # and customize the variables below. export TF_VAR_user="jpetazzo" @@ -6,4 +7,4 @@ export TF_VAR_tenant="training" export TF_VAR_domain="Default" export TF_VAR_password="..." export TF_VAR_auth_url="https://api.r1.nxs.enix.io/v3" -export TF_VAR_flavor="GP1.S" \ No newline at end of file +export TF_VAR_flavor="GP1.S" diff --git a/prepare-vms/infra/hetzner b/prepare-vms/infra/hetzner new file mode 100644 index 00000000..77547446 --- /dev/null +++ b/prepare-vms/infra/hetzner @@ -0,0 +1,5 @@ +INFRACLASS=hetzner +if ! [ -f ~/.config/hcloud/cli.toml ]; then + warn "~/.config/hcloud/cli.toml not found." + warn "Make sure that the Hetzner CLI (hcloud) is installed and configured." +fi diff --git a/prepare-vms/infra/scaleway b/prepare-vms/infra/scaleway index 7615e024..53f232e7 100644 --- a/prepare-vms/infra/scaleway +++ b/prepare-vms/infra/scaleway @@ -1,5 +1 @@ INFRACLASS=scaleway -if ! [ -f ~/.config/scw/config.yaml ]; then - warn "~/.config/scw/config.yaml not found." - warn "Make sure that the scaleway CLI is installed and configured." -fi diff --git a/prepare-vms/lib/commands.sh b/prepare-vms/lib/commands.sh index 1a190b7e..9b00b203 100644 --- a/prepare-vms/lib/commands.sh +++ b/prepare-vms/lib/commands.sh @@ -73,6 +73,19 @@ _cmd_deploy() { apt-get update && apt-get install sudo -y" fi + # FIXME + # Special case for hetzner since it doesn't have an ubuntu user + #if [ "$INFRACLASS" = "hetzner" ]; then + # pssh -l root " + #[ -d /home/ubuntu ] || + # useradd ubuntu -m -s /bin/bash + #echo 'ubuntu ALL=(ALL:ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu + #[ -d /home/ubuntu/.ssh ] || + # install --owner=ubuntu --mode=700 --directory /home/ubuntu/.ssh + #[ -f /home/ubuntu/.ssh/authorized_keys ] || + # install --owner=ubuntu --mode=600 /root/.ssh/authorized_keys --target-directory /home/ubuntu/.ssh" + #fi + # Copy settings and install Python YAML parser pssh -I tee /tmp/settings.yaml /tmp/token && - sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4) + sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4) --ignore-preflight-errors=NumCPU fi" # Put kubeconfig in ubuntu's and docker's accounts @@ -224,13 +237,13 @@ _cmd_kube() { # Install kubectx and kubens pssh " [ -d kubectx ] || git clone https://github.com/ahmetb/kubectx && - sudo ln -sf /home/ubuntu/kubectx/kubectx /usr/local/bin/kctx && - sudo ln -sf /home/ubuntu/kubectx/kubens /usr/local/bin/kns && - sudo cp /home/ubuntu/kubectx/completion/*.bash /etc/bash_completion.d && + sudo ln -sf \$HOME/kubectx/kubectx /usr/local/bin/kctx && + sudo ln -sf \$HOME/kubectx/kubens /usr/local/bin/kns && + sudo cp \$HOME/kubectx/completion/*.bash /etc/bash_completion.d && [ -d kube-ps1 ] || git clone https://github.com/jonmosco/kube-ps1 && sudo -u docker sed -i s/docker-prompt/kube_ps1/ /home/docker/.bashrc && sudo -u docker tee -a /home/docker/.bashrc < webssh/known_hosts" + done | sudo tee /opt/webssh/known_hosts" pssh "cat >webssh.service </dev/null; then - aws ec2 import-key-pair --key-name $AWS_KEY_NAME \ - --public-key-material "$(ssh-add -L \ - | grep -i RSA \ - | head -n1 \ - | cut -d " " -f 1-2)" &>/dev/null - - if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then - die "Somehow, importing the key didn't work. Make sure that 'ssh-add -l | grep RSA | head -n1' returns an RSA key?" - else - info "Imported new key $AWS_KEY_NAME." - fi - else - info "Using existing key $AWS_KEY_NAME." - fi -} diff --git a/prepare-vms/lib/infra/aws.sh b/prepare-vms/lib/infra/aws.sh index f7721003..6e0b4ad3 100644 --- a/prepare-vms/lib/infra/aws.sh +++ b/prepare-vms/lib/infra/aws.sh @@ -1,9 +1,14 @@ +if ! command -v aws >/dev/null; then + warn "AWS CLI (aws) not found." +fi + infra_list() { - aws_display_tags + aws ec2 describe-instances --output json | + jq -r '.Reservations[].Instances[] | [.InstanceId, .ClientToken, .State.Name, .InstanceType ] | @tsv' } infra_quotas() { - greet + aws_greet max_instances=$(aws ec2 describe-account-attributes \ --attribute-names max-instances \ @@ -21,10 +26,10 @@ infra_start() { COUNT=$1 # Print our AWS username, to ease the pain of credential-juggling - greet + aws_greet # Upload our SSH keys to AWS if needed, to be added to each VM's authorized_keys - key_name=$(sync_keys) + key_name=$(aws_sync_keys) AMI=$(aws_get_ami) # Retrieve the AWS image ID if [ -z "$AMI" ]; then @@ -61,7 +66,7 @@ infra_start() { aws_tag_instances $TAG $TAG # Wait until EC2 API tells us that the instances are running - wait_until_tag_is_running $TAG $COUNT + aws_wait_until_tag_is_running $TAG $COUNT aws_get_instance_ips_by_tag $TAG > tags/$TAG/ips.txt } @@ -98,7 +103,7 @@ infra_disableaddrchecks() { done } -wait_until_tag_is_running() { +aws_wait_until_tag_is_running() { max_retry=100 i=0 done_count=0 @@ -214,3 +219,32 @@ aws_get_ami() { ##VERSION## find_ubuntu_ami -r $AWS_DEFAULT_REGION -a amd64 -v 18.04 -t hvm:ebs -N -q } + +aws_greet() { + IAMUSER=$(aws iam get-user --query 'User.UserName') + info "Hello! You seem to be UNIX user $USER, and IAM user $IAMUSER." +} + +aws_sync_keys() { + # make sure ssh-add -l contains "RSA" + ssh-add -l | grep -q RSA \ + || die "The output of \`ssh-add -l\` doesn't contain 'RSA'. Start the agent, add your keys?" + + AWS_KEY_NAME=$(make_key_name) + info "Syncing keys... " + if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then + aws ec2 import-key-pair --key-name $AWS_KEY_NAME \ + --public-key-material "$(ssh-add -L \ + | grep -i RSA \ + | head -n1 \ + | cut -d " " -f 1-2)" &>/dev/null + + if ! aws ec2 describe-key-pairs --key-name "$AWS_KEY_NAME" &>/dev/null; then + die "Somehow, importing the key didn't work. Make sure that 'ssh-add -l | grep RSA | head -n1' returns an RSA key?" + else + info "Imported new key $AWS_KEY_NAME." + fi + else + info "Using existing key $AWS_KEY_NAME." + fi +} diff --git a/prepare-vms/lib/infra/hetzner.sh b/prepare-vms/lib/infra/hetzner.sh new file mode 100644 index 00000000..bdc29611 --- /dev/null +++ b/prepare-vms/lib/infra/hetzner.sh @@ -0,0 +1,57 @@ +if ! command -v hcloud >/dev/null; then + warn "Hetzner CLI (hcloud) not found." +fi +if ! [ -f ~/.config/hcloud/cli.toml ]; then + warn "~/.config/hcloud/cli.toml not found." +fi + +infra_list() { + [ "$(hcloud server list -o json)" = "null" ] && return + + hcloud server list -o json | + jq -r '.[] | [.id, .name , .status, .server_type.name] | @tsv' +} + +infra_start() { + COUNT=$1 + + HETZNER_INSTANCE_TYPE=${HETZNER_INSTANCE_TYPE-cx21} + HETZNER_DATACENTER=${HETZNER_DATACENTER-nbg1-dc3} + HETZNER_IMAGE=${HETZNER_IMAGE-168855} + + for I in $(seq 1 $COUNT); do + NAME=$(printf "%s-%03d" $TAG $I) + sep "Starting instance $I/$COUNT" + info " Datacenter: $HETZNER_DATACENTER" + info " Name: $NAME" + info " Instance type: $HETZNER_INSTANCE_TYPE" + hcloud server create \ + --type=${HETZNER_INSTANCE_TYPE} \ + --datacenter=${HETZNER_DATACENTER} \ + --image=${HETZNER_IMAGE} \ + --name=$NAME \ + --label=tag=$TAG \ + --ssh-key ~/.ssh/id_rsa.pub + done + + hetzner_get_ips_by_tag $TAG > tags/$TAG/ips.txt +} + +infra_stop() { + for ID in $(hetzner_get_ids_by_tag $TAG); do + info "Scheduling deletion of instance $ID..." + hcloud server delete $ID & + done + info "Waiting for deletion to complete..." + wait +} + +hetzner_get_ids_by_tag() { + TAG=$1 + hcloud server list --selector=tag=$TAG -o json | jq -r .[].name +} + +hetzner_get_ips_by_tag() { + TAG=$1 + hcloud server list --selector=tag=$TAG -o json | jq -r .[].public_net.ipv4.ip +} diff --git a/prepare-vms/lib/infra/openstack-cli.sh b/prepare-vms/lib/infra/openstack-cli.sh new file mode 100644 index 00000000..19d77d4b --- /dev/null +++ b/prepare-vms/lib/infra/openstack-cli.sh @@ -0,0 +1,53 @@ +infra_list() { + openstack server list -f json | + jq -r '.[] | [.ID, .Name , .Status, .Flavor] | @tsv' +} + +infra_start() { + COUNT=$1 + + sep "Starting $COUNT instances" + info " Region: $OS_REGION_NAME" + info " User: $OS_USERNAME" + info " Flavor: $OS_FLAVOR" + info " Image: $OS_IMAGE" + openstack server create \ + --flavor $OS_FLAVOR \ + --image $OS_IMAGE \ + --key-name $OS_KEY \ + --min $COUNT --max $COUNT \ + --property workshopctl=$TAG \ + $TAG + + sep "Waiting for IP addresses to be available" + GOT=0 + while [ "$GOT" != "$COUNT" ]; do + echo "Got $GOT/$COUNT IP addresses." + oscli_get_ips_by_tag $TAG > tags/$TAG/ips.txt + GOT="$(wc -l < tags/$TAG/ips.txt)" + done + +} + +infra_stop() { + info "Counting instances..." + oscli_get_instances_json $TAG | + jq -r .[].Name | + wc -l + info "Deleting instances..." + oscli_get_instances_json $TAG | + jq -r .[].Name | + xargs -P10 -n1 openstack server delete + info "Done." +} + +oscli_get_instances_json() { + TAG=$1 + openstack server list -f json --name "${TAG}-[0-9]*" +} + +oscli_get_ips_by_tag() { + TAG=$1 + oscli_get_instances_json $TAG | + jq -r .[].Networks | grep -oE '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' || true +} diff --git a/prepare-vms/lib/infra/openstack.sh b/prepare-vms/lib/infra/openstack-tf.sh similarity index 100% rename from prepare-vms/lib/infra/openstack.sh rename to prepare-vms/lib/infra/openstack-tf.sh diff --git a/prepare-vms/lib/infra/scaleway.sh b/prepare-vms/lib/infra/scaleway.sh index 9bd07ede..5311b7ec 100644 --- a/prepare-vms/lib/infra/scaleway.sh +++ b/prepare-vms/lib/infra/scaleway.sh @@ -1,15 +1,18 @@ -infra_list() { - die "unimplemented" -} +if ! command -v scw >/dev/null; then + warn "Scaleway CLI (scw) not found." +fi +if ! [ -f ~/.config/scw/config.yaml ]; then + warn "~/.config/scw/config.yaml not found." +fi -infra_quotas() { - die "unimplemented" +infra_list() { + scw instance server list -o json | + jq -r '.[] | [.id, .name, .state, .commercial_type] | @tsv' } infra_start() { COUNT=$1 - AWS_KEY_NAME=$(make_key_name) SCW_INSTANCE_TYPE=${SCW_INSTANCE_TYPE-DEV1-M} SCW_ZONE=${SCW_ZONE-fr-par-1} @@ -29,12 +32,12 @@ infra_start() { } infra_stop() { - for ID in $(scw_get_ids_by_tag $TAG); do - info "Scheduling deletion of instance $ID..." - scw instance server delete force-shutdown=true server-id=$ID & - done - info "Waiting for deletion to complete..." - wait + info "Counting instances..." + scw_get_ids_by_tag $TAG | wc -l + info "Deleting instances..." + scw_get_ids_by_tag $TAG | + xargs -n1 -P10 -I@@ \ + scw instance server delete force-shutdown=true server-id=@@ } scw_get_ids_by_tag() { @@ -46,11 +49,3 @@ scw_get_ips_by_tag() { TAG=$1 scw instance server list name=$TAG -o json | jq -r .[].public_ip.address } - -infra_opensg() { - die "unimplemented" -} - -infra_disableaddrchecks() { - die "unimplemented" -} diff --git a/prepare-vms/lib/infra/unimplemented.sh b/prepare-vms/lib/infra/unimplemented.sh new file mode 100644 index 00000000..c32e2356 --- /dev/null +++ b/prepare-vms/lib/infra/unimplemented.sh @@ -0,0 +1,23 @@ +infra_disableaddrchecks() { + die "unimplemented" +} + +infra_list() { + die "unimplemented" +} + +infra_opensg() { + die "unimplemented" +} + +infra_quotas() { + die "unimplemented" +} + +infra_start() { + die "unimplemented" +} + +infra_stop() { + die "unimplemented" +} diff --git a/prepare-vms/lib/postprep.py b/prepare-vms/lib/postprep.py index e60079bf..e14bd9c4 100755 --- a/prepare-vms/lib/postprep.py +++ b/prepare-vms/lib/postprep.py @@ -37,7 +37,7 @@ def system(cmd): td = str(t2-t1)[:5] f.write(bold("[{}] in {}s\n".format(retcode, td))) STEP += 1 - with open("/home/ubuntu/.bash_history", "a") as f: + with open(os.environ["HOME"] + "/.bash_history", "a") as f: f.write("{}\n".format(cmd)) if retcode != 0: msg = "The following command failed with exit code {}:\n".format(retcode) diff --git a/prepare-vms/lib/pssh.sh b/prepare-vms/lib/pssh.sh index abb94539..ca3bc639 100644 --- a/prepare-vms/lib/pssh.sh +++ b/prepare-vms/lib/pssh.sh @@ -18,7 +18,13 @@ pssh() { echo "[parallel-ssh] $@" export PSSH=$(which pssh || which parallel-ssh) - $PSSH -h $HOSTFILE -l ubuntu \ + if [ "$INFRACLASS" = hetzner ]; then + LOGIN=root + else + LOGIN=ubuntu + fi + + $PSSH -h $HOSTFILE -l $LOGIN \ --par 100 \ -O LogLevel=ERROR \ -O UserKnownHostsFile=/dev/null \ diff --git a/prepare-vms/map-dns.py b/prepare-vms/map-dns.py index 6dcdfc2c..95fee748 100755 --- a/prepare-vms/map-dns.py +++ b/prepare-vms/map-dns.py @@ -29,6 +29,7 @@ apiurl = "https://dns.api.gandi.net/api/v5/domains" if len(sys.argv) == 2: tag = sys.argv[1] domains = open(domains_file).read().split() + domains = [ d for d in domains if not d.startswith('#') ] ips = open(f"tags/{tag}/ips.txt").read().split() settings_file = f"tags/{tag}/settings.yaml" clustersize = yaml.safe_load(open(settings_file))["clustersize"] diff --git a/prepare-vms/terraform/machines.tf b/prepare-vms/terraform/machines.tf index 41ff96e7..78b9da6a 100644 --- a/prepare-vms/terraform/machines.tf +++ b/prepare-vms/terraform/machines.tf @@ -1,7 +1,7 @@ resource "openstack_compute_instance_v2" "machine" { count = "${var.count}" name = "${format("%s-%04d", "${var.prefix}", count.index+1)}" - image_name = "Ubuntu 16.04.5 (Xenial Xerus)" + image_name = "Ubuntu 18.04.4 20200324" flavor_name = "${var.flavor}" security_groups = ["${openstack_networking_secgroup_v2.full_access.name}"] key_pair = "${openstack_compute_keypair_v2.ssh_deploy_key.name}" diff --git a/prepare-vms/workshopctl b/prepare-vms/workshopctl index 870d3e06..9ebd6ceb 100755 --- a/prepare-vms/workshopctl +++ b/prepare-vms/workshopctl @@ -15,7 +15,6 @@ for lib in lib/*.sh; do done DEPENDENCIES=" - aws ssh curl jq diff --git a/slides/index.yaml b/slides/index.yaml index aaa83c44..0f68e9e1 100644 --- a/slides/index.yaml +++ b/slides/index.yaml @@ -58,6 +58,7 @@ speaker: jpetazzo title: Intensive Docker Online Workshop attend: https://fwdays.com/en/event/intensive-docker-workshop + slides: https://2020-08-fwdays.container.training/ - date: [2020-09-12, 2020-09-13] country: www @@ -66,6 +67,7 @@ speaker: jpetazzo title: Kubernetes Intensive Online Workshop attend: https://fwdays.com/en/event/kubernetes-intensive-workshop + slides: https://2020-09-fwdays.container.training/ - date: [2020-07-07, 2020-07-09] country: www diff --git a/slides/k8s/ingress-tls.md b/slides/k8s/ingress-tls.md new file mode 100644 index 00000000..6799e344 --- /dev/null +++ b/slides/k8s/ingress-tls.md @@ -0,0 +1,403 @@ +# Ingress and TLS certificates + +- Most ingress controllers support TLS connections + + (in a way that is standard across controllers) + +- The TLS key and certificate are stored in a Secret + +- The Secret is then referenced in the Ingress resource: + ```yaml + spec: + tls: + - secretName: XXX + hosts: + - YYY + rules: + - ZZZ + ``` + +--- + +## Obtaining a certificate + +- In the next section, we will need a TLS key and certificate + +- These usually come in [PEM](https://en.wikipedia.org/wiki/Privacy-Enhanced_Mail) format: + ``` + -----BEGIN CERTIFICATE----- + MIIDATCCAemg... + ... + -----END CERTIFICATE----- + ``` + +- We will see how to generate a self-signed certificate + + (easy, fast, but won't be recognized by web browsers) + +- We will also see how to obtain a certificate from [Let's Encrypt](https://letsencrypt.org/) + + (requires the cluster to be reachable through a domain name) + +--- + +class: extra-details + +## In production ... + +- A very popular option is to use the [cert-manager](https://cert-manager.io/docs/) operator + +- It's a flexible, modular approach to automated certificate management + +- For simplicity, in this section, we will use [certbot](https://certbot.eff.org/) + +- The method shown here works well for one-time certs, but lacks: + + - automation + + - renewal + +--- + +## Which domain to use + +- If you're doing this in a training: + + *the instructor will tell you what to use* + +- If you're doing this on your own Kubernetes cluster: + + *you should use a domain that points to your cluster* + +- More precisely: + + *you should use a domain that points to your ingress controller* + +- If you don't have a domain name, you can use [nip.io](https://nip.io/) + + (if your ingress controller is on 1.2.3.4, you can use `whatever.1.2.3.4.nip.io`) + +--- + +## Setting `$DOMAIN` + +- We will use `$DOMAIN` in the following section + +- Let's set it now + +.exercise[ + +- Set the `DOMAIN` environment variable: + ```bash + export DOMAIN=... + ``` + +] + +--- + +## Method 1, self-signed certificate + +- Thanks to `openssl`, generating a self-signed cert is just one command away! + +.exercise[ + +- Generate a key and certificate: + ```bash + openssl req \ + -newkey rsa -nodes -keyout privkey.pem \ + -x509 -days 30 -subj /CN=$DOMAIN/ -out cert.pem + ``` + +] + +This will create two files, `privkey.pem` and `cert.pem`. + +--- + +## Method 2, Let's Encrypt with certbot + +- `certbot` is an [ACME](https://tools.ietf.org/html/rfc8555) client + + (Automatic Certificate Management Environment) + +- We can use it to obtain certificates from Let's Encrypt + +- It needs to listen to port 80 + + (to complete the [HTTP-01 challenge](https://letsencrypt.org/docs/challenge-types/)) + +- If port 80 is already taken by our ingress controller, see method 3 + +--- + +class: extra-details + +## HTTP-01 challenge + +- `certbot` contacts Let's Encrypt, asking for a cert for `$DOMAIN` + +- Let's Encrypt gives a token to `certbot` + +- Let's Encrypt then tries to access the following URL: + + `http://$DOMAIN/.well-known/acme-challenge/` + +- That URL needs to be routed to `certbot` + +- Once Let's Encrypt gets the response from `certbot`, it issues the certificate + +--- + +## Running certbot + +- There is a very convenient container image, `certbot/certbot` + +- Let's use a volume to get easy access to the generated key and certificate + +.exercise[ + +- Obtain a certificate from Let's Encrypt: + ```bash + EMAIL=your.address@example.com + docker run --rm -p 80:80 -v $PWD/letsencrypt:/etc/letsencrypt \ + certbot/certbot certonly \ + -m $EMAIL \ + --standalone --agree-tos -n \ + --domain $DOMAIN \ + --test-cert + ``` + +] + +This will get us a "staging" certificate. +Remove `--test-cert` to obtain a *real* certificate. + +--- + +## Copying the key and certificate + +- If everything went fine: + + - the key and certificate files are in `letsencrypt/live/$DOMAIN` + + - they are owned by `root` + +.exercise[ + +- Grant ourselves permissions on these files: + ```bash + sudo chown -R $USER letsencrypt + ``` + +- Copy the certificate and key to the current directory: + ```bash + cp letsencrypt/live/test/{cert,privkey}.pem . + ``` + +] + +--- + +## Method 3, certbot with Ingress + +- Sometimes, we can't simply listen to port 80: + + - we might already have an ingress controller there + - our nodes might be on an internal network + +- But we can define an Ingress to route the HTTP-01 challenge to `certbot`! + +- Our Ingress needs to route all requests to `/.well-known/acme-challenge` to `certbot` + +- There are at least two ways to do that: + + - run `certbot` in a Pod (and extract the cert+key when it's done) + - run `certbot` in a container on a node (and manually route traffic to it) + +- We're going to use the second option + + (mostly because it will give us an excuse to tinker with Endpoints resources!) + +--- + +## The plan + +- We need the following resources: + + - an Endpoints¹ listing a hard-coded IP address and port +
(where our `certbot` container will be listening) + + - a Service corresponding to that Endpoints + + - an Ingress sending requests to `/.well-known/acme-challenge/*` to that Service +
(we don't even need to include a domain name in it) + +- Then we need to start `certbot` so that it's listening on the right address+port + +.footnote[¹Endpoints is always plural, because even a single resource is a list of endpoints.] + +--- + +## Creating resources + +- We prepared a YAML file to create the three resources + +- However, the Endpoints needs to be adapted to put the current node's address + +.exercise[ + +- Edit `~/containers.training/k8s/certbot.yaml` + + (replace `A.B.C.D` with the current node's address) + +- Create the resources: + ```bash + kubectl apply -f ~/containers.training/k8s/certbot.yaml + ``` + +] + +--- + +## Obtaining the certificate + +- Now we can run `certbot`, listening on the port listed in the Endpoints + + (i.e. 8000) + +.exercise[ + +- Run `certbot`: + ```bash + EMAIL=your.address@example.com + docker run --rm -p 8000:80 -v $PWD/letsencrypt:/etc/letsencrypt \ + certbot/certbot certonly \ + -m $EMAIL \ + --standalone --agree-tos -n \ + --domain $DOMAIN \ + --test-cert + ``` + +] + +This is using the staging environment. +Remove `--test-cert` to get a production certificate. + +--- + +## Copying the certificate + +- Just like in the previous method, the certificate is in `letsencrypt/live/$DOMAIN` + + (and owned by root) + +.exercise[ + +- Grand ourselves permissions on these files: + ```bash + sudo chown -R $USER letsencrypt + ``` + +- Copy the certificate and key to the current directory: + ```bash + cp letsencrypt/live/$DOMAIN/{cert,privkey}.pem . + ``` + +] + +--- + +## Creating the Secret + +- We now have two files: + + - `privkey.pem` (the private key) + + - `cert.pem` (the certificate) + +- We can create a Secret to hold them + +.exercise[ + +- Create the Secret: + ```bash + kubectl create secret tls $DOMAIN --cert=cert.pem --key=privkey.pem + ``` + +] + +--- + +## Ingress with TLS + +- To enable TLS for an Ingress, we need to add a `tls` section to the Ingress: + + ```yaml + spec: + tls: + - secretName: DOMAIN + hosts: + - DOMAIN + rules: ... + ``` + +- The list of hosts will be used by the ingress controller + + (to know which certificate to use with [SNI](https://en.wikipedia.org/wiki/Server_Name_Indication)) + +- Of course, the name of the secret can be different + + (here, for clarity and convenience, we set it to match the domain) + +--- + +class: extra-details + +## About the ingress controller + +- Many ingress controllers can use different "stores" for keys and certificates + +- Our ingress controller needs to be configured to use secrets + + (as opposed to, e.g., obtain certificates directly with Let's Encrypt) + +--- + +## Using the certificate + +.exercise[ + +- Edit the Ingress manifest, `~/container.training/k8s/ingress.yaml` + +- Uncomment the `tls` section + +- Update the `secretName` and `hosts` list + +- Create or update the Ingress: + ```bash + kubectl apply -f ~/container.training/k8s/ingress.yaml + ``` + +- Check that the URL now works over `https` + + (it might take a minute to be picked up by the ingress controller) + +] + +--- + +## Discussion + +*To repeat something mentioned earlier ...* + +- The methods presented here are for *educational purpose only* + +- In most production scenarios, the certificates will be obtained automatically + +- A very popular option is to use the [cert-manager](https://cert-manager.io/docs/) operator + +??? + +:EN:- Ingress and TLS +:FR:- Certificats TLS et *ingress* diff --git a/slides/k8s/kubectlexpose.md b/slides/k8s/kubectlexpose.md index 0d3bec75..35b0538d 100644 --- a/slides/k8s/kubectlexpose.md +++ b/slides/k8s/kubectlexpose.md @@ -149,6 +149,28 @@ --- +class: extra-details + +## Supporting other CPU architectures + +- The `jpetazzo/httpenv` image is currently only available for `x86_64` + + (the "classic" Intel 64 bits architecture found on most PCs and Macs) + +- That image won't work on other architectures + + (e.g. Raspberry Pi or other ARM-based machines) + +- Note that Docker supports [multi-arch](https://www.docker.com/blog/multi-arch-build-and-images-the-simple-way/) images + + (so *technically* we could make it work across multiple architectures) + +- If you want to build `httpenv` for your own platform, here is the source: + + https://github.com/jpetazzo/httpenv + +--- + ## Creating a deployment for our HTTP server - We will create a deployment with `kubectl create deployment` diff --git a/slides/k8s/lastwords.md b/slides/k8s/lastwords.md index 0d65ca00..d98d9583 100644 --- a/slides/k8s/lastwords.md +++ b/slides/k8s/lastwords.md @@ -191,6 +191,8 @@ are a few tools that can help us.* ## Developer experience +*These questions constitute a quick "smoke test" for our strategy:* + - How do we on-board a new developer? - What do they need to install to get a dev stack? @@ -199,8 +201,6 @@ are a few tools that can help us.* - How does someone add a component to a stack? -*These questions are good "sanity checks" to validate our strategy!* - --- ## Some guidelines diff --git a/slides/k8s/local-persistent-volumes.md b/slides/k8s/local-persistent-volumes.md index fda28eae..5910b575 100644 --- a/slides/k8s/local-persistent-volumes.md +++ b/slides/k8s/local-persistent-volumes.md @@ -58,7 +58,7 @@ ## Deploying Consul -- We will use a slightly different YAML file +- Let's use a new manifest for our Consul cluster - The only differences between that file and the previous one are: @@ -66,15 +66,11 @@ - the corresponding `volumeMounts` in the Pod spec - - the label `consul` has been changed to `persistentconsul` -
- (to avoid conflicts with the other Stateful Set) - .exercise[ - Apply the persistent Consul YAML file: ```bash - kubectl apply -f ~/container.training/k8s/persistent-consul.yaml + kubectl apply -f ~/container.training/k8s/consul-3.yaml ``` ] @@ -97,7 +93,7 @@ kubectl get pv ``` -- The Pod `persistentconsul-0` is not scheduled yet: +- The Pod `consul-0` is not scheduled yet: ```bash kubectl get pods -o wide ``` @@ -112,9 +108,9 @@ - In a Stateful Set, the Pods are started one by one -- `persistentconsul-1` won't be created until `persistentconsul-0` is running +- `consul-1` won't be created until `consul-0` is running -- `persistentconsul-0` has a dependency on an unbound Persistent Volume Claim +- `consul-0` has a dependency on an unbound Persistent Volume Claim - The scheduler won't schedule the Pod until the PVC is bound @@ -152,7 +148,7 @@ - Once a PVC is bound, its pod can start normally -- Once the pod `persistentconsul-0` has started, `persistentconsul-1` can be created, etc. +- Once the pod `consul-0` has started, `consul-1` can be created, etc. - Eventually, our Consul cluster is up, and backend by "persistent" volumes @@ -160,7 +156,7 @@ - Check that our Consul clusters has 3 members indeed: ```bash - kubectl exec persistentconsul-0 -- consul members + kubectl exec consul-0 -- consul members ``` ] diff --git a/slides/k8s/statefulsets.md b/slides/k8s/statefulsets.md index e1a1feb1..84f35018 100644 --- a/slides/k8s/statefulsets.md +++ b/slides/k8s/statefulsets.md @@ -218,7 +218,9 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \ - Replace X.X.X.X and Y.Y.Y.Y with the addresses of other nodes -- The same command-line can be used on all nodes (convenient!) +- A node can add its own address (it will work fine) + +- ... Which means that we can use the same command-line on all nodes (convenient!) --- @@ -258,19 +260,13 @@ consul agent -data-dir=/consul/data -client=0.0.0.0 -server -ui \ ## Putting it all together -- The file `k8s/consul.yaml` defines the required resources +- The file `k8s/consul-1.yaml` defines the required resources - (service account, cluster role, cluster role binding, service, stateful set) + (service account, role, role binding, service, stateful set) -- It has a few extra touches: +- Inspired by this [excellent tutorial](https://github.com/kelseyhightower/consul-on-kubernetes) by Kelsey Hightower - - a `podAntiAffinity` prevents two pods from running on the same node - - - a `preStop` hook makes the pod leave the cluster when shutdown gracefully - -This was inspired by this [excellent tutorial](https://github.com/kelseyhightower/consul-on-kubernetes) by Kelsey Hightower. -Some features from the original tutorial (TLS authentication between -nodes and encryption of gossip traffic) were removed for simplicity. + (many features from the original tutorial were removed for simplicity) --- @@ -282,7 +278,7 @@ nodes and encryption of gossip traffic) were removed for simplicity. - Create the stateful set and associated service: ```bash - kubectl apply -f ~/container.training/k8s/consul.yaml + kubectl apply -f ~/container.training/k8s/consul-1.yaml ``` - Check the logs as the pods come up one after another: @@ -306,6 +302,88 @@ nodes and encryption of gossip traffic) were removed for simplicity. ## Caveats +- The scheduler may place two Consul pods on the same node + + - if that node fails, we lose two Consul pods at the same time + - this will cause the cluster to fail + +- Scaling down the cluster will cause it to fail + + - when a Consul member leaves the cluster, it needs to inform the others + - otherwise, the last remaining node doesn't have quorum and stops functioning + +- This Consul cluster doesn't use real persistence yet + + - data is stored in the containers' ephemeral filesystem + - if a pod fails, its replacement starts from a blank slate + +--- + +## Improving pod placement + +- We need to tell the scheduler: + + *do not put two of these pods on the same node!* + +- This is done with an `affinity` section like the following one: + ```yaml + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - consul + topologyKey: kubernetes.io/hostname + ``` + +--- + +## Using a lifecycle hook + +- When a Consul member leaves the cluster, it needs to execute: + ```bash + consul leave + ``` + +- This is done with a `lifecycle` section like the following one: + ```yaml + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - consul leave + ``` + +--- + +## Running a better Consul cluster + +- Let's try to add the scheduling constraint and lifecycle hook + +- We can do that in the same namespace or another one (as we like) + +- If we do that in the same namespace, we will see a rolling update + + (pods will be replaced one by one) + +.exercise[ + +- Deploy a better Consul cluster: + ```bash + kubectl apply -f ~/container.training/k8s/consul-2.yaml + ``` + +] + +--- + +## Still no persistence, though + - We aren't using actual persistence yet (no `volumeClaimTemplate`, Persistent Volume, etc.) diff --git a/slides/kube-selfpaced.yml b/slides/kube-selfpaced.yml index aa5a2a2b..6315c163 100644 --- a/slides/kube-selfpaced.yml +++ b/slides/kube-selfpaced.yml @@ -77,6 +77,7 @@ content: - k8s/kubectlproxy.md - - k8s/ingress.md + - k8s/ingress-tls.md - k8s/kustomize.md - k8s/helm-intro.md - k8s/helm-chart-format.md diff --git a/slides/kube.yml b/slides/kube.yml index a649026f..f7207db4 100644 --- a/slides/kube.yml +++ b/slides/kube.yml @@ -74,6 +74,7 @@ content: #- k8s/dryrun.md #- k8s/exercise-yaml.md - k8s/ingress.md + #- k8s/ingress-tls.md - - k8s/volumes.md #- k8s/exercise-configmap.md