Files
container.training/prepare-vms/lib/commands.sh
2021-02-21 16:29:49 +01:00

884 lines
26 KiB
Bash

export AWS_DEFAULT_OUTPUT=text
HELP=""
_cmd() {
HELP="$(printf "%s\n%-20s %s\n" "$HELP" "$1" "$2")"
}
_cmd help "Show available commands"
_cmd_help() {
printf "$(basename $0) - the container training swiss army knife\n"
printf "Commands:"
printf "%s" "$HELP" | sort
}
_cmd build "Build the Docker image to run this program in a container"
_cmd_build() {
docker-compose build
}
_cmd wrap "Run this program in a container"
_cmd_wrap() {
docker-compose run --rm workshopctl "$@"
}
_cmd cards "Generate ready-to-print cards for a group of VMs"
_cmd_cards() {
TAG=$1
need_tag
# This will process ips.txt to generate two files: ips.pdf and ips.html
(
cd tags/$TAG
../../lib/ips-txt-to-html.py settings.yaml
)
ln -sf ../tags/$TAG/ips.html www/$TAG.html
ln -sf ../tags/$TAG/ips.pdf www/$TAG.pdf
info "Cards created. You can view them with:"
info "xdg-open tags/$TAG/ips.html tags/$TAG/ips.pdf (on Linux)"
info "open tags/$TAG/ips.html (on macOS)"
info "Or you can start a web server with:"
info "$0 www"
}
_cmd clean "Remove information about stopped clusters"
_cmd_clean() {
for TAG in tags/*; do
if grep -q ^stopped$ "$TAG/status"; then
info "Removing $TAG..."
rm -rf "$TAG"
fi
done
}
_cmd deploy "Install Docker on a bunch of running VMs"
_cmd_deploy() {
TAG=$1
need_tag
# wait until all hosts are reachable before trying to deploy
info "Trying to reach $TAG instances..."
while ! tag_is_reachable; do
>/dev/stderr echo -n "."
sleep 2
done
>/dev/stderr echo ""
echo deploying > tags/$TAG/status
sep "Deploying tag $TAG"
# Wait for cloudinit to be done
pssh "
while [ ! -f /var/lib/cloud/instance/boot-finished ]; do
sleep 1
done"
# Special case for scaleway since it doesn't come with sudo
if [ "$INFRACLASS" = "scaleway" ]; then
pssh -l root "
grep DEBIAN_FRONTEND /etc/environment || echo DEBIAN_FRONTEND=noninteractive >> /etc/environment
grep cloud-init /etc/sudoers && rm /etc/sudoers
apt-get update && apt-get install sudo -y"
fi
# FIXME
# Special case for hetzner since it doesn't have an ubuntu user
#if [ "$INFRACLASS" = "hetzner" ]; then
# pssh -l root "
#[ -d /home/ubuntu ] ||
# useradd ubuntu -m -s /bin/bash
#echo 'ubuntu ALL=(ALL:ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
#[ -d /home/ubuntu/.ssh ] ||
# install --owner=ubuntu --mode=700 --directory /home/ubuntu/.ssh
#[ -f /home/ubuntu/.ssh/authorized_keys ] ||
# install --owner=ubuntu --mode=600 /root/.ssh/authorized_keys --target-directory /home/ubuntu/.ssh"
#fi
# Copy settings and install Python YAML parser
pssh -I tee /tmp/settings.yaml <tags/$TAG/settings.yaml
pssh "
sudo apt-get update &&
sudo apt-get install -y python-yaml"
# Copy postprep.py to the remote machines, and execute it, feeding it the list of IP addresses
pssh -I tee /tmp/postprep.py <lib/postprep.py
pssh --timeout 900 --send-input "python /tmp/postprep.py >>/tmp/pp.out 2>>/tmp/pp.err" <tags/$TAG/ips.txt
# Install docker-prompt script
pssh -I sudo tee /usr/local/bin/docker-prompt <lib/docker-prompt
pssh sudo chmod +x /usr/local/bin/docker-prompt
# If /home/docker/.ssh/id_rsa doesn't exist, copy it from the first node
pssh "
sudo -u docker [ -f /home/docker/.ssh/id_rsa ] ||
ssh -o StrictHostKeyChecking=no \$(cat /etc/name_of_first_node) sudo -u docker tar -C /home/docker -cvf- .ssh |
sudo -u docker tar -C /home/docker -xf-"
# if 'docker@' doesn't appear in /home/docker/.ssh/authorized_keys, copy it there
pssh "
grep docker@ /home/docker/.ssh/authorized_keys ||
cat /home/docker/.ssh/id_rsa.pub |
sudo -u docker tee -a /home/docker/.ssh/authorized_keys"
# On the first node, create and deploy TLS certs using Docker Machine
# (Currently disabled.)
true || pssh "
if i_am_first_node; then
grep '[0-9]\$' /etc/hosts |
xargs -n2 sudo -H -u docker \
docker-machine create -d generic --generic-ssh-user docker --generic-ip-address
fi"
sep "Deployed tag $TAG"
echo deployed > tags/$TAG/status
info "You may want to run one of the following commands:"
info "$0 kube $TAG"
info "$0 pull_images $TAG"
info "$0 cards $TAG"
}
_cmd disabledocker "Stop Docker Engine and don't restart it automatically"
_cmd_disabledocker() {
TAG=$1
need_tag
pssh "
sudo systemctl disable docker.service
sudo systemctl disable docker.socket
sudo systemctl stop docker
sudo killall containerd
"
}
_cmd kubebins "Install Kubernetes and CNI binaries but don't start anything"
_cmd_kubebins() {
TAG=$1
need_tag
pssh --timeout 300 "
set -e
cd /usr/local/bin
if ! [ -x etcd ]; then
##VERSION##
curl -L https://github.com/etcd-io/etcd/releases/download/v3.4.9/etcd-v3.4.9-linux-amd64.tar.gz \
| sudo tar --strip-components=1 --wildcards -zx '*/etcd' '*/etcdctl'
fi
if ! [ -x hyperkube ]; then
##VERSION##
curl -L https://dl.k8s.io/v1.18.10/kubernetes-server-linux-amd64.tar.gz \
| sudo tar --strip-components=3 -zx \
kubernetes/server/bin/kube{ctl,let,-proxy,-apiserver,-scheduler,-controller-manager}
fi
sudo mkdir -p /opt/cni/bin
cd /opt/cni/bin
if ! [ -x bridge ]; then
curl -L https://github.com/containernetworking/plugins/releases/download/v0.8.6/cni-plugins-linux-amd64-v0.8.6.tgz \
| sudo tar -zx
fi
"
}
_cmd kube "Setup kubernetes clusters with kubeadm (must be run AFTER deploy)"
_cmd_kube() {
TAG=$1
need_tag
# Optional version, e.g. 1.13.5
KUBEVERSION=$2
if [ "$KUBEVERSION" ]; then
EXTRA_APTGET="=$KUBEVERSION-00"
EXTRA_KUBEADM="--kubernetes-version=v$KUBEVERSION"
else
EXTRA_APTGET=""
EXTRA_KUBEADM=""
fi
# Install packages
pssh --timeout 200 "
curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg |
sudo apt-key add - &&
echo deb http://apt.kubernetes.io/ kubernetes-xenial main |
sudo tee /etc/apt/sources.list.d/kubernetes.list"
pssh --timeout 200 "
sudo apt-get update -q &&
sudo apt-get install -qy kubelet$EXTRA_APTGET kubeadm$EXTRA_APTGET kubectl$EXTRA_APTGET &&
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
echo 'alias k=kubectl' | sudo tee /etc/bash_completion.d/k &&
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
# Initialize kube master
pssh --timeout 200 "
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
kubeadm token generate > /tmp/token &&
sudo kubeadm init $EXTRA_KUBEADM --token \$(cat /tmp/token) --apiserver-cert-extra-sans \$(cat /tmp/ipv4) --ignore-preflight-errors=NumCPU
fi"
# Put kubeconfig in ubuntu's and docker's accounts
pssh "
if i_am_first_node; then
sudo mkdir -p \$HOME/.kube /home/docker/.kube &&
sudo cp /etc/kubernetes/admin.conf \$HOME/.kube/config &&
sudo cp /etc/kubernetes/admin.conf /home/docker/.kube/config &&
sudo chown -R \$(id -u) \$HOME/.kube &&
sudo chown -R docker /home/docker/.kube
fi"
# Install weave as the pod network
pssh "
if i_am_first_node; then
kubever=\$(kubectl version | base64 | tr -d '\n') &&
kubectl apply -f https://cloud.weave.works/k8s/net?k8s-version=\$kubever
fi"
# Join the other nodes to the cluster
pssh --timeout 200 "
if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
FIRSTNODE=\$(cat /etc/name_of_first_node) &&
TOKEN=\$(ssh -o StrictHostKeyChecking=no \$FIRSTNODE cat /tmp/token) &&
sudo kubeadm join --discovery-token-unsafe-skip-ca-verification --token \$TOKEN \$FIRSTNODE:6443
fi"
# Install metrics server
pssh "
if i_am_first_node; then
kubectl apply -f https://raw.githubusercontent.com/jpetazzo/container.training/master/k8s/metrics-server.yaml
fi"
}
_cmd kubetools "Install a bunch of CLI tools for Kubernetes"
_cmd_kubetools() {
TAG=$1
need_tag
# Install kubectx and kubens
pssh "
[ -d kubectx ] || git clone https://github.com/ahmetb/kubectx &&
sudo ln -sf \$HOME/kubectx/kubectx /usr/local/bin/kctx &&
sudo ln -sf \$HOME/kubectx/kubens /usr/local/bin/kns &&
sudo cp \$HOME/kubectx/completion/*.bash /etc/bash_completion.d &&
[ -d kube-ps1 ] || git clone https://github.com/jonmosco/kube-ps1 &&
sudo -u docker sed -i s/docker-prompt/kube_ps1/ /home/docker/.bashrc &&
sudo -u docker tee -a /home/docker/.bashrc <<EOF
. \$HOME/kube-ps1/kube-ps1.sh
KUBE_PS1_PREFIX=""
KUBE_PS1_SUFFIX=""
KUBE_PS1_SYMBOL_ENABLE="false"
KUBE_PS1_CTX_COLOR="green"
KUBE_PS1_NS_COLOR="green"
EOF"
# Install stern
pssh "
if [ ! -x /usr/local/bin/stern ]; then
##VERSION##
sudo curl -L -o /usr/local/bin/stern https://github.com/wercker/stern/releases/download/1.11.0/stern_linux_amd64 &&
sudo chmod +x /usr/local/bin/stern &&
stern --completion bash | sudo tee /etc/bash_completion.d/stern
fi"
# Install helm
pssh "
if [ ! -x /usr/local/bin/helm ]; then
curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get-helm-3 | sudo bash &&
helm completion bash | sudo tee /etc/bash_completion.d/helm
fi"
# Install kustomize
pssh "
if [ ! -x /usr/local/bin/kustomize ]; then
##VERSION##
curl -L https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/v3.6.1/kustomize_v3.6.1_linux_amd64.tar.gz |
sudo tar -C /usr/local/bin -zx kustomize
echo complete -C /usr/local/bin/kustomize kustomize | sudo tee /etc/bash_completion.d/kustomize
fi"
# Install ship
# Note: 0.51.3 is the last version that doesn't display GIN-debug messages
# (don't want to get folks confused by that!)
pssh "
if [ ! -x /usr/local/bin/ship ]; then
##VERSION##
curl -L https://github.com/replicatedhq/ship/releases/download/v0.51.3/ship_0.51.3_linux_amd64.tar.gz |
sudo tar -C /usr/local/bin -zx ship
fi"
# Install the AWS IAM authenticator
pssh "
if [ ! -x /usr/local/bin/aws-iam-authenticator ]; then
##VERSION##
sudo curl -o /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.7/2019-03-27/bin/linux/amd64/aws-iam-authenticator
sudo chmod +x /usr/local/bin/aws-iam-authenticator
fi"
# Install the krew package manager
pssh "
if [ ! -d /home/docker/.krew ]; then
cd /tmp &&
curl -fsSL https://github.com/kubernetes-sigs/krew/releases/latest/download/krew.tar.gz |
tar -zxf- &&
sudo -u docker -H ./krew-linux_amd64 install krew &&
echo export PATH=/home/docker/.krew/bin:\\\$PATH | sudo -u docker tee -a /home/docker/.bashrc
fi"
# Install k9s and popeye
pssh "
if [ ! -x /usr/local/bin/k9s ]; then
FILENAME=k9s_\$(uname -s)_\$(uname -m).tar.gz &&
curl -sSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
sudo tar -zxvf- -C /usr/local/bin k9s
fi
if [ ! -x /usr/local/bin/popeye ]; then
FILENAME=popeye_\$(uname -s)_\$(uname -m).tar.gz &&
curl -sSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
sudo tar -zxvf- -C /usr/local/bin popeye
fi"
# Install Tilt
pssh "
if [ ! -x /usr/local/bin/tilt ]; then
curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash
fi"
# Install Skaffold
pssh "
if [ ! -x /usr/local/bin/skaffold ]; then
curl -Lo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-amd64 &&
sudo install skaffold /usr/local/bin/
fi"
# Install Kompose
pssh "
if [ ! -x /usr/local/bin/kompose ]; then
curl -Lo kompose https://github.com/kubernetes/kompose/releases/latest/download/kompose-linux-amd64 &&
sudo install kompose /usr/local/bin
fi"
pssh "
if [ ! -x /usr/local/bin/kubeseal ]; then
curl -Lo kubeseal https://github.com/bitnami-labs/sealed-secrets/releases/download/v0.13.1/kubeseal-linux-amd64 &&
sudo install kubeseal /usr/local/bin
fi"
}
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
_cmd_kubereset() {
TAG=$1
need_tag
pssh "sudo kubeadm reset --force"
}
_cmd kubetest "Check that all nodes are reporting as Ready"
_cmd_kubetest() {
TAG=$1
need_tag
# There are way too many backslashes in the command below.
# Feel free to make that better ♥
pssh "
set -e
if i_am_first_node; then
which kubectl
for NODE in \$(grep [0-9]\$ /etc/hosts | grep -v ^127 | awk {print\ \\\$2}); do
echo \$NODE ; kubectl get nodes | grep -w \$NODE | grep -w Ready
done
fi"
}
_cmd ips "Show the IP addresses for a given tag"
_cmd_ips() {
TAG=$1
need_tag $TAG
SETTINGS=tags/$TAG/settings.yaml
CLUSTERSIZE=$(awk '/^clustersize:/ {print $2}' $SETTINGS)
while true; do
for I in $(seq $CLUSTERSIZE); do
read ip || return 0
printf "%s\t" "$ip"
done
printf "\n"
done < tags/$TAG/ips.txt
}
_cmd list "List all VMs on a given infrastructure (or all infras if no arg given)"
_cmd_list() {
case "$1" in
"")
for INFRA in infra/*; do
$0 list $INFRA
done
;;
*/example.*)
;;
*)
need_infra $1
sep "Listing instances for $1"
infra_list
;;
esac
}
_cmd listall "List VMs running on all configured infrastructures"
_cmd_listall() {
for infra in infra/*; do
case $infra in
infra/example.*)
;;
*)
info "Listing infrastructure $infra:"
need_infra $infra
infra_list
;;
esac
done
}
_cmd maketag "Generate a quasi-unique tag for a group of instances"
_cmd_maketag() {
if [ -z $USER ]; then
export USER=anonymous
fi
MS=$(($(date +%N | tr -d 0)/1000000))
date +%Y-%m-%d-%H-%M-$MS-$USER
}
_cmd ping "Ping VMs in a given tag, to check that they have network access"
_cmd_ping() {
TAG=$1
need_tag
fping < tags/$TAG/ips.txt
}
_cmd netfix "Disable GRO and run a pinger job on the VMs"
_cmd_netfix () {
TAG=$1
need_tag
pssh "
sudo ethtool -K ens3 gro off
sudo tee /root/pinger.service <<EOF
[Unit]
Description=pinger
[Install]
WantedBy=multi-user.target
[Service]
WorkingDirectory=/
ExecStart=/bin/ping -w60 1.1
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /root/pinger.service
sudo systemctl start pinger"
}
_cmd tailhist "Install history viewer on port 1088"
_cmd_tailhist () {
TAG=$1
need_tag
pssh "
wget https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0_amd64.deb
sudo dpkg -i websocketd-0.3.0_amd64.deb
sudo mkdir -p /tmp/tailhist
sudo tee /root/tailhist.service <<EOF
[Unit]
Description=tailhist
[Install]
WantedBy=multi-user.target
[Service]
WorkingDirectory=/tmp/tailhist
ExecStart=/usr/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/docker/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
User=nobody
Group=nogroup
Restart=always
EOF
sudo systemctl enable /root/tailhist.service
sudo systemctl start tailhist"
pssh -I sudo tee /tmp/tailhist/index.html <lib/tailhist.html
}
_cmd opensg "Open the default security group to ALL ingress traffic"
_cmd_opensg() {
need_infra $1
infra_opensg
}
_cmd disableaddrchecks "Disable source/destination IP address checks"
_cmd_disableaddrchecks() {
TAG=$1
need_tag
infra_disableaddrchecks
}
_cmd pssh "Run an arbitrary command on all nodes"
_cmd_pssh() {
TAG=$1
need_tag
shift
pssh "$@"
}
_cmd pull_images "Pre-pull a bunch of Docker images"
_cmd_pull_images() {
TAG=$1
need_tag
pull_tag
}
_cmd remap_nodeports "Remap NodePort range to 10000-10999"
_cmd_remap_nodeports() {
TAG=$1
need_tag
FIND_LINE=" - --service-cluster-ip-range=10.96.0.0\/12"
ADD_LINE=" - --service-node-port-range=10000-10999"
MANIFEST_FILE=/etc/kubernetes/manifests/kube-apiserver.yaml
pssh "
if i_am_first_node && ! grep -q '$ADD_LINE' $MANIFEST_FILE; then
sudo sed -i 's/\($FIND_LINE\)\$/\1\n$ADD_LINE/' $MANIFEST_FILE
fi"
info "If you have manifests hard-coding nodePort values,"
info "you might want to patch them with a command like:"
info "
if i_am_first_node; then
kubectl -n kube-system patch svc prometheus-server \\
-p 'spec: { ports: [ {port: 80, nodePort: 10101} ]}'
fi
"
}
_cmd quotas "Check our infrastructure quotas (max instances)"
_cmd_quotas() {
need_infra $1
infra_quotas
}
_cmd ssh "Open an SSH session to the first node of a tag"
_cmd_ssh() {
TAG=$1
need_tag
IP=$(head -1 tags/$TAG/ips.txt)
info "Logging into $IP"
ssh docker@$IP
}
_cmd start "Start a group of VMs"
_cmd_start() {
while [ ! -z "$*" ]; do
case "$1" in
--infra) INFRA=$2; shift 2;;
--settings) SETTINGS=$2; shift 2;;
--count) COUNT=$2; shift 2;;
--tag) TAG=$2; shift 2;;
--students) STUDENTS=$2; shift 2;;
*) die "Unrecognized parameter: $1."
esac
done
if [ -z "$INFRA" ]; then
die "Please add --infra flag to specify which infrastructure file to use."
fi
if [ -z "$SETTINGS" ]; then
die "Please add --settings flag to specify which settings file to use."
fi
if [ -z "$COUNT" ]; then
CLUSTERSIZE=$(awk '/^clustersize:/ {print $2}' $SETTINGS)
if [ -z "$STUDENTS" ]; then
warning "Neither --count nor --students was specified."
warning "According to the settings file, the cluster size is $CLUSTERSIZE."
warning "Deploying one cluster of $CLUSTERSIZE nodes."
STUDENTS=1
fi
COUNT=$(($STUDENTS*$CLUSTERSIZE))
fi
# Check that the specified settings and infrastructure are valid.
need_settings $SETTINGS
need_infra $INFRA
if [ -z "$TAG" ]; then
TAG=$(_cmd_maketag)
fi
mkdir -p tags/$TAG
ln -s ../../$INFRA tags/$TAG/infra.sh
ln -s ../../$SETTINGS tags/$TAG/settings.yaml
echo creating > tags/$TAG/status
infra_start $COUNT
sep
info "Successfully created $COUNT instances with tag $TAG"
echo created > tags/$TAG/status
# If the settings.yaml file has a "steps" field,
# automatically execute all the actions listed in that field.
# If an action fails, retry it up to 10 times.
python -c 'if True: # hack to deal with indentation
import sys, yaml
settings = yaml.safe_load(sys.stdin)
print ("\n".join(settings.get("steps", [])))
' < tags/$TAG/settings.yaml \
| while read step; do
if [ -z "$step" ]; then
break
fi
sep
info "Automatically executing step '$step'."
TRY=1
MAXTRY=10
while ! $0 $step $TAG ; do
TRY=$(($TRY+1))
if [ $TRY -gt $MAXTRY ]; then
error "This step ($step) failed after $MAXTRY attempts."
info "You can troubleshoot the situation manually, or terminate these instances with:"
info "$0 stop $TAG"
die "Giving up."
else
sep
info "Step '$step' failed. Let's wait 10 seconds and try again."
info "(Attempt $TRY out of $MAXTRY.)"
sleep 10
fi
done
done
sep
info "Deployment successful."
info "To log into the first machine of that batch, you can run:"
info "$0 ssh $TAG"
info "To terminate these instances, you can run:"
info "$0 stop $TAG"
}
_cmd stop "Stop (terminate, shutdown, kill, remove, destroy...) instances"
_cmd_stop() {
TAG=$1
need_tag
infra_stop
echo stopped > tags/$TAG/status
}
_cmd tags "List groups of VMs known locally"
_cmd_tags() {
(
cd tags
echo "[#] [Status] [Tag] [Infra]" \
| awk '{ printf "%-7s %-12s %-25s %-25s\n", $1, $2, $3, $4}'
for tag in *; do
if [ -f $tag/ips.txt ]; then
count="$(wc -l < $tag/ips.txt)"
else
count="?"
fi
if [ -f $tag/status ]; then
status="$(cat $tag/status)"
else
status="?"
fi
if [ -f $tag/infra.sh ]; then
infra="$(basename $(readlink $tag/infra.sh))"
else
infra="?"
fi
echo "$count $status $tag $infra" \
| awk '{ printf "%-7s %-12s %-25s %-25s\n", $1, $2, $3, $4}'
done
)
}
_cmd test "Run tests (pre-flight checks) on a group of VMs"
_cmd_test() {
TAG=$1
need_tag
test_tag
}
_cmd tmux "Log into the first node and start a tmux server"
_cmd_tmux() {
TAG=$1
need_tag
IP=$(head -1 tags/$TAG/ips.txt)
info "Opening ssh+tmux with $IP"
rm -f /tmp/tmux-$UID/default
ssh -t -L /tmp/tmux-$UID/default:/tmp/tmux-1001/default docker@$IP tmux new-session -As 0
}
_cmd helmprom "Install Helm and Prometheus"
_cmd_helmprom() {
TAG=$1
need_tag
pssh "
if i_am_first_node; then
sudo -u docker -H helm repo add prometheus-community https://prometheus-community.github.io/helm-charts/
sudo -u docker -H helm install prometheus prometheus-community/prometheus \
--namespace kube-system \
--set server.service.type=NodePort \
--set server.service.nodePort=30090 \
--set server.persistentVolume.enabled=false \
--set alertmanager.enabled=false
fi"
}
# Sometimes, weave fails to come up on some nodes.
# Symptom: the pods on a node are unreachable (they don't even ping).
# Remedy: wipe out Weave state and delete weave pod on that node.
# Specifically, identify the weave pod that is defective, then:
# kubectl -n kube-system exec weave-net-XXXXX -c weave rm /weavedb/weave-netdata.db
# kubectl -n kube-system delete pod weave-net-XXXXX
_cmd weavetest "Check that weave seems properly setup"
_cmd_weavetest() {
TAG=$1
need_tag
pssh "
kubectl -n kube-system get pods -o name | grep weave | cut -d/ -f2 |
xargs -I POD kubectl -n kube-system exec POD -c weave -- \
sh -c \"./weave --local status | grep Connections | grep -q ' 1 failed' || ! echo POD \""
}
_cmd webssh "Install a WEB SSH server on the machines (port 1080)"
_cmd_webssh() {
TAG=$1
need_tag
pssh "
sudo apt-get update &&
sudo apt-get install python-tornado python-paramiko -y"
pssh "
cd /opt
[ -d webssh ] || sudo git clone https://github.com/jpetazzo/webssh"
pssh "
for KEYFILE in /etc/ssh/*.pub; do
read a b c < \$KEYFILE; echo localhost \$a \$b
done | sudo tee /opt/webssh/known_hosts"
pssh "cat >webssh.service <<EOF
[Unit]
Description=webssh
[Install]
WantedBy=multi-user.target
[Service]
WorkingDirectory=/opt/webssh
ExecStart=/usr/bin/env python run.py --fbidhttp=false --port=1080 --policy=reject
User=nobody
Group=nogroup
Restart=always
EOF"
pssh "
sudo systemctl enable \$PWD/webssh.service &&
sudo systemctl start webssh.service"
}
_cmd www "Run a web server to access card HTML and PDF"
_cmd_www() {
cd www
IPADDR=$(curl -sL canihazip.com/s)
info "The following files are available:"
for F in *; do
echo "http://$IPADDR:8000/$F"
done
info "Press Ctrl-C to stop server."
python3 -m http.server
}
pull_tag() {
# Pre-pull a bunch of images
pssh --timeout 900 'for I in \
debian:latest \
ubuntu:latest \
fedora:latest \
centos:latest \
elasticsearch:2 \
postgres \
redis \
alpine \
registry \
nicolaka/netshoot \
jpetazzo/trainingwheels \
golang \
training/namer \
dockercoins/hasher \
dockercoins/rng \
dockercoins/webui \
dockercoins/worker \
logstash \
prom/node-exporter \
google/cadvisor \
dockersamples/visualizer \
nathanleclaire/redisonrails; do
sudo -u docker docker pull $I
done'
info "Finished pulling images for $TAG."
}
tag_is_reachable() {
pssh -t 5 true 2>&1 >/dev/null
}
test_tag() {
ips_file=tags/$TAG/ips.txt
info "Picking a random IP address in $ips_file to run tests."
ip=$(shuf -n1 $ips_file)
test_vm $ip
info "Tests complete."
}
test_vm() {
ip=$1
info "Testing instance with IP address $ip."
user=ubuntu
errors=""
for cmd in "hostname" \
"whoami" \
"hostname -i" \
"ls -l /usr/local/bin/i_am_first_node" \
"grep . /etc/name_of_first_node /etc/ipv4_of_first_node" \
"cat /etc/hosts" \
"hostnamectl status" \
"docker version | grep Version -B1" \
"docker-compose version" \
"docker-machine version" \
"docker images" \
"docker ps" \
"curl --silent localhost:55555" \
"sudo ls -la /mnt/ | grep docker" \
"env" \
"ls -la /home/docker/.ssh"; do
sep "$cmd"
echo "$cmd" \
| ssh -A -q \
-o "UserKnownHostsFile /dev/null" \
-o "StrictHostKeyChecking=no" \
$user@$ip sudo -u docker -i \
|| {
status=$?
error "$cmd exit status: $status"
errors="[$status] $cmd\n$errors"
}
done
sep
if [ -n "$errors" ]; then
error "The following commands had non-zero exit codes:"
printf "$errors"
fi
info "Test VM was $ip."
}
make_key_name() {
SHORT_FINGERPRINT=$(ssh-add -l | grep RSA | head -n1 | cut -d " " -f 2 | tr -d : | cut -c 1-8)
echo "${SHORT_FINGERPRINT}-${USER}"
}