mirror of
https://github.com/jpetazzo/container.training.git
synced 2026-02-14 17:49:59 +00:00
Multiple small changes to allow deployment in IPv6-only environments. What we do: - detect if we are in an IPv6-only environment - if yes, specify a service CIDR and listening address (kubeadm will otherwise pick the IPv4 address for the API server) - switch to Cilium Also minor changes to pssh and terraform to handle pinging and connecting to IPv6 addresses.
1563 lines
50 KiB
Bash
1563 lines
50 KiB
Bash
# Ignore SSH key validation when connecting to these remote hosts.
|
|
# (Otherwise, deployment scripts break when a VM IP address reuse.)
|
|
SSHOPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o LogLevel=ERROR"
|
|
|
|
HELP=""
|
|
_cmd() {
|
|
HELP="$(printf "%s\n%-20s %s\n" "$HELP" "$1" "$2")"
|
|
}
|
|
|
|
_cmd help "Show available commands"
|
|
_cmd_help() {
|
|
printf "$(basename $0) - the container training swiss army knife\n"
|
|
printf "Commands:"
|
|
printf "%s" "$HELP" | sort
|
|
}
|
|
|
|
_cmd cards "Generate ready-to-print cards for a group of VMs"
|
|
_cmd_cards() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
OPTIONS_FILE=$2
|
|
[ -f "$OPTIONS_FILE" ] || die "Please specify a YAML options file as 2nd argument."
|
|
OPTIONS_FILE_PATH="$(readlink -f "$OPTIONS_FILE")"
|
|
|
|
# This will process logins.jsonl to generate two files: cards.pdf and cards.html
|
|
(
|
|
cd tags/$TAG
|
|
../../../lib/make-login-cards.py "$OPTIONS_FILE_PATH"
|
|
)
|
|
|
|
ln -sf ../tags/$TAG/cards.html www/$TAG.html
|
|
ln -sf ../tags/$TAG/cards.pdf www/$TAG.pdf
|
|
|
|
info "Cards created. You can view them with:"
|
|
info "xdg-open tags/$TAG/cards.html tags/$TAG/cards.pdf (on Linux)"
|
|
info "open tags/$TAG/cards.html (on macOS)"
|
|
info "Or you can start a web server with:"
|
|
info "$0 www"
|
|
}
|
|
|
|
_cmd clean "Remove information about destroyed clusters"
|
|
_cmd_clean() {
|
|
for TAG in tags/*; do
|
|
if grep -q ^destroyed$ "$TAG/status"; then
|
|
info "Removing $TAG..."
|
|
rm -rf "$TAG"
|
|
fi
|
|
done
|
|
}
|
|
|
|
_cmd codeserver "Install code-server on the clusters"
|
|
_cmd_codeserver() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
ARCH=${ARCHITECTURE-amd64}
|
|
CODESERVER_VERSION=4.96.4
|
|
CODESERVER_URL=https://github.com/coder/code-server/releases/download/v${CODESERVER_VERSION}/code-server-${CODESERVER_VERSION}-linux-${ARCH}.tar.gz
|
|
pssh "
|
|
set -e
|
|
i_am_first_node || exit 0
|
|
if ! [ -x /usr/local/bin/code-server ]; then
|
|
curl -fsSL $CODESERVER_URL | sudo tar zx -C /opt
|
|
sudo ln -s /opt/code-server-${CODESERVER_VERSION}-linux-${ARCH}/bin/code-server /usr/local/bin/code-server
|
|
sudo -u $USER_LOGIN -H code-server --install-extension ms-azuretools.vscode-docker
|
|
sudo -u $USER_LOGIN -H code-server --install-extension ms-kubernetes-tools.vscode-kubernetes-tools
|
|
sudo -u $USER_LOGIN -H mkdir -p /home/$USER_LOGIN/.local/share/code-server/User
|
|
echo '{\"workbench.startupEditor\": \"terminal\"}' | sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.local/share/code-server/User/settings.json
|
|
sudo -u $USER_LOGIN mkdir -p /home/$USER_LOGIN/.config/systemd/user
|
|
sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.config/systemd/user/code-server.service <<EOF
|
|
[Unit]
|
|
Description=code-server
|
|
|
|
[Install]
|
|
WantedBy=default.target
|
|
|
|
[Service]
|
|
ExecStart=/usr/local/bin/code-server --bind-addr [::]:1789
|
|
Restart=always
|
|
EOF
|
|
sudo systemctl --user -M $USER_LOGIN@ enable code-server.service --now
|
|
sudo loginctl enable-linger $USER_LOGIN
|
|
fi"
|
|
}
|
|
|
|
_cmd createuser "Create the user that students will use"
|
|
_cmd_createuser() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
set -e
|
|
# Create the user if it doesn't exist yet.
|
|
id $USER_LOGIN || sudo useradd -d /home/$USER_LOGIN -g users -m -s /bin/bash $USER_LOGIN
|
|
# Make sure there are at least exec permission on their home.
|
|
sudo chmod a+X /home/$USER_LOGIN
|
|
# Add them to the docker group, if there is one.
|
|
grep ^docker: /etc/group && sudo usermod -aG docker $USER_LOGIN
|
|
# Set their password.
|
|
echo $USER_LOGIN:$USER_PASSWORD | sudo chpasswd
|
|
# Add them to sudoers and allow passwordless authentication.
|
|
echo '$USER_LOGIN ALL=(ALL) NOPASSWD:ALL' | sudo tee /etc/sudoers.d/$USER_LOGIN
|
|
"
|
|
|
|
# The MaxAuthTries is here to help with folks who have many SSH keys.
|
|
pssh "
|
|
set -e
|
|
sudo sed -i 's/PasswordAuthentication no/PasswordAuthentication yes/' /etc/ssh/sshd_config
|
|
sudo sed -i 's/#MaxAuthTries 6/MaxAuthTries 42/' /etc/ssh/sshd_config
|
|
sudo systemctl restart ssh.service
|
|
"
|
|
|
|
pssh "
|
|
set -e
|
|
cd /home/$USER_LOGIN
|
|
sudo -u $USER_LOGIN mkdir -p .ssh
|
|
if i_am_first_node; then
|
|
# Generate a key pair with an empty passphrase.
|
|
if ! sudo -u $USER_LOGIN [ -f .ssh/id_rsa ]; then
|
|
sudo -u $USER_LOGIN ssh-keygen -t rsa -f .ssh/id_rsa -P ''
|
|
sudo -u $USER_LOGIN cp .ssh/id_rsa.pub .ssh/authorized_keys
|
|
fi
|
|
fi
|
|
"
|
|
|
|
# FIXME this is a gross hack to add the deployment key to our SSH agent,
|
|
# so that it can be used to bounce from host to host (which is necessary
|
|
# in the next deployment step). In the long run, we probably want to
|
|
# generate these keys locally and push them to the machines instead
|
|
# (once we move everything to Terraform).
|
|
ssh-add tags/$TAG/id_rsa
|
|
pssh "
|
|
set -e
|
|
cd /home/$USER_LOGIN
|
|
if ! i_am_first_node; then
|
|
# Copy keys from the first node.
|
|
ssh $SSHOPTS \$(cat /etc/name_of_first_node) sudo -u $USER_LOGIN tar -C /home/$USER_LOGIN -cvf- .ssh |
|
|
sudo -u $USER_LOGIN tar -xf-
|
|
fi
|
|
"
|
|
ssh-add -d tags/$TAG/id_rsa
|
|
|
|
# FIXME do this only once.
|
|
pssh -I "sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc" <<"SQRL"
|
|
|
|
# Fancy prompt courtesy of @soulshake.
|
|
export PS1='\e[1m\e[31m[$HOSTIP] \e[32m($(docker-prompt)) \e[34m\u@\h\e[35m \w\e[0m\n$ '
|
|
|
|
# Bigger history, in a different file, and saved before executing each command.
|
|
export HISTSIZE=9999
|
|
export HISTFILESIZE=9999
|
|
shopt -s histappend
|
|
trap 'history -a' DEBUG
|
|
export HISTFILE=~/.history
|
|
SQRL
|
|
|
|
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.vimrc" <<SQRL
|
|
syntax on
|
|
set autoindent
|
|
set expandtab
|
|
set number
|
|
set shiftwidth=2
|
|
set softtabstop=2
|
|
set nowrap
|
|
set laststatus=2
|
|
SQRL
|
|
|
|
pssh -I "sudo -u $USER_LOGIN tee /home/$USER_LOGIN/.tmux.conf" <<SQRL
|
|
set -g status-style bg=yellow,bold
|
|
|
|
bind h select-pane -L
|
|
bind j select-pane -D
|
|
bind k select-pane -U
|
|
bind l select-pane -R
|
|
|
|
# Allow using mouse to switch panes
|
|
set -g mouse on
|
|
|
|
# Make scrolling with wheels work
|
|
bind -n WheelUpPane if-shell -F -t = "#{mouse_any_flag}" "send-keys -M" "if -Ft= '#{pane_in_mode}' 'send-keys -M' 'select-pane -t=; copy-mode -e; send-keys -M'"
|
|
bind -n WheelDownPane select-pane -t= \; send-keys -M
|
|
|
|
# Retain one million lines
|
|
set-option -g history-limit 1000000
|
|
SQRL
|
|
|
|
# Install docker-prompt script
|
|
pssh -I sudo tee /usr/local/bin/docker-prompt <lib/docker-prompt
|
|
pssh sudo chmod +x /usr/local/bin/docker-prompt
|
|
|
|
echo user_ok > tags/$TAG/status
|
|
}
|
|
|
|
|
|
_cmd create "Create lab environments"
|
|
_cmd_create() {
|
|
while [ ! -z "$*" ]; do
|
|
case "$1" in
|
|
--mode) MODE=$2; shift 2;;
|
|
--provider) PROVIDER=$2; shift 2;;
|
|
--settings) SETTINGS=$2; shift 2;;
|
|
--students) STUDENTS=$2; shift 2;;
|
|
--tag) TAG=$2; shift 2;;
|
|
*) die "Unrecognized parameter: $1."
|
|
esac
|
|
done
|
|
|
|
if [ -z "$MODE" ]; then
|
|
info "Using default mode (pssh)."
|
|
MODE=pssh
|
|
fi
|
|
if [ -z "$PROVIDER" ]; then
|
|
die "Please add --provider flag to specify which provider to use."
|
|
fi
|
|
if [ -z "$SETTINGS" ]; then
|
|
die "Please add --settings flag to specify which settings file to use."
|
|
fi
|
|
if [ -z "$STUDENTS" ]; then
|
|
info "Defaulting to 1 student since --students flag wasn't specified."
|
|
STUDENTS=1
|
|
fi
|
|
|
|
case "$MODE" in
|
|
mk8s)
|
|
PROVIDER_BASE=terraform/one-kubernetes
|
|
;;
|
|
pssh)
|
|
PROVIDER_BASE=terraform/virtual-machines
|
|
;;
|
|
*) die "Invalid mode: $MODE (supported modes: mk8s, pssh)." ;;
|
|
esac
|
|
|
|
if ! [ -f "$SETTINGS" ]; then
|
|
die "Settings file ($SETTINGS) not found."
|
|
fi
|
|
|
|
# Check that the provider is valid.
|
|
if [ -d $PROVIDER_BASE/$PROVIDER ]; then
|
|
if [ -f $PROVIDER_BASE/$PROVIDER/requires_tfvars ]; then
|
|
die "Provider $PROVIDER cannot be used directly, because it requires a tfvars file."
|
|
fi
|
|
PROVIDER_DIRECTORY=$PROVIDER_BASE/$PROVIDER
|
|
TFVARS=""
|
|
elif [ -f $PROVIDER_BASE/$PROVIDER.tfvars ]; then
|
|
TFVARS=$PROVIDER_BASE/$PROVIDER.tfvars
|
|
PROVIDER_DIRECTORY=$(dirname $PROVIDER_BASE/$PROVIDER)
|
|
else
|
|
error "Provider $PROVIDER not found."
|
|
info "Available providers for mode $MODE:"
|
|
(
|
|
cd $PROVIDER_BASE
|
|
for P in *; do
|
|
if [ -d "$P" ]; then
|
|
[ -f "$P/requires_tfvars" ] || info "$P"
|
|
for V in $P/*.tfvars; do
|
|
[ -f "$V" ] && info "${V%.tfvars}"
|
|
done
|
|
fi
|
|
done
|
|
)
|
|
die "Please specify a valid provider."
|
|
fi
|
|
|
|
if [ -z "$TAG" ]; then
|
|
TAG=$(_cmd_maketag)
|
|
fi
|
|
mkdir -p tags/$TAG
|
|
echo creating > tags/$TAG/status
|
|
|
|
ln -s ../../$SETTINGS tags/$TAG/settings.env.orig
|
|
cp $SETTINGS tags/$TAG/settings.env
|
|
|
|
# For Google Cloud, it is necessary to specify which "project" to use.
|
|
# Unfortunately, the Terraform provider doesn't seem to have a way
|
|
# to detect which Google Cloud project you want to use; it has to be
|
|
# specified one way or another. Let's decide that it should be set with
|
|
# the GOOGLE_PROJECT env var; and if that var is not set, we'll try to
|
|
# figure it out from gcloud.
|
|
# (See https://github.com/hashicorp/terraform-provider-google/issues/10907#issuecomment-1015721600)
|
|
# Since we need that variable to be set each time we'll call Terraform
|
|
# (e.g. when destroying the environment), let's save it to the settings.env
|
|
# file.
|
|
if [ "$PROVIDER" = "googlecloud" ]; then
|
|
if ! [ "$GOOGLE_PROJECT" ]; then
|
|
info "PROVIDER=googlecloud but GOOGLE_PROJECT is not set. Detecting it."
|
|
GOOGLE_PROJECT=$(gcloud config get project)
|
|
info "GOOGLE_PROJECT will be set to '$GOOGLE_PROJECT'."
|
|
fi
|
|
echo "export GOOGLE_PROJECT=$GOOGLE_PROJECT" >> tags/$TAG/settings.env
|
|
fi
|
|
|
|
. tags/$TAG/settings.env
|
|
|
|
echo $MODE > tags/$TAG/mode
|
|
echo $PROVIDER > tags/$TAG/provider
|
|
case "$MODE" in
|
|
mk8s)
|
|
cp -d terraform/many-kubernetes/*.* tags/$TAG
|
|
mkdir tags/$TAG/one-kubernetes-module
|
|
cp $PROVIDER_DIRECTORY/*.tf tags/$TAG/one-kubernetes-module
|
|
mkdir tags/$TAG/one-kubernetes-config
|
|
mv tags/$TAG/one-kubernetes-module/config.tf tags/$TAG/one-kubernetes-config
|
|
;;
|
|
pssh)
|
|
cp $PROVIDER_DIRECTORY/*.tf tags/$TAG
|
|
if [ "$TFVARS" ]; then
|
|
cp "$TFVARS" "tags/$TAG/$(basename $TFVARS).auto.tfvars"
|
|
fi
|
|
;;
|
|
esac
|
|
(
|
|
cd tags/$TAG
|
|
terraform init
|
|
echo tag = \"$TAG\" >> terraform.tfvars
|
|
echo how_many_clusters = $STUDENTS >> terraform.tfvars
|
|
if [ "$CLUSTERSIZE" ]; then
|
|
echo nodes_per_cluster = $CLUSTERSIZE >> terraform.tfvars
|
|
fi
|
|
)
|
|
|
|
sep
|
|
|
|
# If the settings.env file has a "STEPS" field,
|
|
# automatically execute all the actions listed in that field.
|
|
# If an action fails, retry it up to 10 times.
|
|
for STEP in $(echo $STEPS); do
|
|
sep "$TAG -> $STEP"
|
|
TRY=1
|
|
MAXTRY=10
|
|
while ! $0 $STEP $TAG ; do
|
|
TRY=$(($TRY+1))
|
|
if [ $TRY -gt $MAXTRY ]; then
|
|
error "This step ($STEP) failed after $MAXTRY attempts."
|
|
info "You can troubleshoot the situation manually, or terminate these instances with:"
|
|
info "$0 destroy $TAG"
|
|
die "Giving up."
|
|
else
|
|
sep
|
|
info "Step '$STEP' failed for '$TAG'. Let's wait 10 seconds and try again."
|
|
info "(Attempt $TRY out of $MAXTRY.)"
|
|
sleep 10
|
|
fi
|
|
done
|
|
done
|
|
sep
|
|
info "Deployment successful."
|
|
info "To log into the first machine of that batch, you can run:"
|
|
info "$0 ssh $TAG"
|
|
info "To terminate these instances, you can run:"
|
|
info "$0 destroy $TAG"
|
|
}
|
|
|
|
_cmd destroy "Destroy lab environments"
|
|
_cmd_destroy() {
|
|
TAG=$1
|
|
need_tag
|
|
cd tags/$TAG
|
|
echo destroying > status
|
|
terraform destroy -auto-approve
|
|
echo destroyed > status
|
|
}
|
|
|
|
_cmd clusterize "Group VMs in clusters"
|
|
_cmd_clusterize() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
set -e
|
|
grep PSSH_ /etc/ssh/sshd_config || echo 'AcceptEnv PSSH_*' | sudo tee -a /etc/ssh/sshd_config
|
|
grep KUBECOLOR_ /etc/ssh/sshd_config || echo 'AcceptEnv KUBECOLOR_*' | sudo tee -a /etc/ssh/sshd_config
|
|
sudo systemctl restart ssh.service"
|
|
|
|
pssh -I < tags/$TAG/clusters.tsv "
|
|
grep -w \$PSSH_HOST | tr '\t' '\n' > /tmp/cluster"
|
|
pssh "
|
|
echo \$PSSH_HOST > /tmp/ipv4
|
|
head -n 1 /tmp/cluster | sudo tee /etc/ipv4_of_first_node
|
|
echo ${CLUSTERPREFIX}1 | sudo tee /etc/name_of_first_node
|
|
echo HOSTIP=\$PSSH_HOST | sudo tee -a /etc/environment
|
|
NODEINDEX=\$((\$PSSH_NODENUM%$CLUSTERSIZE+1))
|
|
if [ \$NODEINDEX = 1 ]; then
|
|
sudo ln -sf /bin/true /usr/local/bin/i_am_first_node
|
|
else
|
|
sudo ln -sf /bin/false /usr/local/bin/i_am_first_node
|
|
fi
|
|
echo $CLUSTERPREFIX\$NODEINDEX | sudo tee /etc/hostname
|
|
sudo hostname $CLUSTERPREFIX\$NODEINDEX
|
|
N=1
|
|
while read ip; do
|
|
grep -w \$ip /etc/hosts || echo \$ip $CLUSTERPREFIX\$N | sudo tee -a /etc/hosts
|
|
N=\$((\$N+1))
|
|
done < /tmp/cluster
|
|
"
|
|
|
|
jq --raw-input --compact-output \
|
|
--arg USER_LOGIN "$USER_LOGIN" --arg USER_PASSWORD "$USER_PASSWORD" '
|
|
{
|
|
"login": $USER_LOGIN,
|
|
"password": $USER_PASSWORD,
|
|
"ipaddrs": .
|
|
}' < tags/$TAG/clusters.tsv > tags/$TAG/logins.jsonl
|
|
|
|
echo cluster_ok > tags/$TAG/status
|
|
}
|
|
|
|
_cmd disabledocker "Stop Docker Engine and don't restart it automatically"
|
|
_cmd_disabledocker() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
sudo systemctl disable docker.socket --now
|
|
sudo systemctl disable docker.service --now
|
|
sudo systemctl disable containerd.service --now
|
|
"
|
|
}
|
|
|
|
_cmd docker "Install and start Docker"
|
|
_cmd_docker() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
set -e
|
|
# On EC2, the ephemeral disk might be mounted on /mnt.
|
|
# If /mnt is a mountpoint, place Docker workspace on it.
|
|
if mountpoint -q /mnt; then
|
|
sudo mkdir -p /mnt/docker
|
|
sudo ln -sfn /mnt/docker /var/lib/docker
|
|
fi
|
|
|
|
# This will install the latest Docker.
|
|
sudo apt-get -qy install apt-transport-https ca-certificates curl software-properties-common
|
|
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
|
|
sudo add-apt-repository 'deb https://download.docker.com/linux/ubuntu jammy stable'
|
|
sudo apt-get -q update
|
|
sudo apt-get -qy install docker-ce
|
|
|
|
# Add registry mirror configuration.
|
|
if ! [ -f /etc/docker/daemon.json ]; then
|
|
sudo mkdir -p /etc/docker
|
|
echo '{\"registry-mirrors\": [\"https://mirror.gcr.io\"]}' | sudo tee /etc/docker/daemon.json
|
|
sudo systemctl restart docker
|
|
fi
|
|
"
|
|
|
|
##VERSION## https://github.com/docker/compose/releases
|
|
COMPOSE_VERSION=v2.11.1
|
|
COMPOSE_PLATFORM='linux-$(uname -m)'
|
|
|
|
# Just in case you need Compose 1.X, you can use the following lines.
|
|
# (But it will probably only work for x86_64 machines.)
|
|
#COMPOSE_VERSION=1.29.2
|
|
#COMPOSE_PLATFORM='Linux-$(uname -m)'
|
|
|
|
pssh "
|
|
set -e
|
|
### Install docker-compose.
|
|
sudo curl -fsSL -o /usr/local/bin/docker-compose \
|
|
https://github.com/docker/compose/releases/download/$COMPOSE_VERSION/docker-compose-$COMPOSE_PLATFORM
|
|
sudo chmod +x /usr/local/bin/docker-compose
|
|
docker-compose version
|
|
|
|
### Install docker-machine.
|
|
##VERSION## https://github.com/docker/machine/releases
|
|
MACHINE_VERSION=v0.16.2
|
|
sudo curl -fsSL -o /usr/local/bin/docker-machine \
|
|
https://github.com/docker/machine/releases/download/\$MACHINE_VERSION/docker-machine-\$(uname -s)-\$(uname -m)
|
|
sudo chmod +x /usr/local/bin/docker-machine
|
|
docker-machine version
|
|
"
|
|
}
|
|
|
|
_cmd kubebins "Install Kubernetes and CNI binaries but don't start anything"
|
|
_cmd_kubebins() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
if [ "$KUBEVERSION" = "" ]; then
|
|
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
|
fi
|
|
|
|
##VERSION##
|
|
case "$KUBEVERSION" in
|
|
1.19.*)
|
|
ETCD_VERSION=v3.4.13
|
|
CNI_VERSION=v0.8.7
|
|
;;
|
|
*)
|
|
ETCD_VERSION=v3.5.10
|
|
CNI_VERSION=v1.3.0
|
|
;;
|
|
esac
|
|
|
|
K8SBIN_VERSION="v$KUBEVERSION"
|
|
ARCH=${ARCHITECTURE-amd64}
|
|
pssh --timeout 300 "
|
|
set -e
|
|
cd /usr/local/bin
|
|
if ! [ -x etcd ]; then
|
|
curl -L https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-$ARCH.tar.gz \
|
|
| sudo tar --strip-components=1 --wildcards -zx '*/etcd' '*/etcdctl'
|
|
fi
|
|
if ! [ -x kube-apiserver ]; then
|
|
##VERSION##
|
|
curl -L https://dl.k8s.io/$K8SBIN_VERSION/kubernetes-server-linux-$ARCH.tar.gz \
|
|
| sudo tar --strip-components=3 -zx \
|
|
kubernetes/server/bin/kube{ctl,let,-proxy,-apiserver,-scheduler,-controller-manager}
|
|
fi
|
|
sudo mkdir -p /opt/cni/bin
|
|
cd /opt/cni/bin
|
|
if ! [ -x bridge ]; then
|
|
curl -L https://github.com/containernetworking/plugins/releases/download/$CNI_VERSION/cni-plugins-linux-$ARCH-$CNI_VERSION.tgz \
|
|
| sudo tar -zx
|
|
fi
|
|
"
|
|
}
|
|
|
|
_cmd kubepkgs "Install Kubernetes packages (kubectl, kubeadm, kubelet)"
|
|
_cmd_kubepkgs() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
# Prior September 2023, there was a single Kubernetes package repo that
|
|
# contained packages for all versions, so we could just add that repo
|
|
# and install whatever was the latest version available there.
|
|
# Things have changed (versions after September 2023, e.g. 1.28.3 are
|
|
# not in the old repo) and now there is a different repo for each
|
|
# minor version, so we need to figure out what minor version we are
|
|
# installing to add the corresponding repo.
|
|
if [ "$KUBEVERSION" = "" ]; then
|
|
KUBEVERSION="$(curl -fsSL https://cdn.dl.k8s.io/release/stable.txt | sed s/^v//)"
|
|
fi
|
|
KUBEREPOVERSION="$(echo $KUBEVERSION | cut -d. -f1-2)"
|
|
|
|
# Since the new repo doesn't have older versions, add a safety check here.
|
|
MINORVERSION="$(echo $KUBEVERSION | cut -d. -f2)"
|
|
if [ "$MINORVERSION" -lt 24 ]; then
|
|
die "Cannot install kubepkgs for versions before 1.24."
|
|
fi
|
|
|
|
pssh "
|
|
sudo tee /etc/apt/preferences.d/kubernetes <<EOF
|
|
Package: kubectl kubeadm kubelet
|
|
Pin: version $KUBEVERSION-*
|
|
Pin-Priority: 1000
|
|
EOF"
|
|
|
|
# Install packages
|
|
pssh --timeout 200 "
|
|
curl -fsSL https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/Release.key |
|
|
gpg --dearmor | sudo tee /etc/apt/keyrings/kubernetes-apt-keyring.gpg &&
|
|
echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v$KUBEREPOVERSION/deb/ /' |
|
|
sudo tee /etc/apt/sources.list.d/kubernetes.list"
|
|
pssh --timeout 200 "
|
|
sudo apt-get update -q &&
|
|
sudo apt-get install -qy kubelet kubeadm kubectl &&
|
|
sudo apt-mark hold kubelet kubeadm kubectl &&
|
|
kubeadm completion bash | sudo tee /etc/bash_completion.d/kubeadm &&
|
|
kubectl completion bash | sudo tee /etc/bash_completion.d/kubectl &&
|
|
echo 'alias k=kubecolor' | sudo tee /etc/bash_completion.d/k &&
|
|
echo 'complete -F __start_kubectl k' | sudo tee -a /etc/bash_completion.d/k"
|
|
|
|
# Install helm early
|
|
# (so that we can use it to install e.g. Cilium etc.)
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/helm ]; then
|
|
curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get-helm-3 | sudo bash &&
|
|
helm completion bash | sudo tee /etc/bash_completion.d/helm
|
|
helm version
|
|
fi"
|
|
}
|
|
|
|
_cmd kubeadm "Setup kubernetes clusters with kubeadm"
|
|
_cmd_kubeadm() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
if [ "$KUBEVERSION" ]; then
|
|
CLUSTER_CONFIGURATION_KUBERNETESVERSION='kubernetesVersion: "v'$KUBEVERSION'"'
|
|
IGNORE_SYSTEMVERIFICATION="- SystemVerification"
|
|
IGNORE_SWAP="- Swap"
|
|
IGNORE_IPTABLES="- FileContent--proc-sys-net-bridge-bridge-nf-call-iptables"
|
|
fi
|
|
|
|
# Install a valid configuration for containerd
|
|
# (first, the CRI interface needs to be re-enabled;
|
|
# also, the correct systemd cgroup driver must be selected,
|
|
# otherwise containerd just restarts containers for no good reason)
|
|
pssh -I "sudo tee /etc/containerd/config.toml" < lib/containerd-config.toml
|
|
pssh "sudo systemctl restart containerd"
|
|
|
|
# Initialize kube control plane
|
|
pssh --timeout 200 "
|
|
IPV6=\$(ip -json a | jq -r '.[].addr_info[] | select(.scope==\"global\" and .family==\"inet6\") | .local' | head -n1)
|
|
if [ \"\$IPV6\" ]; then
|
|
ADVERTISE=\"advertiseAddress: \$IPV6\"
|
|
SERVICE_SUBNET=\"serviceSubnet: fdff::/112\"
|
|
touch /tmp/install-cilium-ipv6-only
|
|
else
|
|
ADVERTISE=
|
|
SERVICE_SUBNET=
|
|
touch /tmp/install-weave
|
|
fi
|
|
echo IPV6=\$IPV6 ADVERTISE=\$ADVERTISE
|
|
if i_am_first_node && [ ! -f /etc/kubernetes/admin.conf ]; then
|
|
kubeadm token generate > /tmp/token &&
|
|
cat >/tmp/kubeadm-config.yaml <<EOF
|
|
kind: InitConfiguration
|
|
apiVersion: kubeadm.k8s.io/v1beta3
|
|
bootstrapTokens:
|
|
- token: \$(cat /tmp/token)
|
|
localAPIEndpoint:
|
|
\$ADVERTISE
|
|
nodeRegistration:
|
|
ignorePreflightErrors:
|
|
- NumCPU
|
|
- FileContent--proc-sys-net-ipv6-conf-default-forwarding
|
|
$IGNORE_SYSTEMVERIFICATION
|
|
$IGNORE_SWAP
|
|
$IGNORE_IPTABLES
|
|
---
|
|
kind: JoinConfiguration
|
|
apiVersion: kubeadm.k8s.io/v1beta3
|
|
discovery:
|
|
bootstrapToken:
|
|
apiServerEndpoint: \$(cat /etc/name_of_first_node):6443
|
|
token: \$(cat /tmp/token)
|
|
unsafeSkipCAVerification: true
|
|
nodeRegistration:
|
|
ignorePreflightErrors:
|
|
- NumCPU
|
|
$IGNORE_SYSTEMVERIFICATION
|
|
$IGNORE_SWAP
|
|
$IGNORE_IPTABLES
|
|
---
|
|
kind: KubeletConfiguration
|
|
apiVersion: kubelet.config.k8s.io/v1beta1
|
|
failSwapOn: false
|
|
---
|
|
kind: ClusterConfiguration
|
|
apiVersion: kubeadm.k8s.io/v1beta3
|
|
apiServer:
|
|
certSANs:
|
|
- \$(cat /tmp/ipv4)
|
|
networking:
|
|
\$SERVICE_SUBNET
|
|
$CLUSTER_CONFIGURATION_KUBERNETESVERSION
|
|
EOF
|
|
sudo kubeadm init --config=/tmp/kubeadm-config.yaml
|
|
fi"
|
|
|
|
# Put kubeconfig in ubuntu's and $USER_LOGIN's accounts
|
|
pssh "
|
|
if i_am_first_node; then
|
|
sudo mkdir -p \$HOME/.kube /home/$USER_LOGIN/.kube &&
|
|
sudo cp /etc/kubernetes/admin.conf \$HOME/.kube/config &&
|
|
sudo cp /etc/kubernetes/admin.conf /home/$USER_LOGIN/.kube/config &&
|
|
sudo chown -R \$(id -u) \$HOME/.kube &&
|
|
sudo chown -R $USER_LOGIN /home/$USER_LOGIN/.kube
|
|
fi"
|
|
|
|
# Install weave as the pod network
|
|
pssh "
|
|
if i_am_first_node; then
|
|
if [ -f /tmp/install-weave ]; then
|
|
curl -fsSL https://github.com/weaveworks/weave/releases/download/v2.8.1/weave-daemonset-k8s-1.11.yaml |
|
|
sed s,weaveworks/weave,quay.io/rackspace/weave, |
|
|
kubectl apply -f-
|
|
fi
|
|
if [ -f /tmp/install-cilium-ipv6-only ]; then
|
|
helm upgrade -i cilium cilium --repo https://helm.cilium.io/ \
|
|
--namespace kube-system \
|
|
--set ipv6.enabled=true \
|
|
--set ipv4.enabled=false \
|
|
--set underlayProtocol=ipv6 \
|
|
--version 1.18.3
|
|
fi
|
|
fi"
|
|
|
|
# FIXME this is a gross hack to add the deployment key to our SSH agent,
|
|
# so that it can be used to bounce from host to host (which is necessary
|
|
# in the next deployment step). In the long run, we probably want to
|
|
# generate these keys locally and push them to the machines instead
|
|
# (once we move everything to Terraform).
|
|
if [ -f "tags/$TAG/id_rsa" ]; then
|
|
ssh-add tags/$TAG/id_rsa
|
|
fi
|
|
# Join the other nodes to the cluster
|
|
pssh --timeout 200 "
|
|
if ! i_am_first_node && [ ! -f /etc/kubernetes/kubelet.conf ]; then
|
|
FIRSTNODE=\$(cat /etc/name_of_first_node) &&
|
|
ssh $SSHOPTS \$FIRSTNODE cat /tmp/kubeadm-config.yaml > /tmp/kubeadm-config.yaml &&
|
|
sudo kubeadm join --config /tmp/kubeadm-config.yaml
|
|
fi"
|
|
if [ -f "tags/$TAG/id_rsa" ]; then
|
|
ssh-add -d tags/$TAG/id_rsa
|
|
fi
|
|
|
|
# Install metrics server
|
|
pssh "
|
|
if i_am_first_node; then
|
|
kubectl apply -f https://raw.githubusercontent.com/jpetazzo/container.training/master/k8s/metrics-server.yaml
|
|
#helm upgrade --install metrics-server \
|
|
# --repo https://kubernetes-sigs.github.io/metrics-server/ metrics-server \
|
|
# --namespace kube-system --set args={--kubelet-insecure-tls}
|
|
fi"
|
|
}
|
|
|
|
_cmd kubetools "Install a bunch of CLI tools for Kubernetes"
|
|
_cmd_kubetools() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
ARCH=${ARCHITECTURE-amd64}
|
|
|
|
# Folks, please, be consistent!
|
|
# Either pick "uname -m" (on Linux, that's x86_64, aarch64, etc.)
|
|
# Or GOARCH (amd64, arm64, etc.)
|
|
# But don't mix both! Thank you ♥
|
|
case $ARCH in
|
|
amd64)
|
|
HERP_DERP_ARCH=x86_64
|
|
TILT_ARCH=x86_64
|
|
;;
|
|
*)
|
|
HERP_DERP_ARCH=$ARCH
|
|
TILT_ARCH=${ARCH}_ALPHA
|
|
;;
|
|
esac
|
|
|
|
# Install ArgoCD CLI
|
|
##VERSION## https://github.com/argoproj/argo-cd/releases/latest
|
|
URL=https://github.com/argoproj/argo-cd/releases/latest/download/argocd-linux-${ARCH}
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/argocd ]; then
|
|
sudo curl -o /usr/local/bin/argocd -fsSL $URL
|
|
sudo chmod +x /usr/local/bin/argocd
|
|
argocd completion bash | sudo tee /etc/bash_completion.d/argocd
|
|
argocd version --client
|
|
fi"
|
|
|
|
# Install Flux CLI
|
|
##VERSION## https://github.com/fluxcd/flux2/releases
|
|
FLUX_VERSION=2.3.0
|
|
FILENAME=flux_${FLUX_VERSION}_linux_${ARCH}
|
|
URL=https://github.com/fluxcd/flux2/releases/download/v$FLUX_VERSION/$FILENAME.tar.gz
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/flux ]; then
|
|
curl -fsSL $URL |
|
|
sudo tar -C /usr/local/bin -zx flux
|
|
sudo chmod +x /usr/local/bin/flux
|
|
flux completion bash | sudo tee /etc/bash_completion.d/flux
|
|
flux --version
|
|
fi"
|
|
|
|
# Install kubectx and kubens
|
|
pssh "
|
|
set -e
|
|
if ! [ -x /usr/local/bin/kctx ]; then
|
|
cd /tmp
|
|
git clone https://github.com/ahmetb/kubectx
|
|
sudo cp kubectx/kubectx /usr/local/bin/kctx
|
|
sudo cp kubectx/kubens /usr/local/bin/kns
|
|
sudo cp kubectx/completion/*.bash /etc/bash_completion.d
|
|
fi"
|
|
|
|
# Install kube-ps1
|
|
pssh "
|
|
set -e
|
|
if ! [ -d /opt/kube-ps1 ]; then
|
|
cd /tmp
|
|
git clone https://github.com/jonmosco/kube-ps1
|
|
sudo mv kube-ps1 /opt/kube-ps1
|
|
sudo -u $USER_LOGIN sed -i s/docker-prompt/kube_ps1/ /home/$USER_LOGIN/.bashrc &&
|
|
sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc <<EOF
|
|
. /opt/kube-ps1/kube-ps1.sh
|
|
KUBE_PS1_PREFIX=""
|
|
KUBE_PS1_SUFFIX=""
|
|
KUBE_PS1_SYMBOL_ENABLE="false"
|
|
KUBE_PS1_CTX_COLOR="green"
|
|
KUBE_PS1_NS_COLOR="green"
|
|
EOF
|
|
fi"
|
|
|
|
# Install stern
|
|
##VERSION## https://github.com/stern/stern/releases
|
|
STERN_VERSION=1.29.0
|
|
FILENAME=stern_${STERN_VERSION}_linux_${ARCH}
|
|
URL=https://github.com/stern/stern/releases/download/v$STERN_VERSION/$FILENAME.tar.gz
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/stern ]; then
|
|
curl -fsSL $URL |
|
|
sudo tar -C /usr/local/bin -zx stern
|
|
sudo chmod +x /usr/local/bin/stern
|
|
stern --completion bash | sudo tee /etc/bash_completion.d/stern
|
|
stern --version
|
|
fi"
|
|
|
|
# Install helm
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/helm ]; then
|
|
curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get-helm-3 | sudo bash &&
|
|
helm completion bash | sudo tee /etc/bash_completion.d/helm
|
|
helm version
|
|
fi"
|
|
|
|
# Install kustomize
|
|
##VERSION## https://github.com/kubernetes-sigs/kustomize/releases
|
|
KUSTOMIZE_VERSION=v5.4.1
|
|
URL=https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize/${KUSTOMIZE_VERSION}/kustomize_${KUSTOMIZE_VERSION}_linux_${ARCH}.tar.gz
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kustomize ]; then
|
|
curl -fsSL $URL |
|
|
sudo tar -C /usr/local/bin -zx kustomize
|
|
kustomize completion bash | sudo tee /etc/bash_completion.d/kustomize
|
|
kustomize version
|
|
fi"
|
|
|
|
# Install ship
|
|
# Note: 0.51.3 is the last version that doesn't display GIN-debug messages
|
|
# (don't want to get folks confused by that!)
|
|
# Only install ship on Intel platforms (no ARM 64 builds).
|
|
[ "$ARCH" = "amd64" ] &&
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/ship ]; then
|
|
##VERSION##
|
|
curl -fsSL https://github.com/replicatedhq/ship/releases/download/v0.51.3/ship_0.51.3_linux_$ARCH.tar.gz |
|
|
sudo tar -C /usr/local/bin -zx ship
|
|
fi"
|
|
|
|
# Install the AWS IAM authenticator
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/aws-iam-authenticator ]; then
|
|
##VERSION##
|
|
sudo curl -fsSLo /usr/local/bin/aws-iam-authenticator https://amazon-eks.s3-us-west-2.amazonaws.com/1.12.7/2019-03-27/bin/linux/$ARCH/aws-iam-authenticator
|
|
sudo chmod +x /usr/local/bin/aws-iam-authenticator
|
|
aws-iam-authenticator version
|
|
fi"
|
|
|
|
# Install jless (jless.io)
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/jless ]; then
|
|
##VERSION##
|
|
sudo apt-get install -y libxcb-render0 libxcb-shape0 libxcb-xfixes0
|
|
wget https://github.com/PaulJuliusMartinez/jless/releases/download/v0.9.0/jless-v0.9.0-x86_64-unknown-linux-gnu.zip
|
|
unzip jless-v0.9.0-x86_64-unknown-linux-gnu
|
|
sudo mv jless /usr/local/bin
|
|
fi"
|
|
|
|
# Install the krew package manager
|
|
pssh "
|
|
if [ ! -d /home/$USER_LOGIN/.krew ]; then
|
|
cd /tmp &&
|
|
KREW=krew-linux_$ARCH
|
|
curl -fsSL https://github.com/kubernetes-sigs/krew/releases/latest/download/\$KREW.tar.gz |
|
|
tar -zxf- &&
|
|
sudo -u $USER_LOGIN -H ./\$KREW install krew &&
|
|
echo export PATH=/home/$USER_LOGIN/.krew/bin:\\\$PATH | sudo -u $USER_LOGIN tee -a /home/$USER_LOGIN/.bashrc
|
|
fi"
|
|
|
|
# Install kubecolor
|
|
KUBECOLOR_VERSION=0.4.0
|
|
URL=https://github.com/kubecolor/kubecolor/releases/download/v${KUBECOLOR_VERSION}/kubecolor_${KUBECOLOR_VERSION}_linux_${ARCH}.tar.gz
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kubecolor ]; then
|
|
##VERSION##
|
|
curl -fsSL $URL |
|
|
sudo tar -C /usr/local/bin -zx kubecolor
|
|
fi"
|
|
|
|
# Install k9s
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/k9s ]; then
|
|
FILENAME=k9s_Linux_$ARCH.tar.gz &&
|
|
curl -fsSL https://github.com/derailed/k9s/releases/latest/download/\$FILENAME |
|
|
sudo tar -C /usr/local/bin -zx k9s
|
|
k9s version
|
|
fi"
|
|
|
|
# Install popeye
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/popeye ]; then
|
|
FILENAME=popeye_Linux_$ARCH.tar.gz &&
|
|
curl -fsSL https://github.com/derailed/popeye/releases/latest/download/\$FILENAME |
|
|
sudo tar -C /usr/local/bin -zx popeye
|
|
popeye version
|
|
fi"
|
|
|
|
# Install Tilt
|
|
# Official instructions:
|
|
# curl -fsSL https://raw.githubusercontent.com/tilt-dev/tilt/master/scripts/install.sh | bash
|
|
# But the install script is not arch-aware (see https://github.com/tilt-dev/tilt/pull/5050).
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/tilt ]; then
|
|
TILT_VERSION=0.33.13
|
|
FILENAME=tilt.\$TILT_VERSION.linux.$TILT_ARCH.tar.gz
|
|
curl -fsSL https://github.com/tilt-dev/tilt/releases/download/v\$TILT_VERSION/\$FILENAME |
|
|
sudo tar -C /usr/local/bin -zx tilt
|
|
tilt completion bash | sudo tee /etc/bash_completion.d/tilt
|
|
tilt version
|
|
fi"
|
|
|
|
# Install Skaffold
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/skaffold ]; then
|
|
curl -fsSLo skaffold https://storage.googleapis.com/skaffold/releases/latest/skaffold-linux-$ARCH &&
|
|
sudo install skaffold /usr/local/bin/
|
|
skaffold completion bash | sudo tee /etc/bash_completion.d/skaffold
|
|
skaffold version
|
|
fi"
|
|
|
|
# Install Kompose
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kompose ]; then
|
|
curl -fsSLo kompose https://github.com/kubernetes/kompose/releases/latest/download/kompose-linux-$ARCH &&
|
|
sudo install kompose /usr/local/bin
|
|
kompose completion bash | sudo tee /etc/bash_completion.d/kompose
|
|
kompose version
|
|
fi"
|
|
|
|
# Install KinD
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kind ]; then
|
|
curl -fsSLo kind https://github.com/kubernetes-sigs/kind/releases/latest/download/kind-linux-$ARCH &&
|
|
sudo install kind /usr/local/bin
|
|
kind completion bash | sudo tee /etc/bash_completion.d/kind
|
|
kind version
|
|
fi"
|
|
|
|
# Install YTT
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/ytt ]; then
|
|
curl -fsSLo ytt https://github.com/vmware-tanzu/carvel-ytt/releases/latest/download/ytt-linux-$ARCH &&
|
|
sudo install ytt /usr/local/bin
|
|
ytt completion bash | sudo tee /etc/bash_completion.d/ytt
|
|
ytt version
|
|
fi"
|
|
|
|
##VERSION## https://github.com/bitnami-labs/sealed-secrets/releases
|
|
KUBESEAL_VERSION=0.26.2
|
|
URL=https://github.com/bitnami-labs/sealed-secrets/releases/download/v${KUBESEAL_VERSION}/kubeseal-${KUBESEAL_VERSION}-linux-${ARCH}.tar.gz
|
|
#case $ARCH in
|
|
#amd64) FILENAME=kubeseal-linux-amd64;;
|
|
#arm64) FILENAME=kubeseal-arm64;;
|
|
#*) FILENAME=nope;;
|
|
#esac
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kubeseal ]; then
|
|
curl -fsSL $URL |
|
|
sudo tar -C /usr/local/bin -zx kubeseal
|
|
kubeseal --version
|
|
fi"
|
|
|
|
##VERSION## https://github.com/vmware-tanzu/velero/releases
|
|
VELERO_VERSION=1.13.2
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/velero ]; then
|
|
curl -fsSL https://github.com/vmware-tanzu/velero/releases/download/v$VELERO_VERSION/velero-v$VELERO_VERSION-linux-$ARCH.tar.gz |
|
|
sudo tar --strip-components=1 --wildcards -zx -C /usr/local/bin '*/velero'
|
|
velero completion bash | sudo tee /etc/bash_completion.d/velero
|
|
velero version --client-only
|
|
fi"
|
|
|
|
##VERSION## https://github.com/doitintl/kube-no-trouble/releases
|
|
KUBENT_VERSION=0.7.2
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/kubent ]; then
|
|
curl -fsSL https://github.com/doitintl/kube-no-trouble/releases/download/${KUBENT_VERSION}/kubent-${KUBENT_VERSION}-linux-$ARCH.tar.gz |
|
|
sudo tar -zxvf- -C /usr/local/bin kubent
|
|
kubent --version
|
|
fi"
|
|
|
|
# Ngrok. Note that unfortunately, this is the x86_64 binary.
|
|
# We might have to rethink how to handle this for multi-arch environments.
|
|
pssh "
|
|
if [ ! -x /usr/local/bin/ngrok ]; then
|
|
curl -fsSL https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz |
|
|
sudo tar -zxvf- -C /usr/local/bin ngrok
|
|
fi"
|
|
}
|
|
|
|
_cmd kubereset "Wipe out Kubernetes configuration on all nodes"
|
|
_cmd_kubereset() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "sudo kubeadm reset --force"
|
|
}
|
|
|
|
_cmd kubetest "Check that all nodes are reporting as Ready"
|
|
_cmd_kubetest() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
# There are way too many backslashes in the command below.
|
|
# Feel free to make that better ♥
|
|
pssh "
|
|
set -e
|
|
if i_am_first_node; then
|
|
which kubectl
|
|
for NODE in \$(grep [0-9]\$ /etc/hosts | grep -v ^127 | awk {print\ \\\$2}); do
|
|
echo \$NODE ; kubectl get nodes | grep -w \$NODE | grep -w Ready
|
|
done
|
|
fi"
|
|
echo kube_ok > tags/$TAG/status
|
|
}
|
|
|
|
_cmd ips "Show the IP addresses for a given tag"
|
|
_cmd_ips() {
|
|
TAG=$1
|
|
need_tag $TAG
|
|
|
|
while true; do
|
|
for I in $(seq $CLUSTERSIZE); do
|
|
read ip || return 0
|
|
printf "%s\t" "$ip"
|
|
done
|
|
printf "\n"
|
|
done < tags/$TAG/ips.txt
|
|
}
|
|
|
|
_cmd inventory "List all VMs on a given provider (or across all providers if no arg given)"
|
|
_cmd_inventory() {
|
|
FIXME
|
|
}
|
|
|
|
_cmd logins "Show login information for a group of instances"
|
|
_cmd_logins() {
|
|
TAG=$1
|
|
need_tag $TAG
|
|
|
|
cat tags/$TAG/logins.jsonl \
|
|
| jq -r '"\(if .codeServerPort then "\(.codeServerPort)\t" else "" end )\(.password)\tssh -l \(.login)\(if .port then " -p \(.port)" else "" end)\t\(.ipaddrs)"'
|
|
}
|
|
|
|
_cmd maketag "Generate a quasi-unique tag for a group of instances"
|
|
_cmd_maketag() {
|
|
if [ -z $USER ]; then
|
|
export USER=anonymous
|
|
fi
|
|
MS=$(($(date +%N | tr -d 0)/1000000))
|
|
date +%Y-%m-%d-%H-%M-$MS-$USER
|
|
}
|
|
|
|
_cmd netfix "Disable GRO and run a pinger job on the VMs"
|
|
_cmd_netfix () {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
sudo ethtool -K ens3 gro off
|
|
sudo tee /root/pinger.service <<EOF
|
|
[Unit]
|
|
Description=pinger
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
|
|
[Service]
|
|
WorkingDirectory=/
|
|
ExecStart=/bin/ping -w60 1.1
|
|
User=nobody
|
|
Group=nogroup
|
|
Restart=always
|
|
EOF
|
|
sudo systemctl enable /root/pinger.service
|
|
sudo systemctl start pinger"
|
|
}
|
|
|
|
_cmd ping "Ping VMs in a given tag, to check that they have network access"
|
|
_cmd_ping() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
# If we connect to our VMs over IPv6, the IP address is between brackets.
|
|
# Unfortunately, fping doesn't support that; so let's strip brackets here.
|
|
tr -d [] < tags/$TAG/ips.txt | fping
|
|
}
|
|
|
|
_cmd stage2 "Finalize the setup of managed Kubernetes clusters"
|
|
_cmd_stage2() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
cd tags/$TAG/stage2
|
|
terraform init -upgrade
|
|
terraform apply -auto-approve
|
|
terraform output -raw logins_jsonl > ../logins.jsonl
|
|
terraform output -raw ips_txt > ../ips.txt
|
|
echo "stage2_ok" > status
|
|
}
|
|
|
|
_cmd standardize "Deal with non-standard Ubuntu cloud images"
|
|
_cmd_standardize() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
# Try to log in as root.
|
|
# If successful, make sure than we have:
|
|
# - sudo
|
|
# - ubuntu user
|
|
# Note that on Scaleway, the keys of the root account get copied
|
|
# a little bit later after boot; so the first time we run "standardize"
|
|
# we might end up copying an incomplete authorized_keys file.
|
|
# That's why we copy it inconditionally here, rather than checking
|
|
# for existence and skipping if it already exists.
|
|
pssh -l root -t 5 true 2>&1 >/dev/null && {
|
|
pssh -l root "
|
|
grep DEBIAN_FRONTEND /etc/environment || echo DEBIAN_FRONTEND=noninteractive >> /etc/environment
|
|
#grep cloud-init /etc/sudoers && rm /etc/sudoers
|
|
apt-get update && apt-get install sudo -y
|
|
getent passwd ubuntu || {
|
|
useradd ubuntu -m -s /bin/bash
|
|
echo 'ubuntu ALL=(ALL:ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
|
|
}
|
|
install --owner=ubuntu --mode=700 --directory /home/ubuntu/.ssh
|
|
install --owner=ubuntu --mode=600 /root/.ssh/authorized_keys --target-directory /home/ubuntu/.ssh
|
|
"
|
|
}
|
|
|
|
# Now make sure that we have an ubuntu user
|
|
pssh true
|
|
|
|
# Disable unattended upgrades so that they don't mess up with the subsequent steps
|
|
pssh sudo rm -f /etc/apt/apt.conf.d/50unattended-upgrades
|
|
|
|
# Some cloud providers think that it's smart to disable password authentication.
|
|
# We need to re-neable it, though.
|
|
# Digital Ocecan
|
|
pssh "
|
|
if [ -f /etc/ssh/sshd_config.d/50-cloud-init.conf ]; then
|
|
sudo rm /etc/ssh/sshd_config.d/50-cloud-init.conf
|
|
sudo systemctl restart ssh.service
|
|
fi"
|
|
# AWS
|
|
pssh "if [ -f /etc/ssh/sshd_config.d/60-cloudimg-settings.conf ]; then
|
|
sudo rm /etc/ssh/sshd_config.d/60-cloudimg-settings.conf
|
|
sudo systemctl restart ssh.service
|
|
fi"
|
|
|
|
# Special case for oracle since their iptables blocks everything but SSH
|
|
pssh "
|
|
if [ -f /etc/iptables/rules.v4 ]; then
|
|
sudo sed -i 's/-A INPUT -j REJECT --reject-with icmp-host-prohibited//' /etc/iptables/rules.v4
|
|
sudo netfilter-persistent flush
|
|
sudo netfilter-persistent start
|
|
fi"
|
|
|
|
# oracle-cloud-agent upgrades pacakges in the background.
|
|
# This breaks our deployment scripts, because when we invoke apt-get, it complains
|
|
# that the lock already exists (symptom: random "Exited with error code 100").
|
|
# Workaround: if we detect oracle-cloud-agent, remove it.
|
|
# But this agent seems to also take care of installing/upgrading
|
|
# the unified-monitoring-agent package, so when we stop the snap,
|
|
# it can leave dpkg in a broken state. We "fix" it with the 2nd command.
|
|
pssh "
|
|
if [ -d /snap/oracle-cloud-agent ]; then
|
|
sudo snap remove oracle-cloud-agent
|
|
sudo dpkg --remove --force-remove-reinstreq unified-monitoring-agent
|
|
fi"
|
|
}
|
|
|
|
_cmd tailhist "Install history viewer on port 1088"
|
|
_cmd_tailhist () {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
ARCH=${ARCHITECTURE-amd64}
|
|
[ "$ARCH" = "aarch64" ] && ARCH=arm64
|
|
|
|
# We use "wget -c" here in case the download was aborted
|
|
# halfway through and we're actually trying to download it again.
|
|
pssh "
|
|
set -e
|
|
sudo apt-get install unzip -y
|
|
wget -c https://github.com/joewalnes/websocketd/releases/download/v0.3.0/websocketd-0.3.0-linux_$ARCH.zip
|
|
unzip -o websocketd-0.3.0-linux_$ARCH.zip websocketd
|
|
sudo mv websocketd /usr/local/bin/websocketd
|
|
sudo mkdir -p /opt/tailhist
|
|
sudo tee /opt/tailhist.service <<EOF
|
|
[Unit]
|
|
Description=tailhist
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
|
|
[Service]
|
|
WorkingDirectory=/opt/tailhist
|
|
ExecStart=/usr/local/bin/websocketd --port=1088 --staticdir=. sh -c \"tail -n +1 -f /home/$USER_LOGIN/.history || echo 'Could not read history file. Perhaps you need to \\\"chmod +r .history\\\"?'\"
|
|
User=nobody
|
|
Group=nogroup
|
|
Restart=always
|
|
EOF
|
|
sudo systemctl enable /opt/tailhist.service --now
|
|
"
|
|
|
|
pssh -I sudo tee /opt/tailhist/index.html <lib/tailhist.html
|
|
}
|
|
|
|
_cmd terraform "Apply Terraform configuration to provision resources."
|
|
_cmd_terraform() {
|
|
TAG=$1
|
|
need_tag
|
|
echo terraforming > tags/$TAG/status
|
|
(
|
|
cd tags/$TAG
|
|
terraform apply -auto-approve
|
|
# The Terraform provider for Proxmox has a bug; sometimes it fails
|
|
# to obtain VM address from the QEMU agent. In that case, we put
|
|
# ERROR in the ips.txt file (instead of the VM IP address). Detect
|
|
# that so that we run Terraform again (this typically solves the issue).
|
|
if grep -q ERROR ips.txt; then
|
|
die "Couldn't obtain IP address of some machines. Try to re-run terraform."
|
|
fi
|
|
)
|
|
echo terraformed > tags/$TAG/status
|
|
|
|
}
|
|
|
|
_cmd tools "Install a bunch of useful tools (editors, git, jq...)"
|
|
_cmd_tools() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
pssh "
|
|
set -e
|
|
sudo apt-get -q update
|
|
sudo apt-get -qy install apache2-utils argon2 emacs-nox git httping htop jid joe jq mosh tree unzip
|
|
# This is for VMs with broken PRNG (symptom: running docker-compose randomly hangs)
|
|
sudo apt-get -qy install haveged
|
|
"
|
|
}
|
|
|
|
_cmd pssh "Run an arbitrary command on all nodes"
|
|
_cmd_pssh() {
|
|
TAG=$1
|
|
need_tag
|
|
shift
|
|
|
|
pssh "$@"
|
|
}
|
|
|
|
_cmd pull_images "Pre-pull a bunch of Docker images"
|
|
_cmd_pull_images() {
|
|
TAG=$1
|
|
need_tag
|
|
pull_tag
|
|
}
|
|
|
|
_cmd remap_nodeports "Remap NodePort range to 10000-10999"
|
|
_cmd_remap_nodeports() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
FIND_LINE=" - --service-cluster-ip-range=10.96.0.0\/12"
|
|
ADD_LINE=" - --service-node-port-range=10000-10999"
|
|
MANIFEST_FILE=/etc/kubernetes/manifests/kube-apiserver.yaml
|
|
pssh "
|
|
if i_am_first_node && ! grep -q '$ADD_LINE' $MANIFEST_FILE; then
|
|
sudo sed -i 's/\($FIND_LINE\)\$/\1\n$ADD_LINE/' $MANIFEST_FILE
|
|
fi"
|
|
|
|
info "If you have manifests hard-coding nodePort values,"
|
|
info "you might want to patch them with a command like:"
|
|
info "
|
|
|
|
if i_am_first_node; then
|
|
kubectl -n kube-system patch svc prometheus-server \\
|
|
-p 'spec: { ports: [ {port: 80, nodePort: 10101} ]}'
|
|
fi
|
|
|
|
"
|
|
}
|
|
|
|
_cmd ssh "Open an SSH session to a node (first one by default)"
|
|
_cmd_ssh() {
|
|
TAG=$1
|
|
need_tag
|
|
if [ "$2" ]; then
|
|
ssh -l ubuntu -i tags/$TAG/id_rsa $2
|
|
else
|
|
IP=$(head -1 tags/$TAG/ips.txt)
|
|
info "Logging into $IP (default password: $USER_PASSWORD)"
|
|
ssh $SSHOPTS $USER_LOGIN@$IP
|
|
fi
|
|
}
|
|
|
|
_cmd tags "List groups of VMs known locally"
|
|
_cmd_tags() {
|
|
(
|
|
cd tags
|
|
echo "[#] [Status] [Tag] [Mode] [Provider]"
|
|
for tag in *; do
|
|
if [ -f $tag/logins.jsonl ]; then
|
|
count="$(wc -l < $tag/logins.jsonl)"
|
|
else
|
|
count="?"
|
|
fi
|
|
if [ -f $tag/status ]; then
|
|
status="$(cat $tag/status)"
|
|
else
|
|
status="?"
|
|
fi
|
|
if [ -f $tag/mode ]; then
|
|
mode="$(cat $tag/mode)"
|
|
else
|
|
mode="?"
|
|
fi
|
|
if [ -f $tag/provider ]; then
|
|
provider="$(cat $tag/provider)"
|
|
else
|
|
provider="?"
|
|
fi
|
|
echo "$count $status $tag $mode $provider"
|
|
done
|
|
) | column -t
|
|
}
|
|
|
|
_cmd test "Run tests (pre-flight checks) on a group of VMs"
|
|
_cmd_test() {
|
|
TAG=$1
|
|
need_tag
|
|
test_tag
|
|
}
|
|
|
|
_cmd tmux "Log into the first node and start a tmux server"
|
|
_cmd_tmux() {
|
|
TAG=$1
|
|
need_tag
|
|
IP=$(head -1 tags/$TAG/ips.txt)
|
|
info "Opening ssh+tmux with $IP"
|
|
rm -f /tmp/tmux-$UID/default
|
|
ssh $SSHOPTS -t -L /tmp/tmux-$UID/default:/tmp/tmux-1001/default docker@$IP tmux new-session -As 0
|
|
}
|
|
|
|
_cmd helmprom "Install Prometheus with Helm"
|
|
_cmd_helmprom() {
|
|
TAG=$1
|
|
need_tag
|
|
pssh "
|
|
if i_am_first_node; then
|
|
sudo -u $USER_LOGIN -H helm upgrade --install prometheus prometheus \
|
|
--repo https://prometheus-community.github.io/helm-charts/ \
|
|
--namespace prometheus --create-namespace \
|
|
--set server.service.type=NodePort \
|
|
--set server.service.nodePort=30090 \
|
|
--set server.persistentVolume.enabled=false \
|
|
--set alertmanager.enabled=false
|
|
fi"
|
|
}
|
|
|
|
_cmd passwords "Set individual passwords for each cluster"
|
|
_cmd_passwords() {
|
|
TAG=$1
|
|
need_tag
|
|
PASSWORDS_FILE="tags/$TAG/passwords"
|
|
if ! [ -f "$PASSWORDS_FILE" ]; then
|
|
error "File $PASSWORDS_FILE not found. Please create it first."
|
|
error "It should contain one password per line."
|
|
error "It should have as many lines as there are clusters."
|
|
die "Aborting."
|
|
fi
|
|
N_CLUSTERS=$($0 ips "$TAG" | wc -l)
|
|
N_PASSWORDS=$(wc -l < "$PASSWORDS_FILE")
|
|
if [ "$N_CLUSTERS" != "$N_PASSWORDS" ]; then
|
|
die "Found $N_CLUSTERS clusters and $N_PASSWORDS passwords. Aborting."
|
|
fi
|
|
$0 ips "$TAG" | paste "$PASSWORDS_FILE" - | while read password nodes; do
|
|
info "Setting password for $nodes..."
|
|
for node in $nodes; do
|
|
echo $USER_LOGIN $password | ssh $SSHOPTS -i tags/$TAG/id_rsa ubuntu@$node '
|
|
read login password
|
|
echo $login:$password | sudo chpasswd
|
|
hashedpassword=$(echo -n $password | argon2 saltysalt$RANDOM -e)
|
|
sudo -u $login mkdir -p /home/$login/.config/code-server
|
|
echo "hashed-password: \"$hashedpassword\"" | sudo -u $login tee /home/$login/.config/code-server/config.yaml >/dev/null
|
|
'
|
|
done
|
|
done
|
|
info "Done."
|
|
}
|
|
|
|
_cmd wait "Wait until VMs are ready (reachable, cloud init is done, ubuntu user is up)"
|
|
_cmd_wait() {
|
|
TAG=$1
|
|
need_tag
|
|
|
|
# Wait until all hosts are reachable.
|
|
info "Trying to reach $TAG instances..."
|
|
while >/dev/stderr echo -n "."; do
|
|
pssh -t 5 true 2>&1 >/dev/null && {
|
|
SSH_USER=ubuntu
|
|
break
|
|
}
|
|
pssh -l root -t 5 true 2>&1 >/dev/null && {
|
|
SSH_USER=root
|
|
break
|
|
}
|
|
sleep 2
|
|
done
|
|
>/dev/stderr echo ""
|
|
|
|
# If this VM image is using cloud-init,
|
|
# wait for cloud-init to be done
|
|
info "Waiting for cloud-init to be done on $TAG instances..."
|
|
pssh -l $SSH_USER "
|
|
if [ -d /var/lib/cloud ]; then
|
|
cloud-init status --wait
|
|
case $? in
|
|
0) exit 0;; # all is good
|
|
2) exit 0;; # recoverable error (happens with proxmox deprecated cloud-init payloads)
|
|
*) exit 1;; # all other problems
|
|
esac
|
|
fi"
|
|
}
|
|
|
|
# Sometimes, weave fails to come up on some nodes.
|
|
# Symptom: the pods on a node are unreachable (they don't even ping).
|
|
# Remedy: wipe out Weave state and delete weave pod on that node.
|
|
# Specifically, identify the weave pod that is defective, then:
|
|
# kubectl -n kube-system exec weave-net-XXXXX -c weave rm /weavedb/weave-netdata.db
|
|
# kubectl -n kube-system delete pod weave-net-XXXXX
|
|
_cmd weavetest "Check that weave seems properly setup"
|
|
_cmd_weavetest() {
|
|
TAG=$1
|
|
need_tag
|
|
pssh "
|
|
kubectl -n kube-system get pods -o name | grep weave | cut -d/ -f2 |
|
|
xargs -I POD kubectl -n kube-system exec POD -c weave -- \
|
|
sh -c \"./weave --local status | grep Connections | grep -q ' 1 failed' || ! echo POD \""
|
|
}
|
|
|
|
_cmd webssh "Install a WEB SSH server on the machines (port 1080)"
|
|
_cmd_webssh() {
|
|
TAG=$1
|
|
need_tag
|
|
pssh "
|
|
sudo apt-get update &&
|
|
sudo apt-get install python3-tornado python3-paramiko -y"
|
|
pssh "
|
|
cd /opt
|
|
[ -d webssh ] || sudo git clone https://github.com/jpetazzo/webssh"
|
|
pssh "
|
|
for KEYFILE in /etc/ssh/*.pub; do
|
|
read a b c < \$KEYFILE; echo localhost \$a \$b
|
|
done | sudo tee /opt/webssh/known_hosts"
|
|
pssh "cat >webssh.service <<EOF
|
|
[Unit]
|
|
Description=webssh
|
|
|
|
[Install]
|
|
WantedBy=multi-user.target
|
|
|
|
[Service]
|
|
WorkingDirectory=/opt/webssh
|
|
ExecStart=/usr/bin/env python3 run.py --fbidhttp=false --port=1080 --policy=reject
|
|
User=nobody
|
|
Group=nogroup
|
|
Restart=always
|
|
EOF"
|
|
pssh "
|
|
sudo systemctl enable \$PWD/webssh.service &&
|
|
sudo systemctl start webssh.service"
|
|
}
|
|
|
|
_cmd www "Run a web server to access card HTML and PDF"
|
|
_cmd_www() {
|
|
cd www
|
|
IPADDR=$(curl -fsSL canihazip.com/s || echo localhost)
|
|
info "The following files are available:"
|
|
for F in *; do
|
|
echo "http://$IPADDR:8000/$F"
|
|
done
|
|
info "Press Ctrl-C to stop server."
|
|
python3 -m http.server
|
|
}
|
|
|
|
pull_tag() {
|
|
# Pre-pull a bunch of images
|
|
pssh --timeout 900 'for I in \
|
|
debian:latest \
|
|
ubuntu:latest \
|
|
fedora:latest \
|
|
centos:latest \
|
|
elasticsearch:2 \
|
|
postgres \
|
|
redis \
|
|
alpine \
|
|
registry \
|
|
nicolaka/netshoot \
|
|
jpetazzo/trainingwheels \
|
|
golang \
|
|
training/namer \
|
|
dockercoins/hasher \
|
|
dockercoins/rng \
|
|
dockercoins/webui \
|
|
dockercoins/worker \
|
|
logstash \
|
|
prom/node-exporter \
|
|
google/cadvisor \
|
|
dockersamples/visualizer \
|
|
nathanleclaire/redisonrails; do
|
|
sudo docker pull $I
|
|
done'
|
|
|
|
info "Finished pulling images for $TAG."
|
|
}
|
|
|
|
test_tag() {
|
|
ips_file=tags/$TAG/ips.txt
|
|
info "Picking a random IP address in $ips_file to run tests."
|
|
ip=$(shuf -n1 $ips_file)
|
|
test_vm $ip
|
|
info "Tests complete."
|
|
}
|
|
|
|
test_vm() {
|
|
ip=$1
|
|
info "Testing instance with IP address $ip."
|
|
user=ubuntu
|
|
errors=""
|
|
|
|
for cmd in "hostname" \
|
|
"whoami" \
|
|
"hostname -i" \
|
|
"ls -l /usr/local/bin/i_am_first_node" \
|
|
"grep . /etc/name_of_first_node /etc/ipv4_of_first_node" \
|
|
"cat /etc/hosts" \
|
|
"hostnamectl status" \
|
|
"docker version | grep Version -B1" \
|
|
"docker-compose version" \
|
|
"docker-machine version" \
|
|
"docker images" \
|
|
"docker ps" \
|
|
"curl --silent localhost:55555" \
|
|
"sudo ls -la /mnt/ | grep docker" \
|
|
"env" \
|
|
"ls -la /home/docker/.ssh"; do
|
|
sep "$cmd"
|
|
echo "$cmd" \
|
|
| ssh -A $SSHOPTS $user@$ip sudo -u docker -i \
|
|
|| {
|
|
status=$?
|
|
error "$cmd exit status: $status"
|
|
errors="[$status] $cmd\n$errors"
|
|
}
|
|
done
|
|
sep
|
|
if [ -n "$errors" ]; then
|
|
error "The following commands had non-zero exit codes:"
|
|
printf "$errors"
|
|
fi
|
|
info "Test VM was $ip."
|
|
}
|
|
|
|
make_key_name() {
|
|
SHORT_FINGERPRINT=$(ssh-add -l | grep RSA | head -n1 | cut -d " " -f 2 | tr -d : | cut -c 1-8)
|
|
echo "${SHORT_FINGERPRINT}-${USER}"
|
|
}
|