Files
weave-scope/scope
Iago López Galeiras 9920c4ea48 Add eBPF connection tracking without dependencies on kernel headers
Based on work from Lorenzo, updated by Iago, Alban, Alessandro and
Michael.

This PR adds connection tracking using eBPF. This feature is not enabled by default.
For now, you can enable it by launching scope with the following command:

```
sudo ./scope launch --probe.ebpf.connections=true
```

This patch allows scope to get notified of every connection event,
without relying on the parsing of /proc/$pid/net/tcp{,6} and
/proc/$pid/fd/*, and therefore improve performance.

We vendor https://github.com/iovisor/gobpf in Scope to load the
pre-compiled ebpf program and https://github.com/weaveworks/tcptracer-bpf
to guess the offsets of the structures we need in the kernel. In this
way we don't need a different pre-compiled ebpf object file per kernel.
The pre-compiled ebpf program is included in the vendoring of
tcptracer-bpf.

The ebpf program uses kprobes/kretprobes on the following kernel functions:
- tcp_v4_connect
- tcp_v6_connect
- tcp_set_state
- inet_csk_accept
- tcp_close

It generates "connect", "accept" and "close" events containing the
connection tuple but also pid and netns.
Note: the IPv6 events are not supported in Scope and thus not passed on.

probe/endpoint/ebpf.go maintains the list of connections. Similarly to
conntrack, it also keeps the dead connections for one iteration in order
to report short-lived connections.

The code for parsing /proc/$pid/net/tcp{,6} and /proc/$pid/fd/* is still
there and still used at start-up because eBPF only brings us the events
and not the initial state. However, the /proc parsing for the initial
state is now done in foreground instead of background, via
newForegroundReader().

NAT resolution on connections from eBPF works in the same way as it did
on connections from /proc: by using conntrack. One of the two conntrack
instances is only started to get the initial state and then it is
stopped since eBPF detects short-lived connections.

The Scope Docker image size comparison:
- weaveworks/scope in current master:  22 MB (compressed),  68 MB
  (uncompressed)
- weaveworks/scope with this patchset: 23 MB (compressed), 69 MB
  (uncompressed)

Fixes #1168 (walking /proc to obtain connections is very expensive)

Fixes #1260 (Short-lived connections not tracked for containers in
shared networking namespaces)

Fixes #1962 (Port ebpf tracker to Go)

Fixes #1961 (Remove runtime kernel header dependency from ebpf tracker)
2017-03-08 22:11:12 +01:00

286 lines
10 KiB
Bash
Executable File

#!/bin/sh
set -eu
ARGS="$*"
SCRIPT_VERSION="(unreleased version)"
if [ "$SCRIPT_VERSION" = "(unreleased version)" ]; then
IMAGE_VERSION=latest
else
IMAGE_VERSION="$SCRIPT_VERSION"
fi
IMAGE_VERSION=${VERSION:-$IMAGE_VERSION}
SCOPE_IMAGE_NAME=weaveworks/scope
SCOPE_IMAGE="$SCOPE_IMAGE_NAME:$IMAGE_VERSION"
# Careful: it's easy to operate on (e.g. stop) the wrong scope instance
# when SCOPE{_APP,}_CONTAINER_NAME values differ between runs. Handle
# with care.
SCOPE_CONTAINER_NAME="${SCOPE_CONTAINER_NAME:-weavescope}"
SCOPE_APP_CONTAINER_NAME="${SCOPE_APP_CONTAINER_NAME:-weavescope-app}"
IP_REGEXP="[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}"
IP_ADDR_CMD="find /sys/class/net -type l | xargs -n1 basename | grep -vE 'docker|veth|lo' | \
xargs -n1 ip addr show | grep inet | awk '{ print \$2 }' | grep -oE '$IP_REGEXP'"
WEAVESCOPE_DOCKER_ARGS=${WEAVESCOPE_DOCKER_ARGS:-}
# When docker daemon is running with User Namespace enabled, this tool will run into errors:
# "Privileged mode is incompatible with user namespaces" for `docker run --privileged`
# "Cannot share the host's network namespace when user namespaces are enabled" for `docker run --net=host`
# To avoid above errors, use `--userns=host` option to let container use host User Namespace.
# This option(saved in $USERNS_HOST) will be inserted ONLY IF docker support `--userns` option.
USERNS_HOST=""
docker run --help | grep -q -- --userns && USERNS_HOST="--userns=host"
usage() {
name=$(basename "$0")
cat >&2 <<-EOF
Usage:
$name launch {OPTIONS} {PEERS} - Launch Scope
$name stop - Stop Scope
$name command - Print the docker command used to start Scope
$name help - Print usage info
$name version - Print version info
PEERS are of the form HOST[:PORT]
HOST may be an ip or hostname.
PORT defaults to 4040.
Launch options:
EOF
# shellcheck disable=SC2086
docker run --rm -e CHECKPOINT_DISABLE --entrypoint=/home/weave/scope \
$WEAVESCOPE_DOCKER_ARGS "$SCOPE_IMAGE" -h >&2
}
usage_and_die() {
usage
exit 1
}
[ $# -gt 0 ] || usage_and_die
COMMAND=$1
shift 1
check_docker_access() {
# Extract socket path
DOCKER_SOCK_FILE=""
if [ -z "${DOCKER_HOST+x}" ]; then
DOCKER_SOCK_FILE="/var/run/docker.sock"
else
WITHOUT_PREFIX="${DOCKER_HOST#unix://}"
if [ "$WITHOUT_PREFIX" != "$DOCKER_HOST" ]; then
DOCKER_SOCK_FILE="$WITHOUT_PREFIX"
fi
fi
# shellcheck disable=SC2166
if [ \( -n "$DOCKER_SOCK_FILE" \) -a \( ! -w "$DOCKER_SOCK_FILE" \) ]; then
echo "ERROR: cannot write to docker socket: $DOCKER_SOCK_FILE" >&2
echo "change socket permissions or try using sudo" >&2
exit 1
fi
}
# - The image embeds the weave script & Docker 1.13.1 client (mimicking a 1.10 client)
# - Weave needs 1.10.0 now (image pulling changes)
MIN_DOCKER_VERSION=1.10.0
check_docker_version() {
if ! DOCKER_VERSION=$(docker -v | sed -n 's%^Docker version \([0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}\).*$%\1%p') \
|| [ -z "$DOCKER_VERSION" ]; then
echo "ERROR: Unable to parse docker version" >&2
exit 1
fi
DOCKER_VERSION_MAJOR=$(echo "$DOCKER_VERSION" | cut -d. -f 1)
DOCKER_VERSION_MINOR=$(echo "$DOCKER_VERSION" | cut -d. -f 2)
DOCKER_VERSION_PATCH=$(echo "$DOCKER_VERSION" | cut -d. -f 3)
MIN_DOCKER_VERSION_MAJOR=$(echo "$MIN_DOCKER_VERSION" | cut -d. -f 1)
MIN_DOCKER_VERSION_MINOR=$(echo "$MIN_DOCKER_VERSION" | cut -d. -f 2)
MIN_DOCKER_VERSION_PATCH=$(echo "$MIN_DOCKER_VERSION" | cut -d. -f 3)
# shellcheck disable=SC2166
if [ \( "$DOCKER_VERSION_MAJOR" -lt "$MIN_DOCKER_VERSION_MAJOR" \) -o \
\( "$DOCKER_VERSION_MAJOR" -eq "$MIN_DOCKER_VERSION_MAJOR" -a \
\( "$DOCKER_VERSION_MINOR" -lt "$MIN_DOCKER_VERSION_MINOR" -o \
\( "$DOCKER_VERSION_MINOR" -eq "$MIN_DOCKER_VERSION_MINOR" -a \
\( "$DOCKER_VERSION_PATCH" -lt "$MIN_DOCKER_VERSION_PATCH" \) \) \) \) ]; then
echo "ERROR: scope requires Docker version $MIN_DOCKER_VERSION or later; you are running $DOCKER_VERSION" >&2
exit 1
fi
}
check_probe_only() {
echo "${ARGS}" | grep -q -E "\-\-no\-app|\-\-service\-token|\-\-probe\-only"
}
check_docker_for_mac() {
[ "$(uname)" = "Darwin" ] \
&& [ -S /var/run/docker.sock ] \
&& [ ! "${DOCKER_HOST+x}" = x ] \
&& [ "${HOME+x}" = x ] \
&& [ -d "${HOME}/Library/Containers/com.docker.docker/Data/database" ]
}
# Check that a container named $1 with image $2 is not running
check_not_running() {
case $(docker inspect --format='{{.State.Running}} {{.Config.Image}}' "$1" 2>/dev/null) in
"true $2")
echo "$1 is already running." >&2
exit 1
;;
"true $2:"*)
echo "$1 is already running." >&2
exit 1
;;
"false $2")
docker rm "$1" >/dev/null
;;
"false $2:"*)
docker rm "$1" >/dev/null
;;
true*)
echo "Found another running container named '$1'. Aborting." >&2
exit 1
;;
false*)
echo "Found another container named '$1'. Aborting." >&2
exit 1
;;
esac
}
create_plugins_dir() {
# Docker for Mac (as of beta18) looks for this path on VM first, and when it doesn't
# find it there, it assumes the user referes to the path on Mac OS. Firstly, in most
# cases user won't have /var/run/scope/plugins on their Mac OS filesystem, and
# secondly Docker for Mac would have to be configured to share this path. The result
# of this "feature" is an error message: "The path /var/run/scope/plugins
# is not shared from OS X and does not belong to the system."
# In any case, creating /var/run/scope/plugins on Mac OS would not work, as domain
# sockets do not cross VM boundaries. We need this directory to exits on the VM.
docker run --rm --entrypoint=/bin/sh \
-v /var/run:/var/run \
"$SCOPE_IMAGE" -c "mkdir -p /var/run/scope/plugins"
}
launch_command() {
# shellcheck disable=SC2086
echo docker run --privileged $USERNS_HOST -d --name="$SCOPE_CONTAINER_NAME" --net=host --pid=host \
-v /var/run/docker.sock:/var/run/docker.sock \
-v /var/run/scope/plugins:/var/run/scope/plugins \
-v /sys/kernel/debug:/sys/kernel/debug \
-e CHECKPOINT_DISABLE \
$WEAVESCOPE_DOCKER_ARGS "$SCOPE_IMAGE" --probe.docker=true
}
launch_docker4mac_app_command() {
# shellcheck disable=SC2086
echo docker run -d --name="$SCOPE_APP_CONTAINER_NAME" \
-e CHECKPOINT_DISABLE \
-p 0.0.0.0:4040:4040 \
$WEAVESCOPE_DOCKER_ARGS "$SCOPE_IMAGE" --no-probe
}
launch() {
check_not_running "$SCOPE_CONTAINER_NAME" "$SCOPE_IMAGE_NAME"
docker rm -f "$SCOPE_CONTAINER_NAME" >/dev/null 2>&1 || true
$(launch_command) "$@"
echo "Scope probe started"
}
print_app_endpoints() {
echo "Weave Scope is reachable at the following URL(s):" >&2
for ip in "$@"; do
echo " * http://$ip:4040/" >&2
done
}
check_docker_access
check_docker_version
case "$COMMAND" in
command)
# Most systems should have printf, but the %q specifier isn't mandated by posix
# and can't be guaranteed. Since this is mainly a cosmetic output and the alternative
# is not making any attempt to do escaping at all, we might as well try.
# shellcheck disable=SC2039
quoted=$(printf '%q ' "$@" 2>/dev/null || true)
# printf %q behaves oddly with zero args (it acts as though it recieved one empty arg)
# so we ignore that case.
if [ -z "$quoted" ] || [ $# -eq 0 ]; then
quoted="$*"
fi
echo "$(launch_command) $quoted"
;;
version)
# shellcheck disable=SC2086
docker run --rm -e CHECKPOINT_DISABLE --entrypoint=/home/weave/scope \
$WEAVESCOPE_DOCKER_ARGS "$SCOPE_IMAGE" --mode=version
;;
-h | help | -help | --help)
usage
;;
launch)
# Do a dry run of scope in the foreground, so it can parse args etc
# avoiding the entrypoint script in the process.
# shellcheck disable=SC2086
docker run --rm -e CHECKPOINT_DISABLE --entrypoint=/home/weave/scope $WEAVESCOPE_DOCKER_ARGS "$SCOPE_IMAGE" --dry-run "$@"
if check_docker_for_mac; then
create_plugins_dir
if check_probe_only; then
launch "$@"
exit
fi
# Docker for Mac (as of beta9) does not ship vmnet driver and
# thereby only access container ports via a tunnel, preventing
# access to host ports of the VM.
# - https://github.com/weaveworks/scope/issues/1411
# - https://forums.docker.com/t/ports-in-host-network-namespace-are-not-accessible/10789
check_not_running "$SCOPE_APP_CONTAINER_NAME" "$SCOPE_IMAGE_NAME"
check_not_running "$SCOPE_CONTAINER_NAME" "$SCOPE_IMAGE_NAME"
docker rm -f "$SCOPE_APP_CONTAINER_NAME" >/dev/null 2>&1 || true
CONTAINER=$($(launch_docker4mac_app_command) "$@")
echo "Scope probe started"
app_ip=$(docker inspect -f '{{.NetworkSettings.IPAddress}}' "${CONTAINER}")
docker rm -f "$SCOPE_CONTAINER_NAME" >/dev/null 2>&1 || true
# shellcheck disable=SC2091
CONTAINER=$($(launch_command --no-app "$@" "${app_ip}:4040"))
print_app_endpoints "localhost"
exit
fi
launch "$@"
if ! check_probe_only; then
# shellcheck disable=SC2086
IP_ADDRS=$(docker run --rm $USERNS_HOST --net=host --entrypoint /bin/sh "$SCOPE_IMAGE" -c "$IP_ADDR_CMD")
# shellcheck disable=SC2086
print_app_endpoints $IP_ADDRS
fi
;;
stop)
[ $# -eq 0 ] || usage_and_die
if docker inspect "$SCOPE_CONTAINER_NAME" >/dev/null 2>&1; then
docker stop "$SCOPE_CONTAINER_NAME" >/dev/null
fi
if check_docker_for_mac; then
if docker inspect "$SCOPE_APP_CONTAINER_NAME" >/dev/null 2>&1; then
docker stop "$SCOPE_APP_CONTAINER_NAME" >/dev/null
fi
fi
;;
*)
echo "Unknown scope command '$COMMAND'" >&2
usage_and_die
;;
esac