Files
weave-scope/probe/endpoint/procspy/proc_linux.go
Alban Crequy d715ccc391 ebpf: handle fd_install events from tcptracer-bpf
Since https://github.com/weaveworks/tcptracer-bpf/pull/39, tcptracer-bpf
can generate "fd_install" events when a process installs a new file
descriptor in its fd table. Those events must be requested explicitely
on a per-pid basis with tracer.AddFdInstallWatcher(pid).

This is useful to know about "accept" events that would otherwise be
missed because kretprobes are not triggered for functions that were
called before the installation of the kretprobe.

This patch find all the processes that are currently blocked on an
accept() syscall during the EbpfTracker initialization.
feedInitialConnections() will use tracer.AddFdInstallWatcher() to
subscribe to fd_install  events. When a fd_install event is received,
synthesise an accept event with the connection tuple and the network
namespace (from /proc).
2017-05-19 14:49:38 +02:00

275 lines
7.0 KiB
Go

package procspy
// /proc-based implementation.
import (
"bytes"
"fmt"
"path/filepath"
"strconv"
"syscall"
"time"
log "github.com/Sirupsen/logrus"
"github.com/armon/go-metrics"
"github.com/weaveworks/common/fs"
"github.com/weaveworks/scope/common/marshal"
"github.com/weaveworks/scope/probe/process"
)
var (
procRoot = "/proc"
namespaceKey = []string{"procspy", "namespaces"}
netNamespacePathSuffix = ""
)
type pidWalker struct {
walker process.Walker
tickc <-chan time.Time // Rate-limit clock. Sets the pace when traversing namespaces and /proc/PID/fd/* files.
stopc chan struct{} // Abort walk
fdBlockSize uint64 // Maximum number of /proc/PID/fd/* files to stat() per tick
}
func newPidWalker(walker process.Walker, tickc <-chan time.Time, fdBlockSize uint64) pidWalker {
w := pidWalker{
walker: walker,
tickc: tickc,
fdBlockSize: fdBlockSize,
stopc: make(chan struct{}),
}
return w
}
// SetProcRoot sets the location of the proc filesystem.
func SetProcRoot(root string) {
procRoot = root
}
func getKernelVersion() (major, minor int, err error) {
var u syscall.Utsname
if err = syscall.Uname(&u); err != nil {
return
}
// Kernel versions are not always a semver, so we have to do minimal parsing.
release := marshal.FromUtsname(u.Release)
if n, err := fmt.Sscanf(release, "%d.%d", &major, &minor); err != nil || n != 2 {
return 0, 0, fmt.Errorf("Malformed version: %s", release)
}
return
}
func getNetNamespacePathSuffix() string {
// With Linux 3.8 or later the network namespace of a process can be
// determined by the inode of /proc/PID/net/ns. Before that, Any file
// under /proc/PID/net/ could be used but it's not documented and may
// break in newer kernels.
const (
post38Path = "ns/net"
pre38Path = "net/dev"
)
if netNamespacePathSuffix != "" {
return netNamespacePathSuffix
}
major, minor, err := getKernelVersion()
if err != nil {
log.Errorf("getNamespacePathSuffix: cannot get kernel version: %s", err)
netNamespacePathSuffix = post38Path
return netNamespacePathSuffix
}
if major < 3 || (major == 3 && minor < 8) {
netNamespacePathSuffix = pre38Path
} else {
netNamespacePathSuffix = post38Path
}
return netNamespacePathSuffix
}
// ReadTCPFiles reads the proc files tcp and tcp6 for a pid
func ReadTCPFiles(pid int, buf *bytes.Buffer) (int64, error) {
var (
errRead error
errRead6 error
read int64
read6 int64
)
// even for tcp4 connections, we need to read the "tcp6" file because of IPv4-Mapped IPv6 Addresses
dirName := strconv.Itoa(pid)
read, errRead = readFile(filepath.Join(procRoot, dirName, "/net/tcp"), buf)
read6, errRead6 = readFile(filepath.Join(procRoot, dirName, "/net/tcp6"), buf)
if errRead != nil {
return read + read6, errRead
}
return read + read6, errRead6
}
// Read the connections for a group of processes living in the same namespace,
// which are found (identically) in /proc/PID/net/tcp{,6} for any of the
// processes.
func readProcessConnections(buf *bytes.Buffer, namespaceProcs []*process.Process) (bool, error) {
var (
read int64
err error
)
for _, p := range namespaceProcs {
read, err = ReadTCPFiles(p.PID, buf)
if err != nil {
// try next process
continue
}
// Return after succeeding on any process
// (proc/PID/net/tcp and proc/PID/net/tcp6 are identical for all the processes in the same namespace)
return read > 0, nil
}
if err != nil {
return false, err
}
return false, nil
}
// walkNamespace does the work of walk for a single namespace
func (w pidWalker) walkNamespace(namespaceID uint64, buf *bytes.Buffer, sockets map[uint64]*Proc, namespaceProcs []*process.Process) error {
if found, err := readProcessConnections(buf, namespaceProcs); err != nil || !found {
return err
}
var statT syscall.Stat_t
var fdBlockCount uint64
for i, p := range namespaceProcs {
// Get the sockets for all the processes in the namespace
dirName := strconv.Itoa(p.PID)
fdBase := filepath.Join(procRoot, dirName, "fd")
if fdBlockCount > w.fdBlockSize {
// we surpassed the filedescriptor rate limit
select {
case <-w.tickc:
case <-w.stopc:
return nil // abort
}
fdBlockCount = 0
// read the connections again to
// avoid the race between between /net/tcp{,6} and /proc/PID/fd/*
if found, err := readProcessConnections(buf, namespaceProcs[i:]); err != nil || !found {
return err
}
}
fds, err := fs.ReadDirNames(fdBase)
if err != nil {
// Process is gone by now, or we don't have access.
continue
}
var proc *Proc
for _, fd := range fds {
fdBlockCount++
// Direct use of syscall.Stat() to save garbage.
err = fs.Stat(filepath.Join(fdBase, fd), &statT)
if err != nil {
continue
}
// We want sockets only.
if statT.Mode&syscall.S_IFMT != syscall.S_IFSOCK {
continue
}
// Initialize proc lazily to avoid creating unnecessary
// garbage
if proc == nil {
proc = &Proc{
PID: uint(p.PID),
Name: p.Name,
NetNamespaceID: namespaceID,
}
}
sockets[statT.Ino] = proc
}
}
return nil
}
// ReadNetnsFromPID gets the netns inode of the specified pid
func ReadNetnsFromPID(pid int) (uint64, error) {
var statT syscall.Stat_t
dirName := strconv.Itoa(pid)
netNamespacePath := filepath.Join(procRoot, dirName, getNetNamespacePathSuffix())
if err := fs.Stat(netNamespacePath, &statT); err != nil {
return 0, err
}
return statT.Ino, nil
}
// walk walks over all numerical (PID) /proc entries. It reads
// /proc/PID/net/tcp{,6} for each namespace and sees if the ./fd/* files of each
// process in that namespace are symlinks to sockets. Returns a map from socket
// ID (inode) to PID.
func (w pidWalker) walk(buf *bytes.Buffer) (map[uint64]*Proc, error) {
var (
sockets = map[uint64]*Proc{} // map socket inode -> process
namespaces = map[uint64][]*process.Process{} // map network namespace id -> processes
)
// We do two process traversals: One to group processes by namespace and
// another one to obtain their connections.
//
// The first traversal is needed to allow obtaining the connections on a
// per-namespace basis. This is done to minimize the race condition
// between reading /net/tcp{,6} of each namespace and /proc/PID/fd/* for
// the processes living in that namespace.
w.walker.Walk(func(p, _ process.Process) {
namespaceID, err := ReadNetnsFromPID(p.PID)
if err != nil {
return
}
namespaces[namespaceID] = append(namespaces[namespaceID], &p)
})
for namespaceID, procs := range namespaces {
select {
case <-w.tickc:
w.walkNamespace(namespaceID, buf, sockets, procs)
case <-w.stopc:
break // abort
}
}
metrics.SetGauge(namespaceKey, float32(len(namespaces)))
return sockets, nil
}
func (w pidWalker) stop() {
close(w.stopc)
}
// readFile reads an arbitrary file into a buffer.
func readFile(filename string, buf *bytes.Buffer) (int64, error) {
f, err := fs.Open(filename)
if err != nil {
return -1, err
}
defer f.Close()
return buf.ReadFrom(f)
}