Merge pull request #905 from weaveworks/reduce-proc-race

Minimize race between proc's net/tcp{,6} and fd/*
2026-05-06 01:08:03 +00:00 · 2016-02-03 18:12:59 +00:00
parent 2ed39682ab 3605407519
commit 6e4e5429bc
2 changed files with 82 additions and 30 deletions
--- a/probe/endpoint/procspy/proc_internal_test.go
+++ b/probe/endpoint/procspy/proc_internal_test.go
@@ -40,6 +40,9 @@ var mockFS = fs.Dir("",
   0: 00000000:A6C0 00000000:0000 01 00000000:00000000 00:00000000 00000000   105        0 5107 1 ffff8800a6aaf040 100 0 0 10 2d
 `,
 				},
+				fs.File{
+					FName: "tcp6",
+				},
 			),
 			fs.File{
 				FName:     "stat",
--- a/probe/endpoint/procspy/proc_linux.go
+++ b/probe/endpoint/procspy/proc_linux.go
@@ -68,42 +68,49 @@ func getNetNamespacePathSuffix() string {
 	return netNamespacePathSuffix
 }

-// walkProcPid walks over all numerical (PID) /proc entries, and sees if their
-// ./fd/* files are symlink to sockets. Returns a map from socket ID (inode)
-// to PID. Will return an error if /proc isn't there.
-func walkProcPid(buf *bytes.Buffer, walker process.Walker) (map[uint64]*Proc, error) {
-	var (
-		res        = map[uint64]*Proc{} // map socket inode -> process
-		namespaces = map[uint64]bool{}  // map namespace id -> has connections
-		statT      syscall.Stat_t
-	)
+// walkNamespacePid does the work of walkProcPid for a single namespace
+func walkNamespacePid(buf *bytes.Buffer, sockets map[uint64]*Proc, namespaceProcs []*process.Process) {

-	walker.Walk(func(p, _ process.Process) {
+	// Read the connections for the namespace, which are found (identically) in
+	// /proc/PID/net/tcp{,6} for any of the processes in the namespace.
+	var tcpSuccess bool
+	for _, p := range namespaceProcs {
+		dirName := strconv.Itoa(p.PID)
+
+		read, errRead := readFile(filepath.Join(procRoot, dirName, "/net/tcp"), buf)
+		read6, errRead6 := readFile(filepath.Join(procRoot, dirName, "/net/tcp6"), buf)
+
+		if errRead != nil || errRead6 != nil {
+			// try next process
+			continue
+		}
+
+		if read+read6 == 0 {
+			// No connections, don't bother reading /fd/*
+			return
+		}
+
+		tcpSuccess = true
+		break
+	}
+
+	if !tcpSuccess {
+		// There's no point in reading /fd/*
+		return
+	}
+
+	// Get the sockets for all the processes in the namespace
+	var statT syscall.Stat_t
+	for _, p := range namespaceProcs {
 		dirName := strconv.Itoa(p.PID)
 		fdBase := filepath.Join(procRoot, dirName, "fd")

-		// Read network namespace, and if we haven't seen it before,
-		// read /proc/<pid>/net/tcp
-		netNamespacePath := filepath.Join(procRoot, dirName, getNetNamespacePathSuffix())
-		if err := fs.Stat(netNamespacePath, &statT); err != nil {
-			return
-		}
-		hasConns, ok := namespaces[statT.Ino]
-		if !ok {
-			read, _ := readFile(filepath.Join(procRoot, dirName, "/net/tcp"), buf)
-			read6, _ := readFile(filepath.Join(procRoot, dirName, "/net/tcp6"), buf)
-			hasConns = read+read6 > 0
-			namespaces[statT.Ino] = hasConns
-		}
-		if !hasConns {
-			return
-		}
-
 		fds, err := fs.ReadDirNames(fdBase)
 		if err != nil {
 			// Process is be gone by now, or we don't have access.
-			return
+			continue
 		}
+
 		var proc *Proc
 		for _, fd := range fds {
 			// Direct use of syscall.Stat() to save garbage.
@@ -111,22 +118,64 @@ func walkProcPid(buf *bytes.Buffer, walker process.Walker) (map[uint64]*Proc, er
 			if err != nil {
 				continue
 			}
+
 			// We want sockets only.
 			if statT.Mode&syscall.S_IFMT != syscall.S_IFSOCK {
 				continue
 			}
+
+			// Initialize proc lazily to avoid creating unnecessary
+			// garbage
 			if proc == nil {
 				proc = &Proc{
 					PID:  uint(p.PID),
 					Name: p.Name,
 				}
 			}
-			res[statT.Ino] = proc
+
+			sockets[statT.Ino] = proc
 		}
+
+	}
+}
+
+// walkProcPid walks over all numerical (PID) /proc entries. It reads
+// /proc/PID/net/tcp{,6} for each namespace and sees if the ./fd/* files of each
+// process in that namespace are symlinks to sockets. Returns a map from socket
+// ID (inode) to PID.
+func walkProcPid(buf *bytes.Buffer, walker process.Walker) (map[uint64]*Proc, error) {
+	var (
+		sockets    = map[uint64]*Proc{}              // map socket inode -> process
+		namespaces = map[uint64][]*process.Process{} // map network namespace id -> processes
+		statT      syscall.Stat_t
+	)
+
+	// We do two process traversals: One to group processes by namespace and
+	// another one to obtain their connections.
+	//
+	// The first traversal is needed to allow obtaining the connections on a
+	// per-namespace basis. This is done to minimize the race condition
+	// between reading /net/tcp{,6} of each namespace and /proc/PID/fd/* for
+	// the processes living in that namespace.
+
+	walker.Walk(func(p, _ process.Process) {
+		dirName := strconv.Itoa(p.PID)
+
+		netNamespacePath := filepath.Join(procRoot, dirName, getNetNamespacePathSuffix())
+		if err := fs.Stat(netNamespacePath, &statT); err != nil {
+			return
+		}
+
+		namespaceID := statT.Ino
+		namespaces[namespaceID] = append(namespaces[namespaceID], &p)
 	})

+	for _, procs := range namespaces {
+		walkNamespacePid(buf, sockets, procs)
+	}
+
 	metrics.SetGauge(namespaceKey, float32(len(namespaces)))
-	return res, nil
+	return sockets, nil
 }

 // readFile reads an arbitrary file into a buffer. It's a variable so it can