diff --git a/probe/endpoint/connection_tracker.go b/probe/endpoint/connection_tracker.go index 9e753180d..6b14de207 100644 --- a/probe/endpoint/connection_tracker.go +++ b/probe/endpoint/connection_tracker.go @@ -88,8 +88,18 @@ func (t *connectionTracker) ReportConnections(rpt *report.Report) { hostNodeID := report.MakeHostNodeID(t.conf.HostID) if t.ebpfTracker != nil { - t.performEbpfTrack(rpt, hostNodeID) - return + if !t.ebpfTracker.isDead() { + t.performEbpfTrack(rpt, hostNodeID) + return + } + log.Warnf("ebpf tracker died, gently falling back to proc scanning") + if t.conf.WalkProc && t.conf.Scanner == nil { + t.conf.Scanner = procspy.NewConnectionScanner(t.conf.ProcessCache) + } + if t.flowWalker == nil { + t.flowWalker = newConntrackFlowWalker(t.conf.UseConntrack, t.conf.ProcRoot, t.conf.BufferSize, "--any-nat") + } + t.ebpfTracker = nil } // seenTuples contains information about connections seen by conntrack and it will be passed to the /proc parser diff --git a/probe/endpoint/ebpf.go b/probe/endpoint/ebpf.go index 07ce10f05..69d7bad3a 100644 --- a/probe/endpoint/ebpf.go +++ b/probe/endpoint/ebpf.go @@ -25,6 +25,7 @@ type eventTracker interface { walkConnections(f func(ebpfConnection)) feedInitialConnections(ci procspy.ConnIter, seenTuples map[string]fourTuple, hostNodeID string) isReadyToHandleConnections() bool + isDead() bool stop() } @@ -99,7 +100,13 @@ var lastTimestampV4 uint64 func tcpEventCbV4(e tracer.TcpV4) { if lastTimestampV4 > e.Timestamp { - log.Errorf("ERROR: late event!\n") + // A kernel bug can cause the timestamps to be wrong (e.g. on Ubuntu with Linux 4.4.0-47.68) + // Upgrading the kernel will fix the problem. For further info see: + // https://github.com/iovisor/bcc/issues/790#issuecomment-263704235 + // https://github.com/weaveworks/scope/issues/2334 + log.Errorf("tcp tracer received event with timestamp %v even though the last timestamp was %v. Stopping the eBPF tracker.", e.Timestamp, lastTimestampV4) + ebpfTracker.dead = true + ebpfTracker.stop() } lastTimestampV4 = e.Timestamp @@ -197,6 +204,10 @@ func (t *EbpfTracker) isReadyToHandleConnections() bool { return t.readyToHandleConnections } +func (t *EbpfTracker) isDead() bool { + return t.dead +} + func (t *EbpfTracker) stop() { // TODO: implement proper stopping logic // diff --git a/probe/endpoint/ebpf_test.go b/probe/endpoint/ebpf_test.go index e813ec2e8..55e0d2f99 100644 --- a/probe/endpoint/ebpf_test.go +++ b/probe/endpoint/ebpf_test.go @@ -179,6 +179,62 @@ func TestWalkConnections(t *testing.T) { cnt++ }) if cnt != 2 { - t.Errorf("walkConnetions found %v instead of 2 connections", cnt) + t.Errorf("walkConnections found %v instead of 2 connections", cnt) + } +} + +func TestInvalidTimeStampDead(t *testing.T) { + var ( + cnt int + ClientPid uint32 = 43 + ServerIP = net.IP("127.0.0.1") + ClientIP = net.IP("127.0.0.2") + ServerPort uint16 = 12345 + ClientPort uint16 = 6789 + NetNS uint32 = 123456789 + event = tracer.TcpV4{ + CPU: 0, + Type: tracer.EventConnect, + Pid: ClientPid, + Comm: "cmd", + SAddr: ClientIP, + DAddr: ServerIP, + SPort: ClientPort, + DPort: ServerPort, + NetNS: NetNS, + } + ) + mockEbpfTracker := &EbpfTracker{ + readyToHandleConnections: true, + dead: false, + openConnections: map[string]ebpfConnection{}, + } + ebpfTracker = mockEbpfTracker + event.Timestamp = 0 + tcpEventCbV4(event) + event2 := event + event2.SPort = 1 + event2.Timestamp = 2 + tcpEventCbV4(event2) + mockEbpfTracker.walkConnections(func(e ebpfConnection) { + cnt++ + }) + if cnt != 2 { + t.Errorf("walkConnections found %v instead of 2 connections", cnt) + } + if mockEbpfTracker.isDead() { + t.Errorf("expected ebpfTracker to be alive after events with valid order") + } + cnt = 0 + event.Timestamp = 1 + tcpEventCbV4(event) + mockEbpfTracker.walkConnections(func(e ebpfConnection) { + cnt++ + }) + if cnt != 2 { + t.Errorf("walkConnections found %v instead of 2 connections", cnt) + } + if !mockEbpfTracker.isDead() { + t.Errorf("expected ebpfTracker to be set to dead after events with wrong order") } } diff --git a/probe/endpoint/procspy/reader_linux.go b/probe/endpoint/procspy/reader_linux.go index 359ca08b5..92b43ca3c 100644 --- a/probe/endpoint/procspy/reader_linux.go +++ b/probe/endpoint/procspy/reader_linux.go @@ -51,11 +51,13 @@ func (br *backgroundReader) getWalkedProcPid(buf *bytes.Buffer) (map[uint64]*Pro br.mtx.Lock() defer br.mtx.Unlock() + var err error // Don't access latestBuf directly but create a reader. In this way, // the buffer will not be empty in the next call of getWalkedProcPid // and it can be copied again. - _, err := io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes())) - + if br.latestBuf != nil { + _, err = io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes())) + } return br.latestSockets, err }