Merge pull request #2336 from kinvolk/schu/timestamp-fallback

Fallback to proc when ebpf timestamps are wrong
This commit is contained in:
Alfonso Acosta
2017-03-21 10:35:22 +01:00
committed by GitHub
4 changed files with 85 additions and 6 deletions

View File

@@ -88,8 +88,18 @@ func (t *connectionTracker) ReportConnections(rpt *report.Report) {
hostNodeID := report.MakeHostNodeID(t.conf.HostID)
if t.ebpfTracker != nil {
t.performEbpfTrack(rpt, hostNodeID)
return
if !t.ebpfTracker.isDead() {
t.performEbpfTrack(rpt, hostNodeID)
return
}
log.Warnf("ebpf tracker died, gently falling back to proc scanning")
if t.conf.WalkProc && t.conf.Scanner == nil {
t.conf.Scanner = procspy.NewConnectionScanner(t.conf.ProcessCache)
}
if t.flowWalker == nil {
t.flowWalker = newConntrackFlowWalker(t.conf.UseConntrack, t.conf.ProcRoot, t.conf.BufferSize, "--any-nat")
}
t.ebpfTracker = nil
}
// seenTuples contains information about connections seen by conntrack and it will be passed to the /proc parser

View File

@@ -25,6 +25,7 @@ type eventTracker interface {
walkConnections(f func(ebpfConnection))
feedInitialConnections(ci procspy.ConnIter, seenTuples map[string]fourTuple, hostNodeID string)
isReadyToHandleConnections() bool
isDead() bool
stop()
}
@@ -99,7 +100,13 @@ var lastTimestampV4 uint64
func tcpEventCbV4(e tracer.TcpV4) {
if lastTimestampV4 > e.Timestamp {
log.Errorf("ERROR: late event!\n")
// A kernel bug can cause the timestamps to be wrong (e.g. on Ubuntu with Linux 4.4.0-47.68)
// Upgrading the kernel will fix the problem. For further info see:
// https://github.com/iovisor/bcc/issues/790#issuecomment-263704235
// https://github.com/weaveworks/scope/issues/2334
log.Errorf("tcp tracer received event with timestamp %v even though the last timestamp was %v. Stopping the eBPF tracker.", e.Timestamp, lastTimestampV4)
ebpfTracker.dead = true
ebpfTracker.stop()
}
lastTimestampV4 = e.Timestamp
@@ -197,6 +204,10 @@ func (t *EbpfTracker) isReadyToHandleConnections() bool {
return t.readyToHandleConnections
}
func (t *EbpfTracker) isDead() bool {
return t.dead
}
func (t *EbpfTracker) stop() {
// TODO: implement proper stopping logic
//

View File

@@ -179,6 +179,62 @@ func TestWalkConnections(t *testing.T) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnetions found %v instead of 2 connections", cnt)
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
}
func TestInvalidTimeStampDead(t *testing.T) {
var (
cnt int
ClientPid uint32 = 43
ServerIP = net.IP("127.0.0.1")
ClientIP = net.IP("127.0.0.2")
ServerPort uint16 = 12345
ClientPort uint16 = 6789
NetNS uint32 = 123456789
event = tracer.TcpV4{
CPU: 0,
Type: tracer.EventConnect,
Pid: ClientPid,
Comm: "cmd",
SAddr: ClientIP,
DAddr: ServerIP,
SPort: ClientPort,
DPort: ServerPort,
NetNS: NetNS,
}
)
mockEbpfTracker := &EbpfTracker{
readyToHandleConnections: true,
dead: false,
openConnections: map[string]ebpfConnection{},
}
ebpfTracker = mockEbpfTracker
event.Timestamp = 0
tcpEventCbV4(event)
event2 := event
event2.SPort = 1
event2.Timestamp = 2
tcpEventCbV4(event2)
mockEbpfTracker.walkConnections(func(e ebpfConnection) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
if mockEbpfTracker.isDead() {
t.Errorf("expected ebpfTracker to be alive after events with valid order")
}
cnt = 0
event.Timestamp = 1
tcpEventCbV4(event)
mockEbpfTracker.walkConnections(func(e ebpfConnection) {
cnt++
})
if cnt != 2 {
t.Errorf("walkConnections found %v instead of 2 connections", cnt)
}
if !mockEbpfTracker.isDead() {
t.Errorf("expected ebpfTracker to be set to dead after events with wrong order")
}
}

View File

@@ -51,11 +51,13 @@ func (br *backgroundReader) getWalkedProcPid(buf *bytes.Buffer) (map[uint64]*Pro
br.mtx.Lock()
defer br.mtx.Unlock()
var err error
// Don't access latestBuf directly but create a reader. In this way,
// the buffer will not be empty in the next call of getWalkedProcPid
// and it can be copied again.
_, err := io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes()))
if br.latestBuf != nil {
_, err = io.Copy(buf, bytes.NewReader(br.latestBuf.Bytes()))
}
return br.latestSockets, err
}