Add arbitrary daemon log support

This commit is contained in:
Random-Liu
2017-02-03 03:06:44 -08:00
parent 5e563930c0
commit 27cc831408
10 changed files with 272 additions and 105 deletions

View File

@@ -0,0 +1,14 @@
{
"plugin": "syslog",
"pluginConfig": {
"timestamp": "^time=\"(\\S*)\"",
"message": "msg=\"([^\n]*)\"",
"timestampFormat": "2006-01-02T15:04:05.999999999-07:00"
},
"logPath": "/var/log/docker.log",
"lookback": "5m",
"bufferSize": 10,
"source": "docker-monitor",
"conditions": [],
"rules": []
}

View File

@@ -0,0 +1,12 @@
{
"plugin": "journald",
"pluginConfig": {
"source": "docker"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,
"source": "docker-monitor",
"conditions": [],
"rules": []
}

View File

@@ -0,0 +1,58 @@
{
"plugin": "syslog",
"pluginConfig": {
"timestamp": "^.{15}",
"message": "kernel: \\[.*\\] (.*)",
"timestampFormat": "Jan _2 15:04:05"
},
"logPath": "/var/log/kern.log",
"lookback": "5m",
"bufferSize": 10,
"source": "kernel-monitor",
"conditions": [
{
"type": "KernelDeadlock",
"reason": "KernelHasNoDeadlock",
"message": "kernel has no deadlock"
}
],
"rules": [
{
"type": "temporary",
"reason": "OOMKilling",
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB"
},
{
"type": "temporary",
"reason": "TaskHung",
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "temporary",
"reason": "UnregisterNetDevice",
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "AUFSUmountHung",
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "DockerHung",
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
}
]
}

View File

@@ -1,5 +1,8 @@
{
"plugin": "journald",
"pluginConfig": {
"source": "kernel"
},
"logPath": "/var/log/journal",
"lookback": "5m",
"bufferSize": 10,

View File

@@ -114,8 +114,13 @@ func (j *journaldWatcher) watchLoop() {
}
}
// defaultJournalLogPath is the default path of journal log.
const defaultJournalLogPath = "/var/log/journal"
const (
// defaultJournalLogPath is the default path of journal log.
defaultJournalLogPath = "/var/log/journal"
// configSourceKey is the key of source configuration in the plugin configuration.
configSourceKey = "source"
)
// getJournal returns a journal client.
func getJournal(cfg types.WatcherConfig) (*sdjournal.Journal, error) {
@@ -140,14 +145,18 @@ func getJournal(cfg types.WatcherConfig) (*sdjournal.Journal, error) {
if err != nil {
return nil, fmt.Errorf("failed to lookback %q: %v", since, err)
}
// TODO(random-liu): Make this configurable to support parsing other logs.
kernelMatch := sdjournal.Match{
Field: sdjournal.SD_JOURNAL_FIELD_TRANSPORT,
Value: "kernel",
// Empty source is not allowed and treated as an error.
source := cfg.PluginConfig[configSourceKey]
if source == "" {
return nil, fmt.Errorf("failed to filter journal log, empty source is not allowed")
}
err = journal.AddMatch(kernelMatch.String())
match := sdjournal.Match{
Field: sdjournal.SD_JOURNAL_FIELD_SYSLOG_IDENTIFIER,
Value: source,
}
err = journal.AddMatch(match.String())
if err != nil {
return nil, fmt.Errorf("failed to add log filter %#v: %v", kernelMatch, err)
return nil, fmt.Errorf("failed to add log filter %#v: %v", match, err)
}
return journal, nil
}

View File

@@ -17,86 +17,88 @@ package syslog
import (
"fmt"
"io"
"os"
"strings"
"regexp"
"time"
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
"github.com/google/cadvisor/utils/tail"
"github.com/golang/glog"
)
// translate translates the log line into internal type.
func translate(line string) (*kerntypes.KernelLog, error) {
timestamp, message, err := parseLine(line)
if err != nil {
return nil, err
// translator translates log line into internal log type based on user defined
// regular expression.
type translator struct {
timestampRegexp *regexp.Regexp
messageRegexp *regexp.Regexp
timestampFormat string
}
const (
// NOTE that we support submatch for both timestamp and message regular expressions. When
// there are multiple matches returned by submatch, only **the last** is used.
// timestampKey is the key of timestamp regular expression in the plugin configuration.
timestampKey = "timestamp"
// messageKey is the key of message regular expression in the plugin configuration.
messageKey = "message"
// timestampFormatKey is the key of timestamp format string in the plugin configuration.
timestampFormatKey = "timestampFormat"
)
func newTranslatorOrDie(pluginConfig map[string]string) *translator {
if err := validatePluginConfig(pluginConfig); err != nil {
glog.Errorf("Failed to validate plugin configuration %+v: %v", pluginConfig, err)
}
return &translator{
timestampRegexp: regexp.MustCompile(pluginConfig[timestampKey]),
messageRegexp: regexp.MustCompile(pluginConfig[messageKey]),
timestampFormat: pluginConfig[timestampFormatKey],
}
}
// translate translates the log line into internal type.
func (t *translator) translate(line string) (*kerntypes.KernelLog, error) {
// Parse timestamp.
matches := t.timestampRegexp.FindStringSubmatch(line)
if len(matches) == 0 {
return nil, fmt.Errorf("no timestamp found in line %q with regular expression %v", line, t.timestampRegexp)
}
timestamp, err := time.ParseInLocation(t.timestampFormat, matches[len(matches)-1], time.Local)
if err != nil {
return nil, fmt.Errorf("failed to parse timestamp %q: %v", matches[len(matches)-1], err)
}
// Formalize the timestmap.
timestamp = formalizeTimestamp(timestamp)
// Parse message.
matches = t.messageRegexp.FindStringSubmatch(line)
if len(matches) == 0 {
return nil, fmt.Errorf("no message found in line %q with regular expression %v", line, t.messageRegexp)
}
message := matches[len(matches)-1]
return &kerntypes.KernelLog{
Timestamp: timestamp,
Message: message,
}, nil
}
const (
// timestampLen is the length of timestamp in syslog logging format.
timestampLen = 15
// messagePrefix is the character before real message.
messagePrefix = "]"
)
// parseLine parses one log line into timestamp and message.
func parseLine(line string) (time.Time, string, error) {
// Trim the spaces to make sure timestamp could be found
line = strings.TrimSpace(line)
if len(line) < timestampLen {
return time.Time{}, "", fmt.Errorf("the line is too short: %q", line)
// validatePluginConfig validates whether the plugin configuration.
func validatePluginConfig(cfg map[string]string) error {
if cfg[timestampKey] == "" {
return fmt.Errorf("unexpected empty timestamp regular expression")
}
// Example line: Jan 1 00:00:00 hostname kernel: [0.000000] component: log message
now := time.Now()
// There is no time zone information in kernel log timestamp, apply the current time
// zone.
timestamp, err := time.ParseInLocation(time.Stamp, line[:timestampLen], time.Local)
if err != nil {
return time.Time{}, "", fmt.Errorf("error parsing timestamp in line %q: %v", line, err)
if cfg[messageKey] == "" {
return fmt.Errorf("unexpected empty message regular expression")
}
// There is no year information in kernel log timestamp, apply the current year.
// This could go wrong during looking back phase after kernel monitor is started,
// and the old logs are generated in old year.
timestamp = timestamp.AddDate(now.Year(), 0, 0)
loc := strings.Index(line, messagePrefix)
if loc == -1 {
return timestamp, "", fmt.Errorf("can't find message prefix %q in line %q", messagePrefix, line)
if cfg[timestampFormatKey] == "" {
return fmt.Errorf("unexpected empty timestamp format string")
}
message := strings.Trim(line[loc+1:], " ")
return timestamp, message, nil
return nil
}
// defaultKernelLogPath the default path of syslog kernel log.
const defaultKernelLogPath = "/var/log/kern.log"
// getLogReader returns log reader for syslog log. Note that getLogReader doesn't look back
// to the rolled out logs.
func getLogReader(path string) (io.ReadCloser, error) {
if path == "" {
path = defaultKernelLogPath
// formalizeTimestamp formalizes the timestamp. We need this because some log doesn't contain full
// timestamp, e.g. syslog.
func formalizeTimestamp(t time.Time) time.Time {
if t.Year() == 0 {
t = t.AddDate(time.Now().Year(), 0, 0)
}
// To handle log rotation, tail will not report error immediately if
// the file doesn't exist. So we check file existence frist.
// This could go wrong during mid-rotation. It should recover after
// several restart when the log file is created again. The chance
// is slim but we should still fix this in the future.
// TODO(random-liu): Handle log missing during rotation.
_, err := os.Stat(path)
if err != nil {
return nil, fmt.Errorf("failed to stat the file %q: %v", path, err)
}
tail, err := tail.NewTail(path)
if err != nil {
return nil, fmt.Errorf("failed to tail the file %q: %v", path, err)
}
return tail, nil
return t
}

View File

@@ -21,6 +21,7 @@ import (
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
)
@@ -28,12 +29,18 @@ import (
func TestTranslate(t *testing.T) {
year := time.Now().Year()
testCases := []struct {
input string
err bool
log *kerntypes.KernelLog
config map[string]string
input string
err bool
log *kerntypes.KernelLog
}{
{
input: "May 1 12:23:45 hostname kernel: [0.000000] component: log message",
// missing year and timezone
// "timestamp": "^.{15}",
// "message": "kernel \\[.*\\] (.*)",
// "timestampFormat": "Jan _2 15:04:05",
config: getTestPluginConfig(),
input: "May 1 12:23:45 hostname kernel: [0.000000] component: log message",
log: &kerntypes.KernelLog{
Timestamp: time.Date(year, time.May, 1, 12, 23, 45, 0, time.Local),
Message: "component: log message",
@@ -41,7 +48,8 @@ func TestTranslate(t *testing.T) {
},
{
// no log message
input: "May 21 12:23:45 hostname kernel: [9.999999]",
config: getTestPluginConfig(),
input: "May 21 12:23:45 hostname kernel: [9.999999] ",
log: &kerntypes.KernelLog{
Timestamp: time.Date(year, time.May, 21, 12, 23, 45, 0, time.Local),
Message: "",
@@ -49,18 +57,36 @@ func TestTranslate(t *testing.T) {
},
{
// the right square bracket is missing
input: "May 21 12:23:45 hostname kernel: [9.999999 component: log message",
err: true,
config: getTestPluginConfig(),
input: "May 21 12:23:45 hostname kernel: [9.999999 component: log message",
err: true,
},
{
// contains full timestamp
config: map[string]string{
"timestamp": "^time=\"(\\S*)\"",
"message": "msg=\"([^\n]*)\"",
"timestampFormat": "2006-01-02T15:04:05.999999999-07:00",
},
input: `time="2017-02-01T17:58:34.999999999-08:00" level=error msg="test log line1\n test log line2"`,
log: &kerntypes.KernelLog{
Timestamp: time.Date(2017, 2, 1, 17, 58, 34, 999999999, time.FixedZone("PST", -8*3600)),
Message: `test log line1\n test log line2`,
},
},
}
for c, test := range testCases {
t.Logf("TestCase #%d: %#v", c+1, test)
log, err := translate(test.input)
if (err != nil) != test.err {
t.Errorf("case %d: error assertion failed, got log: %+v, error: %v", c+1, log, err)
continue
trans := newTranslatorOrDie(test.config)
log, err := trans.translate(test.input)
if !test.err {
require.NoError(t, err)
// Use RFC3339Nano to make it easier for comparison.
assert.Equal(t, test.log.Timestamp.Format(time.RFC3339Nano), log.Timestamp.Format(time.RFC3339Nano))
assert.Equal(t, test.log.Message, log.Message)
} else {
require.Error(t, err)
}
assert.Equal(t, test.log, log)
}
}

View File

@@ -19,12 +19,16 @@ package syslog
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"strings"
"syscall"
"time"
utilclock "code.cloudfoundry.org/clock"
"github.com/golang/glog"
"github.com/google/cadvisor/utils/tail"
"k8s.io/node-problem-detector/pkg/kernelmonitor/logwatchers/types"
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
@@ -32,13 +36,14 @@ import (
)
type syslogWatcher struct {
cfg types.WatcherConfig
reader *bufio.Reader
closer io.Closer
logCh chan *kerntypes.KernelLog
uptime time.Time
tomb *util.Tomb
clock utilclock.Clock
cfg types.WatcherConfig
reader *bufio.Reader
closer io.Closer
translator *translator
logCh chan *kerntypes.KernelLog
uptime time.Time
tomb *util.Tomb
clock utilclock.Clock
}
// NewSyslogWatcherOrDie creates a new kernel log watcher. The function panics
@@ -49,9 +54,10 @@ func NewSyslogWatcherOrDie(cfg types.WatcherConfig) types.LogWatcher {
glog.Fatalf("Failed to get system info: %v", err)
}
return &syslogWatcher{
cfg: cfg,
uptime: time.Now().Add(time.Duration(-info.Uptime * int64(time.Second))),
tomb: util.NewTomb(),
cfg: cfg,
translator: newTranslatorOrDie(cfg.PluginConfig),
uptime: time.Now().Add(time.Duration(-info.Uptime * int64(time.Second))),
tomb: util.NewTomb(),
// A capacity 1000 buffer should be enough
logCh: make(chan *kerntypes.KernelLog, 1000),
clock: utilclock.NewClock(),
@@ -116,7 +122,7 @@ func (s *syslogWatcher) watchLoop() {
}
line = buffer.String()
buffer.Reset()
log, err := translate(line)
log, err := s.translator.translate(strings.TrimSuffix(line, "\n"))
if err != nil {
glog.Warningf("Unable to parse line: %q, %v", line, err)
continue
@@ -128,3 +134,26 @@ func (s *syslogWatcher) watchLoop() {
s.logCh <- log
}
}
// getLogReader returns log reader for syslog log. Note that getLogReader doesn't look back
// to the rolled out logs.
func getLogReader(path string) (io.ReadCloser, error) {
if path == "" {
return nil, fmt.Errorf("unexpected empty log path")
}
// To handle log rotation, tail will not report error immediately if
// the file doesn't exist. So we check file existence first.
// This could go wrong during mid-rotation. It should recover after
// several restart when the log file is created again. The chance
// is slim but we should still fix this in the future.
// TODO(random-liu): Handle log missing during rotation.
_, err := os.Stat(path)
if err != nil {
return nil, fmt.Errorf("failed to stat the file %q: %v", path, err)
}
tail, err := tail.NewTail(path)
if err != nil {
return nil, fmt.Errorf("failed to tail the file %q: %v", path, err)
}
return tail, nil
}

View File

@@ -29,6 +29,16 @@ import (
"github.com/stretchr/testify/assert"
)
// getTestPluginConfig returns a plugin config for test. Use configuration for
// kernel log in test.
func getTestPluginConfig() map[string]string {
return map[string]string{
"timestamp": "^.{15}",
"message": "kernel: \\[.*\\] (.*)",
"timestampFormat": "Jan _2 15:04:05",
}
}
func TestWatch(t *testing.T) {
// now is a fake time
now := time.Date(time.Now().Year(), time.January, 2, 3, 4, 5, 0, time.Local)
@@ -42,8 +52,8 @@ func TestWatch(t *testing.T) {
{
// The start point is at the head of the log file.
log: `Jan 2 03:04:05 kernel: [0.000000] 1
Jan 2 03:04:06 kernel: [1.000000] 2
Jan 2 03:04:07 kernel: [2.000000] 3
Jan 2 03:04:06 kernel: [1.000000] 2
Jan 2 03:04:07 kernel: [2.000000] 3
`,
lookback: "0",
logs: []kerntypes.KernelLog{
@@ -64,8 +74,8 @@ func TestWatch(t *testing.T) {
{
// The start point is in the middle of the log file.
log: `Jan 2 03:04:04 kernel: [0.000000] 1
Jan 2 03:04:05 kernel: [1.000000] 2
Jan 2 03:04:06 kernel: [2.000000] 3
Jan 2 03:04:05 kernel: [1.000000] 2
Jan 2 03:04:06 kernel: [2.000000] 3
`,
lookback: "0",
logs: []kerntypes.KernelLog{
@@ -82,8 +92,8 @@ func TestWatch(t *testing.T) {
{
// The start point is at the end of the log file, but we look back.
log: `Jan 2 03:04:03 kernel: [0.000000] 1
Jan 2 03:04:04 kernel: [1.000000] 2
Jan 2 03:04:05 kernel: [2.000000] 3
Jan 2 03:04:04 kernel: [1.000000] 2
Jan 2 03:04:05 kernel: [2.000000] 3
`,
lookback: "1s",
logs: []kerntypes.KernelLog{
@@ -101,8 +111,8 @@ func TestWatch(t *testing.T) {
// The start point is at the end of the log file, we look back, but
// system rebooted at in the middle of the log file.
log: `Jan 2 03:04:03 kernel: [0.000000] 1
Jan 2 03:04:04 kernel: [1.000000] 2
Jan 2 03:04:05 kernel: [2.000000] 3
Jan 2 03:04:04 kernel: [1.000000] 2
Jan 2 03:04:05 kernel: [2.000000] 3
`,
uptime: time.Date(time.Now().Year(), time.January, 2, 3, 4, 4, 0, time.Local),
lookback: "2s",
@@ -130,9 +140,10 @@ func TestWatch(t *testing.T) {
assert.NoError(t, err)
w := NewSyslogWatcherOrDie(types.WatcherConfig{
Plugin: "syslog",
LogPath: f.Name(),
Lookback: test.lookback,
Plugin: "syslog",
PluginConfig: getTestPluginConfig(),
LogPath: f.Name(),
Lookback: test.lookback,
})
// Set the uptime.
w.(*syslogWatcher).uptime = test.uptime

View File

@@ -33,6 +33,9 @@ type WatcherConfig struct {
// Plugin is the name of plugin which is currently used.
// Currently supported: syslog, journald.
Plugin string `json:"plugin, omitempty"`
// PluginConfig is a key/value configuration of a plugin. Valid configurations
// are defined in different log watcher plugin.
PluginConfig map[string]string `json:"pluginConfig, omitempty"`
// LogPath is the path to the log
LogPath string `json:"logPath, omitempty"`
// Lookback is the time kernel watcher looks up