mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-01 17:20:25 +00:00
Report metrics from system-log-monitor
This commit is contained in:
@@ -86,3 +86,30 @@ field in the configuration file is the log path. You can always configure
|
||||
System log monitor uses [Log Watcher](./logwatchers/types/log_watcher.go) to
|
||||
support different log management tools. It is easy to implement a new log
|
||||
watcher.
|
||||
|
||||
## Metrics Reporting
|
||||
|
||||
By setting the boolean `metricsReporting` at top level, you can choose to enable or disable
|
||||
metrics reporting of System Log Monitor. If you omit the field, it will be set to `true` by
|
||||
default.
|
||||
|
||||
Temporary problems will be reported as counter metrics, such as below example:
|
||||
|
||||
```
|
||||
# HELP problem_counter Number of times a specific type of problem have occurred.
|
||||
# TYPE problem_counter counter
|
||||
problem_counter{reason="TaskHung"} 2
|
||||
```
|
||||
|
||||
Permanent problems will be reported as both gauge metrics and counter metrics, such as below
|
||||
example:
|
||||
|
||||
```
|
||||
# HELP problem_counter Number of times a specific type of problem have occurred.
|
||||
# TYPE problem_counter counter
|
||||
problem_counter{reason="DockerHung"} 1
|
||||
# HELP problem_gauge Whether a specific type of problem is affecting the node or not.
|
||||
# TYPE problem_gauge gauge
|
||||
problem_gauge{condition="KernelDeadlock",reason="DockerHung"} 1
|
||||
```
|
||||
|
||||
|
||||
@@ -24,6 +24,12 @@ import (
|
||||
"k8s.io/node-problem-detector/pkg/types"
|
||||
)
|
||||
|
||||
var (
|
||||
defaultBufferSize = 10
|
||||
defaultLookback = "0"
|
||||
defaultEnableMetricsReporting = true
|
||||
)
|
||||
|
||||
// MonitorConfig is the configuration of log monitor.
|
||||
type MonitorConfig struct {
|
||||
// WatcherConfig is the configuration of log watcher.
|
||||
@@ -36,15 +42,20 @@ type MonitorConfig struct {
|
||||
DefaultConditions []types.Condition `json:"conditions"`
|
||||
// Rules are the rules log monitor will follow to parse the log file.
|
||||
Rules []systemlogtypes.Rule `json:"rules"`
|
||||
// EnableMetricsReporting describes whether to report problems as metrics or not.
|
||||
EnableMetricsReporting *bool `json:"metricsReporting,omitempty"`
|
||||
}
|
||||
|
||||
// ApplyConfiguration applies default configurations.
|
||||
func (mc *MonitorConfig) ApplyDefaultConfiguration() {
|
||||
if mc.BufferSize == 0 {
|
||||
mc.BufferSize = 10
|
||||
mc.BufferSize = defaultBufferSize
|
||||
}
|
||||
if mc.EnableMetricsReporting == nil {
|
||||
mc.EnableMetricsReporting = &defaultEnableMetricsReporting
|
||||
}
|
||||
if mc.WatcherConfig.Lookback == "" {
|
||||
mc.WatcherConfig.Lookback = "0"
|
||||
mc.WatcherConfig.Lookback = defaultLookback
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ import (
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/problemdaemon"
|
||||
"k8s.io/node-problem-detector/pkg/problemmetrics"
|
||||
"k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers"
|
||||
watchertypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/types"
|
||||
logtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
|
||||
@@ -55,9 +56,8 @@ type logMonitor struct {
|
||||
|
||||
// NewLogMonitorOrDie create a new LogMonitor, panic if error occurs.
|
||||
func NewLogMonitorOrDie(configPath string) types.Monitor {
|
||||
l := &logMonitor{
|
||||
tomb: tomb.NewTomb(),
|
||||
}
|
||||
l := &logMonitor{tomb: tomb.NewTomb()}
|
||||
|
||||
f, err := ioutil.ReadFile(configPath)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to read configuration file %q: %v", configPath, err)
|
||||
@@ -73,13 +73,36 @@ func NewLogMonitorOrDie(configPath string) types.Monitor {
|
||||
glog.Fatalf("Failed to validate matching rules %+v: %v", l.config.Rules, err)
|
||||
}
|
||||
glog.Infof("Finish parsing log monitor config file: %+v", l.config)
|
||||
|
||||
l.watcher = logwatchers.GetLogWatcherOrDie(l.config.WatcherConfig)
|
||||
l.buffer = NewLogBuffer(l.config.BufferSize)
|
||||
// A 1000 size channel should be big enough.
|
||||
l.output = make(chan *types.Status, 1000)
|
||||
|
||||
if *l.config.EnableMetricsReporting {
|
||||
initializeProblemMetricsOrDie(l.config.Rules)
|
||||
}
|
||||
return l
|
||||
}
|
||||
|
||||
// initializeProblemMetricsOrDie creates problem metrics for all problems and set the value to 0,
|
||||
// panic if error occurs.
|
||||
func initializeProblemMetricsOrDie(rules []systemlogtypes.Rule) {
|
||||
for _, rule := range rules {
|
||||
if rule.Type == types.Perm {
|
||||
err := problemmetrics.GlobalProblemMetricsManager.SetProblemGauge(rule.Condition, rule.Reason, false)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to initialize problem gauge metrics for problem %q, reason %q: %v",
|
||||
rule.Condition, rule.Reason, err)
|
||||
}
|
||||
}
|
||||
err := problemmetrics.GlobalProblemMetricsManager.IncrementProblemCounter(rule.Reason, 0)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to initialize problem counter metrics for %q: %v", rule.Reason, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *logMonitor) Start() (<-chan *types.Status, error) {
|
||||
glog.Info("Start log monitor")
|
||||
var err error
|
||||
@@ -142,6 +165,12 @@ func (l *logMonitor) generateStatus(logs []*logtypes.Log, rule systemlogtypes.Ru
|
||||
Reason: rule.Reason,
|
||||
Message: message,
|
||||
})
|
||||
if *l.config.EnableMetricsReporting {
|
||||
err := problemmetrics.GlobalProblemMetricsManager.IncrementProblemCounter(rule.Reason, 1)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to update problem counter metrics for %q: %v", rule.Reason, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// For permanent error changes the condition
|
||||
for i := range l.conditions {
|
||||
@@ -159,6 +188,18 @@ func (l *logMonitor) generateStatus(logs []*logtypes.Log, rule systemlogtypes.Ru
|
||||
rule.Reason,
|
||||
timestamp,
|
||||
))
|
||||
|
||||
if *l.config.EnableMetricsReporting {
|
||||
err := problemmetrics.GlobalProblemMetricsManager.SetProblemGauge(rule.Condition, rule.Reason, true)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to update problem gauge metrics for problem %q, reason %q: %v",
|
||||
rule.Condition, rule.Reason, err)
|
||||
}
|
||||
err = problemmetrics.GlobalProblemMetricsManager.IncrementProblemCounter(rule.Reason, 1)
|
||||
if err != nil {
|
||||
glog.Errorf("Failed to update problem counter metrics for %q: %v", rule.Reason, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
condition.Status = types.True
|
||||
condition.Reason = rule.Reason
|
||||
@@ -166,6 +207,7 @@ func (l *logMonitor) generateStatus(logs []*logtypes.Log, rule systemlogtypes.Ru
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &types.Status{
|
||||
Source: l.config.Source,
|
||||
// TODO(random-liu): Aggregate events and conditions and then do periodically report.
|
||||
|
||||
@@ -24,9 +24,11 @@ import (
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/problemdaemon"
|
||||
"k8s.io/node-problem-detector/pkg/problemmetrics"
|
||||
logtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
|
||||
"k8s.io/node-problem-detector/pkg/types"
|
||||
"k8s.io/node-problem-detector/pkg/util"
|
||||
"k8s.io/node-problem-detector/pkg/util/metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -41,7 +43,7 @@ func TestRegistration(t *testing.T) {
|
||||
"System log monitor failed to register itself as a problem daemon.")
|
||||
}
|
||||
|
||||
func TestGenerateStatus(t *testing.T) {
|
||||
func TestGenerateStatusForConditions(t *testing.T) {
|
||||
initConditions := []types.Condition{
|
||||
{
|
||||
Type: testConditionA,
|
||||
@@ -141,9 +143,558 @@ func TestGenerateStatus(t *testing.T) {
|
||||
// during the test.
|
||||
conditions: append([]types.Condition{}, initConditions...),
|
||||
}
|
||||
(&l.config).ApplyDefaultConfiguration()
|
||||
got := l.generateStatus(logs, test.rule)
|
||||
if !reflect.DeepEqual(&test.expected, got) {
|
||||
t.Errorf("case %d: expected status %+v, got %+v", c+1, test.expected, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateStatusForMetrics(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
conditions []types.Condition
|
||||
triggeredRules []logtypes.Rule
|
||||
expectedMetrics []metrics.Int64MetricRepresentation
|
||||
}{
|
||||
{
|
||||
name: "one temporary problem that has not happened",
|
||||
conditions: []types.Condition{},
|
||||
triggeredRules: []logtypes.Rule{},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{},
|
||||
},
|
||||
{
|
||||
name: "one temporary problem happened once",
|
||||
conditions: []types.Condition{},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one temporary problem happened twice",
|
||||
conditions: []types.Condition{},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two different temporary problems happened",
|
||||
conditions: []types.Condition{},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem that is happening",
|
||||
conditions: []types.Condition{
|
||||
{
|
||||
Type: "ConditionA",
|
||||
Status: types.False,
|
||||
},
|
||||
},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem observed twice with same reason",
|
||||
conditions: []types.Condition{
|
||||
{
|
||||
Type: "ConditionA",
|
||||
Status: types.False,
|
||||
},
|
||||
},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem observed twice with different reasons",
|
||||
conditions: []types.Condition{
|
||||
{
|
||||
Type: "ConditionA",
|
||||
Status: types.False,
|
||||
},
|
||||
},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason bar"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two permanent problem observed once each",
|
||||
conditions: []types.Condition{
|
||||
{
|
||||
Type: "ConditionA",
|
||||
Status: types.False,
|
||||
},
|
||||
{
|
||||
Type: "ConditionB",
|
||||
Status: types.False,
|
||||
},
|
||||
},
|
||||
triggeredRules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionB",
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 1,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
l := &logMonitor{}
|
||||
l.conditions = test.conditions
|
||||
(&l.config).ApplyDefaultConfiguration()
|
||||
|
||||
originalGlobalProblemMetricsManager := problemmetrics.GlobalProblemMetricsManager
|
||||
defer func() {
|
||||
problemmetrics.GlobalProblemMetricsManager = originalGlobalProblemMetricsManager
|
||||
}()
|
||||
|
||||
fakePMM, fakeProblemCounter, fakeProblemGauge := problemmetrics.NewProblemMetricsManagerStub()
|
||||
problemmetrics.GlobalProblemMetricsManager = fakePMM
|
||||
|
||||
for _, rule := range test.triggeredRules {
|
||||
l.generateStatus([]*logtypes.Log{{}}, rule)
|
||||
}
|
||||
|
||||
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
|
||||
|
||||
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
|
||||
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeProblemMetricsOrDie(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
rules []logtypes.Rule
|
||||
expectedMetrics []metrics.Int64MetricRepresentation
|
||||
}{
|
||||
{
|
||||
name: "no problem type at all",
|
||||
rules: []logtypes.Rule{},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{},
|
||||
},
|
||||
{
|
||||
name: "one type of temporary problem",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one type of permanent problem",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "duplicate temporary problem types",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple temporary problem types",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple permanent problem types with same condition",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple permanent problem types with different conditions",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionB",
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "duplicate permanent problem types",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "mixture of temporary and permanent problem types",
|
||||
rules: []logtypes.Rule{
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason hello",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionA",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionB",
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Perm,
|
||||
Condition: "ConditionB",
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason foo",
|
||||
},
|
||||
{
|
||||
Type: types.Temp,
|
||||
Reason: "problem reason bar",
|
||||
},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason hello"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason hello"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "problem reason bar"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
l := &logMonitor{}
|
||||
(&l.config).ApplyDefaultConfiguration()
|
||||
|
||||
originalGlobalProblemMetricsManager := problemmetrics.GlobalProblemMetricsManager
|
||||
defer func() {
|
||||
problemmetrics.GlobalProblemMetricsManager = originalGlobalProblemMetricsManager
|
||||
}()
|
||||
|
||||
fakePMM, fakeProblemCounter, fakeProblemGauge := problemmetrics.NewProblemMetricsManagerStub()
|
||||
problemmetrics.GlobalProblemMetricsManager = fakePMM
|
||||
|
||||
initializeProblemMetricsOrDie(test.rules)
|
||||
|
||||
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
|
||||
|
||||
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
|
||||
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,8 +17,9 @@ limitations under the License.
|
||||
package types
|
||||
|
||||
import (
|
||||
"k8s.io/node-problem-detector/pkg/types"
|
||||
"time"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/types"
|
||||
)
|
||||
|
||||
// Log is the log item returned by translator. It's very easy to extend this
|
||||
|
||||
Reference in New Issue
Block a user