Files
node-problem-detector/pkg/systemlogmonitor/log_monitor_test.go
2025-09-08 19:10:28 +00:00

739 lines
18 KiB
Go

/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package systemlogmonitor
import (
"reflect"
"testing"
"time"
"github.com/stretchr/testify/assert"
"k8s.io/node-problem-detector/pkg/problemdaemon"
"k8s.io/node-problem-detector/pkg/problemmetrics"
systemlogtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
"k8s.io/node-problem-detector/pkg/types"
"k8s.io/node-problem-detector/pkg/util"
"k8s.io/node-problem-detector/pkg/util/metrics"
)
const (
testSource = "TestSource"
testConditionA = "TestConditionA"
testConditionB = "TestConditionB"
)
func TestRegistration(t *testing.T) {
assert.NotPanics(t,
func() { problemdaemon.GetProblemDaemonHandlerOrDie("system-log-monitor") },
"System log monitor failed to register itself as a problem daemon.")
}
func TestGenerateStatusForConditions(t *testing.T) {
initConditions := []types.Condition{
{
Type: testConditionA,
Status: types.True,
Transition: time.Unix(500, 500),
Reason: "initial reason",
},
{
Type: testConditionB,
Status: types.False,
Transition: time.Unix(500, 500),
},
}
logs := []*systemlogtypes.Log{
{
Timestamp: time.Unix(1000, 1000),
Message: "test message 1",
},
{
Timestamp: time.Unix(2000, 2000),
Message: "test message 2",
},
}
for c, test := range []struct {
rule systemlogtypes.Rule
expected types.Status
}{
// Do not need Pattern because we don't do pattern match in this test
{
rule: systemlogtypes.Rule{
Type: types.Perm,
Condition: testConditionA,
Reason: "test reason",
},
expected: types.Status{
Source: testSource,
Events: []types.Event{util.GenerateConditionChangeEvent(
testConditionA,
types.True,
"test reason",
"test message 1\ntest message 2",
time.Unix(1000, 1000),
)},
Conditions: []types.Condition{
{
Type: testConditionA,
Status: types.True,
Transition: time.Unix(1000, 1000),
Reason: "test reason",
Message: "test message 1\ntest message 2",
},
initConditions[1],
},
},
},
// Should not update transition time when status and reason are not changed.
{
rule: systemlogtypes.Rule{
Type: types.Perm,
Condition: testConditionA,
Reason: "initial reason",
},
expected: types.Status{
Source: testSource,
Conditions: []types.Condition{
{
Type: testConditionA,
Status: types.True,
Transition: time.Unix(500, 500),
Reason: "initial reason",
},
initConditions[1],
},
},
},
{
rule: systemlogtypes.Rule{
Type: types.Temp,
Reason: "test reason",
},
expected: types.Status{
Source: testSource,
Events: []types.Event{{
Severity: types.Warn,
Timestamp: time.Unix(1000, 1000),
Reason: "test reason",
Message: "test message 1\ntest message 2",
}},
Conditions: initConditions,
},
},
} {
l := &logMonitor{
config: MonitorConfig{
Source: testSource,
},
// Copy the init conditions to make sure it's not changed
// during the test.
conditions: append([]types.Condition{}, initConditions...),
}
(&l.config).ApplyDefaultConfiguration()
got := l.generateStatus(logs, test.rule)
if !reflect.DeepEqual(&test.expected, got) {
t.Errorf("case %d: expected status %+v, got %+v", c+1, test.expected, got)
}
}
}
func TestGenerateStatusForMetrics(t *testing.T) {
testCases := []struct {
name string
conditions []types.Condition
triggeredRules []systemlogtypes.Rule
expectedMetrics []metrics.Int64MetricRepresentation
}{
{
name: "one temporary problem that has not happened",
conditions: []types.Condition{},
triggeredRules: []systemlogtypes.Rule{},
expectedMetrics: []metrics.Int64MetricRepresentation{},
},
{
name: "one temporary problem happened once",
conditions: []types.Condition{},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
},
},
{
name: "one temporary problem happened twice",
conditions: []types.Condition{},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Temp,
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 2,
},
},
},
{
name: "two different temporary problems happened",
conditions: []types.Condition{},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Temp,
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 1,
},
},
},
{
name: "one permanent problem that is happening",
conditions: []types.Condition{
{
Type: "ConditionA",
Status: types.False,
},
},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
},
},
{
name: "one permanent problem observed twice with same reason",
conditions: []types.Condition{
{
Type: "ConditionA",
Status: types.False,
},
},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
},
},
{
name: "one permanent problem observed twice with different reasons",
conditions: []types.Condition{
{
Type: "ConditionA",
Status: types.False,
},
},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason bar"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 1,
},
},
},
{
name: "two permanent problem observed once each",
conditions: []types.Condition{
{
Type: "ConditionA",
Status: types.False,
},
{
Type: "ConditionB",
Status: types.False,
},
},
triggeredRules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionB",
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 1,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 1,
},
},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
l := &logMonitor{}
l.conditions = test.conditions
(&l.config).ApplyDefaultConfiguration()
originalGlobalProblemMetricsManager := problemmetrics.GlobalProblemMetricsManager
defer func() {
problemmetrics.GlobalProblemMetricsManager = originalGlobalProblemMetricsManager
}()
fakePMM, fakeProblemCounter, fakeProblemGauge := problemmetrics.NewProblemMetricsManagerStub()
problemmetrics.GlobalProblemMetricsManager = fakePMM
for _, rule := range test.triggeredRules {
l.generateStatus([]*systemlogtypes.Log{{}}, rule)
}
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
})
}
}
func TestInitializeProblemMetricsOrDie(t *testing.T) {
testCases := []struct {
name string
rules []systemlogtypes.Rule
expectedMetrics []metrics.Int64MetricRepresentation
}{
{
name: "no problem type at all",
rules: []systemlogtypes.Rule{},
expectedMetrics: []metrics.Int64MetricRepresentation{},
},
{
name: "one type of temporary problem",
rules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
},
},
{
name: "one type of permanent problem",
rules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
},
},
{
name: "duplicate temporary problem types",
rules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Temp,
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
},
},
{
name: "multiple temporary problem types",
rules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Temp,
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 0,
},
},
},
{
name: "multiple permanent problem types with same condition",
rules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason bar"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 0,
},
},
},
{
name: "multiple permanent problem types with different conditions",
rules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionB",
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 0,
},
},
},
{
name: "duplicate permanent problem types",
rules: []systemlogtypes.Rule{
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
},
},
{
name: "mixture of temporary and permanent problem types",
rules: []systemlogtypes.Rule{
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason hello",
},
{
Type: types.Perm,
Condition: "ConditionA",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionB",
Reason: "problem reason foo",
},
{
Type: types.Perm,
Condition: "ConditionB",
Reason: "problem reason bar",
},
{
Type: types.Temp,
Reason: "problem reason foo",
},
{
Type: types.Temp,
Reason: "problem reason bar",
},
},
expectedMetrics: []metrics.Int64MetricRepresentation{
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason hello"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionA", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_gauge",
Labels: map[string]string{"type": "ConditionB", "reason": "problem reason bar"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason hello"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason foo"},
Value: 0,
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "problem reason bar"},
Value: 0,
},
},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
l := &logMonitor{}
(&l.config).ApplyDefaultConfiguration()
originalGlobalProblemMetricsManager := problemmetrics.GlobalProblemMetricsManager
defer func() {
problemmetrics.GlobalProblemMetricsManager = originalGlobalProblemMetricsManager
}()
fakePMM, fakeProblemCounter, fakeProblemGauge := problemmetrics.NewProblemMetricsManagerStub()
problemmetrics.GlobalProblemMetricsManager = fakePMM
initializeProblemMetricsOrDie(test.rules)
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
})
}
}
func TestGenerateMessage(t *testing.T) {
tests := []struct {
name string
logs []*systemlogtypes.Log
patternGeneratedMessageSuffix string
want string
}{
{
name: "No rule message",
logs: []*systemlogtypes.Log{
{Message: "First log message"},
{Message: "Second log message"},
},
patternGeneratedMessageSuffix: "",
want: "First log message\nSecond log message",
},
{
name: "With rule message",
logs: []*systemlogtypes.Log{
{Message: "First log message"},
{Message: "Second log message"},
},
patternGeneratedMessageSuffix: "refer www.foo.com/docs for playbook on how to fix the issue",
want: "First log message\nSecond log message; refer www.foo.com/docs for playbook on how to fix the issue",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := generateMessage(tt.logs, tt.patternGeneratedMessageSuffix)
if got != tt.want {
t.Errorf("generateMessage() = %v, want %v", got, tt.want)
}
})
}
}