mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-03 02:00:36 +00:00
Report metrics from system-log-monitor
This commit is contained in:
114
pkg/problemmetrics/problem_metrics.go
Normal file
114
pkg/problemmetrics/problem_metrics.go
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package problemmetrics
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/util/metrics"
|
||||
)
|
||||
|
||||
// GlobalProblemMetricsManager is a singleton of ProblemMetricsManager,
|
||||
// which should be used to manage all problem-converted metrics across all
|
||||
// problem daemons.
|
||||
var GlobalProblemMetricsManager *ProblemMetricsManager
|
||||
|
||||
func init() {
|
||||
GlobalProblemMetricsManager = NewProblemMetricsManagerOrDie()
|
||||
}
|
||||
|
||||
// ProblemMetricsManager manages problem-converted metrics.
|
||||
// ProblemMetricsManager is thread-safe.
|
||||
type ProblemMetricsManager struct {
|
||||
problemCounter metrics.Int64MetricInterface
|
||||
problemGauge metrics.Int64MetricInterface
|
||||
problemTypeToReason map[string]string
|
||||
problemTypeToReasonMutex sync.Mutex
|
||||
}
|
||||
|
||||
func NewProblemMetricsManagerOrDie() *ProblemMetricsManager {
|
||||
pmm := ProblemMetricsManager{}
|
||||
|
||||
var err error
|
||||
pmm.problemCounter, err = metrics.NewInt64Metric(
|
||||
"problem_counter",
|
||||
"Number of times a specific type of problem have occurred.",
|
||||
"1",
|
||||
metrics.Sum,
|
||||
[]string{"reason"})
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to create problem_counter metric: %v", err)
|
||||
}
|
||||
|
||||
pmm.problemGauge, err = metrics.NewInt64Metric(
|
||||
"problem_gauge",
|
||||
"Whether a specific type of problem is affecting the node or not.",
|
||||
"1",
|
||||
metrics.LastValue,
|
||||
[]string{"type", "reason"})
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed to create problem_gauge metric: %v", err)
|
||||
}
|
||||
|
||||
pmm.problemTypeToReason = make(map[string]string)
|
||||
|
||||
return &pmm
|
||||
}
|
||||
|
||||
// IncrementProblemCounter increments the value of a problem counter.
|
||||
func (pmm *ProblemMetricsManager) IncrementProblemCounter(reason string, count int64) error {
|
||||
if pmm.problemCounter == nil {
|
||||
return errors.New("problem counter is being incremented before initialized.")
|
||||
}
|
||||
|
||||
return pmm.problemCounter.Record(map[string]string{"reason": reason}, count)
|
||||
}
|
||||
|
||||
// SetProblemGauge sets the value of a problem gauge.
|
||||
func (pmm *ProblemMetricsManager) SetProblemGauge(problemType string, reason string, value bool) error {
|
||||
if pmm.problemGauge == nil {
|
||||
return errors.New("problem gauge is being set before initialized.")
|
||||
}
|
||||
|
||||
pmm.problemTypeToReasonMutex.Lock()
|
||||
defer pmm.problemTypeToReasonMutex.Unlock()
|
||||
|
||||
// We clear the last reason, because the expected behavior is that at any point of time,
|
||||
// for each type of permanent problem, there should be at most one reason got set to 1.
|
||||
// This behavior is consistent with the behavior of node condition in Kubernetes.
|
||||
// However, problemGauges with different "type" and "reason" are considered as different
|
||||
// metrics in Prometheus. So we need to clear the previous metrics explicitly.
|
||||
if lastReason, ok := pmm.problemTypeToReason[problemType]; ok {
|
||||
err := pmm.problemGauge.Record(map[string]string{"type": problemType, "reason": lastReason}, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to clear previous reason %q for type %q: %v",
|
||||
problemType, lastReason, err)
|
||||
}
|
||||
}
|
||||
|
||||
pmm.problemTypeToReason[problemType] = reason
|
||||
|
||||
var valueInt int64
|
||||
if value {
|
||||
valueInt = 1
|
||||
}
|
||||
return pmm.problemGauge.Record(map[string]string{"type": problemType, "reason": reason}, valueInt)
|
||||
}
|
||||
35
pkg/problemmetrics/problem_metrics_stub.go
Normal file
35
pkg/problemmetrics/problem_metrics_stub.go
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package problemmetrics
|
||||
|
||||
import (
|
||||
"k8s.io/node-problem-detector/pkg/util/metrics"
|
||||
)
|
||||
|
||||
// NewProblemMetricsManagerStub creates a ProblemMetricsManager stubbed by fake metrics.
|
||||
// The stubbed ProblemMetricsManager and fake metrics are returned.
|
||||
func NewProblemMetricsManagerStub() (*ProblemMetricsManager, *metrics.FakeInt64Metric, *metrics.FakeInt64Metric) {
|
||||
fakeProblemCounter := metrics.NewFakeInt64Metric("problem_counter", metrics.Sum, []string{"reason"})
|
||||
fakeProblemGauge := metrics.NewFakeInt64Metric("problem_gauge", metrics.LastValue, []string{"type", "reason"})
|
||||
|
||||
pmm := ProblemMetricsManager{}
|
||||
pmm.problemCounter = metrics.Int64MetricInterface(fakeProblemCounter)
|
||||
pmm.problemGauge = metrics.Int64MetricInterface(fakeProblemGauge)
|
||||
pmm.problemTypeToReason = make(map[string]string)
|
||||
|
||||
return &pmm, fakeProblemCounter, fakeProblemGauge
|
||||
}
|
||||
277
pkg/problemmetrics/problem_metrics_test.go
Normal file
277
pkg/problemmetrics/problem_metrics_test.go
Normal file
@@ -0,0 +1,277 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package problemmetrics
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"k8s.io/node-problem-detector/pkg/util/metrics"
|
||||
)
|
||||
|
||||
func TestNewProblem(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
reasons []string
|
||||
counts []int64
|
||||
expectedMetrics []metrics.Int64MetricRepresentation
|
||||
}{
|
||||
{
|
||||
name: "no problem at all",
|
||||
reasons: []string{},
|
||||
counts: []int64{},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{},
|
||||
},
|
||||
{
|
||||
name: "one problem happened",
|
||||
reasons: []string{"foo"},
|
||||
counts: []int64{1},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "foo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one problem happened twice",
|
||||
reasons: []string{"foo", "foo"},
|
||||
counts: []int64{1, 1},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "foo"},
|
||||
Value: 2,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two problem happened various times",
|
||||
reasons: []string{"foo", "bar", "foo"},
|
||||
counts: []int64{1, 1, 1},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "foo"},
|
||||
Value: 2,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "bar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two problem initialized",
|
||||
reasons: []string{"foo", "bar"},
|
||||
counts: []int64{0, 0},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "foo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "bar"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two problem first initialized, then happened various times",
|
||||
reasons: []string{"foo", "bar", "foo", "bar", "foo"},
|
||||
counts: []int64{0, 0, 1, 1, 1},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "foo"},
|
||||
Value: 2,
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "bar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
pmm, fakeProblemCounter, fakeProblemGauge := NewProblemMetricsManagerStub()
|
||||
|
||||
for idx, reason := range test.reasons {
|
||||
pmm.IncrementProblemCounter(reason, test.counts[idx])
|
||||
}
|
||||
|
||||
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
|
||||
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
|
||||
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSetProblemGauge(t *testing.T) {
|
||||
type argumentType struct {
|
||||
problemType string
|
||||
reason string
|
||||
value bool
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
arguments []argumentType
|
||||
expectedMetrics []metrics.Int64MetricRepresentation
|
||||
}{
|
||||
{
|
||||
name: "no permanent problem at all",
|
||||
arguments: []argumentType{},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem was set once",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem was set twice with same reason",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem was set twice with different reasons",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
{"ProblemTypeA", "ReasonBar", true},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonBar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem was set then cleared",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
{"ProblemTypeA", "", false},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": ""},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 0,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "one permanent problem was set, cleared, and set again",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
{"ProblemTypeA", "", false},
|
||||
{"ProblemTypeA", "ReasonBar", true},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": ""},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonBar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "two permanent problems were set and one of them got cleared",
|
||||
arguments: []argumentType{
|
||||
{"ProblemTypeA", "ReasonFoo", true},
|
||||
{"ProblemTypeB", "ReasonBar", true},
|
||||
{"ProblemTypeA", "", false},
|
||||
},
|
||||
expectedMetrics: []metrics.Int64MetricRepresentation{
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": ""},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeA", "reason": "ReasonFoo"},
|
||||
Value: 0,
|
||||
},
|
||||
{
|
||||
Name: "problem_gauge",
|
||||
Labels: map[string]string{"type": "ProblemTypeB", "reason": "ReasonBar"},
|
||||
Value: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
pmm, fakeProblemCounter, fakeProblemGauge := NewProblemMetricsManagerStub()
|
||||
|
||||
for _, argument := range test.arguments {
|
||||
pmm.SetProblemGauge(argument.problemType, argument.reason, argument.value)
|
||||
}
|
||||
|
||||
gotMetrics := append(fakeProblemCounter.ListMetrics(), fakeProblemGauge.ListMetrics()...)
|
||||
assert.ElementsMatch(t, test.expectedMetrics, gotMetrics,
|
||||
"expected metrics: %+v, got: %+v", test.expectedMetrics, gotMetrics)
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user