mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-04-21 01:46:42 +00:00
Add e2e test for NPD
The first test is a very simple test. It installs NPD on a VM, and then verifies that NPD reports metric host_uptime in Prometheus format.
This commit is contained in:
@@ -21,17 +21,6 @@ import (
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// Int64MetricRepresentation represents a snapshot of an int64 metrics.
|
||||
// This is used for inspecting fake metrics.
|
||||
type Int64MetricRepresentation struct {
|
||||
// Name is the metric name.
|
||||
Name string
|
||||
// Labels contains all metric labels in key-value pair format.
|
||||
Labels map[string]string
|
||||
// Value is the value of the metric.
|
||||
Value int64
|
||||
}
|
||||
|
||||
// Int64MetricInterface is used to create test double for Int64Metric.
|
||||
type Int64MetricInterface interface {
|
||||
// Record records a measurement for the metric, with provided tags as metric labels.
|
||||
|
||||
@@ -18,8 +18,11 @@ package metrics
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
pcm "github.com/prometheus/client_model/go"
|
||||
"github.com/prometheus/common/expfmt"
|
||||
"go.opencensus.io/stats"
|
||||
"go.opencensus.io/stats/view"
|
||||
"go.opencensus.io/tag"
|
||||
@@ -34,12 +37,6 @@ func init() {
|
||||
tagMapMutex.Unlock()
|
||||
}
|
||||
|
||||
// Int64Metric represents an int64 metric.
|
||||
type Int64Metric struct {
|
||||
name string
|
||||
measure *stats.Int64Measure
|
||||
}
|
||||
|
||||
// Aggregation defines how measurements should be aggregated into data points.
|
||||
type Aggregation string
|
||||
|
||||
@@ -50,6 +47,23 @@ const (
|
||||
Sum Aggregation = "Sum"
|
||||
)
|
||||
|
||||
// Int64MetricRepresentation represents a snapshot of an int64 metrics.
|
||||
// This is used for inspecting metric internals.
|
||||
type Int64MetricRepresentation struct {
|
||||
// Name is the metric name.
|
||||
Name string
|
||||
// Labels contains all metric labels in key-value pair format.
|
||||
Labels map[string]string
|
||||
// Value is the value of the metric.
|
||||
Value int64
|
||||
}
|
||||
|
||||
// Int64Metric represents an int64 metric.
|
||||
type Int64Metric struct {
|
||||
name string
|
||||
measure *stats.Int64Measure
|
||||
}
|
||||
|
||||
// NewInt64Metric create a Int64Metric metric, returns nil when name is empty.
|
||||
func NewInt64Metric(name string, description string, unit string, aggregation Aggregation, tagNames []string) (*Int64Metric, error) {
|
||||
if name == "" {
|
||||
@@ -106,6 +120,17 @@ func (metric *Int64Metric) Record(tags map[string]string, measurement int64) err
|
||||
metric.measure.M(measurement))
|
||||
}
|
||||
|
||||
// Float64MetricRepresentation represents a snapshot of a float64 metrics.
|
||||
// This is used for inspecting metric internals.
|
||||
type Float64MetricRepresentation struct {
|
||||
// Name is the metric name.
|
||||
Name string
|
||||
// Labels contains all metric labels in key-value pair format.
|
||||
Labels map[string]string
|
||||
// Value is the value of the metric.
|
||||
Value float64
|
||||
}
|
||||
|
||||
// Float64Metric represents an float64 metric.
|
||||
type Float64Metric struct {
|
||||
name string
|
||||
@@ -187,3 +212,66 @@ func getTagKeysFromNames(tagNames []string) ([]tag.Key, error) {
|
||||
}
|
||||
return tagKeys, nil
|
||||
}
|
||||
|
||||
// ParsePrometheusMetrics parses Prometheus formatted metrics into metrics under Float64MetricRepresentation.
|
||||
//
|
||||
// Note: Prometheus's go library stores all counter/gauge-typed metric values under float64.
|
||||
func ParsePrometheusMetrics(metricsText string) ([]Float64MetricRepresentation, error) {
|
||||
var metrics []Float64MetricRepresentation
|
||||
|
||||
var textParser expfmt.TextParser
|
||||
metricFamilies, err := textParser.TextToMetricFamilies(strings.NewReader(metricsText))
|
||||
if err != nil {
|
||||
return metrics, err
|
||||
}
|
||||
|
||||
for _, metricFamily := range metricFamilies {
|
||||
for _, metric := range metricFamily.Metric {
|
||||
labels := make(map[string]string)
|
||||
for _, labelPair := range metric.Label {
|
||||
labels[*labelPair.Name] = *labelPair.Value
|
||||
}
|
||||
|
||||
var value float64
|
||||
if *metricFamily.Type == pcm.MetricType_COUNTER {
|
||||
value = *metric.Counter.Value
|
||||
} else if *metricFamily.Type == pcm.MetricType_GAUGE {
|
||||
value = *metric.Gauge.Value
|
||||
} else {
|
||||
return metrics, fmt.Errorf("unexpected MetricType %s for metric %s",
|
||||
pcm.MetricType_name[int32(*metricFamily.Type)], *metricFamily.Name)
|
||||
}
|
||||
|
||||
metrics = append(metrics, Float64MetricRepresentation{*metricFamily.Name, labels, value})
|
||||
}
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// GetFloat64Metric finds the metric matching provided name and labels.
|
||||
// When strictLabelMatching is set to true, the founded metric labels are identical to the provided labels;
|
||||
// when strictLabelMatching is set to false, the founded metric labels are a superset of the provided labels.
|
||||
func GetFloat64Metric(metrics []Float64MetricRepresentation, name string, labels map[string]string,
|
||||
strictLabelMatching bool) (Float64MetricRepresentation, error) {
|
||||
for _, metric := range metrics {
|
||||
if metric.Name != name {
|
||||
continue
|
||||
}
|
||||
if strictLabelMatching && len(metric.Labels) != len(labels) {
|
||||
continue
|
||||
}
|
||||
sameLabels := true
|
||||
for key, value := range labels {
|
||||
if metric.Labels[key] != value {
|
||||
sameLabels = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !sameLabels {
|
||||
continue
|
||||
}
|
||||
return metric, nil
|
||||
}
|
||||
return Float64MetricRepresentation{}, fmt.Errorf("no matching metric found")
|
||||
}
|
||||
|
||||
152
pkg/util/metrics/helpers_test.go
Normal file
152
pkg/util/metrics/helpers_test.go
Normal file
@@ -0,0 +1,152 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestPrometheusMetricsParsingAndMatching verifies the behavior of ParsePrometheusMetrics() and GetFloat64Metric().
|
||||
func TestPrometheusMetricsParsingAndMatching(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
metricsTextPath string
|
||||
expectedMetrics []Float64MetricRepresentation
|
||||
notExpectedMetrics []Float64MetricRepresentation
|
||||
strictLabelMatching bool
|
||||
}{
|
||||
{
|
||||
name: "Relaxed label matching",
|
||||
metricsTextPath: "testdata/sample_metrics.txt",
|
||||
expectedMetrics: []Float64MetricRepresentation{
|
||||
// Metric with no label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
// Metric with partial label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"kernel_version": "4.14.127+"},
|
||||
},
|
||||
{
|
||||
Name: "disk_avg_queue_len",
|
||||
Labels: map[string]string{"device": "sda1"},
|
||||
},
|
||||
{
|
||||
Name: "disk_avg_queue_len",
|
||||
Labels: map[string]string{"device": "sda8"},
|
||||
},
|
||||
},
|
||||
notExpectedMetrics: []Float64MetricRepresentation{
|
||||
// Metric with non-existant label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"non-existant-version": "0.0.1"},
|
||||
},
|
||||
// Metric with incorrect label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"kernel_version": "mismatched-version"},
|
||||
},
|
||||
// Non-exsistant metric.
|
||||
{
|
||||
Name: "host_downtime",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
},
|
||||
strictLabelMatching: false,
|
||||
},
|
||||
{
|
||||
name: "Strict label matching",
|
||||
metricsTextPath: "testdata/sample_metrics.txt",
|
||||
expectedMetrics: []Float64MetricRepresentation{
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"kernel_version": "4.14.127+", "os_version": "cos 73-11647.217.0"},
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "DockerHung"},
|
||||
},
|
||||
{
|
||||
Name: "problem_counter",
|
||||
Labels: map[string]string{"reason": "OOMKilling"},
|
||||
},
|
||||
},
|
||||
notExpectedMetrics: []Float64MetricRepresentation{
|
||||
// Metric with incomplete label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"kernel_version": "4.14.127+"},
|
||||
},
|
||||
// Metric with missing label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
// Metric with non-existant label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"non-existant-version": "0.0.1"},
|
||||
},
|
||||
// Metric with incorrect label.
|
||||
{
|
||||
Name: "host_uptime",
|
||||
Labels: map[string]string{"kernel_version": "mismatched-version"},
|
||||
},
|
||||
// Non-exsistant metric.
|
||||
{
|
||||
Name: "host_downtime",
|
||||
Labels: map[string]string{},
|
||||
},
|
||||
},
|
||||
strictLabelMatching: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
b, err := ioutil.ReadFile(test.metricsTextPath)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error reading file %s: %v", test.metricsTextPath, err)
|
||||
}
|
||||
metricsText := string(b)
|
||||
|
||||
metrics, err := ParsePrometheusMetrics(metricsText)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error parsing NPD metrics: %v\nMetrics text: %s\n", err, metricsText)
|
||||
}
|
||||
|
||||
for _, expectedMetric := range test.expectedMetrics {
|
||||
_, err = GetFloat64Metric(metrics, expectedMetric.Name, expectedMetric.Labels, test.strictLabelMatching)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to find metric %v in these metrics %v.\nMetrics text: %s\n",
|
||||
expectedMetric, metrics, metricsText)
|
||||
}
|
||||
}
|
||||
|
||||
for _, notExpectedMetric := range test.notExpectedMetrics {
|
||||
_, err = GetFloat64Metric(metrics, notExpectedMetric.Name, notExpectedMetric.Labels, test.strictLabelMatching)
|
||||
if err == nil {
|
||||
t.Errorf("Unexpected metric %v found in these metrics %v.\nMetrics text: %s\n",
|
||||
notExpectedMetric, metrics, metricsText)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
45
pkg/util/metrics/testdata/sample_metrics.txt
vendored
Normal file
45
pkg/util/metrics/testdata/sample_metrics.txt
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
# HELP disk_avg_queue_len The average queue length on the disk
|
||||
# TYPE disk_avg_queue_len gauge
|
||||
disk_avg_queue_len{device="sda"} 3.388908266480642
|
||||
disk_avg_queue_len{device="sda1"} 6.53953488372093
|
||||
disk_avg_queue_len{device="sda8"} 3.404255319148936
|
||||
# HELP disk_io_time The IO time spent on the disk
|
||||
# TYPE disk_io_time gauge
|
||||
disk_io_time{device="sda"} 8601
|
||||
disk_io_time{device="sda1"} 430
|
||||
disk_io_time{device="sda8"} 47
|
||||
# HELP disk_weighted_io The weighted IO on the disk
|
||||
# TYPE disk_weighted_io gauge
|
||||
disk_weighted_io{device="sda"} 29148
|
||||
disk_weighted_io{device="sda1"} 2812
|
||||
disk_weighted_io{device="sda8"} 160
|
||||
# HELP host_uptime The uptime of the operating system
|
||||
# TYPE host_uptime gauge
|
||||
host_uptime{kernel_version="4.14.127+",os_version="cos 73-11647.217.0"} 81
|
||||
# HELP problem_counter Number of times a specific type of problem have occurred.
|
||||
# TYPE problem_counter counter
|
||||
problem_counter{reason="AUFSUmountHung"} 0
|
||||
problem_counter{reason="ContainerdStart"} 1
|
||||
problem_counter{reason="CorruptDockerImage"} 0
|
||||
problem_counter{reason="CorruptDockerOverlay2"} 0
|
||||
problem_counter{reason="DockerHung"} 0
|
||||
problem_counter{reason="DockerStart"} 1
|
||||
problem_counter{reason="FilesystemIsReadOnly"} 0
|
||||
problem_counter{reason="FrequentContainerdRestart"} 0
|
||||
problem_counter{reason="FrequentDockerRestart"} 0
|
||||
problem_counter{reason="FrequentKubeletRestart"} 0
|
||||
problem_counter{reason="KernelOops"} 0
|
||||
problem_counter{reason="KubeletStart"} 0
|
||||
problem_counter{reason="OOMKilling"} 0
|
||||
problem_counter{reason="TaskHung"} 0
|
||||
problem_counter{reason="UnregisterNetDevice"} 0
|
||||
# HELP problem_gauge Whether a specific type of problem is affecting the node or not.
|
||||
# TYPE problem_gauge gauge
|
||||
problem_gauge{reason="AUFSUmountHung",type="KernelDeadlock"} 0
|
||||
problem_gauge{reason="CorruptDockerOverlay2",type="CorruptDockerOverlay2"} 0
|
||||
problem_gauge{reason="DockerHung",type="KernelDeadlock"} 0
|
||||
problem_gauge{reason="FilesystemIsReadOnly",type="ReadonlyFilesystem"} 0
|
||||
problem_gauge{reason="FrequentContainerdRestart",type="FrequentContainerdRestart"} 0
|
||||
problem_gauge{reason="FrequentDockerRestart",type="FrequentDockerRestart"} 0
|
||||
problem_gauge{reason="FrequentKubeletRestart",type="FrequentKubeletRestart"} 0
|
||||
problem_gauge{reason="UnregisterNetDevice",type="FrequentUnregisterNetDevice"} 0
|
||||
Reference in New Issue
Block a user