Add e2e test for NPD

The first test is a very simple test. It installs NPD on a VM, and then
verifies that NPD reports metric host_uptime in Prometheus format.
This commit is contained in:
Xuewei Zhang
2019-08-13 17:34:33 -07:00
parent db2dbd1eb2
commit f9b5e60a43
15 changed files with 1051 additions and 20 deletions

View File

@@ -21,17 +21,6 @@ import (
"reflect"
)
// Int64MetricRepresentation represents a snapshot of an int64 metrics.
// This is used for inspecting fake metrics.
type Int64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value int64
}
// Int64MetricInterface is used to create test double for Int64Metric.
type Int64MetricInterface interface {
// Record records a measurement for the metric, with provided tags as metric labels.

View File

@@ -18,8 +18,11 @@ package metrics
import (
"context"
"fmt"
"strings"
"sync"
pcm "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
@@ -34,12 +37,6 @@ func init() {
tagMapMutex.Unlock()
}
// Int64Metric represents an int64 metric.
type Int64Metric struct {
name string
measure *stats.Int64Measure
}
// Aggregation defines how measurements should be aggregated into data points.
type Aggregation string
@@ -50,6 +47,23 @@ const (
Sum Aggregation = "Sum"
)
// Int64MetricRepresentation represents a snapshot of an int64 metrics.
// This is used for inspecting metric internals.
type Int64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value int64
}
// Int64Metric represents an int64 metric.
type Int64Metric struct {
name string
measure *stats.Int64Measure
}
// NewInt64Metric create a Int64Metric metric, returns nil when name is empty.
func NewInt64Metric(name string, description string, unit string, aggregation Aggregation, tagNames []string) (*Int64Metric, error) {
if name == "" {
@@ -106,6 +120,17 @@ func (metric *Int64Metric) Record(tags map[string]string, measurement int64) err
metric.measure.M(measurement))
}
// Float64MetricRepresentation represents a snapshot of a float64 metrics.
// This is used for inspecting metric internals.
type Float64MetricRepresentation struct {
// Name is the metric name.
Name string
// Labels contains all metric labels in key-value pair format.
Labels map[string]string
// Value is the value of the metric.
Value float64
}
// Float64Metric represents an float64 metric.
type Float64Metric struct {
name string
@@ -187,3 +212,66 @@ func getTagKeysFromNames(tagNames []string) ([]tag.Key, error) {
}
return tagKeys, nil
}
// ParsePrometheusMetrics parses Prometheus formatted metrics into metrics under Float64MetricRepresentation.
//
// Note: Prometheus's go library stores all counter/gauge-typed metric values under float64.
func ParsePrometheusMetrics(metricsText string) ([]Float64MetricRepresentation, error) {
var metrics []Float64MetricRepresentation
var textParser expfmt.TextParser
metricFamilies, err := textParser.TextToMetricFamilies(strings.NewReader(metricsText))
if err != nil {
return metrics, err
}
for _, metricFamily := range metricFamilies {
for _, metric := range metricFamily.Metric {
labels := make(map[string]string)
for _, labelPair := range metric.Label {
labels[*labelPair.Name] = *labelPair.Value
}
var value float64
if *metricFamily.Type == pcm.MetricType_COUNTER {
value = *metric.Counter.Value
} else if *metricFamily.Type == pcm.MetricType_GAUGE {
value = *metric.Gauge.Value
} else {
return metrics, fmt.Errorf("unexpected MetricType %s for metric %s",
pcm.MetricType_name[int32(*metricFamily.Type)], *metricFamily.Name)
}
metrics = append(metrics, Float64MetricRepresentation{*metricFamily.Name, labels, value})
}
}
return metrics, nil
}
// GetFloat64Metric finds the metric matching provided name and labels.
// When strictLabelMatching is set to true, the founded metric labels are identical to the provided labels;
// when strictLabelMatching is set to false, the founded metric labels are a superset of the provided labels.
func GetFloat64Metric(metrics []Float64MetricRepresentation, name string, labels map[string]string,
strictLabelMatching bool) (Float64MetricRepresentation, error) {
for _, metric := range metrics {
if metric.Name != name {
continue
}
if strictLabelMatching && len(metric.Labels) != len(labels) {
continue
}
sameLabels := true
for key, value := range labels {
if metric.Labels[key] != value {
sameLabels = false
break
}
}
if !sameLabels {
continue
}
return metric, nil
}
return Float64MetricRepresentation{}, fmt.Errorf("no matching metric found")
}

View File

@@ -0,0 +1,152 @@
/*
Copyright 2019 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"io/ioutil"
"testing"
)
// TestPrometheusMetricsParsingAndMatching verifies the behavior of ParsePrometheusMetrics() and GetFloat64Metric().
func TestPrometheusMetricsParsingAndMatching(t *testing.T) {
testCases := []struct {
name string
metricsTextPath string
expectedMetrics []Float64MetricRepresentation
notExpectedMetrics []Float64MetricRepresentation
strictLabelMatching bool
}{
{
name: "Relaxed label matching",
metricsTextPath: "testdata/sample_metrics.txt",
expectedMetrics: []Float64MetricRepresentation{
// Metric with no label.
{
Name: "host_uptime",
Labels: map[string]string{},
},
// Metric with partial label.
{
Name: "host_uptime",
Labels: map[string]string{"kernel_version": "4.14.127+"},
},
{
Name: "disk_avg_queue_len",
Labels: map[string]string{"device": "sda1"},
},
{
Name: "disk_avg_queue_len",
Labels: map[string]string{"device": "sda8"},
},
},
notExpectedMetrics: []Float64MetricRepresentation{
// Metric with non-existant label.
{
Name: "host_uptime",
Labels: map[string]string{"non-existant-version": "0.0.1"},
},
// Metric with incorrect label.
{
Name: "host_uptime",
Labels: map[string]string{"kernel_version": "mismatched-version"},
},
// Non-exsistant metric.
{
Name: "host_downtime",
Labels: map[string]string{},
},
},
strictLabelMatching: false,
},
{
name: "Strict label matching",
metricsTextPath: "testdata/sample_metrics.txt",
expectedMetrics: []Float64MetricRepresentation{
{
Name: "host_uptime",
Labels: map[string]string{"kernel_version": "4.14.127+", "os_version": "cos 73-11647.217.0"},
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "DockerHung"},
},
{
Name: "problem_counter",
Labels: map[string]string{"reason": "OOMKilling"},
},
},
notExpectedMetrics: []Float64MetricRepresentation{
// Metric with incomplete label.
{
Name: "host_uptime",
Labels: map[string]string{"kernel_version": "4.14.127+"},
},
// Metric with missing label.
{
Name: "host_uptime",
Labels: map[string]string{},
},
// Metric with non-existant label.
{
Name: "host_uptime",
Labels: map[string]string{"non-existant-version": "0.0.1"},
},
// Metric with incorrect label.
{
Name: "host_uptime",
Labels: map[string]string{"kernel_version": "mismatched-version"},
},
// Non-exsistant metric.
{
Name: "host_downtime",
Labels: map[string]string{},
},
},
strictLabelMatching: true,
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
b, err := ioutil.ReadFile(test.metricsTextPath)
if err != nil {
t.Errorf("Unexpected error reading file %s: %v", test.metricsTextPath, err)
}
metricsText := string(b)
metrics, err := ParsePrometheusMetrics(metricsText)
if err != nil {
t.Errorf("Unexpected error parsing NPD metrics: %v\nMetrics text: %s\n", err, metricsText)
}
for _, expectedMetric := range test.expectedMetrics {
_, err = GetFloat64Metric(metrics, expectedMetric.Name, expectedMetric.Labels, test.strictLabelMatching)
if err != nil {
t.Errorf("Failed to find metric %v in these metrics %v.\nMetrics text: %s\n",
expectedMetric, metrics, metricsText)
}
}
for _, notExpectedMetric := range test.notExpectedMetrics {
_, err = GetFloat64Metric(metrics, notExpectedMetric.Name, notExpectedMetric.Labels, test.strictLabelMatching)
if err == nil {
t.Errorf("Unexpected metric %v found in these metrics %v.\nMetrics text: %s\n",
notExpectedMetric, metrics, metricsText)
}
}
})
}
}

View File

@@ -0,0 +1,45 @@
# HELP disk_avg_queue_len The average queue length on the disk
# TYPE disk_avg_queue_len gauge
disk_avg_queue_len{device="sda"} 3.388908266480642
disk_avg_queue_len{device="sda1"} 6.53953488372093
disk_avg_queue_len{device="sda8"} 3.404255319148936
# HELP disk_io_time The IO time spent on the disk
# TYPE disk_io_time gauge
disk_io_time{device="sda"} 8601
disk_io_time{device="sda1"} 430
disk_io_time{device="sda8"} 47
# HELP disk_weighted_io The weighted IO on the disk
# TYPE disk_weighted_io gauge
disk_weighted_io{device="sda"} 29148
disk_weighted_io{device="sda1"} 2812
disk_weighted_io{device="sda8"} 160
# HELP host_uptime The uptime of the operating system
# TYPE host_uptime gauge
host_uptime{kernel_version="4.14.127+",os_version="cos 73-11647.217.0"} 81
# HELP problem_counter Number of times a specific type of problem have occurred.
# TYPE problem_counter counter
problem_counter{reason="AUFSUmountHung"} 0
problem_counter{reason="ContainerdStart"} 1
problem_counter{reason="CorruptDockerImage"} 0
problem_counter{reason="CorruptDockerOverlay2"} 0
problem_counter{reason="DockerHung"} 0
problem_counter{reason="DockerStart"} 1
problem_counter{reason="FilesystemIsReadOnly"} 0
problem_counter{reason="FrequentContainerdRestart"} 0
problem_counter{reason="FrequentDockerRestart"} 0
problem_counter{reason="FrequentKubeletRestart"} 0
problem_counter{reason="KernelOops"} 0
problem_counter{reason="KubeletStart"} 0
problem_counter{reason="OOMKilling"} 0
problem_counter{reason="TaskHung"} 0
problem_counter{reason="UnregisterNetDevice"} 0
# HELP problem_gauge Whether a specific type of problem is affecting the node or not.
# TYPE problem_gauge gauge
problem_gauge{reason="AUFSUmountHung",type="KernelDeadlock"} 0
problem_gauge{reason="CorruptDockerOverlay2",type="CorruptDockerOverlay2"} 0
problem_gauge{reason="DockerHung",type="KernelDeadlock"} 0
problem_gauge{reason="FilesystemIsReadOnly",type="ReadonlyFilesystem"} 0
problem_gauge{reason="FrequentContainerdRestart",type="FrequentContainerdRestart"} 0
problem_gauge{reason="FrequentDockerRestart",type="FrequentDockerRestart"} 0
problem_gauge{reason="FrequentKubeletRestart",type="FrequentKubeletRestart"} 0
problem_gauge{reason="UnregisterNetDevice",type="FrequentUnregisterNetDevice"} 0