mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-04 18:53:51 +00:00
160 lines
5.7 KiB
Go
160 lines
5.7 KiB
Go
/*
|
|
Copyright 2019 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package npd
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"path"
|
|
"strings"
|
|
"time"
|
|
|
|
"k8s.io/node-problem-detector/pkg/util/metrics"
|
|
"k8s.io/node-problem-detector/test/e2e/lib/gce"
|
|
|
|
"github.com/avast/retry-go"
|
|
)
|
|
|
|
// SetupNPD installs NPD from the test tarball onto the provided GCE instance.
|
|
//
|
|
// Here is how it works:
|
|
// 1. SetupNPD will SCP the NPD build tarball onto the VM.
|
|
// 2. SetupNPD will extract the tarball in the VM, to expose the test/e2e-install.sh on the VM.
|
|
// 3. SetupNPD will then call the e2e-install.sh script, and feed the NPD build tarball as input.
|
|
// 4. Finally, the e2e-install.sh script will do the heavy lifting of installing NPD (setting up
|
|
// binary/config directories, setting up systemd config file, etc).
|
|
func SetupNPD(ins gce.Instance, npdBuildTar string) error {
|
|
tmpDirCmd := ins.RunCommand("mktemp -d")
|
|
if tmpDirCmd.SSHError != nil || tmpDirCmd.Code != 0 {
|
|
return fmt.Errorf("error creating temporary directory to hold NPD tarball: %v", tmpDirCmd)
|
|
}
|
|
|
|
tmpDir := strings.TrimSuffix(tmpDirCmd.Stdout, "\n")
|
|
npdTarVMPath := tmpDir + "/npd.tar.gz"
|
|
npdExtractDir := tmpDir + "/npd"
|
|
|
|
err := ins.PushFile(npdBuildTar, npdTarVMPath)
|
|
if err != nil {
|
|
return fmt.Errorf("error pushing local NPD build tarball %s to VM at %s: %v", npdBuildTar, npdTarVMPath, err)
|
|
}
|
|
|
|
mkdirCmd := ins.RunCommand(fmt.Sprintf("mkdir -p %s", npdExtractDir))
|
|
if mkdirCmd.SSHError != nil || mkdirCmd.Code != 0 {
|
|
return fmt.Errorf("error creating directory to extract NPD tarball into: %v", mkdirCmd)
|
|
}
|
|
|
|
extractCmd := ins.RunCommand(fmt.Sprintf("tar -xf %s --directory %s", npdTarVMPath, npdExtractDir))
|
|
if extractCmd.SSHError != nil || extractCmd.Code != 0 {
|
|
return fmt.Errorf("error extracting NPD build tarball: %v", extractCmd)
|
|
}
|
|
|
|
installCmd := ins.RunCommand(fmt.Sprintf("sudo bash %s/test/e2e-install.sh -t %s install", npdExtractDir, npdTarVMPath))
|
|
if installCmd.SSHError != nil || installCmd.Code != 0 {
|
|
return fmt.Errorf("error installing NPD: %v", installCmd)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// FetchNPDMetrics fetches and parses metrics reported by NPD on the provided GCE instance.
|
|
func FetchNPDMetrics(ins gce.Instance) ([]metrics.Float64MetricRepresentation, error) {
|
|
var npdMetrics []metrics.Float64MetricRepresentation
|
|
var err error
|
|
|
|
curlCmd := ins.RunCommand("curl http://localhost:20257/metrics")
|
|
if curlCmd.SSHError != nil || curlCmd.Code != 0 {
|
|
return npdMetrics, fmt.Errorf("error fetching NPD metrics: %v", curlCmd)
|
|
}
|
|
|
|
npdMetrics, err = metrics.ParsePrometheusMetrics(curlCmd.Stdout)
|
|
if err != nil {
|
|
return npdMetrics, fmt.Errorf("error parsing NPD metrics: %v", err)
|
|
}
|
|
|
|
return npdMetrics, nil
|
|
}
|
|
|
|
// FetchNPDMetric fetches and parses a specific metric reported by NPD on the provided GCE instance.
|
|
func FetchNPDMetric(ins gce.Instance, metricName string, labels map[string]string) (float64, error) {
|
|
gotMetrics, err := FetchNPDMetrics(ins)
|
|
if err != nil {
|
|
return 0.0, err
|
|
}
|
|
metric, err := metrics.GetFloat64Metric(gotMetrics, metricName, labels, true)
|
|
if err != nil {
|
|
return 0.0, fmt.Errorf("Failed to find %s metric with label %v: %v.\nHere is all NPD exported metrics: %v",
|
|
metricName, labels, err, gotMetrics)
|
|
}
|
|
return metric.Value, nil
|
|
}
|
|
|
|
// WaitForNPD waits for NPD to become ready by waiting for expected metrics.
|
|
func WaitForNPD(ins gce.Instance, metricNames []string, timeoutSeconds uint) error {
|
|
verifyMetricExist := func() error {
|
|
gotMetrics, err := FetchNPDMetrics(ins)
|
|
if err != nil {
|
|
return fmt.Errorf("Error fetching NPD metrics: %v", err)
|
|
}
|
|
for _, metricName := range metricNames {
|
|
_, err = metrics.GetFloat64Metric(gotMetrics, metricName, map[string]string{}, false)
|
|
if err != nil {
|
|
return fmt.Errorf("Failed to find metric %s: %v.\nHere is all NPD exported metrics: %v",
|
|
metricName, err, gotMetrics)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Wait for NPD to be ready for a maximum of 120 seconds.
|
|
return retry.Do(verifyMetricExist,
|
|
retry.Delay(10*time.Second),
|
|
retry.Attempts(timeoutSeconds/10),
|
|
retry.DelayType(retry.FixedDelay))
|
|
}
|
|
|
|
// SaveTestArtifacts saves debugging data from NPD.
|
|
func SaveTestArtifacts(ins gce.Instance, artifactDirectory string, testID int) []error {
|
|
var errs []error
|
|
|
|
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
|
|
"curl http://localhost:20257/metrics", "node-problem-detector-metrics"); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
|
|
"sudo journalctl -u node-problem-detector.service", "node-problem-detector"); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
if err := saveCommandResultAsArtifact(ins, artifactDirectory, testID,
|
|
"sudo journalctl -k", "kernel-logs"); err != nil {
|
|
errs = append(errs, err)
|
|
}
|
|
|
|
return errs
|
|
}
|
|
|
|
func saveCommandResultAsArtifact(ins gce.Instance, artifactDirectory string, testID int, command string, artifactPrefix string) error {
|
|
artifactPath := path.Join(artifactDirectory, fmt.Sprintf("%v-%02d.txt", artifactPrefix, testID))
|
|
result := ins.RunCommand(command)
|
|
if result.SSHError != nil || result.Code != 0 {
|
|
return fmt.Errorf("Error running command: %v\n", result)
|
|
}
|
|
if err := ioutil.WriteFile(artifactPath, []byte(result.Stdout), 0644); err != nil {
|
|
return fmt.Errorf("Error writing artifact to %v: %v\n", artifactPath, err)
|
|
}
|
|
return nil
|
|
}
|