mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-02-14 09:59:56 +00:00
286 lines
7.6 KiB
Go
286 lines
7.6 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors All rights reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package plugin
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os/exec"
|
|
"strings"
|
|
"sync"
|
|
"syscall"
|
|
"time"
|
|
|
|
"k8s.io/klog/v2"
|
|
|
|
cpmtypes "k8s.io/node-problem-detector/pkg/custompluginmonitor/types"
|
|
"k8s.io/node-problem-detector/pkg/util"
|
|
"k8s.io/node-problem-detector/pkg/util/tomb"
|
|
)
|
|
|
|
// maxCustomPluginBufferBytes is the max bytes that a custom plugin is allowed to
|
|
// send to stdout/stderr. Any bytes exceeding this value will be truncated.
|
|
const maxCustomPluginBufferBytes = 1024 * 4
|
|
|
|
type Plugin struct {
|
|
config cpmtypes.CustomPluginConfig
|
|
syncChan chan struct{}
|
|
resultChan chan cpmtypes.Result
|
|
tomb *tomb.Tomb
|
|
sync.WaitGroup
|
|
}
|
|
|
|
func NewPlugin(config cpmtypes.CustomPluginConfig) *Plugin {
|
|
return &Plugin{
|
|
config: config,
|
|
syncChan: make(chan struct{}, *config.PluginGlobalConfig.Concurrency),
|
|
// A 1000 size channel should be big enough.
|
|
resultChan: make(chan cpmtypes.Result, 1000),
|
|
tomb: tomb.NewTomb(),
|
|
}
|
|
}
|
|
|
|
func (p *Plugin) GetResultChan() <-chan cpmtypes.Result {
|
|
return p.resultChan
|
|
}
|
|
|
|
func (p *Plugin) Run() {
|
|
defer func() {
|
|
klog.Info("Stopping plugin execution")
|
|
close(p.resultChan)
|
|
p.tomb.Done()
|
|
}()
|
|
|
|
runTicker := time.NewTicker(*p.config.PluginGlobalConfig.InvokeInterval)
|
|
defer runTicker.Stop()
|
|
|
|
// on boot run once
|
|
select {
|
|
case <-p.tomb.Stopping():
|
|
return
|
|
default:
|
|
p.runRules()
|
|
}
|
|
|
|
// run every InvokeInterval
|
|
for {
|
|
select {
|
|
case <-runTicker.C:
|
|
p.runRules()
|
|
case <-p.tomb.Stopping():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// run each rule in parallel and wait for them to complete
|
|
func (p *Plugin) runRules() {
|
|
klog.V(3).Info("Start to run custom plugins")
|
|
|
|
for _, rule := range p.config.Rules {
|
|
// syncChan limits concurrent goroutines to configured PluginGlobalConfig.Concurrency value
|
|
p.syncChan <- struct{}{}
|
|
p.Add(1)
|
|
go func(rule *cpmtypes.CustomRule) {
|
|
defer p.Done()
|
|
defer func() {
|
|
<-p.syncChan
|
|
}()
|
|
|
|
start := time.Now()
|
|
exitStatus, message := p.run(*rule)
|
|
level := klog.Level(3)
|
|
if exitStatus != 0 {
|
|
level = klog.Level(2)
|
|
}
|
|
|
|
klog.V(level).Infof("Rule: %+v. Start time: %v. End time: %v. Duration: %v", rule, start, time.Now(), time.Since(start))
|
|
|
|
result := cpmtypes.Result{
|
|
Rule: rule,
|
|
ExitStatus: exitStatus,
|
|
Message: message,
|
|
}
|
|
|
|
// pipes result into resultChan which customPluginMonitor instance generates status from
|
|
p.resultChan <- result
|
|
|
|
// Let the result be logged at a higher verbosity level. If there is a change in status it is logged later.
|
|
klog.V(level).Infof("Add check result %+v for rule %+v", result, rule)
|
|
}(rule)
|
|
}
|
|
|
|
p.Wait()
|
|
klog.V(3).Info("Finish running custom plugins")
|
|
}
|
|
|
|
// readFromReader reads the maxBytes from the reader and drains the rest.
|
|
func readFromReader(reader io.ReadCloser, maxBytes int64) ([]byte, error) {
|
|
limitReader := io.LimitReader(reader, maxBytes)
|
|
data, err := io.ReadAll(limitReader)
|
|
if err != nil {
|
|
return []byte{}, err
|
|
}
|
|
// Drain the reader
|
|
if _, err := io.Copy(io.Discard, reader); err != nil {
|
|
return []byte{}, err
|
|
}
|
|
return data, nil
|
|
}
|
|
|
|
func (p *Plugin) run(rule cpmtypes.CustomRule) (exitStatus cpmtypes.Status, output string) {
|
|
var ctx context.Context
|
|
var cancel context.CancelFunc
|
|
|
|
if rule.Timeout != nil && *rule.Timeout < *p.config.PluginGlobalConfig.Timeout {
|
|
ctx, cancel = context.WithTimeout(context.Background(), *rule.Timeout)
|
|
} else {
|
|
ctx, cancel = context.WithTimeout(context.Background(), *p.config.PluginGlobalConfig.Timeout)
|
|
}
|
|
defer cancel()
|
|
|
|
cmd := util.Exec(ctx, rule.Path, rule.Args...)
|
|
|
|
stdoutPipe, err := cmd.StdoutPipe()
|
|
if err != nil {
|
|
klog.Errorf("Error creating stdout pipe for plugin %q: error - %v", rule.Path, err)
|
|
return cpmtypes.Unknown, "Error creating stdout pipe for plugin. Please check the error log"
|
|
}
|
|
stderrPipe, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
klog.Errorf("Error creating stderr pipe for plugin %q: error - %v", rule.Path, err)
|
|
return cpmtypes.Unknown, "Error creating stderr pipe for plugin. Please check the error log"
|
|
}
|
|
if err := cmd.Start(); err != nil {
|
|
klog.Errorf("Error in starting plugin %q: error - %v", rule.Path, err)
|
|
return cpmtypes.Unknown, "Error in starting plugin. Please check the error log"
|
|
}
|
|
|
|
waitChan := make(chan struct{})
|
|
defer close(waitChan)
|
|
|
|
var m sync.Mutex
|
|
timeout := false
|
|
|
|
go func() {
|
|
select {
|
|
case <-ctx.Done():
|
|
if ctx.Err() == context.Canceled {
|
|
return
|
|
}
|
|
klog.Errorf("Error in running plugin timeout %q", rule.Path)
|
|
if cmd.Process == nil || cmd.Process.Pid == 0 {
|
|
klog.Errorf("Error in cmd.Process check %q", rule.Path)
|
|
break
|
|
}
|
|
|
|
m.Lock()
|
|
timeout = true
|
|
m.Unlock()
|
|
|
|
err := util.Kill(cmd)
|
|
if err != nil {
|
|
klog.Errorf("Error in kill process %d, %v", cmd.Process.Pid, err)
|
|
}
|
|
case <-waitChan:
|
|
return
|
|
}
|
|
}()
|
|
|
|
var (
|
|
wg sync.WaitGroup
|
|
stdout []byte
|
|
stderr []byte
|
|
stdoutErr error
|
|
stderrErr error
|
|
)
|
|
|
|
wg.Add(2)
|
|
go func() {
|
|
defer wg.Done()
|
|
stdout, stdoutErr = readFromReader(stdoutPipe, maxCustomPluginBufferBytes)
|
|
}()
|
|
go func() {
|
|
defer wg.Done()
|
|
stderr, stderrErr = readFromReader(stderrPipe, maxCustomPluginBufferBytes)
|
|
}()
|
|
// This will wait for the reads to complete. If the execution times out, the pipes
|
|
// will be closed and the wait group unblocks.
|
|
wg.Wait()
|
|
|
|
if stdoutErr != nil {
|
|
klog.Errorf("Error reading stdout for plugin %q: error - %v", rule.Path, err)
|
|
return cpmtypes.Unknown, "Error reading stdout for plugin. Please check the error log"
|
|
}
|
|
|
|
if stderrErr != nil {
|
|
klog.Errorf("Error reading stderr for plugin %q: error - %v", rule.Path, err)
|
|
return cpmtypes.Unknown, "Error reading stderr for plugin. Please check the error log"
|
|
}
|
|
|
|
if err := cmd.Wait(); err != nil {
|
|
if _, ok := err.(*exec.ExitError); !ok {
|
|
klog.Errorf("Error in waiting for plugin %q: error - %v. output - %q", rule.Path, err, string(stdout))
|
|
return cpmtypes.Unknown, "Error in waiting for plugin. Please check the error log"
|
|
}
|
|
}
|
|
|
|
// trim suffix useless bytes
|
|
output = string(stdout)
|
|
output = strings.TrimSpace(output)
|
|
|
|
m.Lock()
|
|
cmdKilled := timeout
|
|
m.Unlock()
|
|
|
|
if cmdKilled {
|
|
output = fmt.Sprintf("Timeout when running plugin %q: state - %s. output - %q", rule.Path, cmd.ProcessState.String(), output)
|
|
}
|
|
|
|
// cut at position max_output_length if stdout is longer than max_output_length bytes
|
|
if len(output) > *p.config.PluginGlobalConfig.MaxOutputLength {
|
|
output = output[:*p.config.PluginGlobalConfig.MaxOutputLength]
|
|
}
|
|
|
|
exitCode := cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
|
|
switch exitCode {
|
|
case 0:
|
|
logPluginStderr(rule, string(stderr), 3)
|
|
return cpmtypes.OK, output
|
|
case 1:
|
|
logPluginStderr(rule, string(stderr), 0)
|
|
return cpmtypes.NonOK, output
|
|
default:
|
|
logPluginStderr(rule, string(stderr), 0)
|
|
return cpmtypes.Unknown, output
|
|
}
|
|
}
|
|
|
|
// Stop the plugin.
|
|
func (p *Plugin) Stop() {
|
|
p.tomb.Stop()
|
|
klog.Info("Stop plugin execution")
|
|
}
|
|
|
|
func logPluginStderr(rule cpmtypes.CustomRule, logs string, logLevel klog.Level) {
|
|
if len(logs) != 0 {
|
|
klog.V(logLevel).Infof("Start logs from plugin %+v \n %s", rule, logs)
|
|
klog.V(logLevel).Infof("End logs from plugin %+v", rule)
|
|
}
|
|
}
|