diff --git a/cmd/nodeproblemdetector/node_problem_detector.go b/cmd/nodeproblemdetector/node_problem_detector.go index 95593dd7..07185386 100644 --- a/cmd/nodeproblemdetector/node_problem_detector.go +++ b/cmd/nodeproblemdetector/node_problem_detector.go @@ -22,8 +22,8 @@ import ( "github.com/golang/glog" "github.com/spf13/pflag" - "k8s.io/node-problem-detector/cmd/options" _ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/problemdaemonplugins" + "k8s.io/node-problem-detector/cmd/options" "k8s.io/node-problem-detector/pkg/exporters/k8sexporter" "k8s.io/node-problem-detector/pkg/exporters/prometheusexporter" "k8s.io/node-problem-detector/pkg/problemdaemon" diff --git a/pkg/problemdetector/problem_detector.go b/pkg/problemdetector/problem_detector.go index 27e12336..0bc809e1 100644 --- a/pkg/problemdetector/problem_detector.go +++ b/pkg/problemdetector/problem_detector.go @@ -47,18 +47,20 @@ func NewProblemDetector(monitors []types.Monitor, exporters []types.Exporter) Pr func (p *problemDetector) Run() error { // Start the log monitors one by one. var chans []<-chan *types.Status + failureCount := 0 for _, m := range p.monitors { ch, err := m.Start() if err != nil { // Do not return error and keep on trying the following config files. glog.Errorf("Failed to start problem daemon %v: %v", m, err) + failureCount += 1 continue } if ch != nil { chans = append(chans, ch) } } - if len(chans) == 0 { + if len(p.monitors) == failureCount { return fmt.Errorf("no problem daemon is successfully setup") } ch := groupChannel(chans) diff --git a/pkg/types/types.go b/pkg/types/types.go index a06150be..b981b282 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -99,12 +99,13 @@ const ( Perm Type = "permanent" ) -// Monitor monitors log and custom plugins and reports node problem condition and event according to -// the rules. +// Monitor monitors the system and reports problems and metrics according to the rules. type Monitor interface { - // Start starts the log monitor. + // Start starts the monitor. + // The Status channel is used to report problems. If the Monitor does not report any + // problem (i.e. metrics reporting only), the channel should be set to nil. Start() (<-chan *Status, error) - // Stop stops the log monitor. + // Stop stops the monitor. Stop() }