mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-02-14 18:09:57 +00:00
Support waiting for kube-apiserver to be ready with timout during NPD startup
This commit is contained in:
@@ -20,6 +20,7 @@ import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"net/url"
|
||||
|
||||
@@ -49,6 +50,12 @@ type NodeProblemDetectorOptions struct {
|
||||
EnableK8sExporter bool
|
||||
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
|
||||
ApiServerOverride string
|
||||
// APIServerWaitTimeout is the timeout on waiting for kube-apiserver to be
|
||||
// ready.
|
||||
APIServerWaitTimeout time.Duration
|
||||
// APIServerWaitInterval is the interval between the checks on the
|
||||
// readiness of kube-apiserver.
|
||||
APIServerWaitInterval time.Duration
|
||||
|
||||
// prometheusExporter options
|
||||
// PrometheusServerPort is the port to bind the Prometheus scrape endpoint. Use 0 to disable.
|
||||
@@ -96,6 +103,8 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.BoolVar(&npdo.EnableK8sExporter, "enable-k8s-exporter", true, "Enables reporting to Kubernetes API server.")
|
||||
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
|
||||
"", "Custom URI used to connect to Kubernetes ApiServer. This is ignored if --enable-k8s-exporter is false.")
|
||||
fs.DurationVar(&npdo.APIServerWaitTimeout, "apiserver-wait-timeout", time.Duration(5)*time.Minute, "The timeout on waiting for kube-apiserver to be ready. This is ignored if --enable-k8s-exporter is false.")
|
||||
fs.DurationVar(&npdo.APIServerWaitInterval, "apiserver-wait-interval", time.Duration(5)*time.Second, "The interval between the checks on the readiness of kube-apiserver. This is ignored if --enable-k8s-exporter is false.")
|
||||
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")
|
||||
fs.StringVar(&npdo.HostnameOverride, "hostname-override",
|
||||
"", "Custom node name used to override hostname")
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/golang/glog"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
|
||||
"k8s.io/node-problem-detector/cmd/options"
|
||||
"k8s.io/node-problem-detector/pkg/exporters/k8sexporter/condition"
|
||||
@@ -38,13 +39,23 @@ type k8sExporter struct {
|
||||
conditionManager condition.ConditionManager
|
||||
}
|
||||
|
||||
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting, panics if error occurs.
|
||||
// NewExporterOrDie creates a exporter for Kubernetes apiserver exporting,
|
||||
// panics if error occurs.
|
||||
//
|
||||
// Note that this function may be blocked (until a timeout occurs) before
|
||||
// kube-apiserver becomes ready.
|
||||
func NewExporterOrDie(npdo *options.NodeProblemDetectorOptions) types.Exporter {
|
||||
if !npdo.EnableK8sExporter {
|
||||
return nil
|
||||
}
|
||||
|
||||
c := problemclient.NewClientOrDie(npdo)
|
||||
|
||||
glog.Infof("Waiting for kube-apiserver to be ready (timeout %v)...", npdo.APIServerWaitTimeout)
|
||||
if err := waitForAPIServerReadyWithTimeout(c, npdo); err != nil {
|
||||
glog.Warningf("kube-apiserver did not become ready: timed out on waiting for kube-apiserver to return the node object: %v", err)
|
||||
}
|
||||
|
||||
ke := k8sExporter{
|
||||
client: c,
|
||||
conditionManager: condition.NewConditionManager(c, clock.RealClock{}),
|
||||
@@ -91,3 +102,14 @@ func (ke *k8sExporter) startHTTPReporting(npdo *options.NodeProblemDetectorOptio
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func waitForAPIServerReadyWithTimeout(c problemclient.Client, npdo *options.NodeProblemDetectorOptions) error {
|
||||
return wait.PollImmediate(npdo.APIServerWaitInterval, npdo.APIServerWaitTimeout, func() (done bool, err error) {
|
||||
// If NPD can get the node object from kube-apiserver, the server is
|
||||
// ready and the RBAC permission is set correctly.
|
||||
if _, err := c.GetNode(); err == nil {
|
||||
return true, nil
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
"reflect"
|
||||
"sync"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
)
|
||||
|
||||
// FakeProblemClient is a fake problem client for debug.
|
||||
@@ -92,3 +92,7 @@ func (f *FakeProblemClient) GetConditions(types []v1.NodeConditionType) ([]*v1.N
|
||||
// Eventf does nothing now.
|
||||
func (f *FakeProblemClient) Eventf(eventType string, source, reason, messageFmt string, args ...interface{}) {
|
||||
}
|
||||
|
||||
func (f *FakeProblemClient) GetNode() (*v1.Node, error) {
|
||||
return nil, fmt.Errorf("GetNode() not implemented")
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ import (
|
||||
typedcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/kubernetes/pkg/api/legacyscheme"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
"k8s.io/apimachinery/pkg/util/clock"
|
||||
@@ -47,6 +47,9 @@ type Client interface {
|
||||
SetConditions(conditions []v1.NodeCondition) error
|
||||
// Eventf reports the event.
|
||||
Eventf(eventType string, source, reason, messageFmt string, args ...interface{})
|
||||
// GetNode returns the Node object of the node on which the
|
||||
// node-problem-detector runs.
|
||||
GetNode() (*v1.Node, error)
|
||||
}
|
||||
|
||||
type nodeProblemClient struct {
|
||||
@@ -79,7 +82,7 @@ func NewClientOrDie(npdo *options.NodeProblemDetectorOptions) Client {
|
||||
}
|
||||
|
||||
func (c *nodeProblemClient) GetConditions(conditionTypes []v1.NodeConditionType) ([]*v1.NodeCondition, error) {
|
||||
node, err := c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
|
||||
node, err := c.GetNode()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -116,6 +119,10 @@ func (c *nodeProblemClient) Eventf(eventType, source, reason, messageFmt string,
|
||||
recorder.Eventf(c.nodeRef, eventType, reason, messageFmt, args...)
|
||||
}
|
||||
|
||||
func (c *nodeProblemClient) GetNode() (*v1.Node, error) {
|
||||
return c.client.Nodes().Get(c.nodeName, metav1.GetOptions{})
|
||||
}
|
||||
|
||||
// generatePatch generates condition patch
|
||||
func generatePatch(conditions []v1.NodeCondition) ([]byte, error) {
|
||||
raw, err := json.Marshal(&conditions)
|
||||
|
||||
Reference in New Issue
Block a user