diff --git a/README.md b/README.md index 47641a7c..5e500e96 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,7 @@ For example, to run without auth, use the following config: http://APISERVER_IP:APISERVER_PORT?inClusterConfig=false ``` Refer [heapster docs](https://github.com/kubernetes/heapster/blob/1e40b0f4b5eeb3f02e11ee22c2b6fda36b6e6ea1/docs/source-configuration.md#kubernetes) for a complete list of available options. +* `-hostname-override`: A customized node name used for node-problem-detector to update conditions and emit events. node-problem-detector gets node name first from `hostname-override`, then `NODE_NAME` environment variable and finally fall back to `os.Hostname`. ## Build Image Run `make` in the top directory. It will: diff --git a/cmd/node_problem_detector.go b/cmd/node_problem_detector.go index 4203140e..ac061954 100644 --- a/cmd/node_problem_detector.go +++ b/cmd/node_problem_detector.go @@ -26,6 +26,7 @@ import ( "k8s.io/node-problem-detector/pkg/version" "github.com/golang/glog" + "fmt" ) // TODO: Move flags to options directory. @@ -33,6 +34,7 @@ var ( kernelMonitorConfigPath = flag.String("kernel-monitor", "/config/kernel-monitor.json", "The path to the kernel monitor config file") apiServerOverride = flag.String("apiserver-override", "", "Custom URI used to connect to Kubernetes ApiServer") printVersion = flag.Bool("version", false, "Print version information and quit") + hostnameOverride = flag.String("hostname-override", "", "Custom node name used to override hostname") ) func validateCmdParams() { @@ -41,6 +43,36 @@ func validateCmdParams() { } } +func getNodeNameOrDie() string { + var nodeName string + + // Check hostname override first for customized node name. + if *hostnameOverride != "" { + return *hostnameOverride + } + + // Get node name from environment variable NODE_NAME + // By default, assume that the NODE_NAME env should have been set with + // downward api or user defined exported environment variable. We prefer it because sometimes + // the hostname returned by os.Hostname is not right because: + // 1. User may override the hostname. + // 2. For some cloud providers, os.Hostname is different from the real hostname. + nodeName = os.Getenv("NODE_NAME") + if nodeName != "" { + return nodeName + } + + // For backward compatibility. If the env is not set, get the hostname + // from os.Hostname(). This may not work for all configurations and + // environments. + nodeName, err := os.Hostname() + if err != nil { + panic(fmt.Sprintf("Failed to get host name: %v", err)) + } + + return nodeName +} + func main() { flag.Parse() validateCmdParams() @@ -50,7 +82,9 @@ func main() { os.Exit(0) } + nodeName := getNodeNameOrDie() + k := kernelmonitor.NewKernelMonitorOrDie(*kernelMonitorConfigPath) - p := problemdetector.NewProblemDetector(k, *apiServerOverride) + p := problemdetector.NewProblemDetector(k, *apiServerOverride, nodeName) p.Run() } diff --git a/pkg/problemclient/problem_client.go b/pkg/problemclient/problem_client.go index 2045c8ad..94add12d 100644 --- a/pkg/problemclient/problem_client.go +++ b/pkg/problemclient/problem_client.go @@ -20,7 +20,6 @@ import ( "encoding/json" "fmt" "net/url" - "os" "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/unversioned" @@ -51,7 +50,7 @@ type nodeProblemClient struct { } // NewClientOrDie creates a new problem client, panics if error occurs. -func NewClientOrDie(apiServerOverride string) Client { +func NewClientOrDie(apiServerOverride, nodeName string) Client { c := &nodeProblemClient{clock: clock.RealClock{}} // we have checked it is a valid URI after command line argument is parsed.:) @@ -64,23 +63,7 @@ func NewClientOrDie(apiServerOverride string) Client { // TODO(random-liu): Set QPS Limit c.client = client.NewOrDie(cfg) - // Get node name from environment variable NODE_NAME - // By default, assume that the NODE_NAME env should have been set with - // downward api. We prefer it because sometimes the hostname returned - // by os.Hostname is not right because: - // 1. User may override the hostname. - // 2. For some cloud providers, os.Hostname is different from the real hostname. - c.nodeName = os.Getenv("NODE_NAME") - if c.nodeName == "" { - // For backward compatibility. If the env is not set, get the hostname - // from os.Hostname(). This may not work for all configurations and - // environments. - var err error - c.nodeName, err = os.Hostname() - if err != nil { - panic("empty node name") - } - } + c.nodeName = nodeName c.nodeRef = getNodeRef(c.nodeName) c.recorders = make(map[string]record.EventRecorder) return c diff --git a/pkg/problemdetector/problem_detector.go b/pkg/problemdetector/problem_detector.go index a70d46fe..6d8a709e 100644 --- a/pkg/problemdetector/problem_detector.go +++ b/pkg/problemdetector/problem_detector.go @@ -41,8 +41,8 @@ type problemDetector struct { // NewProblemDetector creates the problem detector. Currently we just directly passed in the problem daemons, but // in the future we may want to let the problem daemons register themselves. -func NewProblemDetector(monitor kernelmonitor.KernelMonitor, apiServerOverride string) ProblemDetector { - client := problemclient.NewClientOrDie(apiServerOverride) +func NewProblemDetector(monitor kernelmonitor.KernelMonitor, apiServerOverride, nodeName string) ProblemDetector { + client := problemclient.NewClientOrDie(apiServerOverride, nodeName) return &problemDetector{ client: client, conditionManager: condition.NewConditionManager(client, clock.RealClock{}),