mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-27 13:57:12 +00:00
Merge pull request #552 from mcshooter/detectKubeProxyServiceProblem
Add healthChecker functionality for kube-proxy service
This commit is contained in:
@@ -47,7 +47,7 @@ type HealthCheckerOptions struct {
|
||||
// AddFlags adds health checker command line options to pflag.
|
||||
func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.StringVar(&hco.Component, "component", types.KubeletComponent,
|
||||
"The component to check health for. Supports kubelet, docker and cri")
|
||||
"The component to check health for. Supports kubelet, docker, kube-proxy, and cri")
|
||||
// Deprecated: For backward compatibility on linux environment. Going forward "service" will be used instead of systemd-service
|
||||
if runtime.GOOS == "linux" {
|
||||
fs.MarkDeprecated("systemd-service", "please use --service flag instead")
|
||||
@@ -73,8 +73,9 @@ func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
|
||||
// Returns error if invalid, nil otherwise.
|
||||
func (hco *HealthCheckerOptions) IsValid() error {
|
||||
// Make sure the component specified is valid.
|
||||
if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent && hco.Component != types.CRIComponent {
|
||||
return fmt.Errorf("the component specified is not supported. Supported components are : <kubelet/docker/cri>")
|
||||
if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent &&
|
||||
hco.Component != types.CRIComponent && hco.Component != types.KubeProxyComponent {
|
||||
return fmt.Errorf("the component specified is not supported. Supported components are : <kubelet/docker/cri/kube-proxy>")
|
||||
}
|
||||
// Make sure the service is specified if repair is enabled.
|
||||
if hco.EnableRepair && hco.Service == "" {
|
||||
|
||||
34
config/windows-health-checker-kubeproxy.json
Normal file
34
config/windows-health-checker-kubeproxy.json
Normal file
@@ -0,0 +1,34 @@
|
||||
{
|
||||
"plugin": "custom",
|
||||
"pluginConfig": {
|
||||
"invoke_interval": "10s",
|
||||
"timeout": "3m",
|
||||
"max_output_length": 80,
|
||||
"concurrency": 1
|
||||
},
|
||||
"source": "health-checker",
|
||||
"metricsReporting": true,
|
||||
"conditions": [
|
||||
{
|
||||
"type": "KubeProxyUnhealthy",
|
||||
"reason": "KubeProxyIsHealthy",
|
||||
"message": "kube-proxy on the node is functioning properly"
|
||||
}
|
||||
],
|
||||
"rules": [
|
||||
{
|
||||
"type": "permanent",
|
||||
"condition": "KubeProxyUnhealthy",
|
||||
"reason": "KubeProxyUnhealthy",
|
||||
"path": "C:\\etc\\kubernetes\\node\\bin\\health-checker.exe",
|
||||
"args": [
|
||||
"--component=kube-proxy",
|
||||
"--enable-repair=true",
|
||||
"--cooldown-time=1m",
|
||||
"--health-check-timeout=10s"
|
||||
],
|
||||
"timeout": "3m"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -68,14 +68,9 @@ func getRepairFunc(hco *options.HealthCheckerOptions) func() {
|
||||
func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error) {
|
||||
switch hco.Component {
|
||||
case types.KubeletComponent:
|
||||
return func() (bool, error) {
|
||||
httpClient := http.Client{Timeout: hco.HealthCheckTimeout}
|
||||
response, err := httpClient.Get(types.KubeletHealthCheckEndpoint)
|
||||
if err != nil || response.StatusCode != http.StatusOK {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
return healthCheckEndpointOKFunc(types.KubeletHealthCheckEndpoint, hco.HealthCheckTimeout)
|
||||
case types.KubeProxyComponent:
|
||||
return healthCheckEndpointOKFunc(types.KubeProxyHealthCheckEndpoint, hco.HealthCheckTimeout)
|
||||
case types.DockerComponent:
|
||||
return func() (bool, error) {
|
||||
if _, err := execCommand("docker.exe", "ps"); err != nil {
|
||||
@@ -94,6 +89,18 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error)
|
||||
return nil
|
||||
}
|
||||
|
||||
// healthCheckEndpointOKFunc returns a function to check the status of an http endpoint
|
||||
func healthCheckEndpointOKFunc(endpoint string, timeout time.Duration) func() (bool, error) {
|
||||
return func() (bool, error) {
|
||||
httpClient := http.Client{Timeout: timeout}
|
||||
response, err := httpClient.Get(endpoint)
|
||||
if err != nil || response.StatusCode != http.StatusOK {
|
||||
return false, nil
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
// execCommand creates a new process, executes the command, and returns the (output, error) from command.
|
||||
func execCommand(command string, args ...string) (string, error) {
|
||||
cmd := util.Exec(command, args...)
|
||||
|
||||
@@ -30,12 +30,14 @@ const (
|
||||
CmdTimeout = 10 * time.Second
|
||||
LogParsingTimeLayout = "2006-01-02 15:04:05"
|
||||
|
||||
KubeletComponent = "kubelet"
|
||||
CRIComponent = "cri"
|
||||
DockerComponent = "docker"
|
||||
ContainerdService = "containerd"
|
||||
KubeletComponent = "kubelet"
|
||||
CRIComponent = "cri"
|
||||
DockerComponent = "docker"
|
||||
ContainerdService = "containerd"
|
||||
KubeProxyComponent = "kube-proxy"
|
||||
|
||||
KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz"
|
||||
KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz"
|
||||
KubeProxyHealthCheckEndpoint = "http://127.0.0.1:10256/healthz"
|
||||
|
||||
LogPatternFlagSeparator = ":"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user