diff --git a/cmd/healthchecker/options/options.go b/cmd/healthchecker/options/options.go index f555ca6f..53940182 100644 --- a/cmd/healthchecker/options/options.go +++ b/cmd/healthchecker/options/options.go @@ -47,7 +47,7 @@ type HealthCheckerOptions struct { // AddFlags adds health checker command line options to pflag. func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&hco.Component, "component", types.KubeletComponent, - "The component to check health for. Supports kubelet, docker and cri") + "The component to check health for. Supports kubelet, docker, kube-proxy, and cri") // Deprecated: For backward compatibility on linux environment. Going forward "service" will be used instead of systemd-service if runtime.GOOS == "linux" { fs.MarkDeprecated("systemd-service", "please use --service flag instead") @@ -73,8 +73,9 @@ func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) { // Returns error if invalid, nil otherwise. func (hco *HealthCheckerOptions) IsValid() error { // Make sure the component specified is valid. - if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent && hco.Component != types.CRIComponent { - return fmt.Errorf("the component specified is not supported. Supported components are : ") + if hco.Component != types.KubeletComponent && hco.Component != types.DockerComponent && + hco.Component != types.CRIComponent && hco.Component != types.KubeProxyComponent { + return fmt.Errorf("the component specified is not supported. Supported components are : ") } // Make sure the service is specified if repair is enabled. if hco.EnableRepair && hco.Service == "" { diff --git a/config/windows-health-checker-kubeproxy.json b/config/windows-health-checker-kubeproxy.json new file mode 100644 index 00000000..54e2793b --- /dev/null +++ b/config/windows-health-checker-kubeproxy.json @@ -0,0 +1,34 @@ +{ + "plugin": "custom", + "pluginConfig": { + "invoke_interval": "10s", + "timeout": "3m", + "max_output_length": 80, + "concurrency": 1 + }, + "source": "health-checker", + "metricsReporting": true, + "conditions": [ + { + "type": "KubeProxyUnhealthy", + "reason": "KubeProxyIsHealthy", + "message": "kube-proxy on the node is functioning properly" + } + ], + "rules": [ + { + "type": "permanent", + "condition": "KubeProxyUnhealthy", + "reason": "KubeProxyUnhealthy", + "path": "C:\\etc\\kubernetes\\node\\bin\\health-checker.exe", + "args": [ + "--component=kube-proxy", + "--enable-repair=true", + "--cooldown-time=1m", + "--health-check-timeout=10s" + ], + "timeout": "3m" + } + ] + } + \ No newline at end of file diff --git a/pkg/healthchecker/health_checker_windows.go b/pkg/healthchecker/health_checker_windows.go index 61916ecd..27863df6 100644 --- a/pkg/healthchecker/health_checker_windows.go +++ b/pkg/healthchecker/health_checker_windows.go @@ -68,14 +68,9 @@ func getRepairFunc(hco *options.HealthCheckerOptions) func() { func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error) { switch hco.Component { case types.KubeletComponent: - return func() (bool, error) { - httpClient := http.Client{Timeout: hco.HealthCheckTimeout} - response, err := httpClient.Get(types.KubeletHealthCheckEndpoint) - if err != nil || response.StatusCode != http.StatusOK { - return false, nil - } - return true, nil - } + return healthCheckEndpointOKFunc(types.KubeletHealthCheckEndpoint, hco.HealthCheckTimeout) + case types.KubeProxyComponent: + return healthCheckEndpointOKFunc(types.KubeProxyHealthCheckEndpoint, hco.HealthCheckTimeout) case types.DockerComponent: return func() (bool, error) { if _, err := execCommand("docker.exe", "ps"); err != nil { @@ -94,6 +89,18 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error) return nil } +// healthCheckEndpointOKFunc returns a function to check the status of an http endpoint +func healthCheckEndpointOKFunc(endpoint string, timeout time.Duration) func() (bool, error) { + return func() (bool, error) { + httpClient := http.Client{Timeout: timeout} + response, err := httpClient.Get(endpoint) + if err != nil || response.StatusCode != http.StatusOK { + return false, nil + } + return true, nil + } +} + // execCommand creates a new process, executes the command, and returns the (output, error) from command. func execCommand(command string, args ...string) (string, error) { cmd := util.Exec(command, args...) diff --git a/pkg/healthchecker/types/types.go b/pkg/healthchecker/types/types.go index 65dcaebb..a8585ff8 100644 --- a/pkg/healthchecker/types/types.go +++ b/pkg/healthchecker/types/types.go @@ -30,12 +30,14 @@ const ( CmdTimeout = 10 * time.Second LogParsingTimeLayout = "2006-01-02 15:04:05" - KubeletComponent = "kubelet" - CRIComponent = "cri" - DockerComponent = "docker" - ContainerdService = "containerd" + KubeletComponent = "kubelet" + CRIComponent = "cri" + DockerComponent = "docker" + ContainerdService = "containerd" + KubeProxyComponent = "kube-proxy" - KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz" + KubeletHealthCheckEndpoint = "http://127.0.0.1:10248/healthz" + KubeProxyHealthCheckEndpoint = "http://127.0.0.1:10256/healthz" LogPatternFlagSeparator = ":" )