From 16933f8333edaf09b4e58a16290452c51393be60 Mon Sep 17 00:00:00 2001 From: Hui Chen Date: Mon, 22 Oct 2018 18:05:49 +0800 Subject: [PATCH] update the deployment file and use configmap --- README.md | 4 +- deployment/node-problem-detector-config.yaml | 82 ++++++++++++++++++++ deployment/node-problem-detector.yaml | 9 ++- 3 files changed, 91 insertions(+), 4 deletions(-) create mode 100644 deployment/node-problem-detector-config.yaml diff --git a/README.md b/README.md index 75790b1a..8e8e06f1 100644 --- a/README.md +++ b/README.md @@ -103,9 +103,9 @@ to another registry. ## Start DaemonSet * Edit [node-problem-detector.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector.yaml) to fit your environment: Set `log` volume to your system log directory. (Used by SystemLogMonitor). For **kubernetes <1.9** use [node-problem-detector-old.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-old.yaml) -* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml` * If needed, you can use [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/) -to overwrite the `config/`. +to overwrite the `config/`, Edit [node-problem-detector-config.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-config.yaml) to fit your environment. and create the ConfigMap with `kubectl create -f node-problem-detector-config.yaml`. +* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`. ## Start Standalone To run node-problem-detector standalone, you should set `inClusterConfig` to `false` and diff --git a/deployment/node-problem-detector-config.yaml b/deployment/node-problem-detector-config.yaml new file mode 100644 index 00000000..84ac4e98 --- /dev/null +++ b/deployment/node-problem-detector-config.yaml @@ -0,0 +1,82 @@ +apiVersion: v1 +data: + kernel-monitor.json: | + { + "plugin": "journald", + "pluginConfig": { + "source": "kernel" + }, + "logPath": "/var/log/journal", + "lookback": "5m", + "bufferSize": 10, + "source": "kernel-monitor", + "conditions": [ + { + "type": "KernelDeadlock", + "reason": "KernelHasNoDeadlock", + "message": "kernel has no deadlock" + } + ], + "rules": [ + { + "type": "temporary", + "reason": "OOMKilling", + "pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*" + }, + { + "type": "temporary", + "reason": "TaskHung", + "pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\." + }, + { + "type": "temporary", + "reason": "UnregisterNetDevice", + "pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+" + }, + { + "type": "temporary", + "reason": "KernelOops", + "pattern": "BUG: unable to handle kernel NULL pointer dereference at .*" + }, + { + "type": "temporary", + "reason": "KernelOops", + "pattern": "divide error: 0000 \\[#\\d+\\] SMP" + }, + { + "type": "permanent", + "condition": "KernelDeadlock", + "reason": "AUFSUmountHung", + "pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\." + }, + { + "type": "permanent", + "condition": "KernelDeadlock", + "reason": "DockerHung", + "pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\." + } + ] + } + docker-monitor.json: | + { + "plugin": "journald", + "pluginConfig": { + "source": "docker" + }, + "logPath": "/var/log/journal", + "lookback": "5m", + "bufferSize": 10, + "source": "docker-monitor", + "conditions": [], + "rules": [ + { + "type": "temporary", + "reason": "CorruptDockerImage", + "pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*" + } + ] + } +kind: ConfigMap +metadata: + name: node-problem-detector-config + namespace: default diff --git a/deployment/node-problem-detector.yaml b/deployment/node-problem-detector.yaml index 52634f18..300d5092 100644 --- a/deployment/node-problem-detector.yaml +++ b/deployment/node-problem-detector.yaml @@ -16,8 +16,8 @@ spec: command: - /node-problem-detector - --logtostderr - - --kernel-monitor=/config/kernel-monitor.json - image: k8s.gcr.io/node-problem-detector:v0.2 + - --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json + image: k8s.gcr.io/node-problem-detector:v0.5.0 imagePullPolicy: Always securityContext: privileged: true @@ -55,3 +55,8 @@ spec: - name: config configMap: name: node-problem-detector-config + items: + - key: kernel-monitor.json + path: kernel-monitor.json + - key: docker-monitor.json + path: docker-monitor.json