mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-03-30 07:22:26 +00:00
Merge pull request #213 from hchenxa/hchenxa
update the deployment file and use configmap
This commit is contained in:
@@ -103,9 +103,9 @@ to another registry.
|
||||
|
||||
## Start DaemonSet
|
||||
* Edit [node-problem-detector.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector.yaml) to fit your environment: Set `log` volume to your system log directory. (Used by SystemLogMonitor). For **kubernetes <1.9** use [node-problem-detector-old.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-old.yaml)
|
||||
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`
|
||||
* If needed, you can use [ConfigMap](https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/)
|
||||
to overwrite the `config/`.
|
||||
to overwrite the `config/`, Edit [node-problem-detector-config.yaml](https://github.com/kubernetes/node-problem-detector/blob/master/deployment/node-problem-detector-config.yaml) to fit your environment. and create the ConfigMap with `kubectl create -f node-problem-detector-config.yaml`.
|
||||
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`.
|
||||
|
||||
## Start Standalone
|
||||
To run node-problem-detector standalone, you should set `inClusterConfig` to `false` and
|
||||
|
||||
82
deployment/node-problem-detector-config.yaml
Normal file
82
deployment/node-problem-detector-config.yaml
Normal file
@@ -0,0 +1,82 @@
|
||||
apiVersion: v1
|
||||
data:
|
||||
kernel-monitor.json: |
|
||||
{
|
||||
"plugin": "journald",
|
||||
"pluginConfig": {
|
||||
"source": "kernel"
|
||||
},
|
||||
"logPath": "/var/log/journal",
|
||||
"lookback": "5m",
|
||||
"bufferSize": 10,
|
||||
"source": "kernel-monitor",
|
||||
"conditions": [
|
||||
{
|
||||
"type": "KernelDeadlock",
|
||||
"reason": "KernelHasNoDeadlock",
|
||||
"message": "kernel has no deadlock"
|
||||
}
|
||||
],
|
||||
"rules": [
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "OOMKilling",
|
||||
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB.*"
|
||||
},
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "TaskHung",
|
||||
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
|
||||
},
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "UnregisterNetDevice",
|
||||
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
|
||||
},
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "KernelOops",
|
||||
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
|
||||
},
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "KernelOops",
|
||||
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
|
||||
},
|
||||
{
|
||||
"type": "permanent",
|
||||
"condition": "KernelDeadlock",
|
||||
"reason": "AUFSUmountHung",
|
||||
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
|
||||
},
|
||||
{
|
||||
"type": "permanent",
|
||||
"condition": "KernelDeadlock",
|
||||
"reason": "DockerHung",
|
||||
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
|
||||
}
|
||||
]
|
||||
}
|
||||
docker-monitor.json: |
|
||||
{
|
||||
"plugin": "journald",
|
||||
"pluginConfig": {
|
||||
"source": "docker"
|
||||
},
|
||||
"logPath": "/var/log/journal",
|
||||
"lookback": "5m",
|
||||
"bufferSize": 10,
|
||||
"source": "docker-monitor",
|
||||
"conditions": [],
|
||||
"rules": [
|
||||
{
|
||||
"type": "temporary",
|
||||
"reason": "CorruptDockerImage",
|
||||
"pattern": "Error trying v2 registry: failed to register layer: rename /var/lib/docker/image/(.+) /var/lib/docker/image/(.+): directory not empty.*"
|
||||
}
|
||||
]
|
||||
}
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: node-problem-detector-config
|
||||
namespace: default
|
||||
@@ -16,8 +16,8 @@ spec:
|
||||
command:
|
||||
- /node-problem-detector
|
||||
- --logtostderr
|
||||
- --kernel-monitor=/config/kernel-monitor.json
|
||||
image: k8s.gcr.io/node-problem-detector:v0.2
|
||||
- --system-log-monitors=/config/kernel-monitor.json,/config/docker-monitor.json
|
||||
image: k8s.gcr.io/node-problem-detector:v0.5.0
|
||||
imagePullPolicy: Always
|
||||
securityContext:
|
||||
privileged: true
|
||||
@@ -55,3 +55,8 @@ spec:
|
||||
- name: config
|
||||
configMap:
|
||||
name: node-problem-detector-config
|
||||
items:
|
||||
- key: kernel-monitor.json
|
||||
path: kernel-monitor.json
|
||||
- key: docker-monitor.json
|
||||
path: docker-monitor.json
|
||||
|
||||
Reference in New Issue
Block a user