mirror of
https://github.com/kubernetes/node-problem-detector.git
synced 2026-02-14 18:09:57 +00:00
Add problem maker to simulate problems for e2e test
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
/bin/
|
||||
/Dockerfile
|
||||
/test/bin/
|
||||
/*.tar.gz
|
||||
ci.env
|
||||
pr.env
|
||||
|
||||
12
Makefile
12
Makefile
@@ -103,6 +103,13 @@ endif
|
||||
-tags "$(BUILD_TAGS)" \
|
||||
./cmd/nodeproblemdetector
|
||||
|
||||
./test/bin/problem-maker: $(PKG_SOURCES)
|
||||
CGO_ENABLED=$(CGO_ENABLED) GOOS=linux GO111MODULE=on go build \
|
||||
-mod vendor \
|
||||
-o test/bin/problem-maker \
|
||||
-tags "$(BUILD_TAGS)" \
|
||||
./test/e2e/problemmaker/problem_maker.go
|
||||
|
||||
Dockerfile: Dockerfile.in
|
||||
sed -e 's|@BASEIMAGE@|$(BASEIMAGE)|g' $< >$@
|
||||
ifneq ($(ENABLE_JOURNALD), 1)
|
||||
@@ -129,8 +136,8 @@ build-binaries: ./bin/node-problem-detector ./bin/log-counter
|
||||
build-container: build-binaries Dockerfile
|
||||
docker build -t $(IMAGE) .
|
||||
|
||||
build-tar: ./bin/node-problem-detector ./bin/log-counter
|
||||
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh
|
||||
build-tar: ./bin/node-problem-detector ./bin/log-counter ./test/bin/problem-maker
|
||||
tar -zcvf $(TARBALL) bin/ config/ test/e2e-install.sh test/bin/problem-maker
|
||||
sha1sum $(TARBALL)
|
||||
md5sum $(TARBALL)
|
||||
|
||||
@@ -156,4 +163,5 @@ push: push-container push-tar
|
||||
clean:
|
||||
rm -f bin/log-counter
|
||||
rm -f bin/node-problem-detector
|
||||
rm -f test/bin/problem-maker
|
||||
rm -f node-problem-detector-*.tar.gz
|
||||
|
||||
20
README.md
20
README.md
@@ -249,6 +249,26 @@ Kubernetes cluster to a healthy state. The following remedy systems exist:
|
||||
[this issue](https://github.com/kubernetes/node-problem-detector/issues/199)
|
||||
for an example production use case for Draino.
|
||||
|
||||
# Testing
|
||||
|
||||
NPD is tested via unit tests, [NPD e2e tests](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md), Kubernetes e2e tests and Kubernetes nodes e2e tests. Prow handles the [pre-submit tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-presubmits.yaml) and [CI tests](https://github.com/kubernetes/test-infra/blob/master/config/jobs/kubernetes/node-problem-detector/node-problem-detector-ci.yaml).
|
||||
|
||||
CI test results can be found at below:
|
||||
1. [Unit tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-test)
|
||||
2. [NPD e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-test)
|
||||
3. [Kubernetes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-kubernetes-gce-gci)
|
||||
4. [Kubernetes nodes e2e tests](https://k8s-testgrid.appspot.com/sig-node-node-problem-detector#ci-npd-e2e-node)
|
||||
|
||||
## Running tests
|
||||
|
||||
Unit test is ran via `make test`.
|
||||
|
||||
See [NPD e2e test documentation](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/README.md) for how to setup and run NPD e2e tests.
|
||||
|
||||
## Problem Maker
|
||||
|
||||
[Problem maker](https://github.com/kubernetes/node-problem-detector/blob/master/test/e2e/problemmaker/README.md) is a program used in NPD e2e tests to generate/simulate node problems. It is ONLY indented to be used by NPD e2e tests. Please do NOT run it on your workstation, as it could cause real node problems.
|
||||
|
||||
# Docs
|
||||
|
||||
* [Custom plugin monitor](docs/custom_plugin_monitor.md)
|
||||
|
||||
20
test/e2e/problemmaker/README.md
Normal file
20
test/e2e/problemmaker/README.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# Problem Maker
|
||||
|
||||
Problem maker is a program to generate/simulate various kinds of node problems. It is used in NPD e2e tests to verify NPD's behavior when node problems happen:
|
||||
1. NPD should report the problems correctly.
|
||||
2. NPD should survive the problems as much as possible.
|
||||
|
||||
**Problem maker is NOT intended to be used in any other places. And please do NOT run this directly on your workstation, as it can cause real OS failures.** For example, running `sudo problem-maker --problem Ext4FilesystemError` will cause an ext4 file system error, which could result in the boot disk being mounted as readonly, requiring a reboot to recover from the failure.
|
||||
|
||||
You shouldn't need to run it anyways. If you want to test NPD, it's best to run NPD e2e test.
|
||||
|
||||
## Developing/Testing Problem Maker
|
||||
|
||||
If you want to enrich the problems that problem maker can generate, you may want to run it to test the behavior. Then the recommended way for running it is to run it in a VM:
|
||||
```
|
||||
sudo problem-maker --help
|
||||
sudo problem-maker --problem DockerHung
|
||||
sudo problem-maker --problem Ext4FilesystemError
|
||||
```
|
||||
|
||||
Problem maker tries to generate real node problems, and can cause real node failures. And when we do not have a good way to generate the problems, we instruct problem maker to simulate problems by injecting logs. In most (if not all) scenarios, generating real problems is preferred over injecting logs. This is because when kernel is upgraded, log patterns can change. NPD e2e tests is supposed to verify whether NPD can correctly understand the tested kernel.
|
||||
45
test/e2e/problemmaker/makers/docker.go
Normal file
45
test/e2e/problemmaker/makers/docker.go
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package makers
|
||||
|
||||
func init() {
|
||||
ProblemGenerators["DockerHung"] = makeDockerHung
|
||||
}
|
||||
|
||||
func makeDockerHung() {
|
||||
const dockerHungPattern = `INFO: task docker:20744 blocked for more than 120 seconds.
|
||||
Tainted: G C 3.16.0-4-amd64 #1
|
||||
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
|
||||
docker D ffff8801a8f2b078 0 20744 1 0x00000000
|
||||
ffff8801a8f2ac20 0000000000000082 0000000000012f00 ffff880057a17fd8
|
||||
0000000000012f00 ffff8801a8f2ac20 ffffffff818bb4a0 ffff880057a17d80
|
||||
ffffffff818bb4a4 ffff8801a8f2ac20 00000000ffffffff ffffffff818bb4a8
|
||||
Call Trace:
|
||||
[<ffffffff81510915>] ? schedule_preempt_disabled+0x25/0x70
|
||||
[<ffffffff815123c3>] ? __mutex_lock_slowpath+0xd3/0x1c0
|
||||
[<ffffffff815124cb>] ? mutex_lock+0x1b/0x2a
|
||||
[<ffffffff814175bc>] ? copy_net_ns+0x6c/0x130
|
||||
[<ffffffff8108bdf4>] ? create_new_namespaces+0xf4/0x180
|
||||
[<ffffffff8108beec>] ? copy_namespaces+0x6c/0x90
|
||||
[<ffffffff810654f6>] ? copy_process.part.25+0x966/0x1c30
|
||||
[<ffffffff81066991>] ? do_fork+0xe1/0x390
|
||||
[<ffffffff811c442c>] ? __alloc_fd+0x7c/0x120
|
||||
[<ffffffff81514079>] ? stub_clone+0x69/0x90
|
||||
[<ffffffff81513d0d>] ? system_call_fast_compare_end+0x10/0x15`
|
||||
|
||||
writeKernelMessageOrDie(dockerHungPattern)
|
||||
}
|
||||
37
test/e2e/problemmaker/makers/filesystem.go
Normal file
37
test/e2e/problemmaker/makers/filesystem.go
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package makers
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
func init() {
|
||||
ProblemGenerators["Ext4FilesystemError"] = makeFilesystemError
|
||||
}
|
||||
|
||||
const ext4ErrorTrigger = "/sys/fs/ext4/sda1/trigger_fs_error"
|
||||
|
||||
func makeFilesystemError() {
|
||||
msg := []byte("fake filesystem error from problem-maker")
|
||||
err := ioutil.WriteFile(ext4ErrorTrigger, msg, 0200)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed writting log to %q: %v", ext4ErrorTrigger, err)
|
||||
}
|
||||
}
|
||||
46
test/e2e/problemmaker/makers/kernel.go
Normal file
46
test/e2e/problemmaker/makers/kernel.go
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package makers
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
)
|
||||
|
||||
func init() {
|
||||
ProblemGenerators["OOMKill"] = makeOOMKill
|
||||
}
|
||||
|
||||
const kmsgPath = "/dev/kmsg"
|
||||
|
||||
func makeOOMKill() {
|
||||
const oomKillPattern = `Memory cgroup out of memory: Kill process 1012 (heapster) score 1035 or sacrifice child
|
||||
Killed process 1012 (heapster) total-vm:327128kB, anon-rss:306328kB, file-rss:11132kB, shmem-rss:12345kB`
|
||||
|
||||
writeKernelMessageOrDie(oomKillPattern)
|
||||
}
|
||||
|
||||
func writeKernelMessageOrDie(msg string) {
|
||||
for _, line := range strings.Split(msg, "\n") {
|
||||
err := ioutil.WriteFile(kmsgPath, []byte(line), 0644)
|
||||
if err != nil {
|
||||
glog.Fatalf("Failed writting to %q: %v", kmsgPath, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
27
test/e2e/problemmaker/makers/register.go
Normal file
27
test/e2e/problemmaker/makers/register.go
Normal file
@@ -0,0 +1,27 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package makers
|
||||
|
||||
var ProblemGenerators = make(map[string]func())
|
||||
|
||||
func GetProblemTypes() []string {
|
||||
var problems []string
|
||||
for problem := range ProblemGenerators {
|
||||
problems = append(problems, problem)
|
||||
}
|
||||
return problems
|
||||
}
|
||||
96
test/e2e/problemmaker/problem_maker.go
Normal file
96
test/e2e/problemmaker/problem_maker.go
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/spf13/pflag"
|
||||
|
||||
"k8s.io/node-problem-detector/test/e2e/problemmaker/makers"
|
||||
)
|
||||
|
||||
func init() {
|
||||
pflag.CommandLine.AddGoFlagSet(flag.CommandLine)
|
||||
}
|
||||
|
||||
type options struct {
|
||||
// Command line options. See flag descriptions for the description
|
||||
Rate float32
|
||||
Duration time.Duration
|
||||
Problem string
|
||||
}
|
||||
|
||||
// AddFlags adds log counter command line options to pflag.
|
||||
func (o *options) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.Float32Var(&o.Rate, "rate", 1.0,
|
||||
"Number of times the problem should be generated per second")
|
||||
fs.DurationVar(&o.Duration, "duration", time.Duration(1)*time.Second,
|
||||
"Duration for problem maker to keep generating problems")
|
||||
|
||||
problems := makers.GetProblemTypes()
|
||||
fs.StringVar(&o.Problem, "problem", "",
|
||||
fmt.Sprintf("The type of problem to be generated. Supported types: %q",
|
||||
strings.Join(problems, ", ")))
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Set glog flag so that it does not log to files.
|
||||
if err := flag.Set("logtostderr", "true"); err != nil {
|
||||
fmt.Printf("Failed to set logtostderr=true: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
o := options{}
|
||||
o.AddFlags(pflag.CommandLine)
|
||||
pflag.Parse()
|
||||
|
||||
if o.Problem == "" {
|
||||
glog.Fatalf("Please specify the type of problem to make using the --problem argument.")
|
||||
}
|
||||
|
||||
problemGenerator, ok := makers.ProblemGenerators[o.Problem]
|
||||
if !ok {
|
||||
glog.Fatalf("Expected to see a problem type of one of %q, but got %q.",
|
||||
makers.GetProblemTypes(), o.Problem)
|
||||
}
|
||||
|
||||
periodMilli := int(1000.0 / o.Rate)
|
||||
ticker := time.NewTicker(time.Duration(periodMilli) * time.Millisecond)
|
||||
defer ticker.Stop()
|
||||
|
||||
done := make(chan bool)
|
||||
go func() {
|
||||
time.Sleep(o.Duration)
|
||||
done <- true
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
return
|
||||
case <-ticker.C:
|
||||
glog.Infof("Generating problem: %q", o.Problem)
|
||||
problemGenerator()
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user