diff --git a/.github/kind-cluster-1.29.yaml b/.github/kind-cluster-current.yaml similarity index 100% rename from .github/kind-cluster-1.29.yaml rename to .github/kind-cluster-current.yaml diff --git a/.github/kind-cluster-1.30.yaml b/.github/kind-cluster-next.yaml similarity index 100% rename from .github/kind-cluster-1.30.yaml rename to .github/kind-cluster-next.yaml diff --git a/.github/kind-cluster-1.28.yaml b/.github/kind-cluster-previous.yaml similarity index 100% rename from .github/kind-cluster-1.28.yaml rename to .github/kind-cluster-previous.yaml diff --git a/.github/workflows/on-pr.yaml b/.github/workflows/on-pr.yaml index 0c6346c..db73960 100644 --- a/.github/workflows/on-pr.yaml +++ b/.github/workflows/on-pr.yaml @@ -4,8 +4,8 @@ on: push: jobs: - pr-gotest: - name: Run go tests + pr-short-tests: + name: Run short go tests runs-on: ubuntu-latest steps: - name: checkout @@ -16,39 +16,13 @@ jobs: go-version-file: 'go.mod' check-latest: true - name: run tests - run: go test -json ./... > test.json + run: make test - name: Annotate tests if: always() uses: guyarb/golang-test-annoations@v0.8.0 with: test-results: test.json - pr-shellcheck: - name: Lint bash code with shellcheck - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Run ShellCheck - uses: bewuethr/shellcheck-action@v2 - - pr-lint-code: - name: Lint golang code - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Ensure go version - uses: actions/setup-go@v5 - with: - go-version-file: 'go.mod' - check-latest: true - - name: Lint cmd folder - uses: Jerome1337/golint-action@v1.0.3 - with: - golint-path: './cmd/...' - - name: Lint pkg folder - uses: Jerome1337/golint-action@v1.0.3 - with: - golint-path: './pkg/...' pr-check-docs-links: name: Check docs for incorrect links @@ -104,16 +78,20 @@ jobs: # (compared to helm charts, manifests cannot easily template changes based on versions) # Helm charts are _trailing_ releases, while manifests are done during development. # This test uses the "command" reboot-method. - e2e-manifests-command: - name: End-to-End test with kured with code and manifests from HEAD (command) + e2e-manifests: + name: End-to-End test with kured with code and manifests from HEAD runs-on: ubuntu-latest strategy: fail-fast: false matrix: - kubernetes: - - "1.28" - - "1.29" - - "1.30" + testname: + - "TestE2EWithCommand" + - "TestE2EWithSignal" + - "TestE2EConcurrentWithCommand" + kubernetes_version: + - "previous" + - "current" + - "next" steps: - uses: actions/checkout@v4 - name: Ensure go version @@ -130,75 +108,24 @@ jobs: - name: Find current tag version run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT id: tags - - name: Build artifacts - run: | - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest - - - name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions - run: | - sudo bash << EOF - cp /etc/docker/daemon.json /etc/docker/daemon.json.old - echo '{}' > /etc/docker/daemon.json - systemctl restart docker || journalctl --no-pager -n 500 - systemctl status docker - EOF - - # Default name for helm/kind-action kind clusters is "chart-testing" - - name: Create kind cluster with 3 nodes + - name: Install kind uses: helm/kind-action@v1.10.0 with: - config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml - version: v0.14.0 - - - name: Preload previously built images onto kind cluster - run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing - - - name: Do not wait for an hour before detecting the rebootSentinel - run: | - sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml - - - name: Install kured with kubectl - run: | - kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml - - - name: Ensure kured is ready - uses: nick-invision/retry@v3.0.0 - with: - timeout_minutes: 10 - max_attempts: 10 - retry_wait_seconds: 60 - # DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size - command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'" - - - name: Create reboot sentinel files - run: | - ./tests/kind/create-reboot-sentinels.sh - - - name: Follow reboot until success - env: - DEBUG: true - run: | - ./tests/kind/follow-coordinated-reboot.sh + install_only: true + version: v0.22.0 + - name: Run specific e2e tests + run: make e2e-test ARGS="-run ^${{ matrix.testname }}/${{ matrix.kubernetes_version }}" - # This ensures the latest code works with the manifests built from tree. - # It is useful for two things: - # - Test manifests changes (obviously), ensuring they don't break existing clusters - # - Ensure manifests work with the latest versions even with no manifest change - # (compared to helm charts, manifests cannot easily template changes based on versions) - # Helm charts are _trailing_ releases, while manifests are done during development. - # This test uses the "signal" reboot-method. - e2e-manifests-signal: - name: End-to-End test with kured with code and manifests from HEAD (signal) + e2e-tests-singleversion: + name: End-to-End test targetting a single version of kubernetes runs-on: ubuntu-latest strategy: fail-fast: false matrix: - kubernetes: - - "1.28" - - "1.29" - - "1.30" + testname: + - "TestCordonningIsKept/concurrency1" + - "TestCordonningIsKept/concurrency2" steps: - uses: actions/checkout@v4 - name: Ensure go version @@ -215,140 +142,10 @@ jobs: - name: Find current tag version run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT id: tags - - name: Build artifacts - run: | - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest - - - name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions - run: | - sudo bash << EOF - cp /etc/docker/daemon.json /etc/docker/daemon.json.old - echo '{}' > /etc/docker/daemon.json - systemctl restart docker || journalctl --no-pager -n 500 - systemctl status docker - EOF - - # Default name for helm/kind-action kind clusters is "chart-testing" - - name: Create kind cluster with 3 nodes + - name: Install kind uses: helm/kind-action@v1.10.0 with: - config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml - version: v0.14.0 - - - name: Preload previously built images onto kind cluster - run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing - - - name: Do not wait for an hour before detecting the rebootSentinel - run: | - sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds-signal.yaml - - - name: Install kured with kubectl - run: | - kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds-signal.yaml - - - name: Ensure kured is ready - uses: nick-invision/retry@v3.0.0 - with: - timeout_minutes: 10 - max_attempts: 10 - retry_wait_seconds: 60 - # DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size - command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'" - - - name: Create reboot sentinel files - run: | - ./tests/kind/create-reboot-sentinels.sh - - - name: Follow reboot until success - env: - DEBUG: true - run: | - ./tests/kind/follow-coordinated-reboot.sh - - - - # This ensures the latest code works with the manifests built from tree. - # It is useful for two things: - # - Test manifests changes (obviously), ensuring they don't break existing clusters - # - Ensure manifests work with the latest versions even with no manifest change - # (compared to helm charts, manifests cannot easily template changes based on versions) - # Helm charts are _trailing_ releases, while manifests are done during development. - # Concurrency = 2 - e2e-manifests-concurent: - name: End-to-End test with kured with code and manifests from HEAD (concurrent) - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - kubernetes: - - "1.28" - - "1.29" - - "1.30" - steps: - - uses: actions/checkout@v4 - - name: Ensure go version - uses: actions/setup-go@v5 - with: - go-version-file: 'go.mod' - check-latest: true - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - name: Setup GoReleaser - run: make bootstrap-tools - - name: Find current tag version - run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT - id: tags - - name: Build artifacts - run: | - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make image - VERSION="${{ steps.tags.outputs.sha_short }}" DH_ORG="${{ github.repository_owner }}" make manifest - - - name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions - run: | - sudo bash << EOF - cp /etc/docker/daemon.json /etc/docker/daemon.json.old - echo '{}' > /etc/docker/daemon.json - systemctl restart docker || journalctl --no-pager -n 500 - systemctl status docker - EOF - - # Default name for helm/kind-action kind clusters is "chart-testing" - - name: Create kind cluster with 3 nodes - uses: helm/kind-action@v1.10.0 - with: - config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml - version: v0.14.0 - - - name: Preload previously built images onto kind cluster - run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing - - - name: Do not wait for an hour before detecting the rebootSentinel - run: | - sed -i 's/#\(.*\)--period=1h/\1--period=15s/g' kured-ds.yaml - sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml - - - name: Install kured with kubectl - run: | - kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml - - - name: Ensure kured is ready - uses: nick-invision/retry@v3.0.0 - with: - timeout_minutes: 10 - max_attempts: 10 - retry_wait_seconds: 60 - # DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size - command: "kubectl get ds -n kube-system kured | grep -E 'kured.*3.*3.*3.*3.*3'" - - - name: Create reboot sentinel files - run: | - ./tests/kind/create-reboot-sentinels.sh - - - name: Follow reboot until success - env: - DEBUG: true - run: | - ./tests/kind/follow-coordinated-reboot.sh + install_only: true + version: v0.22.0 + - name: Run specific e2e tests + run: make e2e-test ARGS="-run ^${{ matrix.testname }}" \ No newline at end of file diff --git a/.github/workflows/periodics-daily.yaml b/.github/workflows/periodics-daily.yaml index 45b47c7..52d67bc 100644 --- a/.github/workflows/periodics-daily.yaml +++ b/.github/workflows/periodics-daily.yaml @@ -12,7 +12,7 @@ jobs: - name: checkout uses: actions/checkout@v4 - name: run tests - run: go test -json ./... > test.json + run: make test - name: Annotate tests if: always() uses: guyarb/golang-test-annoations@v0.8.0 diff --git a/.gitignore b/.gitignore index 0f707f6..e8cd29e 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ vendor build dist .tmp +test.json +tests/kind/testfiles/*.yaml +hack/bin/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2cf2a25..6263e61 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,7 +44,7 @@ All Kured repositories are kept under . To find t ### Kured code - Kured's main code can be found in the [`cmd`](cmd) and [`pkg`](pkg) directories -- Its smoke tests are in the [`tests`](tests) directory +- Its e2e tests are in the [`tests`](tests) directory - We use [GoReleaser to build](.goreleaser.yml). - Every PR and tagged release is tested by [Kind in GitHub workflows](.github/workflows). @@ -75,12 +75,19 @@ efbb0c3: Document version compatibility in release notes Search the git log for inspiration for your cases. -Please update our `.github/workflows` with the new k8s images, starting by -the creation of a `.github/kind-cluster-.yaml`, then updating -our workflows with the new versions. +Please update our `.github/workflows` with the new k8s images. -Once you updated everything, make sure you update the support matrix in -the [installation docs](https://kured.dev/docs/installation/) as well. +For that, run the following: + +`cp .github/kind-cluster-current.yaml .github/kind-cluster-previous.yaml` +`cp .github/kind-cluster-next.yaml .github/kind-cluster-current.yaml` + +Then edit `.github/kind-cluster-next.yaml` to point to the new version. + +This will make the full test matrix updated (the CI and the test code). + +Once your code passes all tests, update the support matrix in +the [installation docs](https://kured.dev/docs/installation/). ### Updating other dependencies @@ -147,6 +154,13 @@ To test your code manually, follow the section Manual testing. ## Manual (release) testing +### Quick Golang code testing + +Please run `make test` to run only the basic tests. It gives a good +idea of the code behaviour. + +### Manual functional testing + Before `kured` is released, we want to make sure it still works fine on the previous, current and next minor version of Kubernetes (with respect to the `client-go` & `kubectl` dependencies in use). For local testing e.g. @@ -162,11 +176,7 @@ results, if you login to a node and run: sudo touch /var/run/reboot-required ``` -### Example of golang testing - -Please run `make test`. You should have `golint` installed. - -### Example of testing with `minikube` +### Example of functional testing with `minikube` A test-run with `minikube` could look like this: @@ -214,6 +224,15 @@ kind create cluster --config .github/kind-cluster-.yaml ``` +### Example of testing with `kind` and `make` + +A test-run with `kind` and `make` can be done with the following command: + +```cli +# Build kured:dev image, build manifests, and run the "long" go tests +make e2e-test +``` + ## Publishing a new kured release ### Prepare Documentation diff --git a/Makefile b/Makefile index 378032d..ddba7a6 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,25 @@ .DEFAULT: all .PHONY: all clean image minikube-publish manifest test kured-all -TEMPDIR=./.tmp -GORELEASER_CMD=$(TEMPDIR)/goreleaser +HACKDIR=./hack/bin +GORELEASER_CMD=$(HACKDIR)/goreleaser DH_ORG ?= kubereboot VERSION=$(shell git rev-parse --short HEAD) SUDO=$(shell docker info >/dev/null 2>&1 || echo "sudo -E") all: image -$(TEMPDIR): - mkdir -p $(TEMPDIR) +$(HACKDIR): + mkdir -p $(HACKDIR) .PHONY: bootstrap-tools -bootstrap-tools: $(TEMPDIR) - VERSION=v1.24.0 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh - curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v1.0.1 - curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.2.3/cosign-linux-amd64 -o .tmp/cosign - chmod +x .tmp/goreleaser .tmp/cosign .tmp/syft +bootstrap-tools: $(HACKDIR) + command -v $(HACKDIR)/goreleaser || VERSION=v1.24.0 TMPDIR=$(HACKDIR) bash hack/installers/goreleaser-install.sh + command -v $(HACKDIR)/syft || curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b $(HACKDIR) v1.0.1 + command -v $(HACKDIR)/cosign || curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.2.3/cosign-linux-amd64 -o $(HACKDIR)/cosign + command -v $(HACKDIR)/shellcheck || (curl -sSfL https://github.com/koalaman/shellcheck/releases/download/stable/shellcheck-stable.linux.x86_64.tar.xz | tar -J -v -x shellcheck-stable/shellcheck && mv shellcheck-stable/shellcheck $(HACKDIR)/shellcheck && rmdir shellcheck-stable) + chmod +x $(HACKDIR)/goreleaser $(HACKDIR)/cosign $(HACKDIR)/syft $(HACKDIR)/shellcheck + # go install honnef.co/go/tools/cmd/staticcheck@latest clean: rm -rf ./dist @@ -35,7 +37,23 @@ kured-release-snapshot: $(GORELEASER_CMD) release --clean --snapshot image: kured - $(SUDO) docker buildx build --load -t ghcr.io/$(DH_ORG)/kured:$(VERSION) . + $(SUDO) docker buildx build --no-cache --load -t ghcr.io/$(DH_ORG)/kured:$(VERSION) . + +dev-image: image + $(SUDO) docker tag ghcr.io/$(DH_ORG)/kured:$(VERSION) kured:dev + +dev-manifest: + # basic e2e scenario + sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' kured-ds.yaml > tests/kind/testfiles/kured-ds.yaml + # signal e2e scenario + sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' kured-ds-signal.yaml > tests/kind/testfiles/kured-ds-signal.yaml + # concurrency e2e scenario + sed -e "s#image: ghcr.io/.*kured.*#image: kured:dev#g" -e 's/#\(.*\)--period=1h/\1--period=20s/g' -e 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml > tests/kind/testfiles/kured-ds-concurrent.yaml + + +e2e-test: dev-manifest dev-image + echo "Running ALL go tests" + go test -count=1 -v --parallel 4 ./... $(ARGS) minikube-publish: image $(SUDO) docker save ghcr.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load) @@ -45,10 +63,9 @@ manifest: sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds-signal.yaml echo "Please generate combined manifest if necessary" -test: - echo "Running go tests" - go test ./... - echo "Running golint on pkg" - golint ./pkg/... - echo "Running golint on cmd" - golint ./cmd/... +test: bootstrap-tools + echo "Running short go tests" + go test -test.short -json ./... > test.json + echo "Running shellcheck" + find . -name '*.sh' | xargs -n1 $(HACKDIR)/shellcheck + # Need to add staticcheck to replace golint as golint is deprecated, and staticcheck is the recommendation diff --git a/.github/scripts/goreleaser-install.sh b/hack/installers/goreleaser-install.sh similarity index 100% rename from .github/scripts/goreleaser-install.sh rename to hack/installers/goreleaser-install.sh diff --git a/tests/kind/create-reboot-sentinels.sh b/tests/kind/create-reboot-sentinels.sh deleted file mode 100755 index 51bd127..0000000 --- a/tests/kind/create-reboot-sentinels.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -# USE KUBECTL_CMD to pass context and/or namespaces. -KUBECTL_CMD="${KUBECTL_CMD:-kubectl}" -SENTINEL_FILE="${SENTINEL_FILE:-/var/run/reboot-required}" - -echo "Creating reboot sentinel on worker nodes" - -# To speed up the system, let's not kill the control plane. -for nodename in $("$KUBECTL_CMD" get nodes -o name | grep -v control-plane); do - docker exec "${nodename/node\//}" hostname - docker exec "${nodename/node\//}" touch "${SENTINEL_FILE}" -done diff --git a/tests/kind/main_test.go b/tests/kind/main_test.go new file mode 100644 index 0000000..0a13c00 --- /dev/null +++ b/tests/kind/main_test.go @@ -0,0 +1,336 @@ +package kind + +import ( + "bytes" + "fmt" + "math/rand" + "os/exec" + "strconv" + "testing" + "time" +) + +const ( + kuredDevImage string = "kured:dev" +) + +// KindTest cluster deployed by each TestMain function, prepared to run a given test scenario. +type KindTest struct { + kindConfigPath string + clusterName string + timeout time.Duration + deployManifests []string + localImages []string + logsDir string + logBuffer bytes.Buffer + testInstance *testing.T // Maybe move this to testing.TB +} + +func (k *KindTest) Write(p []byte) (n int, err error) { + k.testInstance.Helper() + k.logBuffer.Write(p) + return len(p), nil +} + +func (k *KindTest) FlushLog() { + k.testInstance.Helper() + k.testInstance.Log(k.logBuffer.String()) + k.logBuffer.Reset() +} + +func (k *KindTest) RunCmd(cmdDetails ...string) error { + cmd := exec.Command(cmdDetails[0], cmdDetails[1:]...) + // by making KindTest a Writer, we can simply wire k to logs + // writing to k will write to proper logs. + cmd.Stdout = k + cmd.Stderr = k + + err := cmd.Run() + if err != nil { + return err + } + return nil +} + +// Option that can be passed to the NewKind function in order to change the configuration +// of the test cluster +type Option func(k *KindTest) + +// Deploy can be passed to NewKind to deploy extra components, in addition to the base deployment. +func Deploy(manifest string) Option { + return func(k *KindTest) { + k.deployManifests = append(k.deployManifests, manifest) + } +} + +// ExportLogs can be passed to NewKind to specify the folder where the kubernetes logs will be exported after the tests. +func ExportLogs(folder string) Option { + return func(k *KindTest) { + k.logsDir = folder + } +} + +// Timeout for long-running operations (e.g. deployments, readiness probes...) +func Timeout(t time.Duration) Option { + return func(k *KindTest) { + k.timeout = t + } +} + +// LocalImage is passed to NewKind to allow loading a local Docker image into the cluster +func LocalImage(nameTag string) Option { + return func(k *KindTest) { + k.localImages = append(k.localImages, nameTag) + } +} + +// NewKind creates a kind cluster given a name and set of Option instances. +func NewKindTester(kindClusterName string, filePath string, t *testing.T, options ...Option) *KindTest { + + k := &KindTest{ + clusterName: kindClusterName, + timeout: 10 * time.Minute, + kindConfigPath: filePath, + testInstance: t, + } + for _, option := range options { + option(k) + } + return k +} + +// Prepare the kind cluster. +func (k *KindTest) Create() error { + err := k.RunCmd("kind", "create", "cluster", "--name", k.clusterName, "--config", k.kindConfigPath) + + if err != nil { + return fmt.Errorf("failed to create cluster: %v", err) + } + + for _, img := range k.localImages { + if err := k.RunCmd("kind", "load", "docker-image", "--name", k.clusterName, img); err != nil { + return fmt.Errorf("failed to load image: %v", err) + } + } + for _, mf := range k.deployManifests { + kubectlContext := fmt.Sprintf("kind-%v", k.clusterName) + if err := k.RunCmd("kubectl", "--context", kubectlContext, "apply", "-f", mf); err != nil { + return fmt.Errorf("failed to deploy manifest: %v", err) + } + } + return nil +} + +func (k *KindTest) Destroy() error { + if k.logsDir != "" { + if err := k.RunCmd("kind", "export", "logs", k.logsDir, "--name", k.clusterName); err != nil { + return fmt.Errorf("failed to export logs: %v. will not teardown", err) + } + } + + if err := k.RunCmd("kind", "delete", "cluster", "--name", k.clusterName); err != nil { + return fmt.Errorf("failed to destroy cluster: %v", err) + } + return nil +} + +func TestE2EWithCommand(t *testing.T) { + t.Parallel() + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + var kindClusterConfigs = []string{ + "previous", + "current", + "next", + } + // Iterate over each Kubernetes version + for _, version := range kindClusterConfigs { + version := version + // Define a subtest for each combination + t.Run(version, func(t *testing.T) { + t.Parallel() // Allow tests to run in parallel + + randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100))) + kindClusterName := fmt.Sprintf("kured-e2e-command-%v-%v", version, randomInt) + kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version) + kindContext := fmt.Sprintf("kind-%v", kindClusterName) + + k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds.yaml")) + defer k.FlushLog() + + err := k.Create() + if err != nil { + t.Fatalf("Error creating cluster %v", err) + } + defer func(k *KindTest) { + err := k.Destroy() + if err != nil { + t.Fatalf("Error destroying cluster %v", err) + } + }(k) + + k.Write([]byte("Now running e2e tests")) + + if err := k.RunCmd("bash", "testfiles/create-reboot-sentinels.sh", kindContext); err != nil { + t.Fatalf("failed to create sentinels: %v", err) + } + + if err := k.RunCmd("bash", "testfiles/follow-coordinated-reboot.sh", kindContext); err != nil { + t.Fatalf("failed to follow reboot: %v", err) + } + }) + } +} + +func TestE2EWithSignal(t *testing.T) { + t.Parallel() + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + var kindClusterConfigs = []string{ + "previous", + "current", + "next", + } + // Iterate over each Kubernetes version + for _, version := range kindClusterConfigs { + version := version + // Define a subtest for each combination + t.Run(version, func(t *testing.T) { + t.Parallel() // Allow tests to run in parallel + + randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100))) + kindClusterName := fmt.Sprintf("kured-e2e-signal-%v-%v", version, randomInt) + kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version) + kindContext := fmt.Sprintf("kind-%v", kindClusterName) + + k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds-signal.yaml")) + defer k.FlushLog() + + err := k.Create() + if err != nil { + t.Fatalf("Error creating cluster %v", err) + } + defer func(k *KindTest) { + err := k.Destroy() + if err != nil { + t.Fatalf("Error destroying cluster %v", err) + } + }(k) + + k.Write([]byte("Now running e2e tests")) + + if err := k.RunCmd("bash", "testfiles/create-reboot-sentinels.sh", kindContext); err != nil { + t.Fatalf("failed to create sentinels: %v", err) + } + + if err := k.RunCmd("bash", "testfiles/follow-coordinated-reboot.sh", kindContext); err != nil { + t.Fatalf("failed to follow reboot: %v", err) + } + }) + } +} + +func TestE2EConcurrentWithCommand(t *testing.T) { + t.Parallel() + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + var kindClusterConfigs = []string{ + "previous", + "current", + "next", + } + // Iterate over each Kubernetes version + for _, version := range kindClusterConfigs { + version := version + // Define a subtest for each combination + t.Run(version, func(t *testing.T) { + t.Parallel() // Allow tests to run in parallel + + randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100))) + kindClusterName := fmt.Sprintf("kured-e2e-concurrentcommand-%v-%v", version, randomInt) + kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-%v.yaml", version) + kindContext := fmt.Sprintf("kind-%v", kindClusterName) + + k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy("testfiles/kured-ds-concurrent.yaml")) + defer k.FlushLog() + + err := k.Create() + if err != nil { + t.Fatalf("Error creating cluster %v", err) + } + defer func(k *KindTest) { + err := k.Destroy() + if err != nil { + t.Fatalf("Error destroying cluster %v", err) + } + }(k) + + k.Write([]byte("Now running e2e tests")) + + if err := k.RunCmd("bash", "testfiles/create-reboot-sentinels.sh", kindContext); err != nil { + t.Fatalf("failed to create sentinels: %v", err) + } + + if err := k.RunCmd("bash", "testfiles/follow-coordinated-reboot.sh", kindContext); err != nil { + t.Fatalf("failed to follow reboot: %v", err) + } + }) + } +} + +func TestCordonningIsKept(t *testing.T) { + t.Parallel() + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + var kindClusterConfigs = []string{ + "concurrency1", + "concurrency2", + } + // Iterate over each Kubernetes version + for _, version := range kindClusterConfigs { + version := version + // Define a subtest for each combination + t.Run(version, func(t *testing.T) { + t.Parallel() // Allow tests to run in parallel + + randomInt := fmt.Sprintf(strconv.Itoa(rand.Intn(100))) + kindClusterName := fmt.Sprintf("kured-e2e-cordon-%v-%v", version, randomInt) + kindClusterConfigFile := fmt.Sprintf("../../.github/kind-cluster-next.yaml") + kindContext := fmt.Sprintf("kind-%v", kindClusterName) + + var manifest string + if version == "concurrency1" { + manifest = fmt.Sprintf("testfiles/kured-ds.yaml") + } else { + manifest = fmt.Sprintf("testfiles/kured-ds-concurrent.yaml") + } + k := NewKindTester(kindClusterName, kindClusterConfigFile, t, LocalImage(kuredDevImage), Deploy("../../kured-rbac.yaml"), Deploy(manifest)) + defer k.FlushLog() + + err := k.Create() + if err != nil { + t.Fatalf("Error creating cluster %v", err) + } + defer func(k *KindTest) { + err := k.Destroy() + if err != nil { + t.Fatalf("Error destroying cluster %v", err) + } + }(k) + + k.Write([]byte("Now running e2e tests")) + + if err := k.RunCmd("bash", "testfiles/node-stays-as-cordonned.sh", kindContext); err != nil { + t.Fatalf("node did not reboot in time: %v", err) + } + }) + } +} diff --git a/tests/kind/testfiles/create-reboot-sentinels.sh b/tests/kind/testfiles/create-reboot-sentinels.sh new file mode 100755 index 0000000..016f201 --- /dev/null +++ b/tests/kind/testfiles/create-reboot-sentinels.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +kubectl_flags=( ) +[[ "$1" != "" ]] && kubectl_flags=("${kubectl_flags[@]}" --context "$1") + +# To speed up the system, let's not kill the control plane. +for nodename in $(${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" get nodes -o name | grep -v control-plane); do + echo "Creating reboot sentinel on $nodename" + docker exec "${nodename/node\//}" hostname + docker exec "${nodename/node\//}" touch "${SENTINEL_FILE:-/var/run/reboot-required}" +done diff --git a/tests/kind/follow-coordinated-reboot.sh b/tests/kind/testfiles/follow-coordinated-reboot.sh similarity index 75% rename from tests/kind/follow-coordinated-reboot.sh rename to tests/kind/testfiles/follow-coordinated-reboot.sh index 4559ec1..fd25c36 100755 --- a/tests/kind/follow-coordinated-reboot.sh +++ b/tests/kind/testfiles/follow-coordinated-reboot.sh @@ -1,11 +1,14 @@ #!/usr/bin/env bash -NODECOUNT=${NODECOUNT:-2} -KUBECTL_CMD="${KUBECTL_CMD:-kubectl}" +REBOOTCOUNT=${REBOOTCOUNT:-2} # By default we only create two sentinels in create-reboot-sentinels. DEBUG="${DEBUG:-false}" CONTAINER_NAME_FORMAT=${CONTAINER_NAME_FORMAT:-"chart-testing-*"} +kubectl_flags=( ) +[[ "$1" != "" ]] && kubectl_flags=("${kubectl_flags[@]}" --context "$1") + tmp_dir=$(mktemp -d -t kured-XXXX) + function gather_logs_and_cleanup { if [[ -f "$tmp_dir"/node_output ]]; then rm "$tmp_dir"/node_output @@ -18,15 +21,15 @@ function gather_logs_and_cleanup { # This is useful to see if containers have crashed. echo "docker ps -a:" docker ps -a - echo "docker journal logs" - journalctl -u docker --no-pager + echo "docker journal logs" + journalctl -u docker --no-pager # This is useful to see if the nodes have _properly_ rebooted. # It should show the reboot/two container starts per node. - for name in $(docker ps -a -f "name=${CONTAINER_NAME_FORMAT}" -q); do + for id in $(docker ps -a -q); do echo "############################################################" - echo "docker logs for container $name:" - docker logs "$name" + echo "docker logs for container $id:" + docker logs "$id" done fi @@ -42,23 +45,18 @@ attempt_num=1 # Get docker info of each of those kind containers. If one has crashed, restart it. set +o errexit -echo "There are $NODECOUNT nodes in the cluster" -until [ ${#was_unschedulable[@]} == "$NODECOUNT" ] && [ ${#has_recovered[@]} == "$NODECOUNT" ] +echo "There are $REBOOTCOUNT nodes total needing reboot in the cluster" +until [ ${#was_unschedulable[@]} == "$REBOOTCOUNT" ] && [ ${#has_recovered[@]} == "$REBOOTCOUNT" ] do echo "${#was_unschedulable[@]} nodes were removed from pool once:" "${!was_unschedulable[@]}" echo "${#has_recovered[@]} nodes removed from the pool are now back:" "${!has_recovered[@]}" - #"$KUBECTL_CMD" logs -n kube-system -l name=kured --ignore-errors > "$tmp_dir"/node_output - #if [[ "$DEBUG" == "true" ]]; then - # echo "Kured pod logs:" - # cat "$tmp_dir"/node_output - #fi - "$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers | grep -v control-plane > "$tmp_dir"/node_output + ${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers | grep -v control-plane > "$tmp_dir"/node_output if [[ "$DEBUG" == "true" ]]; then # This is useful to see if a node gets stuck after drain, and doesn't # come back up. - echo "Result of command $KUBECTL_CMD get nodes ... showing unschedulable nodes:" + echo "Result of command kubectl unschedulable nodes:" cat "$tmp_dir"/node_output fi @@ -81,7 +79,7 @@ do done < "$tmp_dir"/node_output - if [[ "${#has_recovered[@]}" == "$NODECOUNT" ]]; then + if [[ "${#has_recovered[@]}" == "$REBOOTCOUNT" ]]; then echo "All nodes recovered." break else diff --git a/tests/kind/testfiles/node-stays-as-cordonned.sh b/tests/kind/testfiles/node-stays-as-cordonned.sh new file mode 100755 index 0000000..bc41345 --- /dev/null +++ b/tests/kind/testfiles/node-stays-as-cordonned.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash + +kubectl_flags=( ) +[[ "$1" != "" ]] && kubectl_flags=("${kubectl_flags[@]}" --context "$1") + +cordon() { + kubectl "${kubectl_flags[@]}" cordon "${precordonned_node}" +} + +create_sentinel() { + docker exec "${precordonned_node}" touch "${SENTINEL_FILE:-/var/run/reboot-required}" + docker exec "${notcordonned_node}" touch "${SENTINEL_FILE:-/var/run/reboot-required}" +} + +check_reboot_required() { + while true; + do + docker exec "${precordonned_node}" stat /var/run/reboot-required > /dev/null && echo "Reboot still required" || return 0 + sleep 3 + done +} + +check_node_back_online_as_cordonned() { + sleep 5 # For safety, wait for 5 seconds, so that the kubectl command succeeds. + # This test might be giving us false positive until we work on reliability of the + # test. + while true; + do + result=$(kubectl "${kubectl_flags[@]}" get node "${precordonned_node}" --no-headers | awk '{print $2;}') + test "${result}" != "Ready,SchedulingDisabled" && echo "Node ${precordonned_node} in state ${result}" || return 0 + sleep 3 + done +} + +check_node_back_online_as_uncordonned() { + while true; + do + result=$(kubectl "${kubectl_flags[@]}" get node "${notcordonned_node}" --no-headers | awk '{print $2;}') + test "${result}" != "Ready" && echo "Node ${notcordonned_node} in state ${result}" || return 0 + sleep 3 + done +} +### Start main + +worker_nodes=$(${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" get nodes -o custom-columns=name:metadata.name --no-headers | grep worker) +precordonned_node=$(echo "$worker_nodes" | head -n 1) +notcordonned_node=$(echo "$worker_nodes" | tail -n 1) + +# Wait for kured to install correctly +sleep 15 +cordon +create_sentinel +check_reboot_required +echo "Node has rebooted, but may take time to come back ready" +check_node_back_online_as_cordonned +check_node_back_online_as_uncordonned +echo "Showing final node state" +${KUBECTL_CMD:-kubectl} "${kubectl_flags[@]}" get nodes +echo "Test successful" \ No newline at end of file