mirror of
https://github.com/kubereboot/kured.git
synced 2026-04-26 19:46:37 +00:00
Compare commits
179 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
774dc69e3b | ||
|
|
9377948f62 | ||
|
|
4d6cac66a6 | ||
|
|
9d4ebfc1f8 | ||
|
|
ad781ad6f0 | ||
|
|
9525ce53a3 | ||
|
|
c5bb9ae478 | ||
|
|
8cf12fa24e | ||
|
|
98fdb334aa | ||
|
|
87eda823e7 | ||
|
|
9788dba4f3 | ||
|
|
777f5b2cce | ||
|
|
055de3a949 | ||
|
|
7bea9d53c1 | ||
|
|
aa5a3f0ba9 | ||
|
|
8230add524 | ||
|
|
10d42b07a5 | ||
|
|
5a59c2f504 | ||
|
|
9c56b28282 | ||
|
|
dd0bce41be | ||
|
|
67c50b27ab | ||
|
|
e2e6e86e0c | ||
|
|
5aaa1e01bc | ||
|
|
00d5b4920a | ||
|
|
28c5332450 | ||
|
|
71b3f1dd7f | ||
|
|
95aee6828c | ||
|
|
66ce93ef09 | ||
|
|
1e76d65d00 | ||
|
|
f802373e0f | ||
|
|
6c34fee96b | ||
|
|
8dfccdbe48 | ||
|
|
db62f4aa0e | ||
|
|
115fea9d2a | ||
|
|
0734e270fa | ||
|
|
08774994ad | ||
|
|
90d2d9a39b | ||
|
|
35a6b8955d | ||
|
|
641c319eb8 | ||
|
|
bee558cd8f | ||
|
|
78064e1d2c | ||
|
|
29560f15b3 | ||
|
|
500a8a1bbb | ||
|
|
9e441ebee6 | ||
|
|
34f0df2605 | ||
|
|
cd7c4f8da3 | ||
|
|
9407c3f8f6 | ||
|
|
da59ebff70 | ||
|
|
d2d21f31c0 | ||
|
|
6191c73a3c | ||
|
|
48d112ba32 | ||
|
|
b12ae4eccd | ||
|
|
50aac294b7 | ||
|
|
c3cb2bbc6c | ||
|
|
67e979c198 | ||
|
|
9be88fb878 | ||
|
|
4fcf6e184b | ||
|
|
aa5c3e7783 | ||
|
|
5ab20e62d2 | ||
|
|
03e8c2116a | ||
|
|
9415f301a2 | ||
|
|
4d4d3982c2 | ||
|
|
84fa914fe6 | ||
|
|
d1e8b1b1a5 | ||
|
|
3487860e06 | ||
|
|
d965e7f67e | ||
|
|
4ab3bf9813 | ||
|
|
7397365c51 | ||
|
|
d771013cde | ||
|
|
195f4f0bee | ||
|
|
15735cd933 | ||
|
|
c44ecff3e5 | ||
|
|
1020e7179a | ||
|
|
5ff221b5b6 | ||
|
|
c7b5520859 | ||
|
|
bbdce6abe5 | ||
|
|
5b11ebcc3a | ||
|
|
8543cf25a2 | ||
|
|
6691996bc0 | ||
|
|
eb4acc69bf | ||
|
|
302578467d | ||
|
|
99e7b71ba4 | ||
|
|
e38d153fe7 | ||
|
|
7f6d4a1846 | ||
|
|
07208ef84b | ||
|
|
d6964180ca | ||
|
|
966698f3c6 | ||
|
|
445310b9b7 | ||
|
|
1eec15b5dd | ||
|
|
238e6993f3 | ||
|
|
1ca0203db2 | ||
|
|
9ddad78071 | ||
|
|
4918203ea9 | ||
|
|
640efa56b8 | ||
|
|
67232f00d9 | ||
|
|
ce32f9dc05 | ||
|
|
d82d295f2d | ||
|
|
580279f419 | ||
|
|
87508eb778 | ||
|
|
7d3b97541d | ||
|
|
93d6a783a1 | ||
|
|
b7494f5f80 | ||
|
|
8e1933cd28 | ||
|
|
96bf7c1add | ||
|
|
178ba93b5a | ||
|
|
f3ed0087d2 | ||
|
|
71a273a14c | ||
|
|
2b36eab0f8 | ||
|
|
aefd901b4e | ||
|
|
91b01b5524 | ||
|
|
f1255bff91 | ||
|
|
22a76f0da2 | ||
|
|
b52a9587f3 | ||
|
|
a6e1cf8191 | ||
|
|
d7576dce0f | ||
|
|
661af3b042 | ||
|
|
eec8ca1f9b | ||
|
|
15356fa26d | ||
|
|
7e3565a565 | ||
|
|
a3bc03b4b9 | ||
|
|
22ce5a2628 | ||
|
|
0f80b70478 | ||
|
|
28be690849 | ||
|
|
84292cc8c3 | ||
|
|
21b54227a7 | ||
|
|
8e3fb55ec4 | ||
|
|
1a6592851e | ||
|
|
bba3b8d83f | ||
|
|
9c6d6a6d82 | ||
|
|
997794eaac | ||
|
|
0763cdd95a | ||
|
|
c004566e97 | ||
|
|
077ef2488e | ||
|
|
06093ab53b | ||
|
|
4d2019c07f | ||
|
|
687aeda813 | ||
|
|
acddd6b675 | ||
|
|
54e7d93902 | ||
|
|
2666b49d01 | ||
|
|
ff1a27ba8b | ||
|
|
38ed636ecf | ||
|
|
8324b09bb9 | ||
|
|
fb8677e7ac | ||
|
|
bdd16d4e01 | ||
|
|
16e6d3c4d3 | ||
|
|
af824bfd6a | ||
|
|
8264a529d6 | ||
|
|
cd25017d67 | ||
|
|
4c1a23a047 | ||
|
|
8f86e1d4f8 | ||
|
|
79e19d84ba | ||
|
|
01396db3d1 | ||
|
|
d3b59b8922 | ||
|
|
eafe2c3d98 | ||
|
|
e4f1c7358c | ||
|
|
348b5b4c96 | ||
|
|
c8a3a6ff9d | ||
|
|
c196d4e97f | ||
|
|
efc98c8813 | ||
|
|
b108aa4d2d | ||
|
|
2ae0a82510 | ||
|
|
f95664156d | ||
|
|
891afda596 | ||
|
|
2b89170417 | ||
|
|
de59c2614d | ||
|
|
2e5cb81b4c | ||
|
|
fde91041d5 | ||
|
|
8a3f486ad9 | ||
|
|
513db7ce8c | ||
|
|
938cbd428c | ||
|
|
fa28b550b2 | ||
|
|
164183e1bc | ||
|
|
7d0499cc0a | ||
|
|
5e32864e0b | ||
|
|
718faf4d31 | ||
|
|
ac9e669b52 | ||
|
|
bab1425e1a | ||
|
|
a3f9796305 | ||
|
|
afac9d435a |
4
.github/dependabot.yml
vendored
4
.github/dependabot.yml
vendored
@@ -15,3 +15,7 @@ updates:
|
||||
- dependency-name: "k8s.io/apimachinery"
|
||||
- dependency-name: "k8s.io/client-go"
|
||||
- dependency-name: "k8s.io/kubectl"
|
||||
- package-ecosystem: "docker"
|
||||
directory: "cmd/kured"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
|
||||
13
.github/kind-cluster-1.20.yaml
vendored
13
.github/kind-cluster-1.20.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.20.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.20.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.20.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.20.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.20.7"
|
||||
13
.github/kind-cluster-1.21.yaml
vendored
13
.github/kind-cluster-1.21.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.21.2
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.21.2
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.21.2
|
||||
- role: worker
|
||||
image: kindest/node:v1.21.2
|
||||
- role: worker
|
||||
image: kindest/node:v1.21.2
|
||||
10
.github/kind-cluster-1.22.yaml
vendored
10
.github/kind-cluster-1.22.yaml
vendored
@@ -2,12 +2,12 @@ kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.22.0
|
||||
image: kindest/node:v1.22.4
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.22.0
|
||||
image: kindest/node:v1.22.4
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.22.0
|
||||
image: kindest/node:v1.22.4
|
||||
- role: worker
|
||||
image: kindest/node:v1.22.0
|
||||
image: kindest/node:v1.22.4
|
||||
- role: worker
|
||||
image: kindest/node:v1.22.0
|
||||
image: kindest/node:v1.22.4
|
||||
|
||||
13
.github/kind-cluster-1.23.yaml
vendored
Normal file
13
.github/kind-cluster-1.23.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.23.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.23.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.23.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.23.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.23.0"
|
||||
13
.github/kind-cluster-1.24.yaml
vendored
Normal file
13
.github/kind-cluster-1.24.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.24.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.24.0"
|
||||
2
.github/workflows/on-main-push-charts.yaml
vendored
2
.github/workflows/on-main-push-charts.yaml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
name: Publish latest chart
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Publish Helm chart
|
||||
uses: stefanprodan/helm-gh-pages@master
|
||||
with:
|
||||
|
||||
41
.github/workflows/on-main-push.yaml
vendored
41
.github/workflows/on-main-push.yaml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
name: "Build, scan, and publish tagged image"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Find go version
|
||||
run: |
|
||||
@@ -19,20 +19,41 @@ jobs:
|
||||
id: awk_gomod
|
||||
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME_WEAVEWORKSKUREDCI }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN_WEAVEWORKSKUREDCI }}
|
||||
|
||||
- name: Build image
|
||||
run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" image
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: weave-ghcr-bot
|
||||
password: ${{ secrets.KURED_WEAVE_GHCR_BOT_TOKEN }}
|
||||
|
||||
- name: Publish image
|
||||
run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" publish-image
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Find current tag version
|
||||
run: echo "::set-output name=sha_short::$(git rev-parse --short HEAD)"
|
||||
id: tags
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: .
|
||||
file: cmd/kured/Dockerfile.multi
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
push: true
|
||||
tags: |
|
||||
docker.io/${{ GITHUB.REPOSITORY }}:main-${{ steps.tags.outputs.sha_short }}
|
||||
ghcr.io/${{ GITHUB.REPOSITORY }}:main-${{ steps.tags.outputs.sha_short }}
|
||||
|
||||
18
.github/workflows/on-pr-charts.yaml
vendored
18
.github/workflows/on-pr-charts.yaml
vendored
@@ -22,22 +22,22 @@ jobs:
|
||||
- install
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
|
||||
- uses: actions/setup-python@v2
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.7
|
||||
|
||||
# Helm is already present in github actions, so do not re-install it
|
||||
- name: Setup chart testing
|
||||
uses: helm/chart-testing-action@v2.1.0
|
||||
uses: helm/chart-testing-action@v2.3.0
|
||||
|
||||
- name: Create default kind cluster
|
||||
uses: helm/kind-action@v1.2.0
|
||||
uses: helm/kind-action@v1.3.0
|
||||
with:
|
||||
version: v0.11.0
|
||||
version: v0.14.0
|
||||
if: ${{ matrix.test-action == 'install' }}
|
||||
|
||||
- name: Run chart tests
|
||||
@@ -49,13 +49,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: test-chart
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create 1 node kind cluster
|
||||
uses: helm/kind-action@v1.2.0
|
||||
uses: helm/kind-action@v1.3.0
|
||||
with:
|
||||
version: v0.11.0
|
||||
version: v0.14.0
|
||||
|
||||
- name: Deploy kured on default namespace with its helm chart
|
||||
run: |
|
||||
@@ -69,7 +69,7 @@ jobs:
|
||||
kubectl describe ds kured
|
||||
|
||||
- name: Test if successful deploy
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
|
||||
74
.github/workflows/on-pr.yaml
vendored
74
.github/workflows/on-pr.yaml
vendored
@@ -9,12 +9,21 @@ jobs:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- name: run tests
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.5.0
|
||||
uses: guyarb/golang-test-annoations@v0.6.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
@@ -22,7 +31,7 @@ jobs:
|
||||
name: Lint bash code with shellcheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Run ShellCheck
|
||||
uses: bewuethr/shellcheck-action@v2
|
||||
|
||||
@@ -30,16 +39,16 @@ jobs:
|
||||
name: Lint golang code
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- name: Lint cmd folder
|
||||
uses: Jerome1337/golint-action@v1.0.2
|
||||
with:
|
||||
@@ -53,7 +62,7 @@ jobs:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Link Checker
|
||||
id: lc
|
||||
uses: peter-evans/link-checker@v1
|
||||
@@ -69,18 +78,21 @@ jobs:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- run: make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
|
||||
- uses: Azure/container-scan@v0
|
||||
env:
|
||||
# See https://github.com/goodwithtech/dockle/issues/188
|
||||
DOCKLE_HOST: "unix:///var/run/docker.sock"
|
||||
with:
|
||||
image-name: docker.io/${{ github.repository_owner }}/kured:${{ github.sha }}
|
||||
|
||||
@@ -97,20 +109,20 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.20"
|
||||
- "1.21"
|
||||
- "1.22"
|
||||
- "1.23"
|
||||
- "1.24"
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
|
||||
@@ -127,10 +139,10 @@ jobs:
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.2.0
|
||||
uses: helm/kind-action@v1.3.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.11.0
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image docker.io/${{ github.repository_owner }}/kured:${{ github.sha }} --name chart-testing
|
||||
@@ -144,7 +156,7 @@ jobs:
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
@@ -170,18 +182,18 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.21"
|
||||
- "1.22"
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
|
||||
@@ -198,9 +210,9 @@ jobs:
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create 1 node kind cluster
|
||||
uses: helm/kind-action@v1.2.0
|
||||
uses: helm/kind-action@v1.3.0
|
||||
with:
|
||||
version: v0.11.0
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image docker.io/${{ github.repository_owner }}/kured:${{ github.sha }} --name chart-testing
|
||||
@@ -217,7 +229,7 @@ jobs:
|
||||
kubectl describe ds kured
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
@@ -226,7 +238,7 @@ jobs:
|
||||
command: "kubectl get ds kured | grep -E 'kured.*1.*1.*1.*1.*1' "
|
||||
|
||||
- name: Get metrics (healthy)
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 2
|
||||
max_attempts: 12
|
||||
@@ -238,7 +250,7 @@ jobs:
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
- name: Get metrics (need reboot)
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 15
|
||||
max_attempts: 10
|
||||
@@ -263,16 +275,16 @@ jobs:
|
||||
# kubernetes:
|
||||
# - "1.20"
|
||||
# steps:
|
||||
# - uses: actions/checkout@v2
|
||||
# - uses: actions/checkout@v3
|
||||
# - name: Find go version
|
||||
# run: |
|
||||
# GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
# echo "::set-output name=version::${GO_VERSION}"
|
||||
# id: awk_gomod
|
||||
# - name: Ensure go version
|
||||
# uses: actions/setup-go@v2
|
||||
# uses: actions/setup-go@v3
|
||||
# with:
|
||||
# go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
# go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
# - name: Build artifacts
|
||||
# run: |
|
||||
# make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
|
||||
@@ -308,7 +320,7 @@ jobs:
|
||||
# kubectl describe ds kured
|
||||
#
|
||||
# - name: Ensure kured is ready
|
||||
# uses: nick-invision/retry@v2.5.0
|
||||
# uses: nick-invision/retry@v2.8.1
|
||||
# with:
|
||||
# timeout_minutes: 10
|
||||
# max_attempts: 10
|
||||
|
||||
40
.github/workflows/on-tag.yaml
vendored
40
.github/workflows/on-tag.yaml
vendored
@@ -12,31 +12,57 @@ jobs:
|
||||
name: "Build, scan, and publish tagged image"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- name: Find current tag version
|
||||
run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}"
|
||||
id: tags
|
||||
- run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ steps.tags.outputs.version }}" image
|
||||
- uses: Azure/container-scan@v0
|
||||
env:
|
||||
# See https://github.com/goodwithtech/dockle/issues/188
|
||||
DOCKLE_HOST: "unix:///var/run/docker.sock"
|
||||
with:
|
||||
image-name: docker.io/${{ github.repository_owner }}/kured:${{ steps.tags.outputs.version }}
|
||||
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME_WEAVEWORKSKUREDCI }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN_WEAVEWORKSKUREDCI }}
|
||||
|
||||
- name: Publish image
|
||||
run: |
|
||||
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ steps.tags.outputs.version }}" publish-image
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: weave-ghcr-bot
|
||||
password: ${{ secrets.KURED_WEAVE_GHCR_BOT_TOKEN }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
id: buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v3
|
||||
with:
|
||||
context: .
|
||||
file: cmd/kured/Dockerfile.multi
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
push: true
|
||||
# cache-from: type=registry,ref=user/app:buildcache
|
||||
# cache-to: type=inline
|
||||
tags: |
|
||||
docker.io/${{ GITHUB.REPOSITORY }}:${{ steps.tags.outputs.version }}
|
||||
ghcr.io/${{ GITHUB.REPOSITORY }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
35
.github/workflows/periodics-daily.yaml
vendored
35
.github/workflows/periodics-daily.yaml
vendored
@@ -10,12 +10,12 @@ jobs:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
- name: run tests
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.5.0
|
||||
uses: guyarb/golang-test-annoations@v0.6.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
# Stale by default waits for 60 days before marking PR/issues as stale, and closes them after 21 days.
|
||||
# Do not expire the first issues that would allow the community to grow.
|
||||
- uses: actions/stale@v4
|
||||
- uses: actions/stale@v5
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: 'This issue was automatically considered stale due to lack of activity. Please update it and/or join our slack channels to promote it, before it automatically closes (in 7 days).'
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Link Checker
|
||||
id: lc
|
||||
uses: peter-evans/link-checker@v1
|
||||
@@ -52,18 +52,21 @@ jobs:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
- run: make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
|
||||
- uses: Azure/container-scan@v0
|
||||
env:
|
||||
# See https://github.com/goodwithtech/dockle/issues/188
|
||||
DOCKLE_HOST: "unix:///var/run/docker.sock"
|
||||
with:
|
||||
image-name: docker.io/${{ github.repository_owner }}/kured:${{ github.sha }}
|
||||
|
||||
@@ -74,20 +77,20 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.20"
|
||||
- "1.21"
|
||||
- "1.22"
|
||||
- "1.23"
|
||||
- "1.24"
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Find go version
|
||||
run: |
|
||||
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
|
||||
echo "::set-output name=version::${GO_VERSION}"
|
||||
id: awk_gomod
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v2
|
||||
uses: actions/setup-go@v3
|
||||
with:
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}"
|
||||
go-version: "${{ steps.awk_gomod.outputs.version }}.x"
|
||||
|
||||
- name: "Workaround 'Failed to attach 1 to compat systemd cgroup /actions_job/...' on gh actions"
|
||||
run: |
|
||||
@@ -100,10 +103,10 @@ jobs:
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create 5 node kind cluster
|
||||
uses: helm/kind-action@v1.2.0
|
||||
uses: helm/kind-action@v1.3.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.11.0
|
||||
version: v0.14.0
|
||||
|
||||
- name: Deploy kured on default namespace with its helm chart
|
||||
run: |
|
||||
@@ -114,10 +117,12 @@ jobs:
|
||||
helm install kured ./charts/kured/ --set configuration.period=1m
|
||||
kubectl config set-context kind-chart-testing
|
||||
kubectl get ds --all-namespaces
|
||||
kubectl get nodes -o yaml
|
||||
sleep 5
|
||||
kubectl describe ds kured
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.5.0
|
||||
uses: nick-invision/retry@v2.8.1
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
|
||||
3
CODE_OF_CONDUCT.md
Normal file
3
CODE_OF_CONDUCT.md
Normal file
@@ -0,0 +1,3 @@
|
||||
## Kured Community Code of Conduct
|
||||
|
||||
Kured follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md).
|
||||
7
Makefile
7
Makefile
@@ -1,5 +1,5 @@
|
||||
.DEFAULT: all
|
||||
.PHONY: all clean image publish-image minikube-publish manifest helm-chart test tests
|
||||
.PHONY: all clean image publish-image minikube-publish manifest helm-chart test tests kured-multi
|
||||
|
||||
DH_ORG=weaveworks
|
||||
VERSION=$(shell git symbolic-ref --short HEAD)-$(shell git rev-parse --short HEAD)
|
||||
@@ -19,17 +19,22 @@ cmd/kured/kured: $(DEPS)
|
||||
cmd/kured/kured: cmd/kured/*.go
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "-X main.version=$(VERSION)" -o $@ cmd/kured/*.go
|
||||
|
||||
kured-multi:
|
||||
CGO_ENABLED=0 go build -ldflags "-X main.version=$(VERSION)" -o cmd/kured/kured cmd/kured/*.go
|
||||
|
||||
build/.image.done: cmd/kured/Dockerfile cmd/kured/kured
|
||||
mkdir -p build
|
||||
cp $^ build
|
||||
$(SUDO) docker build -t docker.io/$(DH_ORG)/kured -f build/Dockerfile ./build
|
||||
$(SUDO) docker tag docker.io/$(DH_ORG)/kured docker.io/$(DH_ORG)/kured:$(VERSION)
|
||||
$(SUDO) docker tag docker.io/$(DH_ORG)/kured ghcr.io/$(DH_ORG)/kured:$(VERSION)
|
||||
touch $@
|
||||
|
||||
image: build/.image.done
|
||||
|
||||
publish-image: image
|
||||
$(SUDO) docker push docker.io/$(DH_ORG)/kured:$(VERSION)
|
||||
$(SUDO) docker push ghcr.io/$(DH_ORG)/kured:$(VERSION)
|
||||
|
||||
minikube-publish: image
|
||||
$(SUDO) docker save docker.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
|
||||
|
||||
140
README.md
140
README.md
@@ -1,29 +1,31 @@
|
||||
|
||||
# kured - Kubernetes Reboot Daemon
|
||||
|
||||
<img src="https://github.com/weaveworks/kured/raw/main/img/logo.png" align="right"/>
|
||||
|
||||
- [Introduction](#introduction)
|
||||
- [Kubernetes & OS Compatibility](#kubernetes--os-compatibility)
|
||||
- [Installation](#installation)
|
||||
- [Configuration](#configuration)
|
||||
- [Reboot Sentinel File & Period](#reboot-sentinel-file--period)
|
||||
- [Setting a schedule](#setting-a-schedule)
|
||||
- [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
- [Blocking Reboots via Pods](#blocking-reboots-via-pods)
|
||||
- [Prometheus Metrics](#prometheus-metrics)
|
||||
- [Notifications](#notifications)
|
||||
- [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
- [Operation](#operation)
|
||||
- [Testing](#testing)
|
||||
- [Disabling Reboots](#disabling-reboots)
|
||||
- [Manual Unlock](#manual-unlock)
|
||||
- [Automatic Unlock](#automatic-unlock)
|
||||
- [Delaying Lock Release](#delaying-lock-release)
|
||||
- [Building](#building)
|
||||
- [Frequently Asked/Anticipated Questions](#frequently-askedanticipated-questions)
|
||||
- [Why is there no `latest` tag on Docker Hub?](#why-is-there-no-latest-tag-on-docker-hub)
|
||||
- [Getting Help](#getting-help)
|
||||
- [kured - Kubernetes Reboot Daemon](#kured---kubernetes-reboot-daemon)
|
||||
- [Introduction](#introduction)
|
||||
- [Kubernetes & OS Compatibility](#kubernetes--os-compatibility)
|
||||
- [Installation](#installation)
|
||||
- [Configuration](#configuration)
|
||||
- [Reboot Sentinel File & Period](#reboot-sentinel-file--period)
|
||||
- [Reboot Sentinel Command](#reboot-sentinel-command)
|
||||
- [Setting a schedule](#setting-a-schedule)
|
||||
- [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
- [Blocking Reboots via Pods](#blocking-reboots-via-pods)
|
||||
- [Adding node labels before and after reboots](#adding-node-labels-before-and-after-reboots)
|
||||
- [Prometheus Metrics](#prometheus-metrics)
|
||||
- [Notifications](#notifications)
|
||||
- [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
- [Operation](#operation)
|
||||
- [Testing](#testing)
|
||||
- [Disabling Reboots](#disabling-reboots)
|
||||
- [Manual Unlock](#manual-unlock)
|
||||
- [Automatic Unlock](#automatic-unlock)
|
||||
- [Delaying Lock Release](#delaying-lock-release)
|
||||
- [Building](#building)
|
||||
- [Frequently Asked/Anticipated Questions](#frequently-askedanticipated-questions)
|
||||
- [Why is there no `latest` tag on Docker Hub?](#why-is-there-no-latest-tag-on-docker-hub)
|
||||
- [Getting Help](#getting-help)
|
||||
|
||||
## Introduction
|
||||
|
||||
@@ -47,8 +49,11 @@ forwards and backwards compatibility of one minor version between client and
|
||||
server:
|
||||
|
||||
| kured | kubectl | k8s.io/client-go | k8s.io/apimachinery | expected kubernetes compatibility |
|
||||
|-------|---------|------------------|---------------------|-----------------------------------|
|
||||
| main | 1.21.4 | v0.21.4 | v0.21.4 | 1.20.x, 1.21.x, 1.22.x |
|
||||
| ----- | ------- | ---------------- | ------------------- | --------------------------------- |
|
||||
| main | 1.23.6 | v0.23.6 | v0.23.6 | 1.22.x, 1.23.x, 1.24.x |
|
||||
| 1.10.1| 1.23.6 | v0.23.6 | v0.23.6 | 1.22.x, 1.23.x, 1.24.x |
|
||||
| 1.9.2 | 1.22.4 | v0.22.4 | v0.22.4 | 1.21.x, 1.22.x, 1.23.x |
|
||||
| 1.8.1 | 1.21.4 | v0.21.4 | v0.21.4 | 1.20.x, 1.21.x, 1.22.x |
|
||||
| 1.7.0 | 1.20.5 | v0.20.5 | v0.20.5 | 1.19.x, 1.20.x, 1.21.x |
|
||||
| 1.6.1 | 1.19.4 | v0.19.4 | v0.19.4 | 1.18.x, 1.19.x, 1.20.x |
|
||||
| 1.5.1 | 1.18.8 | v0.18.8 | v0.18.8 | 1.17.x, 1.18.x, 1.19.x |
|
||||
@@ -83,34 +88,46 @@ edit it in accordance with the following section before application.
|
||||
The following arguments can be passed to kured via the daemonset pod template:
|
||||
|
||||
```console
|
||||
Kubernetes Reboot Daemon
|
||||
|
||||
Usage:
|
||||
kured [flags]
|
||||
|
||||
Flags:
|
||||
--alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts
|
||||
--alert-firing-only bool only consider firing alerts when checking for active alerts
|
||||
--alert-firing-only only consider firing alerts when checking for active alerts
|
||||
--annotate-nodes if set, the annotations 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots
|
||||
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
|
||||
--drain-grace-period int time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
|
||||
--skip-wait-for-delete-timeout int when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
|
||||
--drain-grace-period int time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default -1)
|
||||
--drain-timeout duration timeout after which the drain is aborted (default: 0, infinite time)
|
||||
--ds-name string name of daemonset on which to place lock (default "kured")
|
||||
--ds-namespace string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
--end-time string schedule reboot only before this time of day (default "23:59:59")
|
||||
--force-reboot bool force a reboot even if the drain is still running (default: false)
|
||||
--drain-timeout duration timeout after which the drain is aborted (default: 0, infinite time)
|
||||
--force-reboot force a reboot even if the drain fails or times out
|
||||
-h, --help help for kured
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--lock-release-delay duration hold lock after reboot by this duration (default: 0, disabled)
|
||||
--lock-release-delay duration delay lock release for this duration (default: 0, disabled)
|
||||
--lock-ttl duration expire lock annotation after this duration (default: 0, disabled)
|
||||
--log-format string use text or json log format (default "text")
|
||||
--message-template-drain string message template used to notify about a node being drained (default "Draining node %s")
|
||||
--message-template-reboot string message template used to notify about a node being rebooted (default "Rebooting node %s")
|
||||
--notify-url url for reboot notifications (cannot use with --slack-hook-url flags)
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--message-template-uncordon string message template used to notify about a node being successfully uncordoned (default "Node %s rebooted & uncordoned successfully!")
|
||||
--node-id string node name kured runs on, should be passed down from spec.nodeName via KURED_NODE_ID environment variable
|
||||
--notify-url string notify URL for reboot notifications (cannot use with --slack-hook-url flags)
|
||||
--period duration sentinel check period (default 1h0m0s)
|
||||
--post-reboot-node-labels strings labels to add to nodes after uncordoning
|
||||
--pre-reboot-node-labels strings labels to add to nodes before cordoning
|
||||
--prefer-no-schedule-taint string Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting.
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-command string command to run when a reboot is required by the sentinel (default "/sbin/systemctl reboot")
|
||||
--reboot-command string command to run when a reboot is required (default "/bin/systemctl reboot")
|
||||
--reboot-days strings schedule reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--reboot-sentinel-command string command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
|
||||
--slack-channel string slack channel for reboot notfications
|
||||
--slack-hook-url string slack hook URL for reboot notfications [deprecated in favor of --notify-url]
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
--reboot-delay duration delay reboot for this duration (default: 0, disabled)
|
||||
--reboot-sentinel string path to file whose existence triggers the reboot command (default "/var/run/reboot-required")
|
||||
--reboot-sentinel-command string command for which a zero return code will trigger a reboot command
|
||||
--skip-wait-for-delete-timeout int when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node
|
||||
--slack-channel string slack channel for reboot notifications
|
||||
--slack-hook-url string slack hook URL for reboot notifications [deprecated in favor of --notify-url]
|
||||
--slack-username string slack username for reboot notifications (default "kured")
|
||||
--start-time string schedule reboot only after this time of day (default "0:00")
|
||||
--time-zone string use this timezone for schedule inputs (default "UTC")
|
||||
```
|
||||
@@ -123,9 +140,22 @@ values with `--reboot-sentinel` and `--period`. Each replica of the
|
||||
daemon uses a random offset derived from the period on startup so that
|
||||
nodes don't all contend for the lock simultaneously.
|
||||
|
||||
### Reboot Sentinel Command
|
||||
|
||||
Alternatively, a reboot sentinel command can be used. If a reboot
|
||||
sentinel command is used, the reboot sentinel file presence will be
|
||||
ignored.
|
||||
ignored. When the command exits with code `0`, kured will assume
|
||||
that a reboot is required.
|
||||
|
||||
For example, if you're using RHEL or its derivatives, you can
|
||||
set the sentinel command to `sh -c "! needs-restarting --reboothint"`
|
||||
(by default the command will return `1` if a reboot is required,
|
||||
so we wrap it in `sh -c` and add `!` to negate the return value).
|
||||
|
||||
```yaml
|
||||
configuration:
|
||||
rebootSentinelCommand: sh -c "! needs-restarting --reboothint"
|
||||
```
|
||||
|
||||
### Setting a schedule
|
||||
|
||||
@@ -200,6 +230,19 @@ running job or a known temperamental pod on a node will stop it rebooting.
|
||||
> up a RebootRequired alert as described in the next section so that
|
||||
> you can intervene manually if reboots are blocked for too long.
|
||||
|
||||
### Adding node labels before and after reboots
|
||||
|
||||
If you need to add node labels before and after the reboot process, you can use `--pre-reboot-node-labels` and `--post-reboot-node-labels`:
|
||||
|
||||
```console
|
||||
--pre-reboot-node-labels=zalando=notready
|
||||
--post-reboot-node-labels=zalando=ready
|
||||
```
|
||||
|
||||
Labels can be comma-delimited (e.g. `--pre-reboot-node-labels=zalando=notready,thisnode=disabled`) or you can supply the flags multiple times.
|
||||
|
||||
Note that label keys specified by these two flags should match. If they do not match, a warning will be generated.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
Each kured pod exposes a single gauge metric (`:8080/metrics`) that
|
||||
@@ -241,7 +284,7 @@ about draining and rebooting nodes across a list of technologies.
|
||||
|
||||

|
||||
|
||||
Alternatively you can use the `--message-template-drain` and `--message-template-reboot` to customize the text of the message, e.g.
|
||||
Alternatively you can use the `--message-template-drain`, `--message-template-reboot` and `--message-template-uncordon` to customize the text of the message, e.g.
|
||||
|
||||
```cli
|
||||
--message-template-drain="Draining node %s part of *my-cluster* in region *xyz*"
|
||||
@@ -250,17 +293,22 @@ Alternatively you can use the `--message-template-drain` and `--message-template
|
||||
Here is the syntax:
|
||||
|
||||
- slack: `slack://tokenA/tokenB/tokenC`
|
||||
(`--slack-hook-url` is deprecated but possible to use)
|
||||
|
||||
(`slack://<USERNAME>@tokenA/tokenB/tokenC` - in case you want to [respect username](https://github.com/weaveworks/kured/issues/482))
|
||||
|
||||
(`--slack-hook-url` is deprecated but possible to use)
|
||||
|
||||
For the new slack App integration, use:\
|
||||
`slack://xoxb:123456789012-1234567890123-4mt0t4l1YL3g1T5L4cK70k3N@<CHANNEL_NAME>?botname=<BOTNAME>`\
|
||||
for more information, [look here](https://containrrr.dev/shoutrrr/v0.5/services/slack/#examples)
|
||||
|
||||
- rocketchat: `rocketchat://[username@]rocketchat-host/token[/channel|@recipient]`
|
||||
|
||||
- teams: `teams://tName/token-a/token-b/token-c`
|
||||
|
||||
> **Attention** as the [format of the url has changed](https://github.com/containrrr/shoutrrr/issues/138) you also have to specify a `tName`
|
||||
- teams: `teams://group@tenant/altId/groupOwner?host=organization.webhook.office.com`
|
||||
|
||||
- Email: `smtp://username:password@host:port/?fromAddress=fromAddress&toAddresses=recipient1[,recipient2,...]`
|
||||
|
||||
More details here: [containrrr.dev/shoutrrr/v0.4/services/overview](https://containrrr.dev/shoutrrr/v0.4/services/overview)
|
||||
More details here: [containrrr.dev/shoutrrr/v0.5/services/overview](https://containrrr.dev/shoutrrr/v0.5/services/overview)
|
||||
|
||||
### Overriding Lock Configuration
|
||||
|
||||
@@ -369,6 +417,6 @@ If you have any questions about, feedback for or problems with `kured`:
|
||||
* Join us in [our monthly meeting](https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#),
|
||||
every fourth Wednesday of the month at 16:00 UTC.
|
||||
|
||||
We follow the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md).
|
||||
We follow the [CNCF Code of Conduct](CODE_OF_CONDUCT.md).
|
||||
|
||||
Your feedback is always welcome!
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
apiVersion: v1
|
||||
appVersion: "1.7.0"
|
||||
appVersion: "1.10.2"
|
||||
description: A Helm chart for kured
|
||||
name: kured
|
||||
version: 2.9.1
|
||||
version: 3.0.1
|
||||
home: https://github.com/weaveworks/kured
|
||||
maintainers:
|
||||
- name: ckotzbauer
|
||||
|
||||
@@ -21,74 +21,108 @@ $ helm delete my-release
|
||||
|
||||
The command removes all the Kubernetes components associated with the chart and deletes the release.
|
||||
|
||||
## Upgrade Notes
|
||||
|
||||
### From 2.x to 3.x
|
||||
|
||||
The Helm chart labels have been realigned to conform with the [standard labels](https://helm.sh/docs/chart_best_practices/labels/#standard-labels) in the current Helm Chart Best Practices guide, so this upgrade will fail unless the DaemonSet is deleted and recreated. The only way that Helm supports delete and recreate is by uninstalling, so please uninstall the Kured Helm chart before installing again with `v3.x`.
|
||||
|
||||
If you use any GitOps tool, please check and understand how to do a reinstall beforehand.
|
||||
|
||||
Supposing users want to enable metrics and use a `ServiceMonitor` with the `kube-prometheus-stack` chart's default `prometheus` instance. Starting with a chart that has values:
|
||||
|
||||
```
|
||||
metrics:
|
||||
create: true
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
```
|
||||
|
||||
A "ServiceMonitor" needs a "release" label to be discovered by the Prometheus-Operator with the default configuration of `kube-prometheus-stack` and this chart (in the prior `v2.x` chart) already sets a `release` label hardcoded. This is changed by applying the best-practise labels in the chart `v3.x`. Now the user can decide which `release` label-value should be used.
|
||||
|
||||
With this update, it's more readily possible to make use of the Kured chart with `kube-prometheus-stack`'s default `ServiceMonitor` selector configuration.
|
||||
|
||||
## Migrate from stable Helm-Chart
|
||||
|
||||
### From 1.x to 2.x
|
||||
|
||||
The following changes have been made compared to the stable chart:
|
||||
- **[BREAKING CHANGE]** The `autolock` feature was removed. Use `configuration.startTime` and `configuration.endTime` instead.
|
||||
- Role inconsistencies have been fixed (allowed verbs for modifying the `DaemonSet`, apiGroup of `PodSecurityPolicy`)
|
||||
- Added support for affinities.
|
||||
- Configuration of cli-flags can be made through a `configuration` object.
|
||||
- Added optional `Service` and `ServiceMonitor` support for metrics endpoint.
|
||||
|
||||
- Previously static Slack channel, hook URL and username values are now made dynamic using `tpl` function.
|
||||
|
||||
## Configuration
|
||||
|
||||
| Config | Description | Default |
|
||||
| ------ | ----------- | ------- |
|
||||
| `image.repository` | Image repository | `weaveworks/kured` |
|
||||
| `image.tag` | Image tag | `1.7.0` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
| `image.pullSecrets` | Image pull secrets | `[]` |
|
||||
| `updateStrategy` | Daemonset update strategy | `RollingUpdate` |
|
||||
| `maxUnavailable` | The max pods unavailable during a rolling update | `1` |
|
||||
| `podAnnotations` | Annotations to apply to pods (eg to add Prometheus annotations) | `{}` |
|
||||
| `extraArgs` | Extra arguments to pass to `/usr/bin/kured`. See below. | `{}` |
|
||||
| `extraEnvVars` | Array of environment variables to pass to the daemonset. | `{}` |
|
||||
| `configuration.lockTtl` | cli-parameter `--lock-ttl` | `0` |
|
||||
| `configuration.lockReleaseDelay` | cli-parameter `--lock-release-delay` | `0` |
|
||||
| `configuration.alertFilterRegexp` | cli-parameter `--alert-filter-regexp` | `""` |
|
||||
| `configuration.blockingPodSelector` | Array of selectors for multiple cli-parameters `--blocking-pod-selector` | `[]` |
|
||||
| `configuration.endTime` | cli-parameter `--end-time` | `""` |
|
||||
| `configuration.lockAnnotation` | cli-parameter `--lock-annotation` | `""` |
|
||||
| `configuration.period` | cli-parameter `--period` | `""` |
|
||||
| `configuration.forceReboot` | cli-parameter `--force-reboot` | `false` |
|
||||
| `configuration.drainGracePeriod` | cli-parameter `--drain-grace-period` | `""` |
|
||||
| `configuration.drainTimeout` | cli-parameter `--drain-timeout` | `""` |
|
||||
| `configuration.skipWaitForDeleteTimeout` | cli-parameter `--skip-wait-for-delete-timeout` | `""` |
|
||||
| `configuration.prometheusUrl` | cli-parameter `--prometheus-url` | `""` |
|
||||
| `configuration.rebootDays` | Array of days for multiple cli-parameters `--reboot-days` | `[]` |
|
||||
| `configuration.rebootSentinel` | cli-parameter `--reboot-sentinel` | `""` |
|
||||
| `configuration.rebootCommand` | cli-parameter `--reboot-command` | `""` |
|
||||
| `configuration.slackChannel` | cli-parameter `--slack-channel` | `""` |
|
||||
| `configuration.slackHookUrl` | cli-parameter `--slack-hook-url` | `""` |
|
||||
| `configuration.slackUsername` | cli-parameter `--slack-username` | `""` |
|
||||
| `configuration.notifyUrl` | cli-parameter `--notify-url` | `""` |
|
||||
| `configuration.messageTemplateDrain` | cli-parameter `--message-template-drain` | `""` |
|
||||
| `configuration.messageTemplateReboot` | cli-parameter `--message-template-reboot` | `""` |
|
||||
| `configuration.startTime` | cli-parameter `--start-time` | `""` |
|
||||
| `configuration.timeZone` | cli-parameter `--time-zone` | `""` |
|
||||
| `configuration.annotateNodes` | cli-parameter `--annotate-nodes` | `false` |
|
||||
| `rbac.create` | Create RBAC roles | `true` |
|
||||
| `serviceAccount.create` | Create a service account | `true` |
|
||||
| `serviceAccount.name` | Service account name to create (or use if `serviceAccount.create` is false) | (chart fullname) |
|
||||
| `podSecurityPolicy.create` | Create podSecurityPolicy | `false` |
|
||||
| `resources` | Resources requests and limits. | `{}` |
|
||||
| `metrics.create` | Create a ServiceMonitor for prometheus-operator | `false` |
|
||||
| `metrics.namespace` | The namespace to create the ServiceMonitor in | `""` |
|
||||
| `metrics.labels` | Additional labels for the ServiceMonitor | `{}` |
|
||||
| `metrics.interval` | Interval prometheus should scrape the endpoint | `60s` |
|
||||
| `metrics.scrapeTimeout` | A custom scrapeTimeout for prometheus | `""` |
|
||||
| `service.create` | Create a Service for the metrics endpoint | `false` |
|
||||
| `service.name ` | Service name for the metrics endpoint | `""` |
|
||||
| `service.port` | Port of the service to expose | `8080` |
|
||||
| `service.annotations` | Annotations to apply to the service (eg to add Prometheus annotations) | `{}` |
|
||||
| `podLabels` | Additional labels for pods (e.g. CostCenter=IT) | `{}` |
|
||||
| `priorityClassName` | Priority Class to be used by the pods | `""` |
|
||||
| `tolerations` | Tolerations to apply to the daemonset (eg to allow running on master) | `[{"key": "node-role.kubernetes.io/master", "effect": "NoSchedule"}]`|
|
||||
| `affinity` | Affinity for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
| `nodeSelector` | Node Selector for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
| `volumeMounts` | Maps of volumes mount to mount | `{}` |
|
||||
| `volumes` | Maps of volumes to mount | `{}` |
|
||||
| Config | Description | Default |
|
||||
| ------ | ----------- | ------- |
|
||||
| `image.repository` | Image repository | `weaveworks/kured` |
|
||||
| `image.tag` | Image tag | `1.10.2` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
| `image.pullSecrets` | Image pull secrets | `[]` |
|
||||
| `updateStrategy` | Daemonset update strategy | `RollingUpdate` |
|
||||
| `maxUnavailable` | The max pods unavailable during a rolling update | `1` |
|
||||
| `podAnnotations` | Annotations to apply to pods (eg to add Prometheus annotations) | `{}` |
|
||||
| `dsAnnotations` | Annotations to apply to the kured DaemonSet | `{}` |
|
||||
| `extraArgs` | Extra arguments to pass to `/usr/bin/kured`. See below. | `{}` |
|
||||
| `extraEnvVars` | Array of environment variables to pass to the daemonset. | `{}` |
|
||||
| `configuration.lockTtl` | cli-parameter `--lock-ttl` | `0` |
|
||||
| `configuration.lockReleaseDelay` | cli-parameter `--lock-release-delay` | `0` |
|
||||
| `configuration.alertFilterRegexp` | cli-parameter `--alert-filter-regexp` | `""` |
|
||||
| `configuration.alertFiringOnly` | cli-parameter `--alert-firing-only` | `false` |
|
||||
| `configuration.blockingPodSelector` | Array of selectors for multiple cli-parameters `--blocking-pod-selector` | `[]` |
|
||||
| `configuration.endTime` | cli-parameter `--end-time` | `""` |
|
||||
| `configuration.lockAnnotation` | cli-parameter `--lock-annotation` | `""` |
|
||||
| `configuration.period` | cli-parameter `--period` | `""` |
|
||||
| `configuration.forceReboot` | cli-parameter `--force-reboot` | `false` |
|
||||
| `configuration.drainGracePeriod` | cli-parameter `--drain-grace-period` | `""` |
|
||||
| `configuration.drainTimeout` | cli-parameter `--drain-timeout` | `""` |
|
||||
| `configuration.skipWaitForDeleteTimeout` | cli-parameter `--skip-wait-for-delete-timeout` | `""` |
|
||||
| `configuration.prometheusUrl` | cli-parameter `--prometheus-url` | `""` |
|
||||
| `configuration.rebootDays` | Array of days for multiple cli-parameters `--reboot-days` | `[]` |
|
||||
| `configuration.rebootSentinel` | cli-parameter `--reboot-sentinel` | `""` |
|
||||
| `configuration.rebootSentinelCommand` | cli-parameter `--reboot-sentinel-command` | `""` |
|
||||
| `configuration.rebootCommand` | cli-parameter `--reboot-command` | `""` |
|
||||
| `configuration.rebootDelay` | cli-parameter `--reboot-delay` | `""` |
|
||||
| `configuration.slackChannel` | cli-parameter `--slack-channel`. Passed through `tpl` | `""` |
|
||||
| `configuration.slackHookUrl` | cli-parameter `--slack-hook-url`. Passed through `tpl` | `""` |
|
||||
| `configuration.slackUsername` | cli-parameter `--slack-username`. Passed through `tpl` | `""` |
|
||||
| `configuration.notifyUrl` | cli-parameter `--notify-url` | `""` |
|
||||
| `configuration.messageTemplateDrain` | cli-parameter `--message-template-drain` | `""` |
|
||||
| `configuration.messageTemplateReboot` | cli-parameter `--message-template-reboot` | `""` |
|
||||
| `configuration.messageTemplateUncordon` | cli-parameter `--message-template-uncordon` | `""` |
|
||||
| `configuration.startTime` | cli-parameter `--start-time` | `""` |
|
||||
| `configuration.timeZone` | cli-parameter `--time-zone` | `""` |
|
||||
| `configuration.annotateNodes` | cli-parameter `--annotate-nodes` | `false` |
|
||||
| `configuration.logFormat` | cli-parameter `--log-format` | `"text"` |
|
||||
| `configuration.preferNoScheduleTaint` | Taint name applied during pending node reboot | `""` |
|
||||
| `configuration.preRebootNodeLabels` | Array of key-value-pairs to add to nodes before cordoning for multiple cli-parameters `--pre-reboot-node-labels` | `[]` |
|
||||
| `configuration.postRebootNodeLabels` | Array of key-value-pairs to add to nodes after uncordoning for multiple cli-parameters `--post-reboot-node-labels` | `[]` |
|
||||
| `rbac.create` | Create RBAC roles | `true` |
|
||||
| `serviceAccount.create` | Create a service account | `true` |
|
||||
| `serviceAccount.name` | Service account name to create (or use if `serviceAccount.create` is false) | (chart fullname) |
|
||||
| `podSecurityPolicy.create` | Create podSecurityPolicy | `false` |
|
||||
| `containerSecurityContext.privileged ` | Enables `privileged` in container-specific security context | `true` |
|
||||
| `containerSecurityContext.allowPrivilegeEscalation`| Enables `allowPrivilegeEscalation` in container-specific security context. If not set it won't be configured. | |
|
||||
| `resources` | Resources requests and limits. | `{}` |
|
||||
| `metrics.create` | Create a ServiceMonitor for prometheus-operator | `false` |
|
||||
| `metrics.namespace` | The namespace to create the ServiceMonitor in | `""` |
|
||||
| `metrics.labels` | Additional labels for the ServiceMonitor | `{}` |
|
||||
| `metrics.interval` | Interval prometheus should scrape the endpoint | `60s` |
|
||||
| `metrics.scrapeTimeout` | A custom scrapeTimeout for prometheus | `""` |
|
||||
| `service.create` | Create a Service for the metrics endpoint | `false` |
|
||||
| `service.name ` | Service name for the metrics endpoint | `""` |
|
||||
| `service.port` | Port of the service to expose | `8080` |
|
||||
| `service.annotations` | Annotations to apply to the service (eg to add Prometheus annotations) | `{}` |
|
||||
| `podLabels` | Additional labels for pods (e.g. CostCenter=IT) | `{}` |
|
||||
| `priorityClassName` | Priority Class to be used by the pods | `""` |
|
||||
| `tolerations` | Tolerations to apply to the daemonset (eg to allow running on master) | `[{"key": "node-role.kubernetes.io/control-plane", "effect": "NoSchedule"}]` for Kubernetes 1.24.0 and greater, otherwise `[{"key": "node-role.kubernetes.io/master", "effect": "NoSchedule"}]`|
|
||||
| `affinity` | Affinity for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
| `nodeSelector` | Node Selector for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
| `volumeMounts` | Maps of volumes mount to mount | `{}` |
|
||||
| `volumes` | Maps of volumes to mount | `{}` |
|
||||
See https://github.com/weaveworks/kured#configuration for values (not contained in the `configuration` object) for `extraArgs`. Note that
|
||||
```yaml
|
||||
extraArgs:
|
||||
|
||||
@@ -57,16 +57,16 @@ Return the appropriate apiVersion for podsecuritypolicy.
|
||||
Returns a set of labels applied to each resource.
|
||||
*/}}
|
||||
{{- define "kured.labels" -}}
|
||||
app: {{ template "kured.name" . }}
|
||||
chart: {{ template "kured.chart" . }}
|
||||
release: {{ .Release.Name }}
|
||||
heritage: {{ .Release.Service }}
|
||||
app.kubernetes.io/name: {{ template "kured.name" . }}
|
||||
helm.sh/chart: {{ template "kured.chart" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Returns a set of matchLabels applied.
|
||||
*/}}
|
||||
{{- define "kured.matchLabels" -}}
|
||||
app: {{ template "kured.name" . }}
|
||||
release: {{ .Release.Name }}
|
||||
app.kubernetes.io/name: {{ template "kured.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -5,6 +5,12 @@ metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
{{- if .Values.dsAnnotations }}
|
||||
annotations:
|
||||
{{- range $key, $value := .Values.dsAnnotations }}
|
||||
{{ $key }}: {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: {{ .Values.updateStrategy }}
|
||||
@@ -44,7 +50,7 @@ spec:
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
securityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
{{ toYaml .Values.containerSecurityContext | indent 12 }}
|
||||
resources:
|
||||
{{ toYaml .Values.resources | indent 12 }}
|
||||
command:
|
||||
@@ -61,6 +67,9 @@ spec:
|
||||
{{- if .Values.configuration.alertFilterRegexp }}
|
||||
- --alert-filter-regexp={{ .Values.configuration.alertFilterRegexp }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.alertFiringOnly }}
|
||||
- --alert-firing-only={{ .Values.configuration.alertFiringOnly }}
|
||||
{{- end }}
|
||||
{{- range .Values.configuration.blockingPodSelector }}
|
||||
- --blocking-pod-selector={{ . }}
|
||||
{{- end }}
|
||||
@@ -74,7 +83,7 @@ spec:
|
||||
- --period={{ .Values.configuration.period }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.forceReboot }}
|
||||
- --force-reboot
|
||||
- --force-reboot
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.drainGracePeriod }}
|
||||
- --drain-grace-period={{ .Values.configuration.drainGracePeriod }}
|
||||
@@ -91,20 +100,32 @@ spec:
|
||||
{{- range .Values.configuration.rebootDays }}
|
||||
- --reboot-days={{ . }}
|
||||
{{- end }}
|
||||
{{- range .Values.configuration.preRebootNodeLabels }}
|
||||
- --pre-reboot-node-labels={{ . }}
|
||||
{{- end }}
|
||||
{{- range .Values.configuration.postRebootNodeLabels }}
|
||||
- --post-reboot-node-labels={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.rebootSentinel }}
|
||||
- --reboot-sentinel={{ .Values.configuration.rebootSentinel }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.rebootSentinelCommand }}
|
||||
- --reboot-sentinel-command={{ .Values.configuration.rebootSentinelCommand }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.rebootCommand }}
|
||||
- --reboot-command={{ .Values.configuration.rebootCommand }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.rebootDelay }}
|
||||
- --reboot-delay={{ .Values.configuration.rebootDelay }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackChannel }}
|
||||
- --slack-channel={{ .Values.configuration.slackChannel }}
|
||||
- --slack-channel={{ tpl .Values.configuration.slackChannel . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackHookUrl }}
|
||||
- --slack-hook-url={{ .Values.configuration.slackHookUrl }}
|
||||
- --slack-hook-url={{ tpl .Values.configuration.slackHookUrl . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackUsername }}
|
||||
- --slack-username={{ .Values.configuration.slackUsername }}
|
||||
- --slack-username={{ tpl .Values.configuration.slackUsername . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.notifyUrl }}
|
||||
- --notify-url={{ .Values.configuration.notifyUrl }}
|
||||
@@ -115,6 +136,9 @@ spec:
|
||||
{{- if .Values.configuration.messageTemplateReboot }}
|
||||
- --message-template-reboot={{ .Values.configuration.messageTemplateReboot }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.messageTemplateUncordon }}
|
||||
- --message-template-uncordon={{ .Values.configuration.messageTemplateUncordon }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.startTime }}
|
||||
- --start-time={{ .Values.configuration.startTime }}
|
||||
{{- end }}
|
||||
@@ -124,6 +148,12 @@ spec:
|
||||
{{- if .Values.configuration.annotateNodes }}
|
||||
- --annotate-nodes={{ .Values.configuration.annotateNodes }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.preferNoScheduleTaint }}
|
||||
- --prefer-no-schedule-taint={{ .Values.configuration.preferNoScheduleTaint }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.logFormat }}
|
||||
- --log-format={{ .Values.configuration.logFormat }}
|
||||
{{- end }}
|
||||
{{- range $key, $value := .Values.extraArgs }}
|
||||
{{- if $value }}
|
||||
- --{{ $key }}={{ $value }}
|
||||
@@ -148,9 +178,16 @@ spec:
|
||||
{{- if .Values.extraEnvVars }}
|
||||
{{ toYaml .Values.extraEnvVars | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- if .Values.tolerations }}
|
||||
{{- with .Values.tolerations }}
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
|
||||
@@ -3,26 +3,32 @@ image:
|
||||
tag: latest
|
||||
|
||||
configuration:
|
||||
# annotationTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
|
||||
# alertFilterRegexp: "" # alert names to ignore when checking for active alerts
|
||||
# blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
|
||||
# endTime: "" # only reboot before this time of day (default "23:59")
|
||||
# lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
period: "1m" # reboot check period (default 1h0m0s)
|
||||
# forceReboot: false # force a reboot even if the drain fails or times out (default: false)
|
||||
# drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
|
||||
# drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
|
||||
# annotationTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
|
||||
# alertFilterRegexp: "" # alert names to ignore when checking for active alerts
|
||||
# alertFiringOnly: false # only consider firing alerts when checking for active alerts
|
||||
# blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
|
||||
# endTime: "" # only reboot before this time of day (default "23:59")
|
||||
# lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
period: "1m" # reboot check period (default 1h0m0s)
|
||||
# forceReboot: false # force a reboot even if the drain fails or times out (default: false)
|
||||
# drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
|
||||
# drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
|
||||
# skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
|
||||
# prometheusUrl: "" # Prometheus instance to probe for active alerts
|
||||
# rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
# rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
# slackChannel: "" # slack channel for reboot notfications
|
||||
# slackHookUrl: "" # slack hook URL for reboot notfications
|
||||
# slackUsername: "" # slack username for reboot notfications (default "kured")
|
||||
# notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
|
||||
# messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
|
||||
# messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
|
||||
# startTime: "" # only reboot after this time of day (default "0:00")
|
||||
# timeZone: "" # time-zone to use (valid zones from "time" golang package)
|
||||
# annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
|
||||
# lockReleaseDelay: "5m" # hold lock after reboot by this amount of time (default 0, disabled)
|
||||
# prometheusUrl: "" # Prometheus instance to probe for active alerts
|
||||
# rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
# rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
# rebootSentinelCommand: "" # command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
|
||||
# slackChannel: "" # slack channel for reboot notfications
|
||||
# slackHookUrl: "" # slack hook URL for reboot notfications
|
||||
# slackUsername: "" # slack username for reboot notfications (default "kured")
|
||||
# notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
|
||||
# messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
|
||||
# messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
|
||||
# messageTemplateUncordon: "" # slack message template when notifying about a node being drained (default "Node %s rebooted & uncordoned successfully!")
|
||||
# startTime: "" # only reboot after this time of day (default "0:00")
|
||||
# timeZone: "" # time-zone to use (valid zones from "time" golang package)
|
||||
# annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
|
||||
# lockReleaseDelay: "5m" # hold lock after reboot by this amount of time (default 0, disabled)
|
||||
# logFormat: "text" # log format specified as text or json, defaults to text
|
||||
# preRebootNodeLabels: [] # labels to add to nodes before cordoning (default [])
|
||||
# postRebootNodeLabels: [] # labels to add to nodes after uncordoning (default [])
|
||||
|
||||
@@ -9,6 +9,7 @@ updateStrategy: RollingUpdate
|
||||
maxUnavailable: 1
|
||||
|
||||
podAnnotations: {}
|
||||
dsAnnotations: {}
|
||||
|
||||
extraArgs: {}
|
||||
|
||||
@@ -22,30 +23,39 @@ extraEnvVars:
|
||||
# value: 123
|
||||
|
||||
configuration:
|
||||
lockTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
|
||||
alertFilterRegexp: "" # alert names to ignore when checking for active alerts
|
||||
blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
|
||||
endTime: "" # only reboot before this time of day (default "23:59")
|
||||
lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
period: "" # reboot check period (default 1h0m0s)
|
||||
forceReboot: false # force a reboot even if the drain fails or times out (default: false)
|
||||
drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
|
||||
drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
|
||||
lockTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
|
||||
alertFilterRegexp: "" # alert names to ignore when checking for active alerts
|
||||
alertFiringOnly: false # only consider firing alerts when checking for active alerts
|
||||
blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
|
||||
endTime: "" # only reboot before this time of day (default "23:59")
|
||||
lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
period: "" # reboot check period (default 1h0m0s)
|
||||
forceReboot: false # force a reboot even if the drain fails or times out (default: false)
|
||||
drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
|
||||
drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
|
||||
skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
|
||||
prometheusUrl: "" # Prometheus instance to probe for active alerts
|
||||
rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
prometheusUrl: "" # Prometheus instance to probe for active alerts
|
||||
rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
rebootSentinelCommand: "" # command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
|
||||
rebootCommand: "/bin/systemctl reboot" # command to run when a reboot is required by the sentinel
|
||||
slackChannel: "" # slack channel for reboot notfications
|
||||
slackHookUrl: "" # slack hook URL for reboot notfications
|
||||
slackUsername: "" # slack username for reboot notfications (default "kured")
|
||||
notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
|
||||
messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
|
||||
messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
|
||||
startTime: "" # only reboot after this time of day (default "0:00")
|
||||
timeZone: "" # time-zone to use (valid zones from "time" golang package)
|
||||
annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
|
||||
lockReleaseDelay: 0 # hold lock after reboot by this amount of time (default 0, disabled)
|
||||
rebootDelay: "" # add a delay after drain finishes but before the reboot command is issued
|
||||
slackChannel: "" # slack channel for reboot notfications
|
||||
slackHookUrl: "" # slack hook URL for reboot notfications
|
||||
slackUsername: "" # slack username for reboot notfications (default "kured")
|
||||
notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
|
||||
messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
|
||||
messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
|
||||
messageTemplateUncordon: "" # slack message template when notifying about a node being uncordoned (default "Node %s rebooted & uncordoned successfully!")
|
||||
startTime: "" # only reboot after this time of day (default "0:00")
|
||||
timeZone: "" # time-zone to use (valid zones from "time" golang package)
|
||||
annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
|
||||
lockReleaseDelay: 0 # hold lock after reboot by this amount of time (default 0, disabled)
|
||||
preferNoScheduleTaint: "" # Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting.
|
||||
logFormat: "text" # log format specified as text or json, defaults to text
|
||||
preRebootNodeLabels: [] # labels to add to nodes before cordoning (default [])
|
||||
postRebootNodeLabels: [] # labels to add to nodes after uncordoning (default [])
|
||||
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
@@ -57,6 +67,10 @@ serviceAccount:
|
||||
podSecurityPolicy:
|
||||
create: false
|
||||
|
||||
containerSecurityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
# allowPrivilegeEscalation: true # Needed when using defaultAllowPrivilegedEscalation: false in psp
|
||||
|
||||
resources: {}
|
||||
|
||||
metrics:
|
||||
@@ -77,9 +91,7 @@ podLabels: {}
|
||||
|
||||
priorityClassName: ""
|
||||
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM alpine:3.14
|
||||
FROM alpine:3.16.2
|
||||
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
|
||||
COPY ./kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
|
||||
20
cmd/kured/Dockerfile.multi
Normal file
20
cmd/kured/Dockerfile.multi
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM --platform=$BUILDPLATFORM golang:bullseye AS build
|
||||
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
ENV GOOS=$TARGETOS
|
||||
ENV GOARCH=$TARGETARCH
|
||||
ENV GOVARIANT=$TARGETVARIANT
|
||||
|
||||
WORKDIR /src
|
||||
COPY go.mod go.sum .
|
||||
RUN go mod download
|
||||
COPY . .
|
||||
RUN make kured-multi
|
||||
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.16.2 as bin
|
||||
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
|
||||
COPY --from=build /src/cmd/kured/kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
@@ -6,15 +6,20 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
papi "github.com/prometheus/client_golang/api"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/spf13/pflag"
|
||||
"github.com/spf13/viper"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/types"
|
||||
@@ -30,7 +35,6 @@ import (
|
||||
"github.com/weaveworks/kured/pkg/alerts"
|
||||
"github.com/weaveworks/kured/pkg/daemonsetlock"
|
||||
"github.com/weaveworks/kured/pkg/delaytick"
|
||||
"github.com/weaveworks/kured/pkg/notifications/slack"
|
||||
"github.com/weaveworks/kured/pkg/taints"
|
||||
"github.com/weaveworks/kured/pkg/timewindow"
|
||||
)
|
||||
@@ -62,8 +66,13 @@ var (
|
||||
slackChannel string
|
||||
messageTemplateDrain string
|
||||
messageTemplateReboot string
|
||||
messageTemplateUncordon string
|
||||
podSelectors []string
|
||||
rebootCommand string
|
||||
logFormat string
|
||||
preRebootNodeLabels []string
|
||||
postRebootNodeLabels []string
|
||||
nodeID string
|
||||
|
||||
rebootDays []string
|
||||
rebootStart string
|
||||
@@ -86,6 +95,8 @@ const (
|
||||
KuredRebootInProgressAnnotation string = "weave.works/kured-reboot-in-progress"
|
||||
// KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation
|
||||
KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed"
|
||||
// EnvPrefix The environment variable prefix of all environment variables bound to our command line flags.
|
||||
EnvPrefix = "KURED"
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -93,18 +104,30 @@ func init() {
|
||||
}
|
||||
|
||||
func main() {
|
||||
rootCmd := &cobra.Command{
|
||||
Use: "kured",
|
||||
Short: "Kubernetes Reboot Daemon",
|
||||
PreRun: flagCheck,
|
||||
Run: root}
|
||||
cmd := NewRootCommand()
|
||||
|
||||
if err := cmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
// NewRootCommand construct the Cobra root command
|
||||
func NewRootCommand() *cobra.Command {
|
||||
rootCmd := &cobra.Command{
|
||||
Use: "kured",
|
||||
Short: "Kubernetes Reboot Daemon",
|
||||
PersistentPreRunE: bindViper,
|
||||
PreRun: flagCheck,
|
||||
Run: root}
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&nodeID, "node-id", "",
|
||||
"node name kured runs on, should be passed down from spec.nodeName via KURED_NODE_ID environment variable")
|
||||
rootCmd.PersistentFlags().BoolVar(&forceReboot, "force-reboot", false,
|
||||
"force a reboot even if the drain fails or times out (default: false)")
|
||||
"force a reboot even if the drain fails or times out")
|
||||
rootCmd.PersistentFlags().IntVar(&drainGracePeriod, "drain-grace-period", -1,
|
||||
"time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)")
|
||||
"time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used")
|
||||
rootCmd.PersistentFlags().IntVar(&skipWaitForDeleteTimeoutSeconds, "skip-wait-for-delete-timeout", 0,
|
||||
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)")
|
||||
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node")
|
||||
rootCmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 0,
|
||||
"timeout after which the drain is aborted (default: 0, infinite time)")
|
||||
rootCmd.PersistentFlags().DurationVar(&rebootDelay, "reboot-delay", 0,
|
||||
@@ -126,7 +149,7 @@ func main() {
|
||||
rootCmd.PersistentFlags().Var(®expValue{&alertFilter}, "alert-filter-regexp",
|
||||
"alert names to ignore when checking for active alerts")
|
||||
rootCmd.PersistentFlags().BoolVar(&alertFiringOnly, "alert-firing-only", false,
|
||||
"only consider firing alerts when checking for active alerts (default: false)")
|
||||
"only consider firing alerts when checking for active alerts")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinelFile, "reboot-sentinel", "/var/run/reboot-required",
|
||||
"path to file whose existence triggers the reboot command")
|
||||
rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaintName, "prefer-no-schedule-taint", "",
|
||||
@@ -137,13 +160,15 @@ func main() {
|
||||
"command to run when a reboot is required")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
|
||||
"slack hook URL for notifications")
|
||||
"slack hook URL for reboot notifications [deprecated in favor of --notify-url]")
|
||||
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
|
||||
"slack username for notifications")
|
||||
"slack username for reboot notifications")
|
||||
rootCmd.PersistentFlags().StringVar(&slackChannel, "slack-channel", "",
|
||||
"slack channel for reboot notfications")
|
||||
"slack channel for reboot notifications")
|
||||
rootCmd.PersistentFlags().StringVar(¬ifyURL, "notify-url", "",
|
||||
"notify URL for reboot notfications")
|
||||
"notify URL for reboot notifications (cannot use with --slack-hook-url flags)")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateUncordon, "message-template-uncordon", "Node %s rebooted & uncordoned successfully!",
|
||||
"message template used to notify about a node being successfully uncordoned")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateDrain, "message-template-drain", "Draining node %s",
|
||||
"message template used to notify about a node being drained")
|
||||
rootCmd.PersistentFlags().StringVar(&messageTemplateReboot, "message-template-reboot", "Rebooting node %s",
|
||||
@@ -164,21 +189,96 @@ func main() {
|
||||
rootCmd.PersistentFlags().BoolVar(&annotateNodes, "annotate-nodes", false,
|
||||
"if set, the annotations 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots")
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
rootCmd.PersistentFlags().StringVar(&logFormat, "log-format", "text",
|
||||
"use text or json log format")
|
||||
|
||||
rootCmd.PersistentFlags().StringSliceVar(&preRebootNodeLabels, "pre-reboot-node-labels", nil,
|
||||
"labels to add to nodes before cordoning")
|
||||
rootCmd.PersistentFlags().StringSliceVar(&postRebootNodeLabels, "post-reboot-node-labels", nil,
|
||||
"labels to add to nodes after uncordoning")
|
||||
|
||||
return rootCmd
|
||||
}
|
||||
|
||||
// temporary func that checks for deprecated slack-notification-related flags
|
||||
// func that checks for deprecated slack-notification-related flags and node labels that do not match
|
||||
func flagCheck(cmd *cobra.Command, args []string) {
|
||||
if slackHookURL != "" && notifyURL != "" {
|
||||
log.Warnf("Cannot use both --notify-url and --slack-hook-url flags. Kured will use --notify-url flag only...")
|
||||
slackHookURL = ""
|
||||
}
|
||||
if slackChannel != "" || slackHookURL != "" {
|
||||
log.Warnf("slack-* flag(s) are being deprecated. Please use --notify-url flag instead.")
|
||||
if notifyURL != "" {
|
||||
notifyURL = stripQuotes(notifyURL)
|
||||
} else if slackHookURL != "" {
|
||||
slackHookURL = stripQuotes(slackHookURL)
|
||||
log.Warnf("Deprecated flag(s). Please use --notify-url flag instead.")
|
||||
trataURL, err := url.Parse(slackHookURL)
|
||||
if err != nil {
|
||||
log.Warnf("slack-hook-url is not properly formatted... no notification will be sent: %v\n", err)
|
||||
}
|
||||
if len(strings.Split(strings.Trim(trataURL.Path, "/services/"), "/")) != 3 {
|
||||
log.Warnf("slack-hook-url is not properly formatted... no notification will be sent: unexpected number of / in URL\n")
|
||||
} else {
|
||||
notifyURL = fmt.Sprintf("slack://%s", strings.Trim(trataURL.Path, "/services/"))
|
||||
}
|
||||
}
|
||||
var preRebootNodeLabelKeys, postRebootNodeLabelKeys []string
|
||||
for _, label := range preRebootNodeLabels {
|
||||
preRebootNodeLabelKeys = append(preRebootNodeLabelKeys, strings.Split(label, "=")[0])
|
||||
}
|
||||
for _, label := range postRebootNodeLabels {
|
||||
postRebootNodeLabelKeys = append(postRebootNodeLabelKeys, strings.Split(label, "=")[0])
|
||||
}
|
||||
sort.Strings(preRebootNodeLabelKeys)
|
||||
sort.Strings(postRebootNodeLabelKeys)
|
||||
if !reflect.DeepEqual(preRebootNodeLabelKeys, postRebootNodeLabelKeys) {
|
||||
log.Warnf("pre-reboot-node-labels keys and post-reboot-node-labels keys do not match. This may result in unexpected behaviour.")
|
||||
}
|
||||
}
|
||||
|
||||
// stripQuotes removes any literal single or double quote chars that surround a string
|
||||
func stripQuotes(str string) string {
|
||||
if len(str) > 2 {
|
||||
firstChar := str[0]
|
||||
lastChar := str[len(str)-1]
|
||||
if firstChar == lastChar && (firstChar == '"' || firstChar == '\'') {
|
||||
return str[1 : len(str)-1]
|
||||
}
|
||||
}
|
||||
// return the original string if it has a length of zero or one
|
||||
return str
|
||||
}
|
||||
|
||||
// bindViper initializes viper and binds command flags with environment variables
|
||||
func bindViper(cmd *cobra.Command, args []string) error {
|
||||
v := viper.New()
|
||||
|
||||
v.SetEnvPrefix(EnvPrefix)
|
||||
v.AutomaticEnv()
|
||||
bindFlags(cmd, v)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// bindFlags binds each cobra flag to its associated viper configuration (environment variable)
|
||||
func bindFlags(cmd *cobra.Command, v *viper.Viper) {
|
||||
cmd.Flags().VisitAll(func(f *pflag.Flag) {
|
||||
// Environment variables can't have dashes in them, so bind them to their equivalent keys with underscores
|
||||
if strings.Contains(f.Name, "-") {
|
||||
v.BindEnv(f.Name, flagToEnvVar(f.Name))
|
||||
}
|
||||
|
||||
// Apply the viper config value to the flag when the flag is not set and viper has a value
|
||||
if !f.Changed && v.IsSet(f.Name) {
|
||||
val := v.Get(f.Name)
|
||||
log.Infof("Binding %s command flag to environment variable: %s", f.Name, flagToEnvVar(f.Name))
|
||||
cmd.Flags().Set(f.Name, fmt.Sprintf("%v", val))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// flagToEnvVar converts command flag name to equivalent environment variable name
|
||||
func flagToEnvVar(flag string) string {
|
||||
envVarSuffix := strings.ToUpper(strings.ReplaceAll(flag, "-", "_"))
|
||||
return fmt.Sprintf("%s_%s", EnvPrefix, envVarSuffix)
|
||||
}
|
||||
|
||||
// newCommand creates a new Command with stdout/stderr wired to our standard logger
|
||||
@@ -273,7 +373,7 @@ func (pb PrometheusBlockingChecker) isBlocked() bool {
|
||||
}
|
||||
|
||||
func (kb KubernetesBlockingChecker) isBlocked() bool {
|
||||
fieldSelector := fmt.Sprintf("spec.nodeName=%s", kb.nodename)
|
||||
fieldSelector := fmt.Sprintf("spec.nodeName=%s,status.phase!=Succeeded,status.phase!=Failed,status.phase!=Unknown", kb.nodename)
|
||||
for _, labelSelector := range kb.filter {
|
||||
podList, err := kb.client.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
|
||||
LabelSelector: labelSelector,
|
||||
@@ -348,16 +448,15 @@ func release(lock *daemonsetlock.DaemonSetLock) {
|
||||
}
|
||||
}
|
||||
|
||||
func drain(client *kubernetes.Clientset, node *v1.Node) {
|
||||
func drain(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
nodename := node.GetName()
|
||||
|
||||
if preRebootNodeLabels != nil {
|
||||
updateNodeLabels(client, node, preRebootNodeLabels)
|
||||
}
|
||||
|
||||
log.Infof("Draining node %s", nodename)
|
||||
|
||||
if slackHookURL != "" {
|
||||
if err := slack.NotifyDrain(slackHookURL, slackUsername, slackChannel, messageTemplateDrain, nodename); err != nil {
|
||||
log.Warnf("Error notifying slack: %v", err)
|
||||
}
|
||||
}
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateDrain, nodename)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
@@ -378,23 +477,18 @@ func drain(client *kubernetes.Clientset, node *v1.Node) {
|
||||
}
|
||||
|
||||
if err := kubectldrain.RunCordonOrUncordon(drainer, node, true); err != nil {
|
||||
if !forceReboot {
|
||||
log.Fatalf("Error cordonning %s: %v", nodename, err)
|
||||
}
|
||||
log.Errorf("Error cordonning %s: %v, continuing with reboot anyway", nodename, err)
|
||||
return
|
||||
log.Errorf("Error cordonning %s: %v", nodename, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err := kubectldrain.RunNodeDrain(drainer, nodename); err != nil {
|
||||
if !forceReboot {
|
||||
log.Fatalf("Error draining %s: %v", nodename, err)
|
||||
}
|
||||
log.Errorf("Error draining %s: %v, continuing with reboot anyway", nodename, err)
|
||||
return
|
||||
log.Errorf("Error draining %s: %v", nodename, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func uncordon(client *kubernetes.Clientset, node *v1.Node) {
|
||||
func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
nodename := node.GetName()
|
||||
log.Infof("Uncordoning node %s", nodename)
|
||||
drainer := &kubectldrain.Helper{
|
||||
@@ -405,18 +499,16 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) {
|
||||
}
|
||||
if err := kubectldrain.RunCordonOrUncordon(drainer, node, false); err != nil {
|
||||
log.Fatalf("Error uncordonning %s: %v", nodename, err)
|
||||
return err
|
||||
} else if postRebootNodeLabels != nil {
|
||||
updateNodeLabels(client, node, postRebootNodeLabels)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func invokeReboot(nodeID string, rebootCommand []string) {
|
||||
log.Infof("Running command: %s for node: %s", rebootCommand, nodeID)
|
||||
|
||||
if slackHookURL != "" {
|
||||
if err := slack.NotifyReboot(slackHookURL, slackUsername, slackChannel, messageTemplateReboot, nodeID); err != nil {
|
||||
log.Warnf("Error notifying slack: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
@@ -444,10 +536,11 @@ type nodeMeta struct {
|
||||
Unschedulable bool `json:"unschedulable"`
|
||||
}
|
||||
|
||||
func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations map[string]string) {
|
||||
func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations map[string]string) error {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Fatalf("Error retrieving node object via k8s API: %s", err)
|
||||
log.Errorf("Error retrieving node object via k8s API: %s", err)
|
||||
return err
|
||||
}
|
||||
for k, v := range annotations {
|
||||
node.Annotations[k] = v
|
||||
@@ -456,7 +549,8 @@ func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations
|
||||
|
||||
bytes, err := json.Marshal(node)
|
||||
if err != nil {
|
||||
log.Fatalf("Error marshalling node object into JSON: %v", err)
|
||||
log.Errorf("Error marshalling node object into JSON: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
|
||||
@@ -465,11 +559,13 @@ func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations
|
||||
for k, v := range annotations {
|
||||
annotationsErr += fmt.Sprintf("%s=%s ", k, v)
|
||||
}
|
||||
log.Fatalf("Error adding node annotations %s via k8s API: %v", annotationsErr, err)
|
||||
log.Errorf("Error adding node annotations %s via k8s API: %v", annotationsErr, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) {
|
||||
func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) error {
|
||||
log.Infof("Deleting node %s annotation %s", nodeID, key)
|
||||
|
||||
// JSON Patch takes as path input a JSON Pointer, defined in RFC6901
|
||||
@@ -478,7 +574,39 @@ func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) {
|
||||
patch := []byte(fmt.Sprintf("[{\"op\":\"remove\",\"path\":\"/metadata/annotations/%s\"}]", strings.ReplaceAll(key, "/", "~1")))
|
||||
_, err := client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
log.Fatalf("Error deleting node annotation %s via k8s API: %v", key, err)
|
||||
log.Errorf("Error deleting node annotation %s via k8s API: %v", key, err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []string) {
|
||||
labelsMap := make(map[string]string)
|
||||
for _, label := range labels {
|
||||
k := strings.Split(label, "=")[0]
|
||||
v := strings.Split(label, "=")[1]
|
||||
labelsMap[k] = v
|
||||
log.Infof("Updating node %s label: %s=%s", node.GetName(), k, v)
|
||||
}
|
||||
|
||||
bytes, err := json.Marshal(map[string]interface{}{
|
||||
"metadata": map[string]interface{}{
|
||||
"labels": labelsMap,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("Error marshalling node object into JSON: %v", err)
|
||||
}
|
||||
|
||||
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
|
||||
if err != nil {
|
||||
var labelsErr string
|
||||
for _, label := range labels {
|
||||
k := strings.Split(label, "=")[0]
|
||||
v := strings.Split(label, "=")[1]
|
||||
labelsErr += fmt.Sprintf("%s=%s ", k, v)
|
||||
}
|
||||
log.Errorf("Error updating node labels %s via k8s API: %v", labelsErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -496,26 +624,47 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
lock := daemonsetlock.New(client, nodeID, dsNamespace, dsName, lockAnnotation)
|
||||
|
||||
nodeMeta := nodeMeta{}
|
||||
if holding(lock, &nodeMeta) {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Fatalf("Error retrieving node object via k8s API: %v", err)
|
||||
}
|
||||
if !nodeMeta.Unschedulable {
|
||||
uncordon(client, node)
|
||||
}
|
||||
// If we're holding the lock we know we've tried, in a prior run, to reboot
|
||||
// So (1) we want to confirm that the reboot succeeded practically ( !rebootRequired() )
|
||||
// And (2) check if we previously annotated the node that it was in the process of being rebooted,
|
||||
// And finally (3) if it has that annotation, to delete it.
|
||||
// This indicates to other node tools running on the cluster that this node may be a candidate for maintenance
|
||||
if annotateNodes && !rebootRequired(sentinelCommand) {
|
||||
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok {
|
||||
deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation)
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, 1*time.Minute)
|
||||
for range tick {
|
||||
if holding(lock, &nodeMeta) {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Errorf("Error retrieving node object via k8s API: %v", err)
|
||||
continue
|
||||
}
|
||||
if !nodeMeta.Unschedulable {
|
||||
err = uncordon(client, node)
|
||||
if err != nil {
|
||||
log.Errorf("Unable to uncordon %s: %v, will continue to hold lock and retry uncordon", node.GetName(), err)
|
||||
continue
|
||||
} else {
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateUncordon, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we're holding the lock we know we've tried, in a prior run, to reboot
|
||||
// So (1) we want to confirm that the reboot succeeded practically ( !rebootRequired() )
|
||||
// And (2) check if we previously annotated the node that it was in the process of being rebooted,
|
||||
// And finally (3) if it has that annotation, to delete it.
|
||||
// This indicates to other node tools running on the cluster that this node may be a candidate for maintenance
|
||||
if annotateNodes && !rebootRequired(sentinelCommand) {
|
||||
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok {
|
||||
err := deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
throttle(releaseDelay)
|
||||
release(lock)
|
||||
break
|
||||
} else {
|
||||
break
|
||||
}
|
||||
throttle(releaseDelay)
|
||||
release(lock)
|
||||
}
|
||||
|
||||
preferNoScheduleTaint := taints.New(client, nodeID, preferNoScheduleTaintName, v1.TaintEffectPreferNoSchedule)
|
||||
@@ -531,8 +680,8 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
log.Fatal("Unable to create prometheus client: ", err)
|
||||
}
|
||||
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, period)
|
||||
source = rand.NewSource(time.Now().UnixNano())
|
||||
tick = delaytick.New(source, period)
|
||||
for range tick {
|
||||
if !window.Contains(time.Now()) {
|
||||
// Remove taint outside the reboot time window to allow for normal operation.
|
||||
@@ -574,17 +723,29 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
annotations := map[string]string{KuredRebootInProgressAnnotation: timeNowString}
|
||||
// & annotate this node with a timestamp so that other node maintenance tools know how long it's been since this node has been marked for reboot
|
||||
annotations[KuredMostRecentRebootNeededAnnotation] = timeNowString
|
||||
addNodeAnnotations(client, nodeID, annotations)
|
||||
err := addNodeAnnotations(client, nodeID, annotations)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !acquire(lock, &nodeMeta, TTL) {
|
||||
if !holding(lock, &nodeMeta) && !acquire(lock, &nodeMeta, TTL) {
|
||||
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
|
||||
preferNoScheduleTaint.Enable()
|
||||
continue
|
||||
}
|
||||
|
||||
drain(client, node)
|
||||
err = drain(client, node)
|
||||
if err != nil {
|
||||
if !forceReboot {
|
||||
log.Errorf("Unable to cordon or drain %s: %v, will release lock and retry cordon and drain before rebooting when lock is next acquired", node.GetName(), err)
|
||||
release(lock)
|
||||
log.Infof("Performing a best-effort uncordon after failed cordon and drain")
|
||||
uncordon(client, node)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if rebootDelay > 0 {
|
||||
log.Infof("Delaying reboot for %v", rebootDelay)
|
||||
@@ -623,9 +784,12 @@ func parseRebootCommand(rebootCommand string) []string {
|
||||
}
|
||||
|
||||
func root(cmd *cobra.Command, args []string) {
|
||||
if logFormat == "json" {
|
||||
log.SetFormatter(&log.JSONFormatter{})
|
||||
}
|
||||
|
||||
log.Infof("Kubernetes Reboot Daemon: %s", version)
|
||||
|
||||
nodeID := os.Getenv("KURED_NODE_ID")
|
||||
if nodeID == "" {
|
||||
log.Fatal("KURED_NODE_ID environment variable required")
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"testing"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/spf13/cobra"
|
||||
"github.com/weaveworks/kured/pkg/alerts"
|
||||
assert "gotest.tools/v3/assert"
|
||||
|
||||
@@ -22,6 +23,90 @@ func (fbc BlockingChecker) isBlocked() bool {
|
||||
var _ RebootBlocker = BlockingChecker{} // Verify that Type implements Interface.
|
||||
var _ RebootBlocker = (*BlockingChecker)(nil) // Verify that *Type implements Interface.
|
||||
|
||||
func Test_flagCheck(t *testing.T) {
|
||||
var cmd *cobra.Command
|
||||
var args []string
|
||||
slackHookURL = "https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
expected := "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
|
||||
// validate that surrounding quotes are stripped
|
||||
slackHookURL = "\"https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET\""
|
||||
expected = "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
slackHookURL = "'https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET'"
|
||||
expected = "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
slackHookURL = ""
|
||||
notifyURL = "\"teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com\""
|
||||
expected = "teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("notifyURL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
notifyURL = "'teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com'"
|
||||
expected = "teams://79b4XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX@acd8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX/204cXXXXXXXXXXXXXXXXXXXXXXXXXXXX/a1f8XXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX?host=XXXX.webhook.office.com"
|
||||
flagCheck(cmd, args)
|
||||
if notifyURL != expected {
|
||||
t.Errorf("notifyURL Parsing is wrong: expecting %s but got %s\n", expected, notifyURL)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_stripQuotes(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "string with no surrounding quotes is unchanged",
|
||||
input: "Hello, world!",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with surrounding double quotes should strip quotes",
|
||||
input: "\"Hello, world!\"",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with surrounding single quotes should strip quotes",
|
||||
input: "'Hello, world!'",
|
||||
expected: "Hello, world!",
|
||||
},
|
||||
{
|
||||
name: "string with unbalanced surrounding quotes is unchanged",
|
||||
input: "'Hello, world!\"",
|
||||
expected: "'Hello, world!\"",
|
||||
},
|
||||
{
|
||||
name: "string with length of one is unchanged",
|
||||
input: "'",
|
||||
expected: "'",
|
||||
},
|
||||
{
|
||||
name: "string with length of zero is unchanged",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := stripQuotes(tt.input); !reflect.DeepEqual(got, tt.expected) {
|
||||
t.Errorf("stripQuotes() = %v, expected %v", got, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_rebootBlocked(t *testing.T) {
|
||||
noCheckers := []RebootBlocker{}
|
||||
nonblockingChecker := BlockingChecker{blocking: false}
|
||||
|
||||
108
go.mod
108
go.mod
@@ -1,18 +1,102 @@
|
||||
module github.com/weaveworks/kured
|
||||
|
||||
go 1.16
|
||||
go 1.17
|
||||
|
||||
require (
|
||||
github.com/containrrr/shoutrrr v0.5.1
|
||||
github.com/containrrr/shoutrrr v0.6.1
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
|
||||
github.com/prometheus/client_golang v1.11.0
|
||||
github.com/prometheus/common v0.31.1
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/spf13/cobra v1.2.1
|
||||
github.com/stretchr/testify v1.7.0
|
||||
gotest.tools/v3 v3.0.3
|
||||
k8s.io/api v0.21.4
|
||||
k8s.io/apimachinery v0.21.4
|
||||
k8s.io/client-go v0.21.4
|
||||
k8s.io/kubectl v0.21.4
|
||||
github.com/google/uuid v1.1.5 // indirect
|
||||
github.com/prometheus/client_golang v1.13.0
|
||||
github.com/prometheus/common v0.37.0
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/spf13/cobra v1.5.0
|
||||
github.com/spf13/pflag v1.0.5
|
||||
github.com/spf13/viper v1.12.0
|
||||
github.com/stretchr/testify v1.8.0
|
||||
gotest.tools/v3 v3.3.0
|
||||
k8s.io/api v0.23.6
|
||||
k8s.io/apimachinery v0.23.6
|
||||
k8s.io/client-go v0.23.6
|
||||
k8s.io/kubectl v0.23.6
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
||||
github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd // indirect
|
||||
github.com/PuerkitoBio/purell v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||
github.com/chai2010/gettext-go v0.0.0-20160711120539-c6fed771bfd5 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
|
||||
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
|
||||
github.com/fatih/color v1.13.0 // indirect
|
||||
github.com/fsnotify/fsnotify v1.5.4 // indirect
|
||||
github.com/go-errors/errors v1.0.1 // indirect
|
||||
github.com/go-logr/logr v1.2.0 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.5 // indirect
|
||||
github.com/go-openapi/swag v0.19.14 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/google/btree v1.0.1 // indirect
|
||||
github.com/google/go-cmp v0.5.8 // indirect
|
||||
github.com/google/gofuzz v1.1.0 // indirect
|
||||
github.com/googleapis/gnostic v0.5.5 // indirect
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
|
||||
github.com/hashicorp/hcl v1.0.0 // indirect
|
||||
github.com/imdario/mergo v0.3.5 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
|
||||
github.com/magiconair/properties v1.8.6 // indirect
|
||||
github.com/mailru/easyjson v0.7.6 // indirect
|
||||
github.com/mattn/go-colorable v0.1.12 // indirect
|
||||
github.com/mattn/go-isatty v0.0.14 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||
github.com/mitchellh/go-wordwrap v1.0.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/spdystream v0.2.0 // indirect
|
||||
github.com/moby/term v0.0.0-20210610120745-9d4ed1856297 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
|
||||
github.com/pelletier/go-toml v1.9.5 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.1 // indirect
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.2.0 // indirect
|
||||
github.com/prometheus/procfs v0.8.0 // indirect
|
||||
github.com/russross/blackfriday v1.5.2 // indirect
|
||||
github.com/spf13/afero v1.8.2 // indirect
|
||||
github.com/spf13/cast v1.5.0 // indirect
|
||||
github.com/spf13/jwalterweatherman v1.1.0 // indirect
|
||||
github.com/subosito/gotenv v1.3.0 // indirect
|
||||
github.com/xlab/treeprint v0.0.0-20181112141820-a009c3971eca // indirect
|
||||
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect
|
||||
golang.org/x/net v0.0.0-20220520000938-2e3eb7b945c2 // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect
|
||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8 // indirect
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.28.1 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/ini.v1 v1.66.4 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/cli-runtime v0.23.6 // indirect
|
||||
k8s.io/component-base v0.23.6 // indirect
|
||||
k8s.io/klog/v2 v2.30.0 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 // indirect
|
||||
k8s.io/utils v0.0.0-20211116205334-6203023598ed // indirect
|
||||
sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect
|
||||
sigs.k8s.io/kustomize/api v0.10.1 // indirect
|
||||
sigs.k8s.io/kustomize/kyaml v0.13.0 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
|
||||
sigs.k8s.io/yaml v1.2.0 // indirect
|
||||
)
|
||||
|
||||
@@ -23,13 +23,15 @@ spec:
|
||||
spec:
|
||||
serviceAccountName: kured
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
hostPID: true # Facilitate entering the host mount namespace via init
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: kured
|
||||
image: docker.io/weaveworks/kured
|
||||
image: docker.io/weaveworks/kured:1.10.2
|
||||
# If you find yourself here wondering why there is no
|
||||
# :latest tag on Docker Hub,see the FAQ in the README
|
||||
imagePullPolicy: IfNotPresent
|
||||
@@ -64,13 +66,16 @@ spec:
|
||||
# - --slack-channel=alerting
|
||||
# - --notify-url="" # See also shoutrrr url format
|
||||
# - --message-template-drain=Draining node %s
|
||||
# - --message-template-drain=Rebooting node %s
|
||||
# - --message-template-reboot=Rebooting node %s
|
||||
# - --message-template-uncordon=Node %s rebooted & uncordoned successfully!
|
||||
# - --blocking-pod-selector=runtime=long,cost=expensive
|
||||
# - --blocking-pod-selector=name=temperamental
|
||||
# - --blocking-pod-selector=...
|
||||
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
|
||||
# - --reboot-delay=90s
|
||||
# - --start-time=0:00
|
||||
# - --end-time=23:59:59
|
||||
# - --time-zone=UTC
|
||||
# - --annotate-nodes=false
|
||||
# - --lock-release-delay=30m
|
||||
# - --log-format=text
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
package slack
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
httpClient = &http.Client{Timeout: 5 * time.Second}
|
||||
)
|
||||
|
||||
type body struct {
|
||||
Text string `json:"text,omitempty"`
|
||||
Username string `json:"username,omitempty"`
|
||||
Channel string `json:"channel,omitempty"`
|
||||
}
|
||||
|
||||
func notify(hookURL, username, channel, message string) error {
|
||||
msg := body{
|
||||
Text: message,
|
||||
Username: username,
|
||||
Channel: channel,
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
if err := json.NewEncoder(&buf).Encode(&msg); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
resp, err := httpClient.Post(hookURL, "application/json", &buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||
return fmt.Errorf(resp.Status)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NotifyDrain is the exposed way to notify of a drain event onto a slack chan
|
||||
func NotifyDrain(hookURL, username, channel, messageTemplate, nodeID string) error {
|
||||
return notify(hookURL, username, channel, fmt.Sprintf(messageTemplate, nodeID))
|
||||
}
|
||||
|
||||
// NotifyReboot is the exposed way to notify of a reboot event onto a slack chan
|
||||
func NotifyReboot(hookURL, username, channel, messageTemplate, nodeID string) error {
|
||||
return notify(hookURL, username, channel, fmt.Sprintf(messageTemplate, nodeID))
|
||||
}
|
||||
Reference in New Issue
Block a user