mirror of
https://github.com/kubereboot/kured.git
synced 2026-04-27 03:56:37 +00:00
Compare commits
208 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c4a1e9893b | ||
|
|
1a8718096b | ||
|
|
23a0fcb912 | ||
|
|
9e5e0bb930 | ||
|
|
28f878cf7c | ||
|
|
2efd823e13 | ||
|
|
bc2867f283 | ||
|
|
e3ade9d053 | ||
|
|
0ad395a9f1 | ||
|
|
cdc6e68ae1 | ||
|
|
d9216e9baf | ||
|
|
d0bdc115a7 | ||
|
|
6f5d6cb1f9 | ||
|
|
ebb7ccf96d | ||
|
|
dccf0856c7 | ||
|
|
b37fd26062 | ||
|
|
221d5d222a | ||
|
|
e71df6a94e | ||
|
|
c50793933f | ||
|
|
c99930d4d4 | ||
|
|
39c353fb8f | ||
|
|
3fd1b0d32a | ||
|
|
83fcc8f28f | ||
|
|
cbfafbb6f4 | ||
|
|
238423969c | ||
|
|
ea7d9d83f1 | ||
|
|
1c540d94de | ||
|
|
1151d324fa | ||
|
|
a8cc821de5 | ||
|
|
b057ed9eba | ||
|
|
17badb57df | ||
|
|
99c255074d | ||
|
|
408889e2fa | ||
|
|
ca4e4a7063 | ||
|
|
4c098b202a | ||
|
|
3ab5ad025f | ||
|
|
38889b9cac | ||
|
|
90fc467a00 | ||
|
|
8da66de1a9 | ||
|
|
76954d0d94 | ||
|
|
c78f9948ee | ||
|
|
0bfe7a0208 | ||
|
|
d909286a5d | ||
|
|
87202d8fcf | ||
|
|
fb800aade5 | ||
|
|
5a1c90da48 | ||
|
|
9f78ef3555 | ||
|
|
ebbcabee37 | ||
|
|
a1df379c43 | ||
|
|
21aa783cb4 | ||
|
|
cc96064c26 | ||
|
|
3f8760be48 | ||
|
|
8d32574da1 | ||
|
|
a3d7bc3172 | ||
|
|
f75bd4697a | ||
|
|
e47210e986 | ||
|
|
bd45aa61d5 | ||
|
|
0f1f724c06 | ||
|
|
84a70a035a | ||
|
|
e7825fde9f | ||
|
|
496b61be6b | ||
|
|
5344747b58 | ||
|
|
d51258ffde | ||
|
|
6912a1e14e | ||
|
|
61309b9a73 | ||
|
|
9bfa399adb | ||
|
|
1ef888b79e | ||
|
|
0db448da56 | ||
|
|
06d583fbe6 | ||
|
|
bd1c593694 | ||
|
|
a74ea49a3f | ||
|
|
234f819b07 | ||
|
|
125a74976c | ||
|
|
50ba52c2d8 | ||
|
|
60dd73e69c | ||
|
|
608afb84d6 | ||
|
|
7ba9d8aa0b | ||
|
|
ae4d5679b4 | ||
|
|
53cdf40254 | ||
|
|
49967f701c | ||
|
|
7e6d442fa9 | ||
|
|
a74cf1a37e | ||
|
|
7b1e2ffe02 | ||
|
|
34b3d9c1a3 | ||
|
|
f948902710 | ||
|
|
adbf0bb1dc | ||
|
|
9e4b69f818 | ||
|
|
510b2e7e29 | ||
|
|
2f58a4ee7e | ||
|
|
09b8a6118f | ||
|
|
4ce09a8cb1 | ||
|
|
ec551fb390 | ||
|
|
924799cac8 | ||
|
|
1c5baef1f8 | ||
|
|
328dad5ac1 | ||
|
|
09edf0605e | ||
|
|
500693735e | ||
|
|
7cd5b102bf | ||
|
|
b983f8a612 | ||
|
|
97a2514015 | ||
|
|
cddb6afa39 | ||
|
|
5b13247370 | ||
|
|
de6460b2aa | ||
|
|
60ff3e7051 | ||
|
|
61b96375af | ||
|
|
ccbdbe8d16 | ||
|
|
357e2e3d2b | ||
|
|
d4f4a7b553 | ||
|
|
4e935d18f7 | ||
|
|
55f5c0b0fa | ||
|
|
a9e5098dc8 | ||
|
|
f98f74c2b9 | ||
|
|
acdea520b7 | ||
|
|
ff62aecca6 | ||
|
|
0057783ac8 | ||
|
|
ab15cf14a2 | ||
|
|
614c6e8472 | ||
|
|
63a388bded | ||
|
|
f75a87ae4a | ||
|
|
13ee1b90aa | ||
|
|
2aaa2bb732 | ||
|
|
35c41c2bc6 | ||
|
|
48981e6c71 | ||
|
|
a8bcf5bbfe | ||
|
|
ecec14c773 | ||
|
|
a7f113bdf8 | ||
|
|
ac6f777d60 | ||
|
|
8a2d2f9f2f | ||
|
|
620943eefb | ||
|
|
0df1059d66 | ||
|
|
14d887e9d0 | ||
|
|
8bc66c937d | ||
|
|
3639080851 | ||
|
|
fb51a566da | ||
|
|
9a4b8fdb32 | ||
|
|
3b9b190422 | ||
|
|
f22b1abd17 | ||
|
|
c159b37fcc | ||
|
|
351ca71787 | ||
|
|
16dc5e30d9 | ||
|
|
aa971697ff | ||
|
|
d019e7a50a | ||
|
|
ee81617645 | ||
|
|
d7adcf6e1e | ||
|
|
409ff0a3e6 | ||
|
|
3be3cd46b5 | ||
|
|
e8202c602c | ||
|
|
752176d16b | ||
|
|
d30a71e1d3 | ||
|
|
815df5e1e9 | ||
|
|
77327b3915 | ||
|
|
ec328e33d6 | ||
|
|
54e127c2ad | ||
|
|
1867c3253e | ||
|
|
05a3ff85a3 | ||
|
|
19846c73f2 | ||
|
|
ba62c32cbf | ||
|
|
4c75199b41 | ||
|
|
91eb403942 | ||
|
|
a27c755260 | ||
|
|
2a6d119b3b | ||
|
|
b17224addc | ||
|
|
a2f21ebe49 | ||
|
|
4d2f26f483 | ||
|
|
b358be7617 | ||
|
|
e88434b619 | ||
|
|
1b12e52434 | ||
|
|
64e40a62b0 | ||
|
|
6690396679 | ||
|
|
9acb2450ea | ||
|
|
e1a5b7d705 | ||
|
|
72f52f2c6f | ||
|
|
6df454c0eb | ||
|
|
c09e65eab1 | ||
|
|
a34c994f4b | ||
|
|
60c54bef31 | ||
|
|
8afa302680 | ||
|
|
de42273849 | ||
|
|
d3e2c9af95 | ||
|
|
00648786b7 | ||
|
|
c7f4380847 | ||
|
|
c659c25b94 | ||
|
|
f44ced2d04 | ||
|
|
e7d24bfff0 | ||
|
|
0378c8a8c5 | ||
|
|
2cfeb34c03 | ||
|
|
3bfacca254 | ||
|
|
46e1b9616b | ||
|
|
fe95f17503 | ||
|
|
462a063b6e | ||
|
|
e664de6c6f | ||
|
|
b666474cf1 | ||
|
|
64313f82ef | ||
|
|
59ba53584e | ||
|
|
b2ffc0d154 | ||
|
|
b7edf8b345 | ||
|
|
4e01e607cc | ||
|
|
1929c11297 | ||
|
|
28832f5cfb | ||
|
|
3c79c750e1 | ||
|
|
58afedd842 | ||
|
|
57783966db | ||
|
|
316a0ef4a3 | ||
|
|
7a86e65c69 | ||
|
|
efa0fe808d | ||
|
|
c2f97614dd | ||
|
|
e710e05658 | ||
|
|
4ff3378df5 |
13
.github/kind-cluster-1.24.yaml
vendored
13
.github/kind-cluster-1.24.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.24.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.24.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.24.7"
|
||||
13
.github/kind-cluster-1.25.yaml
vendored
13
.github/kind-cluster-1.25.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.25.3
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.25.3
|
||||
- role: control-plane
|
||||
image: kindest/node:v1.25.3
|
||||
- role: worker
|
||||
image: kindest/node:v1.25.3
|
||||
- role: worker
|
||||
image: kindest/node:v1.25.3
|
||||
13
.github/kind-cluster-1.26.yaml
vendored
13
.github/kind-cluster-1.26.yaml
vendored
@@ -1,13 +0,0 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.0"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.26.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.26.0"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.26.0"
|
||||
13
.github/kind-cluster-1.27.yaml
vendored
Normal file
13
.github/kind-cluster-1.27.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.11"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.11"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.27.11"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.27.11"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.27.11"
|
||||
13
.github/kind-cluster-1.28.yaml
vendored
Normal file
13
.github/kind-cluster-1.28.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.7"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.28.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.28.7"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.28.7"
|
||||
13
.github/kind-cluster-1.29.yaml
vendored
Normal file
13
.github/kind-cluster-1.29.yaml
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
nodes:
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.29.2"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.29.2"
|
||||
- role: control-plane
|
||||
image: "kindest/node:v1.29.2"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.29.2"
|
||||
- role: worker
|
||||
image: "kindest/node:v1.29.2"
|
||||
17
.github/scripts/goreleaser-install.sh
vendored
17
.github/scripts/goreleaser-install.sh
vendored
@@ -10,28 +10,21 @@ test -z "$VERSION" && {
|
||||
}
|
||||
|
||||
test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
|
||||
TAR_FILE="$TMPDIR/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
|
||||
# goreleaser uses arm64 instead of aarch64
|
||||
goreleaser_arch=$(uname -m | sed -e 's/aarch64/arm64/g' -e 's/ppc64le/ppc64/' -e 's/armv7l/armv7/' )
|
||||
TAR_FILE="$TMPDIR/${FILE_BASENAME}_$(uname -s)_${goreleaser_arch}.tar.gz"
|
||||
export TAR_FILE
|
||||
|
||||
(
|
||||
echo "Downloading GoReleaser $VERSION..."
|
||||
curl -sfLo "$TAR_FILE" \
|
||||
"$RELEASES_URL/download/$VERSION/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
|
||||
"$RELEASES_URL/download/$VERSION/${FILE_BASENAME}_$(uname -s)_${goreleaser_arch}.tar.gz"
|
||||
cd "$TMPDIR"
|
||||
curl -sfLo "checksums.txt" "$RELEASES_URL/download/$VERSION/checksums.txt"
|
||||
curl -sfLo "checksums.txt.sig" "$RELEASES_URL/download/$VERSION/checksums.txt.sig"
|
||||
echo "Verifying checksums..."
|
||||
sha256sum --ignore-missing --quiet --check checksums.txt
|
||||
if command -v cosign >/dev/null 2>&1; then
|
||||
echo "Verifying signatures..."
|
||||
COSIGN_EXPERIMENTAL=1 cosign verify-blob \
|
||||
--signature checksums.txt.sig \
|
||||
checksums.txt
|
||||
else
|
||||
echo "Could not verify signatures, cosign is not installed."
|
||||
fi
|
||||
)
|
||||
|
||||
tar -xf "$TAR_FILE" -O goreleaser > "$TMPDIR/goreleaser"
|
||||
rm "$TMPDIR/checksums.txt" "$TMPDIR/checksums.txt.sig"
|
||||
rm "$TMPDIR/checksums.txt"
|
||||
rm "$TAR_FILE"
|
||||
|
||||
8
.github/workflows/codeql.yml
vendored
8
.github/workflows/codeql.yml
vendored
@@ -39,11 +39,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v2
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -57,7 +57,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v2
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
|
||||
@@ -70,6 +70,6 @@ jobs:
|
||||
# ./location_of_script_within_repo/buildscript.sh
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v2
|
||||
uses: github/codeql-action/analyze@v3
|
||||
with:
|
||||
category: "/language:${{matrix.language}}"
|
||||
|
||||
23
.github/workflows/on-main-push.yaml
vendored
23
.github/workflows/on-main-push.yaml
vendored
@@ -19,16 +19,16 @@ jobs:
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -36,15 +36,15 @@ jobs:
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@507c2f2dc502c992ad446e3d7a5dfbe311567a96
|
||||
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
@@ -57,10 +57,9 @@ jobs:
|
||||
run: make kured-release-snapshot
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
COSIGN_EXPERIMENTAL: 1
|
||||
|
||||
- name: Build image
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
@@ -75,11 +74,9 @@ jobs:
|
||||
|
||||
- name: Sign and attest artifacts
|
||||
run: |
|
||||
.tmp/cosign sign -f -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
|
||||
.tmp/cosign sign-blob --output-signature kured.sbom.sig --output-certificate kured.sbom.pem kured.sbom
|
||||
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig --output-certificate kured.sbom.pem kured.sbom
|
||||
|
||||
.tmp/cosign attest -f --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
|
||||
env:
|
||||
COSIGN_EXPERIMENTAL: 1
|
||||
|
||||
221
.github/workflows/on-pr.yaml
vendored
221
.github/workflows/on-pr.yaml
vendored
@@ -9,9 +9,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
@@ -19,7 +19,7 @@ jobs:
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.6.0
|
||||
uses: guyarb/golang-test-annoations@v0.8.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
name: Lint bash code with shellcheck
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Run ShellCheck
|
||||
uses: bewuethr/shellcheck-action@v2
|
||||
|
||||
@@ -35,9 +35,9 @@ jobs:
|
||||
name: Lint golang code
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
@@ -54,9 +54,9 @@ jobs:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Link Checker
|
||||
uses: lycheeverse/lychee-action@9ace499fe66cee282a29eaa628fdac2c72fa087f
|
||||
uses: lycheeverse/lychee-action@c053181aa0c3d17606addfe97a9075a32723548a
|
||||
env:
|
||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||
with:
|
||||
@@ -70,16 +70,16 @@ jobs:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
@@ -88,7 +88,7 @@ jobs:
|
||||
- name: Build image
|
||||
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@1f0aa582c8c8f5f7639610d6d38baddfea4fdcee
|
||||
uses: aquasecurity/trivy-action@062f2592684a31eb3aa050cc61e7ca1451cecd3d
|
||||
with:
|
||||
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
|
||||
format: 'table'
|
||||
@@ -103,27 +103,28 @@ jobs:
|
||||
# - Ensure manifests work with the latest versions even with no manifest change
|
||||
# (compared to helm charts, manifests cannot easily template changes based on versions)
|
||||
# Helm charts are _trailing_ releases, while manifests are done during development.
|
||||
e2e-manifests:
|
||||
name: End-to-End test with kured with code and manifests from HEAD
|
||||
# This test uses the "command" reboot-method.
|
||||
e2e-manifests-command:
|
||||
name: End-to-End test with kured with code and manifests from HEAD (command)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.24"
|
||||
- "1.25"
|
||||
- "1.26"
|
||||
- "1.27"
|
||||
- "1.28"
|
||||
- "1.29"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
@@ -145,7 +146,7 @@ jobs:
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.5.0
|
||||
uses: helm/kind-action@v1.9.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.14.0
|
||||
@@ -162,7 +163,179 @@ jobs:
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v2.8.3
|
||||
uses: nick-invision/retry@v3.0.0
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
retry_wait_seconds: 60
|
||||
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
|
||||
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
|
||||
|
||||
- name: Create reboot sentinel files
|
||||
run: |
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
- name: Follow reboot until success
|
||||
env:
|
||||
DEBUG: true
|
||||
run: |
|
||||
./tests/kind/follow-coordinated-reboot.sh
|
||||
|
||||
|
||||
# This ensures the latest code works with the manifests built from tree.
|
||||
# It is useful for two things:
|
||||
# - Test manifests changes (obviously), ensuring they don't break existing clusters
|
||||
# - Ensure manifests work with the latest versions even with no manifest change
|
||||
# (compared to helm charts, manifests cannot easily template changes based on versions)
|
||||
# Helm charts are _trailing_ releases, while manifests are done during development.
|
||||
# This test uses the "signal" reboot-method.
|
||||
e2e-manifests-signal:
|
||||
name: End-to-End test with kured with code and manifests from HEAD (signal)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.27"
|
||||
- "1.28"
|
||||
- "1.29"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
|
||||
|
||||
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
|
||||
run: |
|
||||
sudo bash << EOF
|
||||
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
|
||||
echo '{}' > /etc/docker/daemon.json
|
||||
systemctl restart docker || journalctl --no-pager -n 500
|
||||
systemctl status docker
|
||||
EOF
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.9.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing
|
||||
|
||||
- name: Do not wait for an hour before detecting the rebootSentinel
|
||||
run: |
|
||||
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds-signal.yaml
|
||||
|
||||
- name: Install kured with kubectl
|
||||
run: |
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds-signal.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v3.0.0
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
retry_wait_seconds: 60
|
||||
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
|
||||
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
|
||||
|
||||
- name: Create reboot sentinel files
|
||||
run: |
|
||||
./tests/kind/create-reboot-sentinels.sh
|
||||
|
||||
- name: Follow reboot until success
|
||||
env:
|
||||
DEBUG: true
|
||||
run: |
|
||||
./tests/kind/follow-coordinated-reboot.sh
|
||||
|
||||
|
||||
|
||||
# This ensures the latest code works with the manifests built from tree.
|
||||
# It is useful for two things:
|
||||
# - Test manifests changes (obviously), ensuring they don't break existing clusters
|
||||
# - Ensure manifests work with the latest versions even with no manifest change
|
||||
# (compared to helm charts, manifests cannot easily template changes based on versions)
|
||||
# Helm charts are _trailing_ releases, while manifests are done during development.
|
||||
# Concurrency = 2
|
||||
e2e-manifests-concurent:
|
||||
name: End-to-End test with kured with code and manifests from HEAD (concurrent)
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
kubernetes:
|
||||
- "1.27"
|
||||
- "1.28"
|
||||
- "1.29"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Build artifacts
|
||||
run: |
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
|
||||
|
||||
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
|
||||
run: |
|
||||
sudo bash << EOF
|
||||
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
|
||||
echo '{}' > /etc/docker/daemon.json
|
||||
systemctl restart docker || journalctl --no-pager -n 500
|
||||
systemctl status docker
|
||||
EOF
|
||||
|
||||
# Default name for helm/kind-action kind clusters is "chart-testing"
|
||||
- name: Create kind cluster with 5 nodes
|
||||
uses: helm/kind-action@v1.9.0
|
||||
with:
|
||||
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
|
||||
version: v0.14.0
|
||||
|
||||
- name: Preload previously built images onto kind cluster
|
||||
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing
|
||||
|
||||
- name: Do not wait for an hour before detecting the rebootSentinel
|
||||
run: |
|
||||
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
|
||||
sed -i 's/#\(.*\)--concurrency=1/\1--concurrency=2/g' kured-ds.yaml
|
||||
|
||||
- name: Install kured with kubectl
|
||||
run: |
|
||||
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
|
||||
|
||||
- name: Ensure kured is ready
|
||||
uses: nick-invision/retry@v3.0.0
|
||||
with:
|
||||
timeout_minutes: 10
|
||||
max_attempts: 10
|
||||
|
||||
27
.github/workflows/on-tag.yaml
vendored
27
.github/workflows/on-tag.yaml
vendored
@@ -21,9 +21,9 @@ jobs:
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
@@ -31,18 +31,17 @@ jobs:
|
||||
run: echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
|
||||
id: tags
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Build binaries
|
||||
run: make kured-release-tag
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
COSIGN_EXPERIMENTAL: 1
|
||||
- name: Build single image for scan
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64
|
||||
@@ -52,7 +51,7 @@ jobs:
|
||||
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@1f0aa582c8c8f5f7639610d6d38baddfea4fdcee
|
||||
uses: aquasecurity/trivy-action@062f2592684a31eb3aa050cc61e7ca1451cecd3d
|
||||
with:
|
||||
image-ref: '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}'
|
||||
format: 'table'
|
||||
@@ -62,7 +61,7 @@ jobs:
|
||||
severity: 'CRITICAL,HIGH'
|
||||
|
||||
- name: Login to ghcr.io
|
||||
uses: docker/login-action@v2
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: ${{ env.REGISTRY }}
|
||||
username: ${{ github.actor }}
|
||||
@@ -70,12 +69,12 @@ jobs:
|
||||
|
||||
- name: Extract metadata (tags, labels) for Docker
|
||||
id: meta
|
||||
uses: docker/metadata-action@507c2f2dc502c992ad446e3d7a5dfbe311567a96
|
||||
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
|
||||
with:
|
||||
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
||||
|
||||
- name: Build release images
|
||||
uses: docker/build-push-action@v4
|
||||
uses: docker/build-push-action@v5
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
|
||||
@@ -90,11 +89,9 @@ jobs:
|
||||
|
||||
- name: Sign and attest artifacts
|
||||
run: |
|
||||
.tmp/cosign sign -f -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
|
||||
.tmp/cosign sign-blob --output-signature kured.sbom.sig kured.sbom
|
||||
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig kured.sbom
|
||||
|
||||
.tmp/cosign attest -f --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
|
||||
env:
|
||||
COSIGN_EXPERIMENTAL: 1
|
||||
|
||||
20
.github/workflows/periodics-daily.yaml
vendored
20
.github/workflows/periodics-daily.yaml
vendored
@@ -10,12 +10,12 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: checkout
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v4
|
||||
- name: run tests
|
||||
run: go test -json ./... > test.json
|
||||
- name: Annotate tests
|
||||
if: always()
|
||||
uses: guyarb/golang-test-annoations@v0.6.0
|
||||
uses: guyarb/golang-test-annoations@v0.8.0
|
||||
with:
|
||||
test-results: test.json
|
||||
|
||||
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
# Stale by default waits for 60 days before marking PR/issues as stale, and closes them after 21 days.
|
||||
# Do not expire the first issues that would allow the community to grow.
|
||||
- uses: actions/stale@v7
|
||||
- uses: actions/stale@v9
|
||||
with:
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
stale-issue-message: 'This issue was automatically considered stale due to lack of activity. Please update it and/or join our slack channels to promote it, before it automatically closes (in 7 days).'
|
||||
@@ -39,9 +39,9 @@ jobs:
|
||||
name: Check docs for incorrect links
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Link Checker
|
||||
uses: lycheeverse/lychee-action@9ace499fe66cee282a29eaa628fdac2c72fa087f
|
||||
uses: lycheeverse/lychee-action@c053181aa0c3d17606addfe97a9075a32723548a
|
||||
env:
|
||||
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
|
||||
with:
|
||||
@@ -52,16 +52,16 @@ jobs:
|
||||
name: Build image and scan it against known vulnerabilities
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- name: Ensure go version
|
||||
uses: actions/setup-go@v3
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version-file: 'go.mod'
|
||||
check-latest: true
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v2
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v2
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Setup GoReleaser
|
||||
run: make bootstrap-tools
|
||||
- name: Find current tag version
|
||||
@@ -70,7 +70,7 @@ jobs:
|
||||
- name: Build artifacts
|
||||
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
|
||||
- name: Run Trivy vulnerability scanner
|
||||
uses: aquasecurity/trivy-action@1f0aa582c8c8f5f7639610d6d38baddfea4fdcee
|
||||
uses: aquasecurity/trivy-action@062f2592684a31eb3aa050cc61e7ca1451cecd3d
|
||||
with:
|
||||
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
|
||||
format: 'table'
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
## Kured Community Code of Conduct
|
||||
# Kured Community Code of Conduct
|
||||
|
||||
Kured follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md).
|
||||
|
||||
@@ -208,9 +208,8 @@ kind create cluster --config .github/kind-cluster-<k8s-version>.yaml
|
||||
|
||||
### Prepare Documentation
|
||||
|
||||
Check that `README.md` has an updated compatibility matrix and that the
|
||||
url in the `kubectl` incantation (under "Installation") is updated to the
|
||||
new version you want to release.
|
||||
Check that [compatibility matrix](https://kured.dev/docs/installation/) is updated
|
||||
to the new version you want to release.
|
||||
|
||||
### Create a tag on the repo
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.17.2 as bin
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.19.1 as bin
|
||||
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
@@ -19,7 +19,7 @@ RUN set -ex \
|
||||
esac \
|
||||
&& cp /dist/kured_${TARGETOS}_${TARGETARCH}${SUFFIX}/kured /dist/kured;
|
||||
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.17.2
|
||||
FROM --platform=$TARGETPLATFORM alpine:3.19.1
|
||||
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
|
||||
COPY --from=bin /dist/kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
|
||||
@@ -108,5 +108,5 @@ Governance require a 2/3 vote of all Maintainers.
|
||||
|
||||
[maintainers-file]: ./MAINTAINERS
|
||||
[private-list]: cncf-kured-maintainers@lists.cncf.io
|
||||
[meeting-agenda]: https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#
|
||||
[meeting-agenda]: https://docs.google.com/document/d/1AWT8YDdqZY-Se6Y1oAlwtujWLVpNVK2M_F_Vfqw06aI/edit
|
||||
[decision-issues]: https://github.com/kubereboot/kured/labels/decision
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Christian Kotzbauer <christian.kotzbauer@gmail.com> (@ckotzbauer)
|
||||
Daniel Holbach <daniel@weave.works> (@dholbach)
|
||||
Christian Hopf <christian.kotzbauer@gmail.com> (@ckotzbauer)
|
||||
Daniel Holbach <daniel.holbach@gmail.com> (@dholbach)
|
||||
Hidde Beydals <hidde@weave.works> (@hiddeco)
|
||||
Jack Francis <jackfrancis@gmail.com> (@jackfrancis)
|
||||
Jean-Philippe Evrard <open-source@a.spamming.party> (@evrardjp)
|
||||
|
||||
15
Makefile
15
Makefile
@@ -14,25 +14,25 @@ $(TEMPDIR):
|
||||
|
||||
.PHONY: bootstrap-tools
|
||||
bootstrap-tools: $(TEMPDIR)
|
||||
VERSION=v1.11.4 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh
|
||||
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v0.58.0
|
||||
curl -sSfL https://github.com/sigstore/cosign/releases/download/v1.12.1/cosign-linux-amd64 -o .tmp/cosign
|
||||
VERSION=v1.24.0 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh
|
||||
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v1.0.1
|
||||
curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.2.3/cosign-linux-amd64 -o .tmp/cosign
|
||||
chmod +x .tmp/goreleaser .tmp/cosign .tmp/syft
|
||||
|
||||
clean:
|
||||
rm -rf ./dist
|
||||
|
||||
kured:
|
||||
$(GORELEASER_CMD) build --rm-dist --single-target --snapshot
|
||||
$(GORELEASER_CMD) build --clean --single-target --snapshot
|
||||
|
||||
kured-all:
|
||||
$(GORELEASER_CMD) build --rm-dist --snapshot
|
||||
$(GORELEASER_CMD) build --clean --snapshot
|
||||
|
||||
kured-release-tag:
|
||||
$(GORELEASER_CMD) release --rm-dist
|
||||
$(GORELEASER_CMD) release --clean
|
||||
|
||||
kured-release-snapshot:
|
||||
$(GORELEASER_CMD) release --rm-dist --snapshot
|
||||
$(GORELEASER_CMD) release --clean --snapshot
|
||||
|
||||
image: kured
|
||||
$(SUDO) docker buildx build --load -t ghcr.io/$(DH_ORG)/kured:$(VERSION) .
|
||||
@@ -42,6 +42,7 @@ minikube-publish: image
|
||||
|
||||
manifest:
|
||||
sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds.yaml
|
||||
sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds-signal.yaml
|
||||
echo "Please generate combined manifest if necessary"
|
||||
|
||||
test:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubereboot%2Fkured?ref=badge_shield)
|
||||
[](https://clomonitor.io/projects/cncf/kured)
|
||||
|
||||
<img src="https://github.com/kubereboot/website/raw/main/static/img/kured.png" width="200" align="right"/>
|
||||
<img src="https://github.com/kubereboot/website/raw/main/static/img/kured.png" alt="kured logo" width="200" align="right"/>
|
||||
|
||||
- [kured - Kubernetes Reboot Daemon](#kured---kubernetes-reboot-daemon)
|
||||
- [Introduction](#introduction)
|
||||
@@ -45,7 +45,7 @@ If you have any questions about, feedback for or problems with `kured`:
|
||||
- Invite yourself to the <a href="https://slack.cncf.io/" target="_blank">CNCF Slack</a>.
|
||||
- Ask a question on the [#kured](https://cloud-native.slack.com/archives/kured) slack channel.
|
||||
- [File an issue](https://github.com/kubereboot/kured/issues/new).
|
||||
- Join us in [our monthly meeting](https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#),
|
||||
- Join us in [our monthly meeting](https://docs.google.com/document/d/1AWT8YDdqZY-Se6Y1oAlwtujWLVpNVK2M_F_Vfqw06aI/edit),
|
||||
every first Wednesday of the month at 16:00 UTC.
|
||||
- You might want to [join the kured-dev mailing list](https://lists.cncf.io/g/cncf-kured-dev) as well.
|
||||
|
||||
|
||||
@@ -30,13 +30,15 @@ import (
|
||||
"github.com/google/shlex"
|
||||
|
||||
shoutrrr "github.com/containrrr/shoutrrr"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"github.com/kubereboot/kured/pkg/alerts"
|
||||
"github.com/kubereboot/kured/pkg/daemonsetlock"
|
||||
"github.com/kubereboot/kured/pkg/delaytick"
|
||||
"github.com/kubereboot/kured/pkg/reboot"
|
||||
"github.com/kubereboot/kured/pkg/taints"
|
||||
"github.com/kubereboot/kured/pkg/timewindow"
|
||||
"github.com/kubereboot/kured/pkg/util"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -44,10 +46,15 @@ var (
|
||||
|
||||
// Command line flags
|
||||
forceReboot bool
|
||||
drainDelay time.Duration
|
||||
drainTimeout time.Duration
|
||||
rebootDelay time.Duration
|
||||
rebootMethod string
|
||||
period time.Duration
|
||||
metricsHost string
|
||||
metricsPort int
|
||||
drainGracePeriod int
|
||||
drainPodSelector string
|
||||
skipWaitForDeleteTimeoutSeconds int
|
||||
dsNamespace string
|
||||
dsName string
|
||||
@@ -57,6 +64,7 @@ var (
|
||||
prometheusURL string
|
||||
preferNoScheduleTaintName string
|
||||
alertFilter *regexp.Regexp
|
||||
alertFilterMatchOnly bool
|
||||
alertFiringOnly bool
|
||||
rebootSentinelFile string
|
||||
rebootSentinelCommand string
|
||||
@@ -69,10 +77,12 @@ var (
|
||||
messageTemplateUncordon string
|
||||
podSelectors []string
|
||||
rebootCommand string
|
||||
rebootSignal int
|
||||
logFormat string
|
||||
preRebootNodeLabels []string
|
||||
postRebootNodeLabels []string
|
||||
nodeID string
|
||||
concurrency int
|
||||
|
||||
rebootDays []string
|
||||
rebootStart string
|
||||
@@ -97,6 +107,13 @@ const (
|
||||
KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed"
|
||||
// EnvPrefix The environment variable prefix of all environment variables bound to our command line flags.
|
||||
EnvPrefix = "KURED"
|
||||
|
||||
// MethodCommand is used as "--reboot-method" value when rebooting with the configured "--reboot-command"
|
||||
MethodCommand = "command"
|
||||
// MethodSignal is used as "--reboot-method" value when rebooting with a SIGRTMIN+5 signal.
|
||||
MethodSignal = "signal"
|
||||
|
||||
sigTrminPlus5 = 34 + 5
|
||||
)
|
||||
|
||||
func init() {
|
||||
@@ -124,14 +141,24 @@ func NewRootCommand() *cobra.Command {
|
||||
"node name kured runs on, should be passed down from spec.nodeName via KURED_NODE_ID environment variable")
|
||||
rootCmd.PersistentFlags().BoolVar(&forceReboot, "force-reboot", false,
|
||||
"force a reboot even if the drain fails or times out")
|
||||
rootCmd.PersistentFlags().StringVar(&metricsHost, "metrics-host", "",
|
||||
"host where metrics will listen")
|
||||
rootCmd.PersistentFlags().IntVar(&metricsPort, "metrics-port", 8080,
|
||||
"port number where metrics will listen")
|
||||
rootCmd.PersistentFlags().IntVar(&drainGracePeriod, "drain-grace-period", -1,
|
||||
"time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used")
|
||||
rootCmd.PersistentFlags().StringVar(&drainPodSelector, "drain-pod-selector", "",
|
||||
"only drain pods with labels matching the selector (default: '', all pods)")
|
||||
rootCmd.PersistentFlags().IntVar(&skipWaitForDeleteTimeoutSeconds, "skip-wait-for-delete-timeout", 0,
|
||||
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node")
|
||||
rootCmd.PersistentFlags().DurationVar(&drainDelay, "drain-delay", 0,
|
||||
"delay drain for this duration (default: 0, disabled)")
|
||||
rootCmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 0,
|
||||
"timeout after which the drain is aborted (default: 0, infinite time)")
|
||||
rootCmd.PersistentFlags().DurationVar(&rebootDelay, "reboot-delay", 0,
|
||||
"delay reboot for this duration (default: 0, disabled)")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootMethod, "reboot-method", "command",
|
||||
"method to use for reboots. Available: command")
|
||||
rootCmd.PersistentFlags().DurationVar(&period, "period", time.Minute*60,
|
||||
"sentinel check period")
|
||||
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-namespace", "kube-system",
|
||||
@@ -148,6 +175,8 @@ func NewRootCommand() *cobra.Command {
|
||||
"Prometheus instance to probe for active alerts")
|
||||
rootCmd.PersistentFlags().Var(®expValue{&alertFilter}, "alert-filter-regexp",
|
||||
"alert names to ignore when checking for active alerts")
|
||||
rootCmd.PersistentFlags().BoolVar(&alertFilterMatchOnly, "alert-filter-match-only", false,
|
||||
"Only block if the alert-filter-regexp matches active alerts")
|
||||
rootCmd.PersistentFlags().BoolVar(&alertFiringOnly, "alert-firing-only", false,
|
||||
"only consider firing alerts when checking for active alerts")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinelFile, "reboot-sentinel", "/var/run/reboot-required",
|
||||
@@ -158,6 +187,10 @@ func NewRootCommand() *cobra.Command {
|
||||
"command for which a zero return code will trigger a reboot command")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootCommand, "reboot-command", "/bin/systemctl reboot",
|
||||
"command to run when a reboot is required")
|
||||
rootCmd.PersistentFlags().IntVar(&concurrency, "concurrency", 1,
|
||||
"amount of nodes to concurrently reboot. Defaults to 1")
|
||||
rootCmd.PersistentFlags().IntVar(&rebootSignal, "reboot-signal", sigTrminPlus5,
|
||||
"signal to use for reboot, SIGRTMIN+5 by default.")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
|
||||
"slack hook URL for reboot notifications [deprecated in favor of --notify-url]")
|
||||
@@ -281,22 +314,6 @@ func flagToEnvVar(flag string) string {
|
||||
return fmt.Sprintf("%s_%s", EnvPrefix, envVarSuffix)
|
||||
}
|
||||
|
||||
// newCommand creates a new Command with stdout/stderr wired to our standard logger
|
||||
func newCommand(name string, arg ...string) *exec.Cmd {
|
||||
cmd := exec.Command(name, arg...)
|
||||
cmd.Stdout = log.NewEntry(log.StandardLogger()).
|
||||
WithField("cmd", cmd.Args[0]).
|
||||
WithField("std", "out").
|
||||
WriterLevel(log.InfoLevel)
|
||||
|
||||
cmd.Stderr = log.NewEntry(log.StandardLogger()).
|
||||
WithField("cmd", cmd.Args[0]).
|
||||
WithField("std", "err").
|
||||
WriterLevel(log.WarnLevel)
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// buildHostCommand writes a new command to run in the host namespace
|
||||
// Rancher based need different pid
|
||||
func buildHostCommand(pid int, command []string) []string {
|
||||
@@ -309,7 +326,8 @@ func buildHostCommand(pid int, command []string) []string {
|
||||
}
|
||||
|
||||
func rebootRequired(sentinelCommand []string) bool {
|
||||
if err := newCommand(sentinelCommand[0], sentinelCommand[1:]...).Run(); err != nil {
|
||||
cmd := util.NewCommand(sentinelCommand[0], sentinelCommand[1:]...)
|
||||
if err := cmd.Run(); err != nil {
|
||||
switch err := err.(type) {
|
||||
case *exec.ExitError:
|
||||
// We assume a non-zero exit code means 'reboot not required', but of course
|
||||
@@ -317,6 +335,9 @@ func rebootRequired(sentinelCommand []string) bool {
|
||||
// went wrong during its execution. In that case, not entering a reboot loop
|
||||
// is the right thing to do, and we are logging stdout/stderr of the command
|
||||
// so it should be obvious what is wrong.
|
||||
if cmd.ProcessState.ExitCode() != 1 {
|
||||
log.Warnf("sentinel command ended with unexpected exit code: %v", cmd.ProcessState.ExitCode())
|
||||
}
|
||||
return false
|
||||
default:
|
||||
// Something was grossly misconfigured, such as the command path being wrong.
|
||||
@@ -342,6 +363,8 @@ type PrometheusBlockingChecker struct {
|
||||
filter *regexp.Regexp
|
||||
// bool to indicate if only firing alerts should be considered
|
||||
firingOnly bool
|
||||
// bool to indicate that we're only blocking on alerts which match the filter
|
||||
filterMatchOnly bool
|
||||
}
|
||||
|
||||
// KubernetesBlockingChecker contains info for connecting
|
||||
@@ -355,8 +378,7 @@ type KubernetesBlockingChecker struct {
|
||||
}
|
||||
|
||||
func (pb PrometheusBlockingChecker) isBlocked() bool {
|
||||
|
||||
alertNames, err := pb.promClient.ActiveAlerts(pb.filter, pb.firingOnly)
|
||||
alertNames, err := pb.promClient.ActiveAlerts(pb.filter, pb.firingOnly, pb.filterMatchOnly)
|
||||
if err != nil {
|
||||
log.Warnf("Reboot blocked: prometheus query error: %v", err)
|
||||
return true
|
||||
@@ -408,8 +430,14 @@ func rebootBlocked(blockers ...RebootBlocker) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
holding, err := lock.Test(metadata)
|
||||
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}, isMultiLock bool) bool {
|
||||
var holding bool
|
||||
var err error
|
||||
if isMultiLock {
|
||||
holding, err = lock.TestMultiple()
|
||||
} else {
|
||||
holding, err = lock.Test(metadata)
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("Error testing lock: %v", err)
|
||||
}
|
||||
@@ -419,8 +447,17 @@ func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
return holding
|
||||
}
|
||||
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration) bool {
|
||||
holding, holder, err := lock.Acquire(metadata, TTL)
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration, maxOwners int) bool {
|
||||
var holding bool
|
||||
var holder string
|
||||
var err error
|
||||
if maxOwners > 1 {
|
||||
var holders []string
|
||||
holding, holders, err = lock.AcquireMultiple(metadata, TTL, maxOwners)
|
||||
holder = strings.Join(holders, ",")
|
||||
} else {
|
||||
holding, holder, err = lock.Acquire(metadata, TTL)
|
||||
}
|
||||
switch {
|
||||
case err != nil:
|
||||
log.Fatalf("Error acquiring lock: %v", err)
|
||||
@@ -441,9 +478,16 @@ func throttle(releaseDelay time.Duration) {
|
||||
}
|
||||
}
|
||||
|
||||
func release(lock *daemonsetlock.DaemonSetLock) {
|
||||
func release(lock *daemonsetlock.DaemonSetLock, isMultiLock bool) {
|
||||
log.Infof("Releasing lock")
|
||||
if err := lock.Release(); err != nil {
|
||||
|
||||
var err error
|
||||
if isMultiLock {
|
||||
err = lock.ReleaseMultiple()
|
||||
} else {
|
||||
err = lock.Release()
|
||||
}
|
||||
if err != nil {
|
||||
log.Fatalf("Error releasing lock: %v", err)
|
||||
}
|
||||
}
|
||||
@@ -455,6 +499,11 @@ func drain(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
updateNodeLabels(client, node, preRebootNodeLabels)
|
||||
}
|
||||
|
||||
if drainDelay > 0 {
|
||||
log.Infof("Delaying drain for %v", drainDelay)
|
||||
time.Sleep(drainDelay)
|
||||
}
|
||||
|
||||
log.Infof("Draining node %s", nodename)
|
||||
|
||||
if notifyURL != "" {
|
||||
@@ -467,6 +516,7 @@ func drain(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
Client: client,
|
||||
Ctx: context.Background(),
|
||||
GracePeriodSeconds: drainGracePeriod,
|
||||
PodSelector: drainPodSelector,
|
||||
SkipWaitForDeleteTimeoutSeconds: skipWaitForDeleteTimeoutSeconds,
|
||||
Force: true,
|
||||
DeleteEmptyDirData: true,
|
||||
@@ -506,20 +556,6 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func invokeReboot(nodeID string, rebootCommand []string) {
|
||||
log.Infof("Running command: %s for node: %s", rebootCommand, nodeID)
|
||||
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := newCommand(rebootCommand[0], rebootCommand[1:]...).Run(); err != nil {
|
||||
log.Fatalf("Error invoking reboot command: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func maintainRebootRequiredMetric(nodeID string, sentinelCommand []string) {
|
||||
for {
|
||||
if rebootRequired(sentinelCommand) {
|
||||
@@ -610,7 +646,7 @@ func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []stri
|
||||
}
|
||||
}
|
||||
|
||||
func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
|
||||
func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
|
||||
config, err := rest.InClusterConfig()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@@ -627,7 +663,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, 1*time.Minute)
|
||||
for range tick {
|
||||
if holding(lock, &nodeMeta) {
|
||||
if holding(lock, &nodeMeta, concurrency > 1) {
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Errorf("Error retrieving node object via k8s API: %v", err)
|
||||
@@ -660,7 +696,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
}
|
||||
}
|
||||
throttle(releaseDelay)
|
||||
release(lock)
|
||||
release(lock, concurrency > 1)
|
||||
break
|
||||
} else {
|
||||
break
|
||||
@@ -694,19 +730,6 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
preferNoScheduleTaint.Disable()
|
||||
continue
|
||||
}
|
||||
log.Infof("Reboot required")
|
||||
|
||||
var blockCheckers []RebootBlocker
|
||||
if prometheusURL != "" {
|
||||
blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly})
|
||||
}
|
||||
if podSelectors != nil {
|
||||
blockCheckers = append(blockCheckers, KubernetesBlockingChecker{client: client, nodename: nodeID, filter: podSelectors})
|
||||
}
|
||||
|
||||
if rebootBlocked(blockCheckers...) {
|
||||
continue
|
||||
}
|
||||
|
||||
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
@@ -730,7 +753,22 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
}
|
||||
}
|
||||
|
||||
if !holding(lock, &nodeMeta) && !acquire(lock, &nodeMeta, TTL) {
|
||||
var blockCheckers []RebootBlocker
|
||||
if prometheusURL != "" {
|
||||
blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly, filterMatchOnly: alertFilterMatchOnly})
|
||||
}
|
||||
if podSelectors != nil {
|
||||
blockCheckers = append(blockCheckers, KubernetesBlockingChecker{client: client, nodename: nodeID, filter: podSelectors})
|
||||
}
|
||||
|
||||
var rebootRequiredBlockCondition string
|
||||
if rebootBlocked(blockCheckers...) {
|
||||
rebootRequiredBlockCondition = ", but blocked at this time"
|
||||
continue
|
||||
}
|
||||
log.Infof("Reboot required%s", rebootRequiredBlockCondition)
|
||||
|
||||
if !holding(lock, &nodeMeta, concurrency > 1) && !acquire(lock, &nodeMeta, TTL, concurrency) {
|
||||
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
|
||||
preferNoScheduleTaint.Enable()
|
||||
continue
|
||||
@@ -740,7 +778,7 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
if err != nil {
|
||||
if !forceReboot {
|
||||
log.Errorf("Unable to cordon or drain %s: %v, will release lock and retry cordon and drain before rebooting when lock is next acquired", node.GetName(), err)
|
||||
release(lock)
|
||||
release(lock, concurrency > 1)
|
||||
log.Infof("Performing a best-effort uncordon after failed cordon and drain")
|
||||
uncordon(client, node)
|
||||
continue
|
||||
@@ -752,7 +790,13 @@ func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []s
|
||||
time.Sleep(rebootDelay)
|
||||
}
|
||||
|
||||
invokeReboot(nodeID, rebootCommand)
|
||||
if notifyURL != "" {
|
||||
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
|
||||
log.Warnf("Error notifying: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
booter.Reboot()
|
||||
for {
|
||||
log.Infof("Waiting for reboot")
|
||||
time.Sleep(time.Minute)
|
||||
@@ -818,7 +862,14 @@ func root(cmd *cobra.Command, args []string) {
|
||||
log.Infof("Blocking Pod Selectors: %v", podSelectors)
|
||||
log.Infof("Reboot schedule: %v", window)
|
||||
log.Infof("Reboot check command: %s every %v", sentinelCommand, period)
|
||||
log.Infof("Reboot command: %s", restartCommand)
|
||||
log.Infof("Concurrency: %v", concurrency)
|
||||
log.Infof("Reboot method: %s", rebootMethod)
|
||||
if rebootCommand == MethodCommand {
|
||||
log.Infof("Reboot command: %s", restartCommand)
|
||||
} else {
|
||||
log.Infof("Reboot signal: %v", rebootSignal)
|
||||
}
|
||||
|
||||
if annotateNodes {
|
||||
log.Infof("Will annotate nodes during kured reboot operations")
|
||||
}
|
||||
@@ -826,12 +877,26 @@ func root(cmd *cobra.Command, args []string) {
|
||||
// To run those commands as it was the host, we'll use nsenter
|
||||
// Relies on hostPID:true and privileged:true to enter host mount space
|
||||
// PID set to 1, until we have a better discovery mechanism.
|
||||
hostSentinelCommand := buildHostCommand(1, sentinelCommand)
|
||||
hostRestartCommand := buildHostCommand(1, restartCommand)
|
||||
|
||||
go rebootAsRequired(nodeID, hostRestartCommand, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
|
||||
// Only wrap sentinel-command with nsenter, if a custom-command was configured, otherwise use the host-path mount
|
||||
hostSentinelCommand := sentinelCommand
|
||||
if rebootSentinelCommand != "" {
|
||||
hostSentinelCommand = buildHostCommand(1, sentinelCommand)
|
||||
}
|
||||
|
||||
var booter reboot.Reboot
|
||||
if rebootMethod == MethodCommand {
|
||||
booter = reboot.NewCommandReboot(nodeID, hostRestartCommand)
|
||||
} else if rebootMethod == MethodSignal {
|
||||
booter = reboot.NewSignalReboot(nodeID, rebootSignal)
|
||||
} else {
|
||||
log.Fatalf("Invalid reboot-method configured: %s", rebootMethod)
|
||||
}
|
||||
|
||||
go rebootAsRequired(nodeID, booter, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
|
||||
go maintainRebootRequiredMetric(nodeID, hostSentinelCommand)
|
||||
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
log.Fatal(http.ListenAndServe(":8080", nil))
|
||||
log.Fatal(http.ListenAndServe(fmt.Sprintf("%s:%d", metricsHost, metricsPort), nil))
|
||||
}
|
||||
|
||||
128
go.mod
128
go.mod
@@ -1,52 +1,52 @@
|
||||
module github.com/kubereboot/kured
|
||||
|
||||
go 1.19
|
||||
go 1.20
|
||||
|
||||
replace golang.org/x/net => golang.org/x/net v0.7.0
|
||||
replace golang.org/x/net => golang.org/x/net v0.17.0
|
||||
|
||||
replace github.com/emicklei/go-restful/v3 => github.com/emicklei/go-restful/v3 v3.10.2
|
||||
|
||||
require (
|
||||
github.com/containrrr/shoutrrr v0.7.1
|
||||
github.com/containrrr/shoutrrr v0.8.0
|
||||
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
|
||||
github.com/google/uuid v1.3.0 // indirect
|
||||
github.com/prometheus/client_golang v1.14.0
|
||||
github.com/prometheus/common v0.42.0
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/spf13/cobra v1.6.1
|
||||
github.com/google/uuid v1.4.0 // indirect
|
||||
github.com/prometheus/client_golang v1.19.0
|
||||
github.com/prometheus/common v0.48.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/spf13/cobra v1.8.0
|
||||
github.com/spf13/pflag v1.0.5
|
||||
github.com/spf13/viper v1.15.0
|
||||
github.com/stretchr/testify v1.8.2
|
||||
gotest.tools/v3 v3.4.0
|
||||
k8s.io/api v0.25.5
|
||||
k8s.io/apimachinery v0.25.5
|
||||
k8s.io/client-go v0.25.5
|
||||
k8s.io/kubectl v0.25.5
|
||||
github.com/spf13/viper v1.18.2
|
||||
github.com/stretchr/testify v1.9.0
|
||||
gotest.tools/v3 v3.5.1
|
||||
k8s.io/api v0.28.8
|
||||
k8s.io/apimachinery v0.28.8
|
||||
k8s.io/client-go v0.28.8
|
||||
k8s.io/kubectl v0.28.8
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect
|
||||
github.com/MakeNowJust/heredoc v1.0.0 // indirect
|
||||
github.com/PuerkitoBio/purell v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/chai2010/gettext-go v1.0.2 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.8.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
|
||||
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
|
||||
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
|
||||
github.com/fatih/color v1.14.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||
github.com/go-errors/errors v1.0.1 // indirect
|
||||
github.com/go-logr/logr v1.2.3 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.5 // indirect
|
||||
github.com/go-openapi/swag v0.19.14 // indirect
|
||||
github.com/fatih/color v1.15.0 // indirect
|
||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||
github.com/go-errors/errors v1.4.2 // indirect
|
||||
github.com/go-logr/logr v1.2.4 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.6 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||
github.com/go-openapi/swag v0.22.3 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/google/btree v1.0.1 // indirect
|
||||
github.com/google/gnostic v0.5.7-v3refs // indirect
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/google/gofuzz v1.1.0 // indirect
|
||||
github.com/google/gnostic-models v0.6.8 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/google/gofuzz v1.2.0 // indirect
|
||||
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
|
||||
github.com/hashicorp/hcl v1.0.0 // indirect
|
||||
github.com/imdario/mergo v0.3.6 // indirect
|
||||
@@ -55,51 +55,55 @@ require (
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
|
||||
github.com/magiconair/properties v1.8.7 // indirect
|
||||
github.com/mailru/easyjson v0.7.6 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/mattn/go-isatty v0.0.17 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
|
||||
github.com/mitchellh/go-wordwrap v1.0.0 // indirect
|
||||
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/moby/spdystream v0.2.0 // indirect
|
||||
github.com/moby/term v0.0.0-20210619224110-3f7ff695adc6 // indirect
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.0.6 // indirect
|
||||
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
|
||||
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.3.0 // indirect
|
||||
github.com/prometheus/procfs v0.8.0 // indirect
|
||||
github.com/russross/blackfriday v1.5.2 // indirect
|
||||
github.com/spf13/afero v1.9.3 // indirect
|
||||
github.com/spf13/cast v1.5.0 // indirect
|
||||
github.com/spf13/jwalterweatherman v1.1.0 // indirect
|
||||
github.com/subosito/gotenv v1.4.2 // indirect
|
||||
github.com/xlab/treeprint v1.1.0 // indirect
|
||||
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect
|
||||
golang.org/x/net v0.7.0 // indirect
|
||||
golang.org/x/oauth2 v0.5.0 // indirect
|
||||
golang.org/x/sys v0.5.0 // indirect
|
||||
golang.org/x/term v0.5.0 // indirect
|
||||
golang.org/x/text v0.7.0 // indirect
|
||||
golang.org/x/time v0.1.0 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/prometheus/client_model v0.5.0 // indirect
|
||||
github.com/prometheus/procfs v0.12.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/sagikazarmark/locafero v0.4.0 // indirect
|
||||
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
|
||||
github.com/sourcegraph/conc v0.3.0 // indirect
|
||||
github.com/spf13/afero v1.11.0 // indirect
|
||||
github.com/spf13/cast v1.6.0 // indirect
|
||||
github.com/subosito/gotenv v1.6.0 // indirect
|
||||
github.com/xlab/treeprint v1.2.0 // indirect
|
||||
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
|
||||
golang.org/x/net v0.20.0 // indirect
|
||||
golang.org/x/oauth2 v0.16.0 // indirect
|
||||
golang.org/x/sync v0.5.0 // indirect
|
||||
golang.org/x/sys v0.16.0 // indirect
|
||||
golang.org/x/term v0.15.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
golang.org/x/time v0.5.0 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/protobuf v1.28.1 // indirect
|
||||
google.golang.org/protobuf v1.33.0 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/ini.v1 v1.67.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
k8s.io/cli-runtime v0.25.5 // indirect
|
||||
k8s.io/component-base v0.25.5 // indirect
|
||||
k8s.io/klog/v2 v2.70.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 // indirect
|
||||
k8s.io/utils v0.0.0-20220728103510-ee6ede2d64ed // indirect
|
||||
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
|
||||
sigs.k8s.io/kustomize/api v0.12.1 // indirect
|
||||
sigs.k8s.io/kustomize/kyaml v0.13.9 // indirect
|
||||
k8s.io/cli-runtime v0.28.8 // indirect
|
||||
k8s.io/component-base v0.28.8 // indirect
|
||||
k8s.io/klog/v2 v2.100.1 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20230717233707-2695361300d9 // indirect
|
||||
k8s.io/utils v0.0.0-20230406110748-d93618cff8a2 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
|
||||
sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect
|
||||
sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
|
||||
sigs.k8s.io/yaml v1.2.0 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0 // indirect
|
||||
)
|
||||
|
||||
100
kured-ds-signal.yaml
Normal file
100
kured-ds-signal.yaml
Normal file
@@ -0,0 +1,100 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: kured
|
||||
namespace: kube-system
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: kured # Must match `--ds-name`
|
||||
namespace: kube-system # Must match `--ds-namespace`
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
name: kured
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
name: kured
|
||||
spec:
|
||||
serviceAccountName: kured
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/control-plane
|
||||
effect: NoSchedule
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
hostPID: true # Facilitate entering the host mount namespace via init
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: sentinel
|
||||
hostPath:
|
||||
path: /var/run
|
||||
type: Directory
|
||||
containers:
|
||||
- name: kured
|
||||
# If you find yourself here wondering why there is no
|
||||
# :latest tag on Docker Hub,see the FAQ in the README
|
||||
image: ghcr.io/kubereboot/kured:1.15.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: false # Give permission to nsenter /proc/1/ns/mnt
|
||||
readOnlyRootFilesystem: true
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["*"]
|
||||
add: ["CAP_KILL"]
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: metrics
|
||||
env:
|
||||
# Pass in the name of the node on which this pod is scheduled
|
||||
# for use with drain/uncordon operations and lock acquisition
|
||||
- name: KURED_NODE_ID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- mountPath: /sentinel
|
||||
name: sentinel
|
||||
readOnly: true
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
- --reboot-sentinel=/sentinel/reboot-required
|
||||
- --reboot-method=signal
|
||||
# - --reboot-signal=39
|
||||
# - --force-reboot=false
|
||||
# - --drain-grace-period=-1
|
||||
# - --skip-wait-for-delete-timeout=0
|
||||
# - --drain-timeout=0
|
||||
# - --period=1h
|
||||
# - --ds-namespace=kube-system
|
||||
# - --ds-name=kured
|
||||
# - --lock-annotation=weave.works/kured-node-lock
|
||||
# - --lock-ttl=0
|
||||
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
# - --alert-filter-regexp=^RebootRequired$
|
||||
# - --alert-firing-only=false
|
||||
# - --prefer-no-schedule-taint=""
|
||||
# - --reboot-sentinel-command=""
|
||||
# - --slack-hook-url=https://hooks.slack.com/...
|
||||
# - --slack-username=prod
|
||||
# - --slack-channel=alerting
|
||||
# - --notify-url="" # See also shoutrrr url format
|
||||
# - --message-template-drain=Draining node %s
|
||||
# - --message-template-reboot=Rebooting node %s
|
||||
# - --message-template-uncordon=Node %s rebooted & uncordoned successfully!
|
||||
# - --blocking-pod-selector=runtime=long,cost=expensive
|
||||
# - --blocking-pod-selector=name=temperamental
|
||||
# - --blocking-pod-selector=...
|
||||
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
|
||||
# - --reboot-delay=90s
|
||||
# - --start-time=0:00
|
||||
# - --end-time=23:59:59
|
||||
# - --time-zone=UTC
|
||||
# - --annotate-nodes=false
|
||||
# - --lock-release-delay=30m
|
||||
# - --log-format=text
|
||||
@@ -29,14 +29,20 @@ spec:
|
||||
effect: NoSchedule
|
||||
hostPID: true # Facilitate entering the host mount namespace via init
|
||||
restartPolicy: Always
|
||||
volumes:
|
||||
- name: sentinel
|
||||
hostPath:
|
||||
path: /var/run
|
||||
type: Directory
|
||||
containers:
|
||||
- name: kured
|
||||
# If you find yourself here wondering why there is no
|
||||
# :latest tag on Docker Hub,see the FAQ in the README
|
||||
image: ghcr.io/kubereboot/kured:1.12.2
|
||||
image: ghcr.io/kubereboot/kured:1.15.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
readOnlyRootFilesystem: true
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: metrics
|
||||
@@ -47,12 +53,19 @@ spec:
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
volumeMounts:
|
||||
- mountPath: /sentinel
|
||||
name: sentinel
|
||||
readOnly: true
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
- --reboot-sentinel=/sentinel/reboot-required
|
||||
# - --force-reboot=false
|
||||
# - --drain-grace-period=-1
|
||||
# - --skip-wait-for-delete-timeout=0
|
||||
# - --drain-delay=0
|
||||
# - --drain-timeout=0
|
||||
# - --drain-pod-selector=""
|
||||
# - --period=1h
|
||||
# - --ds-namespace=kube-system
|
||||
# - --ds-name=kured
|
||||
@@ -60,10 +73,12 @@ spec:
|
||||
# - --lock-ttl=0
|
||||
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
# - --alert-filter-regexp=^RebootRequired$
|
||||
# - --alert-filter-match-only=false
|
||||
# - --alert-firing-only=false
|
||||
# - --reboot-sentinel=/var/run/reboot-required
|
||||
# - --prefer-no-schedule-taint=""
|
||||
# - --reboot-sentinel-command=""
|
||||
# - --reboot-method=command
|
||||
# - --reboot-signal=39
|
||||
# - --slack-hook-url=https://hooks.slack.com/...
|
||||
# - --slack-username=prod
|
||||
# - --slack-channel=alerting
|
||||
@@ -82,3 +97,6 @@ spec:
|
||||
# - --annotate-nodes=false
|
||||
# - --lock-release-delay=30m
|
||||
# - --log-format=text
|
||||
# - --metrics-host=""
|
||||
# - --metrics-port=8080
|
||||
# - --concurrency=1
|
||||
|
||||
@@ -36,7 +36,7 @@ func NewPromClient(conf papi.Config) (*PromClient, error) {
|
||||
// filter by regexp means when the regex finds the alert-name; the alert is exluded from the
|
||||
// block-list and will NOT block rebooting. query by includeLabel means,
|
||||
// if the query finds an alert, it will include it to the block-list and it WILL block rebooting.
|
||||
func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly bool) ([]string, error) {
|
||||
func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly, filterMatchOnly bool) ([]string, error) {
|
||||
|
||||
// get all alerts from prometheus
|
||||
value, _, err := p.api.Query(context.Background(), "ALERTS", time.Now())
|
||||
@@ -49,7 +49,7 @@ func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly bool) ([]str
|
||||
activeAlertSet := make(map[string]bool)
|
||||
for _, sample := range vector {
|
||||
if alertName, isAlert := sample.Metric[model.AlertNameLabel]; isAlert && sample.Value != 0 {
|
||||
if (filter == nil || !filter.MatchString(string(alertName))) && (!firingOnly || sample.Metric["alertstate"] == "firing") {
|
||||
if matchesRegex(filter, string(alertName), filterMatchOnly) && (!firingOnly || sample.Metric["alertstate"] == "firing") {
|
||||
activeAlertSet[string(alertName)] = true
|
||||
}
|
||||
}
|
||||
@@ -67,3 +67,11 @@ func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly bool) ([]str
|
||||
|
||||
return nil, fmt.Errorf("Unexpected value type: %v", value)
|
||||
}
|
||||
|
||||
func matchesRegex(filter *regexp.Regexp, alertName string, filterMatchOnly bool) bool {
|
||||
if filter == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
return filter.MatchString(string(alertName)) == filterMatchOnly
|
||||
}
|
||||
|
||||
@@ -45,62 +45,87 @@ func TestActiveAlerts(t *testing.T) {
|
||||
addr := "http://localhost:10001"
|
||||
|
||||
for _, tc := range []struct {
|
||||
it string
|
||||
rFilter string
|
||||
respBody string
|
||||
aName string
|
||||
wantN int
|
||||
firingOnly bool
|
||||
it string
|
||||
rFilter string
|
||||
respBody string
|
||||
aName string
|
||||
wantN int
|
||||
firingOnly bool
|
||||
filterMatchOnly bool
|
||||
}{
|
||||
{
|
||||
it: "should return no active alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
it: "should return no active alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return a subset of all alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "Pod",
|
||||
wantN: 3,
|
||||
firingOnly: false,
|
||||
it: "should return a subset of all alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "Pod",
|
||||
wantN: 3,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return all active alerts by regex",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
it: "should return a subset of all alerts",
|
||||
respBody: responsebody,
|
||||
rFilter: "Gatekeeper",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: true,
|
||||
},
|
||||
{
|
||||
it: "should return all active alerts by regex filter",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
it: "should return all active alerts by regex",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return only firing alerts if firingOnly is true",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 4,
|
||||
firingOnly: true,
|
||||
it: "should return all active alerts by regex filter",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 5,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should return ScheduledRebootFailing active alerts",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
aName: "ScheduledRebootFailing",
|
||||
rFilter: "*",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
it: "should return only firing alerts if firingOnly is true",
|
||||
respBody: responsebody,
|
||||
rFilter: "*",
|
||||
wantN: 4,
|
||||
firingOnly: true,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
|
||||
{
|
||||
it: "should return ScheduledRebootFailing active alerts",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
aName: "ScheduledRebootFailing",
|
||||
rFilter: "*",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should not return an active alert if RebootRequired is firing (regex filter)",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
rFilter: "RebootRequired",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
it: "should not return an active alert if RebootRequired is firing (regex filter)",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
rFilter: "RebootRequired",
|
||||
wantN: 0,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: false,
|
||||
},
|
||||
{
|
||||
it: "should not return an active alert if RebootRequired is firing (regex filter)",
|
||||
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
|
||||
rFilter: "RebootRequired",
|
||||
wantN: 1,
|
||||
firingOnly: false,
|
||||
filterMatchOnly: true,
|
||||
},
|
||||
} {
|
||||
// Start mockServer
|
||||
@@ -125,7 +150,7 @@ func TestActiveAlerts(t *testing.T) {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
result, err := p.ActiveAlerts(regex, tc.firingOnly)
|
||||
result, err := p.ActiveAlerts(regex, tc.firingOnly, tc.filterMatchOnly)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
@@ -35,6 +35,11 @@ type lockAnnotationValue struct {
|
||||
TTL time.Duration `json:"TTL"`
|
||||
}
|
||||
|
||||
type multiLockAnnotationValue struct {
|
||||
MaxOwners int `json:"maxOwners"`
|
||||
LockAnnotations []lockAnnotationValue `json:"locks"`
|
||||
}
|
||||
|
||||
// New creates a daemonsetLock object containing the necessary data for follow up k8s requests
|
||||
func New(client *kubernetes.Clientset, nodeID, namespace, name, annotation string) *DaemonSetLock {
|
||||
return &DaemonSetLock{client, nodeID, namespace, name, annotation}
|
||||
@@ -84,6 +89,92 @@ func (dsl *DaemonSetLock) Acquire(metadata interface{}, TTL time.Duration) (bool
|
||||
}
|
||||
}
|
||||
|
||||
// AcquireMultiple creates and annotates the daemonset with a multiple owner lock
|
||||
func (dsl *DaemonSetLock) AcquireMultiple(metadata interface{}, TTL time.Duration, maxOwners int) (bool, []string, error) {
|
||||
for {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, []string{}, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
annotation := multiLockAnnotationValue{}
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
if exists {
|
||||
if err := json.Unmarshal([]byte(valueString), &annotation); err != nil {
|
||||
return false, []string{}, fmt.Errorf("error getting multi lock: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
lockPossible, newAnnotation := dsl.canAcquireMultiple(annotation, metadata, TTL, maxOwners)
|
||||
if !lockPossible {
|
||||
return false, nodeIDsFromMultiLock(newAnnotation), nil
|
||||
}
|
||||
|
||||
if ds.ObjectMeta.Annotations == nil {
|
||||
ds.ObjectMeta.Annotations = make(map[string]string)
|
||||
}
|
||||
newAnnotationBytes, err := json.Marshal(&newAnnotation)
|
||||
if err != nil {
|
||||
return false, []string{}, fmt.Errorf("error marshalling new annotation lock: %w", err)
|
||||
}
|
||||
ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes)
|
||||
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.Background(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
} else {
|
||||
return false, []string{}, fmt.Errorf("error updating daemonset with multi lock: %w", err)
|
||||
}
|
||||
}
|
||||
return true, nodeIDsFromMultiLock(newAnnotation), nil
|
||||
}
|
||||
}
|
||||
|
||||
func nodeIDsFromMultiLock(annotation multiLockAnnotationValue) []string {
|
||||
nodeIDs := make([]string, 0, len(annotation.LockAnnotations))
|
||||
for _, nodeLock := range annotation.LockAnnotations {
|
||||
nodeIDs = append(nodeIDs, nodeLock.NodeID)
|
||||
}
|
||||
return nodeIDs
|
||||
}
|
||||
|
||||
func (dsl *DaemonSetLock) canAcquireMultiple(annotation multiLockAnnotationValue, metadata interface{}, TTL time.Duration, maxOwners int) (bool, multiLockAnnotationValue) {
|
||||
newAnnotation := multiLockAnnotationValue{MaxOwners: maxOwners}
|
||||
freeSpace := false
|
||||
if annotation.LockAnnotations == nil || len(annotation.LockAnnotations) < maxOwners {
|
||||
freeSpace = true
|
||||
newAnnotation.LockAnnotations = annotation.LockAnnotations
|
||||
} else {
|
||||
for _, nodeLock := range annotation.LockAnnotations {
|
||||
if ttlExpired(nodeLock.Created, nodeLock.TTL) {
|
||||
freeSpace = true
|
||||
continue
|
||||
}
|
||||
newAnnotation.LockAnnotations = append(
|
||||
newAnnotation.LockAnnotations,
|
||||
nodeLock,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if freeSpace {
|
||||
newAnnotation.LockAnnotations = append(
|
||||
newAnnotation.LockAnnotations,
|
||||
lockAnnotationValue{
|
||||
NodeID: dsl.nodeID,
|
||||
Metadata: metadata,
|
||||
Created: time.Now().UTC(),
|
||||
TTL: TTL,
|
||||
},
|
||||
)
|
||||
return true, newAnnotation
|
||||
}
|
||||
|
||||
return false, multiLockAnnotationValue{}
|
||||
}
|
||||
|
||||
// Test attempts to check the kured daemonset lock status (existence, expiry) from instantiated DaemonSetLock using client-go
|
||||
func (dsl *DaemonSetLock) Test(metadata interface{}) (bool, error) {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
@@ -106,6 +197,30 @@ func (dsl *DaemonSetLock) Test(metadata interface{}) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// TestMultiple attempts to check the kured daemonset lock status for multi locks
|
||||
func (dsl *DaemonSetLock) TestMultiple() (bool, error) {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
if exists {
|
||||
value := multiLockAnnotationValue{}
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, nodeLock := range value.LockAnnotations {
|
||||
if nodeLock.NodeID == dsl.nodeID && !ttlExpired(nodeLock.Created, nodeLock.TTL) {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Release attempts to remove the lock data from the kured ds annotations using client-go
|
||||
func (dsl *DaemonSetLock) Release() error {
|
||||
for {
|
||||
@@ -144,6 +259,55 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
}
|
||||
}
|
||||
|
||||
// ReleaseMultiple attempts to remove the lock data from the kured ds annotations using client-go
|
||||
func (dsl *DaemonSetLock) ReleaseMultiple() error {
|
||||
for {
|
||||
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
|
||||
if err != nil {
|
||||
return fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
|
||||
}
|
||||
|
||||
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
|
||||
modified := false
|
||||
value := multiLockAnnotationValue{}
|
||||
if exists {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for idx, nodeLock := range value.LockAnnotations {
|
||||
if nodeLock.NodeID == dsl.nodeID {
|
||||
value.LockAnnotations = append(value.LockAnnotations[:idx], value.LockAnnotations[idx+1:]...)
|
||||
modified = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !exists || !modified {
|
||||
return fmt.Errorf("Lock not held")
|
||||
}
|
||||
|
||||
newAnnotationBytes, err := json.Marshal(value)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error marshalling new annotation on release: %v", err)
|
||||
}
|
||||
ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes)
|
||||
|
||||
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
|
||||
if err != nil {
|
||||
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
|
||||
// Something else updated the resource between us reading and writing - try again soon
|
||||
time.Sleep(time.Second)
|
||||
continue
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// GetDaemonSet returns the named DaemonSet resource from the DaemonSetLock's configured client
|
||||
func (dsl *DaemonSetLock) GetDaemonSet(sleep, timeout time.Duration) (*v1.DaemonSet, error) {
|
||||
var ds *v1.DaemonSet
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package daemonsetlock
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -26,3 +28,181 @@ func TestTtlExpired(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func multiLockAnnotationsAreEqualByNodes(src, dst multiLockAnnotationValue) bool {
|
||||
srcNodes := []string{}
|
||||
for _, srcLock := range src.LockAnnotations {
|
||||
srcNodes = append(srcNodes, srcLock.NodeID)
|
||||
}
|
||||
sort.Strings(srcNodes)
|
||||
|
||||
dstNodes := []string{}
|
||||
for _, dstLock := range dst.LockAnnotations {
|
||||
dstNodes = append(dstNodes, dstLock.NodeID)
|
||||
}
|
||||
sort.Strings(dstNodes)
|
||||
|
||||
return reflect.DeepEqual(srcNodes, dstNodes)
|
||||
}
|
||||
|
||||
func TestCanAcquireMultiple(t *testing.T) {
|
||||
node1Name := "n1"
|
||||
node2Name := "n2"
|
||||
node3Name := "n3"
|
||||
testCases := []struct {
|
||||
name string
|
||||
daemonSetLock DaemonSetLock
|
||||
maxOwners int
|
||||
current multiLockAnnotationValue
|
||||
desired multiLockAnnotationValue
|
||||
lockPossible bool
|
||||
}{
|
||||
{
|
||||
name: "empty_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "partial_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node2Name},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
{NodeID: node2Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "full_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node2Name},
|
||||
{NodeID: node3Name},
|
||||
},
|
||||
},
|
||||
lockPossible: false,
|
||||
},
|
||||
{
|
||||
name: "full_with_one_expired_lock",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Minute),
|
||||
TTL: time.Hour,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
{NodeID: node3Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
{
|
||||
name: "full_with_all_expired_locks",
|
||||
daemonSetLock: DaemonSetLock{
|
||||
nodeID: node1Name,
|
||||
},
|
||||
maxOwners: 2,
|
||||
current: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{
|
||||
NodeID: node2Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
{
|
||||
NodeID: node3Name,
|
||||
Created: time.Now().UTC().Add(-1 * time.Hour),
|
||||
TTL: time.Minute,
|
||||
},
|
||||
},
|
||||
},
|
||||
desired: multiLockAnnotationValue{
|
||||
MaxOwners: 2,
|
||||
LockAnnotations: []lockAnnotationValue{
|
||||
{NodeID: node1Name},
|
||||
},
|
||||
},
|
||||
lockPossible: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
lockPossible, actual := testCase.daemonSetLock.canAcquireMultiple(testCase.current, struct{}{}, time.Minute, testCase.maxOwners)
|
||||
if lockPossible != testCase.lockPossible {
|
||||
t.Fatalf(
|
||||
"unexpected result for lock possible (got %t expected %t new annotation %v",
|
||||
lockPossible,
|
||||
testCase.lockPossible,
|
||||
actual,
|
||||
)
|
||||
}
|
||||
|
||||
if lockPossible && (!multiLockAnnotationsAreEqualByNodes(actual, testCase.desired) || testCase.desired.MaxOwners != actual.MaxOwners) {
|
||||
t.Fatalf(
|
||||
"expected lock %v but got %v",
|
||||
testCase.desired,
|
||||
actual,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
25
pkg/reboot/command.go
Normal file
25
pkg/reboot/command.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package reboot
|
||||
|
||||
import (
|
||||
"github.com/kubereboot/kured/pkg/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// CommandRebootMethod holds context-information for a command reboot.
|
||||
type CommandRebootMethod struct {
|
||||
nodeID string
|
||||
rebootCommand []string
|
||||
}
|
||||
|
||||
// NewCommandReboot creates a new command-rebooter which needs full privileges on the host.
|
||||
func NewCommandReboot(nodeID string, rebootCommand []string) *CommandRebootMethod {
|
||||
return &CommandRebootMethod{nodeID: nodeID, rebootCommand: rebootCommand}
|
||||
}
|
||||
|
||||
// Reboot triggers the command-reboot.
|
||||
func (c *CommandRebootMethod) Reboot() {
|
||||
log.Infof("Running command: %s for node: %s", c.rebootCommand, c.nodeID)
|
||||
if err := util.NewCommand(c.rebootCommand[0], c.rebootCommand[1:]...).Run(); err != nil {
|
||||
log.Fatalf("Error invoking reboot command: %v", err)
|
||||
}
|
||||
}
|
||||
6
pkg/reboot/reboot.go
Normal file
6
pkg/reboot/reboot.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package reboot
|
||||
|
||||
// Reboot interface defines the Reboot function to be implemented.
|
||||
type Reboot interface {
|
||||
Reboot()
|
||||
}
|
||||
34
pkg/reboot/signal.go
Normal file
34
pkg/reboot/signal.go
Normal file
@@ -0,0 +1,34 @@
|
||||
package reboot
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// SignalRebootMethod holds context-information for a signal reboot.
|
||||
type SignalRebootMethod struct {
|
||||
nodeID string
|
||||
signal int
|
||||
}
|
||||
|
||||
// NewSignalReboot creates a new signal-rebooter which can run unprivileged.
|
||||
func NewSignalReboot(nodeID string, signal int) *SignalRebootMethod {
|
||||
return &SignalRebootMethod{nodeID: nodeID, signal: signal}
|
||||
}
|
||||
|
||||
// Reboot triggers the signal-reboot.
|
||||
func (c *SignalRebootMethod) Reboot() {
|
||||
log.Infof("Emit reboot-signal for node: %s", c.nodeID)
|
||||
|
||||
process, err := os.FindProcess(1)
|
||||
if err != nil {
|
||||
log.Fatalf("There was no systemd process found: %v", err)
|
||||
}
|
||||
|
||||
err = process.Signal(syscall.Signal(c.signal))
|
||||
if err != nil {
|
||||
log.Fatalf("Signal of SIGRTMIN+5 failed: %v", err)
|
||||
}
|
||||
}
|
||||
23
pkg/util/util.go
Normal file
23
pkg/util/util.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"os/exec"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewCommand creates a new Command with stdout/stderr wired to our standard logger
|
||||
func NewCommand(name string, arg ...string) *exec.Cmd {
|
||||
cmd := exec.Command(name, arg...)
|
||||
cmd.Stdout = log.NewEntry(log.StandardLogger()).
|
||||
WithField("cmd", cmd.Args[0]).
|
||||
WithField("std", "out").
|
||||
WriterLevel(log.InfoLevel)
|
||||
|
||||
cmd.Stderr = log.NewEntry(log.StandardLogger()).
|
||||
WithField("cmd", cmd.Args[0]).
|
||||
WithField("std", "err").
|
||||
WriterLevel(log.WarnLevel)
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -46,6 +46,12 @@ do
|
||||
echo "${#was_unschedulable[@]} nodes were removed from pool once:" "${!was_unschedulable[@]}"
|
||||
echo "${#has_recovered[@]} nodes removed from the pool are now back:" "${!has_recovered[@]}"
|
||||
|
||||
#"$KUBECTL_CMD" logs -n kube-system -l name=kured --ignore-errors > "$tmp_dir"/node_output
|
||||
#if [[ "$DEBUG" == "true" ]]; then
|
||||
# echo "Kured pod logs:"
|
||||
# cat "$tmp_dir"/node_output
|
||||
#fi
|
||||
|
||||
"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers > "$tmp_dir"/node_output
|
||||
if [[ "$DEBUG" == "true" ]]; then
|
||||
# This is useful to see if a node gets stuck after drain, and doesn't
|
||||
|
||||
Reference in New Issue
Block a user