Compare commits

..

2 Commits

Author SHA1 Message Date
Christian Kotzbauer
59cbea5e25 feat: add another background
Signed-off-by: Christian Kotzbauer <git@ckotzbauer.de>
2023-08-14 19:08:44 +02:00
Christian Kotzbauer
776c35c1e1 cleanup: use Background context
Signed-off-by: Christian Kotzbauer <git@ckotzbauer.de>
2023-08-14 19:08:23 +02:00
29 changed files with 1305 additions and 707 deletions

13
.github/kind-cluster-1.25.yaml vendored Normal file
View File

@@ -0,0 +1,13 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: kindest/node:v1.25.11
- role: control-plane
image: kindest/node:v1.25.11
- role: control-plane
image: kindest/node:v1.25.11
- role: worker
image: kindest/node:v1.25.11
- role: worker
image: kindest/node:v1.25.11

13
.github/kind-cluster-1.26.yaml vendored Normal file
View File

@@ -0,0 +1,13 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.26.6"
- role: control-plane
image: "kindest/node:v1.26.6"
- role: control-plane
image: "kindest/node:v1.26.6"
- role: worker
image: "kindest/node:v1.26.6"
- role: worker
image: "kindest/node:v1.26.6"

13
.github/kind-cluster-1.27.yaml vendored Normal file
View File

@@ -0,0 +1,13 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.27.3"
- role: control-plane
image: "kindest/node:v1.27.3"
- role: control-plane
image: "kindest/node:v1.27.3"
- role: worker
image: "kindest/node:v1.27.3"
- role: worker
image: "kindest/node:v1.27.3"

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: control-plane
image: "kindest/node:v1.28.9"
- role: worker
image: "kindest/node:v1.28.9"
- role: worker
image: "kindest/node:v1.28.9"

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: control-plane
image: "kindest/node:v1.29.4"
- role: worker
image: "kindest/node:v1.29.4"
- role: worker
image: "kindest/node:v1.29.4"

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: control-plane
image: "kindest/node:v1.30.2"
- role: worker
image: "kindest/node:v1.30.2"
- role: worker
image: "kindest/node:v1.30.2"

View File

@@ -10,21 +10,28 @@ test -z "$VERSION" && {
}
test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
# goreleaser uses arm64 instead of aarch64
goreleaser_arch=$(uname -m | sed -e 's/aarch64/arm64/g' -e 's/ppc64le/ppc64/' -e 's/armv7l/armv7/' )
TAR_FILE="$TMPDIR/${FILE_BASENAME}_$(uname -s)_${goreleaser_arch}.tar.gz"
TAR_FILE="$TMPDIR/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
export TAR_FILE
(
echo "Downloading GoReleaser $VERSION..."
curl -sfLo "$TAR_FILE" \
"$RELEASES_URL/download/$VERSION/${FILE_BASENAME}_$(uname -s)_${goreleaser_arch}.tar.gz"
"$RELEASES_URL/download/$VERSION/${FILE_BASENAME}_$(uname -s)_$(uname -m).tar.gz"
cd "$TMPDIR"
curl -sfLo "checksums.txt" "$RELEASES_URL/download/$VERSION/checksums.txt"
curl -sfLo "checksums.txt.sig" "$RELEASES_URL/download/$VERSION/checksums.txt.sig"
echo "Verifying checksums..."
sha256sum --ignore-missing --quiet --check checksums.txt
if command -v cosign >/dev/null 2>&1; then
echo "Verifying signatures..."
COSIGN_EXPERIMENTAL=1 cosign verify-blob \
--signature checksums.txt.sig \
checksums.txt
else
echo "Could not verify signatures, cosign is not installed."
fi
)
tar -xf "$TAR_FILE" -O goreleaser > "$TMPDIR/goreleaser"
rm "$TMPDIR/checksums.txt"
rm "$TMPDIR/checksums.txt" "$TMPDIR/checksums.txt.sig"
rm "$TAR_FILE"

View File

@@ -39,11 +39,11 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
uses: actions/checkout@v3
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
@@ -57,7 +57,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v3
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
@@ -70,6 +70,6 @@ jobs:
# ./location_of_script_within_repo/buildscript.sh
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"

View File

@@ -19,16 +19,16 @@ jobs:
contents: write
packages: write
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Login to ghcr.io
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -36,15 +36,15 @@ jobs:
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Find current tag version
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
@@ -57,9 +57,10 @@ jobs:
run: make kured-release-snapshot
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COSIGN_EXPERIMENTAL: 1
- name: Build image
uses: docker/build-push-action@v6
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
@@ -74,9 +75,11 @@ jobs:
- name: Sign and attest artifacts
run: |
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
.tmp/cosign sign -f -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig --output-certificate kured.sbom.pem kured.sbom
.tmp/cosign sign-blob --output-signature kured.sbom.sig --output-certificate kured.sbom.pem kured.sbom
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
.tmp/cosign attest -f --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.sha_short }}
env:
COSIGN_EXPERIMENTAL: 1

View File

@@ -9,9 +9,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
@@ -19,7 +19,7 @@ jobs:
run: go test -json ./... > test.json
- name: Annotate tests
if: always()
uses: guyarb/golang-test-annoations@v0.8.0
uses: guyarb/golang-test-annoations@v0.7.0
with:
test-results: test.json
@@ -27,7 +27,7 @@ jobs:
name: Lint bash code with shellcheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Run ShellCheck
uses: bewuethr/shellcheck-action@v2
@@ -35,9 +35,9 @@ jobs:
name: Lint golang code
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
@@ -54,9 +54,9 @@ jobs:
name: Check docs for incorrect links
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Link Checker
uses: lycheeverse/lychee-action@2b973e86fc7b1f6b36a93795fe2c9c6ae1118621
uses: lycheeverse/lychee-action@ec3ed119d4f44ad2673a7232460dc7dff59d2421
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
with:
@@ -70,16 +70,16 @@ jobs:
name: Build image and scan it against known vulnerabilities
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
@@ -88,7 +88,7 @@ jobs:
- name: Build image
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
with:
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
format: 'table'
@@ -103,28 +103,27 @@ jobs:
# - Ensure manifests work with the latest versions even with no manifest change
# (compared to helm charts, manifests cannot easily template changes based on versions)
# Helm charts are _trailing_ releases, while manifests are done during development.
# This test uses the "command" reboot-method.
e2e-manifests-command:
name: End-to-End test with kured with code and manifests from HEAD (command)
e2e-manifests:
name: End-to-End test with kured with code and manifests from HEAD
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
kubernetes:
- "1.28"
- "1.29"
- "1.30"
- "1.25"
- "1.26"
- "1.27"
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
@@ -146,7 +145,7 @@ jobs:
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
uses: helm/kind-action@v1.10.0
uses: helm/kind-action@v1.8.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.14.0
@@ -163,92 +162,7 @@ jobs:
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
- name: Ensure kured is ready
uses: nick-invision/retry@v3.0.0
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
- name: Create reboot sentinel files
run: |
./tests/kind/create-reboot-sentinels.sh
- name: Follow reboot until success
env:
DEBUG: true
run: |
./tests/kind/follow-coordinated-reboot.sh
# This ensures the latest code works with the manifests built from tree.
# It is useful for two things:
# - Test manifests changes (obviously), ensuring they don't break existing clusters
# - Ensure manifests work with the latest versions even with no manifest change
# (compared to helm charts, manifests cannot easily template changes based on versions)
# Helm charts are _trailing_ releases, while manifests are done during development.
# This test uses the "signal" reboot-method.
e2e-manifests-signal:
name: End-to-End test with kured with code and manifests from HEAD (signal)
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
kubernetes:
- "1.28"
- "1.29"
- "1.30"
steps:
- uses: actions/checkout@v4
- name: Ensure go version
uses: actions/setup-go@v5
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
id: tags
- name: Build artifacts
run: |
VERSION="${{ steps.tags.outputs.sha_short }}" make image
VERSION="${{ steps.tags.outputs.sha_short }}" make manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
sudo bash << EOF
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
echo '{}' > /etc/docker/daemon.json
systemctl restart docker || journalctl --no-pager -n 500
systemctl status docker
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
uses: helm/kind-action@v1.10.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.14.0
- name: Preload previously built images onto kind cluster
run: kind load docker-image ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }} --name chart-testing
- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds-signal.yaml
- name: Install kured with kubectl
run: |
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds-signal.yaml
- name: Ensure kured is ready
uses: nick-invision/retry@v3.0.0
uses: nick-invision/retry@v2.8.3
with:
timeout_minutes: 10
max_attempts: 10
@@ -282,20 +196,20 @@ jobs:
fail-fast: false
matrix:
kubernetes:
- "1.28"
- "1.29"
- "1.30"
- "1.25"
- "1.26"
- "1.27"
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
@@ -317,7 +231,7 @@ jobs:
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
uses: helm/kind-action@v1.10.0
uses: helm/kind-action@v1.8.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.14.0
@@ -335,7 +249,7 @@ jobs:
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
- name: Ensure kured is ready
uses: nick-invision/retry@v3.0.0
uses: nick-invision/retry@v2.8.3
with:
timeout_minutes: 10
max_attempts: 10

View File

@@ -21,9 +21,9 @@ jobs:
contents: write
packages: write
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
@@ -31,17 +31,18 @@ jobs:
run: echo "version=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
id: tags
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Build binaries
run: make kured-release-tag
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COSIGN_EXPERIMENTAL: 1
- name: Build single image for scan
uses: docker/build-push-action@v6
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/amd64
@@ -51,7 +52,7 @@ jobs:
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
with:
image-ref: '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}'
format: 'table'
@@ -61,7 +62,7 @@ jobs:
severity: 'CRITICAL,HIGH'
- name: Login to ghcr.io
uses: docker/login-action@v3
uses: docker/login-action@v2
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
@@ -69,12 +70,12 @@ jobs:
- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
uses: docker/metadata-action@818d4b7b91585d195f67373fd9cb0332e31a7175
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
- name: Build release images
uses: docker/build-push-action@v6
uses: docker/build-push-action@v4
with:
context: .
platforms: linux/arm64, linux/amd64, linux/arm/v7, linux/arm/v6, linux/386
@@ -89,9 +90,11 @@ jobs:
- name: Sign and attest artifacts
run: |
.tmp/cosign sign -y -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
.tmp/cosign sign -f -r ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
.tmp/cosign sign-blob -y --output-signature kured.sbom.sig kured.sbom
.tmp/cosign sign-blob --output-signature kured.sbom.sig kured.sbom
.tmp/cosign attest -y --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
.tmp/cosign attest -f --type spdx --predicate kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
.tmp/cosign attach sbom --type spdx --sbom kured.sbom ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.tags.outputs.version }}
env:
COSIGN_EXPERIMENTAL: 1

View File

@@ -10,12 +10,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: checkout
uses: actions/checkout@v4
uses: actions/checkout@v3
- name: run tests
run: go test -json ./... > test.json
- name: Annotate tests
if: always()
uses: guyarb/golang-test-annoations@v0.8.0
uses: guyarb/golang-test-annoations@v0.7.0
with:
test-results: test.json
@@ -25,7 +25,7 @@ jobs:
steps:
# Stale by default waits for 60 days before marking PR/issues as stale, and closes them after 21 days.
# Do not expire the first issues that would allow the community to grow.
- uses: actions/stale@v9
- uses: actions/stale@v8
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
stale-issue-message: 'This issue was automatically considered stale due to lack of activity. Please update it and/or join our slack channels to promote it, before it automatically closes (in 7 days).'
@@ -39,9 +39,9 @@ jobs:
name: Check docs for incorrect links
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Link Checker
uses: lycheeverse/lychee-action@2b973e86fc7b1f6b36a93795fe2c9c6ae1118621
uses: lycheeverse/lychee-action@ec3ed119d4f44ad2673a7232460dc7dff59d2421
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
with:
@@ -52,16 +52,16 @@ jobs:
name: Build image and scan it against known vulnerabilities
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- name: Ensure go version
uses: actions/setup-go@v5
uses: actions/setup-go@v4
with:
go-version-file: 'go.mod'
check-latest: true
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
uses: docker/setup-qemu-action@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
uses: docker/setup-buildx-action@v2
- name: Setup GoReleaser
run: make bootstrap-tools
- name: Find current tag version
@@ -70,7 +70,7 @@ jobs:
- name: Build artifacts
run: VERSION="${{ steps.tags.outputs.sha_short }}" make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@6e7b7d1fd3e4fef0c5fa8cce1229c54b2c9bd0d8
uses: aquasecurity/trivy-action@41f05d9ecffa2ed3f1580af306000f734b733e54
with:
image-ref: 'ghcr.io/${{ github.repository }}:${{ steps.tags.outputs.sha_short }}'
format: 'table'

View File

@@ -1,3 +1,3 @@
# Kured Community Code of Conduct
## Kured Community Code of Conduct
Kured follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md).

View File

@@ -208,8 +208,9 @@ kind create cluster --config .github/kind-cluster-<k8s-version>.yaml
### Prepare Documentation
Check that [compatibility matrix](https://kured.dev/docs/installation/) is updated
to the new version you want to release.
Check that `README.md` has an updated compatibility matrix and that the
url in the `kubectl` incantation (under "Installation") is updated to the
new version you want to release.
### Create a tag on the repo

View File

@@ -1,4 +1,4 @@
FROM --platform=$TARGETPLATFORM alpine:3.20.2 as bin
FROM --platform=$TARGETPLATFORM alpine:3.18.3 as bin
ARG TARGETOS
ARG TARGETARCH
@@ -19,7 +19,7 @@ RUN set -ex \
esac \
&& cp /dist/kured_${TARGETOS}_${TARGETARCH}${SUFFIX}/kured /dist/kured;
FROM --platform=$TARGETPLATFORM alpine:3.20.2
FROM --platform=$TARGETPLATFORM alpine:3.18.3
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
COPY --from=bin /dist/kured /usr/bin/kured
ENTRYPOINT ["/usr/bin/kured"]

View File

@@ -1,4 +1,4 @@
Christian Hopf <christian.kotzbauer@gmail.com> (@ckotzbauer)
Christian Kotzbauer <christian.kotzbauer@gmail.com> (@ckotzbauer)
Daniel Holbach <daniel.holbach@gmail.com> (@dholbach)
Hidde Beydals <hidde@weave.works> (@hiddeco)
Jack Francis <jackfrancis@gmail.com> (@jackfrancis)

View File

@@ -14,25 +14,25 @@ $(TEMPDIR):
.PHONY: bootstrap-tools
bootstrap-tools: $(TEMPDIR)
VERSION=v1.24.0 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v1.0.1
curl -sSfL https://github.com/sigstore/cosign/releases/download/v2.2.3/cosign-linux-amd64 -o .tmp/cosign
VERSION=v1.11.4 TMPDIR=.tmp bash .github/scripts/goreleaser-install.sh
curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh | sh -s -- -b .tmp v0.58.0
curl -sSfL https://github.com/sigstore/cosign/releases/download/v1.12.1/cosign-linux-amd64 -o .tmp/cosign
chmod +x .tmp/goreleaser .tmp/cosign .tmp/syft
clean:
rm -rf ./dist
kured:
$(GORELEASER_CMD) build --clean --single-target --snapshot
$(GORELEASER_CMD) build --rm-dist --single-target --snapshot
kured-all:
$(GORELEASER_CMD) build --clean --snapshot
$(GORELEASER_CMD) build --rm-dist --snapshot
kured-release-tag:
$(GORELEASER_CMD) release --clean
$(GORELEASER_CMD) release --rm-dist
kured-release-snapshot:
$(GORELEASER_CMD) release --clean --snapshot
$(GORELEASER_CMD) release --rm-dist --snapshot
image: kured
$(SUDO) docker buildx build --load -t ghcr.io/$(DH_ORG)/kured:$(VERSION) .
@@ -42,7 +42,6 @@ minikube-publish: image
manifest:
sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds.yaml
sed -i "s#image: ghcr.io/.*kured.*#image: ghcr.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds-signal.yaml
echo "Please generate combined manifest if necessary"
test:

View File

@@ -3,9 +3,8 @@
[![Artifact HUB](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/kured)](https://artifacthub.io/packages/helm/kured/kured)
[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fkubereboot%2Fkured.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubereboot%2Fkured?ref=badge_shield)
[![CLOMonitor](https://img.shields.io/endpoint?url=https://clomonitor.io/api/projects/cncf/kured/badge)](https://clomonitor.io/projects/cncf/kured)
[![OpenSSF Best Practices](https://www.bestpractices.dev/projects/8867/badge)](https://www.bestpractices.dev/projects/8867)
<img src="https://github.com/kubereboot/website/raw/main/static/img/kured.png" alt="kured logo" width="200" align="right"/>
<img src="https://github.com/kubereboot/website/raw/main/static/img/kured.png" width="200" align="right"/>
- [kured - Kubernetes Reboot Daemon](#kured---kubernetes-reboot-daemon)
- [Introduction](#introduction)

View File

@@ -33,10 +33,8 @@ import (
"github.com/kubereboot/kured/pkg/alerts"
"github.com/kubereboot/kured/pkg/daemonsetlock"
"github.com/kubereboot/kured/pkg/delaytick"
"github.com/kubereboot/kured/pkg/reboot"
"github.com/kubereboot/kured/pkg/taints"
"github.com/kubereboot/kured/pkg/timewindow"
"github.com/kubereboot/kured/pkg/util"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
@@ -46,10 +44,8 @@ var (
// Command line flags
forceReboot bool
drainDelay time.Duration
drainTimeout time.Duration
rebootDelay time.Duration
rebootMethod string
period time.Duration
metricsHost string
metricsPort int
@@ -77,7 +73,6 @@ var (
messageTemplateUncordon string
podSelectors []string
rebootCommand string
rebootSignal int
logFormat string
preRebootNodeLabels []string
postRebootNodeLabels []string
@@ -107,13 +102,6 @@ const (
KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed"
// EnvPrefix The environment variable prefix of all environment variables bound to our command line flags.
EnvPrefix = "KURED"
// MethodCommand is used as "--reboot-method" value when rebooting with the configured "--reboot-command"
MethodCommand = "command"
// MethodSignal is used as "--reboot-method" value when rebooting with a SIGRTMIN+5 signal.
MethodSignal = "signal"
sigTrminPlus5 = 34 + 5
)
func init() {
@@ -151,14 +139,10 @@ func NewRootCommand() *cobra.Command {
"only drain pods with labels matching the selector (default: '', all pods)")
rootCmd.PersistentFlags().IntVar(&skipWaitForDeleteTimeoutSeconds, "skip-wait-for-delete-timeout", 0,
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node")
rootCmd.PersistentFlags().DurationVar(&drainDelay, "drain-delay", 0,
"delay drain for this duration (default: 0, disabled)")
rootCmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 0,
"timeout after which the drain is aborted (default: 0, infinite time)")
rootCmd.PersistentFlags().DurationVar(&rebootDelay, "reboot-delay", 0,
"delay reboot for this duration (default: 0, disabled)")
rootCmd.PersistentFlags().StringVar(&rebootMethod, "reboot-method", "command",
"method to use for reboots. Available: command")
rootCmd.PersistentFlags().DurationVar(&period, "period", time.Minute*60,
"sentinel check period")
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-namespace", "kube-system",
@@ -189,8 +173,6 @@ func NewRootCommand() *cobra.Command {
"command to run when a reboot is required")
rootCmd.PersistentFlags().IntVar(&concurrency, "concurrency", 1,
"amount of nodes to concurrently reboot. Defaults to 1")
rootCmd.PersistentFlags().IntVar(&rebootSignal, "reboot-signal", sigTrminPlus5,
"signal to use for reboot, SIGRTMIN+5 by default.")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notifications [deprecated in favor of --notify-url]")
@@ -314,6 +296,22 @@ func flagToEnvVar(flag string) string {
return fmt.Sprintf("%s_%s", EnvPrefix, envVarSuffix)
}
// newCommand creates a new Command with stdout/stderr wired to our standard logger
func newCommand(name string, arg ...string) *exec.Cmd {
cmd := exec.Command(name, arg...)
cmd.Stdout = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "out").
WriterLevel(log.InfoLevel)
cmd.Stderr = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "err").
WriterLevel(log.WarnLevel)
return cmd
}
// buildHostCommand writes a new command to run in the host namespace
// Rancher based need different pid
func buildHostCommand(pid int, command []string) []string {
@@ -326,7 +324,7 @@ func buildHostCommand(pid int, command []string) []string {
}
func rebootRequired(sentinelCommand []string) bool {
cmd := util.NewCommand(sentinelCommand[0], sentinelCommand[1:]...)
cmd := newCommand(sentinelCommand[0], sentinelCommand[1:]...)
if err := cmd.Run(); err != nil {
switch err := err.(type) {
case *exec.ExitError:
@@ -397,7 +395,7 @@ func (pb PrometheusBlockingChecker) isBlocked() bool {
func (kb KubernetesBlockingChecker) isBlocked() bool {
fieldSelector := fmt.Sprintf("spec.nodeName=%s,status.phase!=Succeeded,status.phase!=Failed,status.phase!=Unknown", kb.nodename)
for _, labelSelector := range kb.filter {
podList, err := kb.client.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
podList, err := kb.client.CoreV1().Pods("").List(context.Background(), metav1.ListOptions{
LabelSelector: labelSelector,
FieldSelector: fieldSelector,
Limit: 10})
@@ -499,11 +497,6 @@ func drain(client *kubernetes.Clientset, node *v1.Node) error {
updateNodeLabels(client, node, preRebootNodeLabels)
}
if drainDelay > 0 {
log.Infof("Delaying drain for %v", drainDelay)
time.Sleep(drainDelay)
}
log.Infof("Draining node %s", nodename)
if notifyURL != "" {
@@ -556,6 +549,20 @@ func uncordon(client *kubernetes.Clientset, node *v1.Node) error {
return nil
}
func invokeReboot(nodeID string, rebootCommand []string) {
log.Infof("Running command: %s for node: %s", rebootCommand, nodeID)
if notifyURL != "" {
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
log.Warnf("Error notifying: %v", err)
}
}
if err := newCommand(rebootCommand[0], rebootCommand[1:]...).Run(); err != nil {
log.Fatalf("Error invoking reboot command: %v", err)
}
}
func maintainRebootRequiredMetric(nodeID string, sentinelCommand []string) {
for {
if rebootRequired(sentinelCommand) {
@@ -573,7 +580,7 @@ type nodeMeta struct {
}
func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations map[string]string) error {
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
node, err := client.CoreV1().Nodes().Get(context.Background(), nodeID, metav1.GetOptions{})
if err != nil {
log.Errorf("Error retrieving node object via k8s API: %s", err)
return err
@@ -589,7 +596,7 @@ func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations
return err
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
_, err = client.CoreV1().Nodes().Patch(context.Background(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
if err != nil {
var annotationsErr string
for k, v := range annotations {
@@ -608,7 +615,7 @@ func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) erro
// So we replace all instances of "/" with "~1" as per:
// https://tools.ietf.org/html/rfc6901#section-3
patch := []byte(fmt.Sprintf("[{\"op\":\"remove\",\"path\":\"/metadata/annotations/%s\"}]", strings.ReplaceAll(key, "/", "~1")))
_, err := client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{})
_, err := client.CoreV1().Nodes().Patch(context.Background(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{})
if err != nil {
log.Errorf("Error deleting node annotation %s via k8s API: %v", key, err)
return err
@@ -634,7 +641,7 @@ func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []stri
log.Fatalf("Error marshalling node object into JSON: %v", err)
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
_, err = client.CoreV1().Nodes().Patch(context.Background(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
if err != nil {
var labelsErr string
for _, label := range labels {
@@ -646,7 +653,7 @@ func updateNodeLabels(client *kubernetes.Clientset, node *v1.Node, labels []stri
}
}
func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
config, err := rest.InClusterConfig()
if err != nil {
log.Fatal(err)
@@ -664,7 +671,7 @@ func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []str
tick := delaytick.New(source, 1*time.Minute)
for range tick {
if holding(lock, &nodeMeta, concurrency > 1) {
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
node, err := client.CoreV1().Nodes().Get(context.Background(), nodeID, metav1.GetOptions{})
if err != nil {
log.Errorf("Error retrieving node object via k8s API: %v", err)
continue
@@ -730,8 +737,9 @@ func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []str
preferNoScheduleTaint.Disable()
continue
}
log.Infof("Reboot required")
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
node, err := client.CoreV1().Nodes().Get(context.Background(), nodeID, metav1.GetOptions{})
if err != nil {
log.Fatalf("Error retrieving node object via k8s API: %v", err)
}
@@ -753,6 +761,12 @@ func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []str
}
}
if !holding(lock, &nodeMeta, concurrency > 1) && !acquire(lock, &nodeMeta, TTL, concurrency) {
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()
continue
}
var blockCheckers []RebootBlocker
if prometheusURL != "" {
blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly, filterMatchOnly: alertFilterMatchOnly})
@@ -761,16 +775,7 @@ func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []str
blockCheckers = append(blockCheckers, KubernetesBlockingChecker{client: client, nodename: nodeID, filter: podSelectors})
}
var rebootRequiredBlockCondition string
if rebootBlocked(blockCheckers...) {
rebootRequiredBlockCondition = ", but blocked at this time"
continue
}
log.Infof("Reboot required%s", rebootRequiredBlockCondition)
if !holding(lock, &nodeMeta, concurrency > 1) && !acquire(lock, &nodeMeta, TTL, concurrency) {
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()
continue
}
@@ -790,13 +795,7 @@ func rebootAsRequired(nodeID string, booter reboot.Reboot, sentinelCommand []str
time.Sleep(rebootDelay)
}
if notifyURL != "" {
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
log.Warnf("Error notifying: %v", err)
}
}
booter.Reboot()
invokeReboot(nodeID, rebootCommand)
for {
log.Infof("Waiting for reboot")
time.Sleep(time.Minute)
@@ -863,13 +862,7 @@ func root(cmd *cobra.Command, args []string) {
log.Infof("Reboot schedule: %v", window)
log.Infof("Reboot check command: %s every %v", sentinelCommand, period)
log.Infof("Concurrency: %v", concurrency)
log.Infof("Reboot method: %s", rebootMethod)
if rebootCommand == MethodCommand {
log.Infof("Reboot command: %s", restartCommand)
} else {
log.Infof("Reboot signal: %v", rebootSignal)
}
log.Infof("Reboot command: %s", restartCommand)
if annotateNodes {
log.Infof("Will annotate nodes during kured reboot operations")
}
@@ -877,24 +870,10 @@ func root(cmd *cobra.Command, args []string) {
// To run those commands as it was the host, we'll use nsenter
// Relies on hostPID:true and privileged:true to enter host mount space
// PID set to 1, until we have a better discovery mechanism.
hostSentinelCommand := buildHostCommand(1, sentinelCommand)
hostRestartCommand := buildHostCommand(1, restartCommand)
// Only wrap sentinel-command with nsenter, if a custom-command was configured, otherwise use the host-path mount
hostSentinelCommand := sentinelCommand
if rebootSentinelCommand != "" {
hostSentinelCommand = buildHostCommand(1, sentinelCommand)
}
var booter reboot.Reboot
if rebootMethod == MethodCommand {
booter = reboot.NewCommandReboot(nodeID, hostRestartCommand)
} else if rebootMethod == MethodSignal {
booter = reboot.NewSignalReboot(nodeID, rebootSignal)
} else {
log.Fatalf("Invalid reboot-method configured: %s", rebootMethod)
}
go rebootAsRequired(nodeID, booter, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
go rebootAsRequired(nodeID, hostRestartCommand, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
go maintainRebootRequiredMetric(nodeID, hostSentinelCommand)
http.Handle("/metrics", promhttp.Handler())

121
go.mod
View File

@@ -1,27 +1,27 @@
module github.com/kubereboot/kured
go 1.21
go 1.19
replace golang.org/x/net => golang.org/x/net v0.23.0
replace golang.org/x/net => golang.org/x/net v0.7.0
replace github.com/emicklei/go-restful/v3 => github.com/emicklei/go-restful/v3 v3.10.2
require (
github.com/containrrr/shoutrrr v0.8.0
github.com/containrrr/shoutrrr v0.7.1
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/google/uuid v1.4.0 // indirect
github.com/prometheus/client_golang v1.19.1
github.com/prometheus/common v0.55.0
github.com/google/uuid v1.3.0 // indirect
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/common v0.44.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.8.1
github.com/spf13/cobra v1.7.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.19.0
github.com/stretchr/testify v1.9.0
gotest.tools/v3 v3.5.1
k8s.io/api v0.29.7
k8s.io/apimachinery v0.29.7
k8s.io/client-go v0.29.7
k8s.io/kubectl v0.29.7
github.com/spf13/viper v1.16.0
github.com/stretchr/testify v1.8.4
gotest.tools/v3 v3.5.0
k8s.io/api v0.26.7
k8s.io/apimachinery v0.26.7
k8s.io/client-go v0.26.7
k8s.io/kubectl v0.26.7
)
require (
@@ -30,24 +30,23 @@ require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/chai2010/gettext-go v1.0.2 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect
github.com/fatih/color v1.15.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-errors/errors v1.4.2 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.3 // indirect
github.com/fatih/color v1.14.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/go-errors/errors v1.0.1 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/google/btree v1.0.1 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
@@ -56,55 +55,51 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/mitchellh/go-wordwrap v1.0.1 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/mitchellh/go-wordwrap v1.0.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/moby/spdystream v0.2.0 // indirect
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect
github.com/moby/term v0.0.0-20220808134915-39b0c02b01ae // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pelletier/go-toml/v2 v2.2.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.8 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect
github.com/spf13/afero v1.11.0 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/xlab/treeprint v1.2.0 // indirect
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/protobuf v1.34.2 // indirect
github.com/spf13/afero v1.9.5 // indirect
github.com/spf13/cast v1.5.1 // indirect
github.com/spf13/jwalterweatherman v1.1.0 // indirect
github.com/subosito/gotenv v1.4.2 // indirect
github.com/xlab/treeprint v1.1.0 // indirect
go.starlark.net v0.0.0-20200306205701-8dd3e2ee1dd5 // indirect
golang.org/x/net v0.10.0 // indirect
golang.org/x/oauth2 v0.8.0 // indirect
golang.org/x/sys v0.8.0 // indirect
golang.org/x/term v0.6.0 // indirect
golang.org/x/text v0.9.0 // indirect
golang.org/x/time v0.1.0 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/cli-runtime v0.29.7 // indirect
k8s.io/component-base v0.29.7 // indirect
k8s.io/klog/v2 v2.110.1 // indirect
k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect
k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect
sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
k8s.io/cli-runtime v0.26.7 // indirect
k8s.io/component-base v0.26.7 // indirect
k8s.io/klog/v2 v2.80.1 // indirect
k8s.io/kube-openapi v0.0.0-20221012153701-172d655c2280 // indirect
k8s.io/utils v0.0.0-20221107191617-1a15be271d1d // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/kustomize/api v0.12.1 // indirect
sigs.k8s.io/kustomize/kyaml v0.13.9 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.3.0 // indirect
)

1199
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -1,100 +0,0 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kured
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kured # Must match `--ds-name`
namespace: kube-system # Must match `--ds-namespace`
spec:
selector:
matchLabels:
name: kured
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: kured
spec:
serviceAccountName: kured
tolerations:
- key: node-role.kubernetes.io/control-plane
effect: NoSchedule
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
volumes:
- name: sentinel
hostPath:
path: /var/run
type: Directory
containers:
- name: kured
# If you find yourself here wondering why there is no
# :latest tag on Docker Hub,see the FAQ in the README
image: ghcr.io/kubereboot/kured:1.16.0
imagePullPolicy: IfNotPresent
securityContext:
privileged: false # Give permission to nsenter /proc/1/ns/mnt
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
capabilities:
drop: ["*"]
add: ["CAP_KILL"]
ports:
- containerPort: 8080
name: metrics
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- mountPath: /sentinel
name: sentinel
readOnly: true
command:
- /usr/bin/kured
- --reboot-sentinel=/sentinel/reboot-required
- --reboot-method=signal
# - --reboot-signal=39
# - --force-reboot=false
# - --drain-grace-period=-1
# - --skip-wait-for-delete-timeout=0
# - --drain-timeout=0
# - --period=1h
# - --ds-namespace=kube-system
# - --ds-name=kured
# - --lock-annotation=weave.works/kured-node-lock
# - --lock-ttl=0
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
# - --alert-filter-regexp=^RebootRequired$
# - --alert-firing-only=false
# - --prefer-no-schedule-taint=""
# - --reboot-sentinel-command=""
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
# - --slack-channel=alerting
# - --notify-url="" # See also shoutrrr url format
# - --message-template-drain=Draining node %s
# - --message-template-reboot=Rebooting node %s
# - --message-template-uncordon=Node %s rebooted & uncordoned successfully!
# - --blocking-pod-selector=runtime=long,cost=expensive
# - --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
# - --reboot-delay=90s
# - --start-time=0:00
# - --end-time=23:59:59
# - --time-zone=UTC
# - --annotate-nodes=false
# - --lock-release-delay=30m
# - --log-format=text

View File

@@ -29,16 +29,11 @@ spec:
effect: NoSchedule
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
volumes:
- name: sentinel
hostPath:
path: /var/run
type: Directory
containers:
- name: kured
# If you find yourself here wondering why there is no
# :latest tag on Docker Hub,see the FAQ in the README
image: ghcr.io/kubereboot/kured:1.16.0
image: ghcr.io/kubereboot/kured:1.13.2
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
@@ -53,19 +48,12 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
volumeMounts:
- mountPath: /sentinel
name: sentinel
readOnly: true
command:
- /usr/bin/kured
- --reboot-sentinel=/sentinel/reboot-required
# - --force-reboot=false
# - --drain-grace-period=-1
# - --skip-wait-for-delete-timeout=0
# - --drain-delay=0
# - --drain-timeout=0
# - --drain-pod-selector=""
# - --period=1h
# - --ds-namespace=kube-system
# - --ds-name=kured
@@ -75,10 +63,9 @@ spec:
# - --alert-filter-regexp=^RebootRequired$
# - --alert-filter-match-only=false
# - --alert-firing-only=false
# - --reboot-sentinel=/var/run/reboot-required
# - --prefer-no-schedule-taint=""
# - --reboot-sentinel-command=""
# - --reboot-method=command
# - --reboot-signal=39
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
# - --slack-channel=alerting

View File

@@ -75,7 +75,7 @@ func (dsl *DaemonSetLock) Acquire(metadata interface{}, TTL time.Duration) (bool
}
ds.ObjectMeta.Annotations[dsl.annotation] = string(valueBytes)
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.Background(), ds, metav1.UpdateOptions{})
if err != nil {
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
// Something else updated the resource between us reading and writing - try again soon
@@ -245,7 +245,7 @@ func (dsl *DaemonSetLock) Release() error {
delete(ds.ObjectMeta.Annotations, dsl.annotation)
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.Background(), ds, metav1.UpdateOptions{})
if err != nil {
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
// Something else updated the resource between us reading and writing - try again soon
@@ -294,7 +294,7 @@ func (dsl *DaemonSetLock) ReleaseMultiple() error {
}
ds.ObjectMeta.Annotations[dsl.annotation] = string(newAnnotationBytes)
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.Background(), ds, metav1.UpdateOptions{})
if err != nil {
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
// Something else updated the resource between us reading and writing - try again soon

View File

@@ -1,25 +0,0 @@
package reboot
import (
"github.com/kubereboot/kured/pkg/util"
log "github.com/sirupsen/logrus"
)
// CommandRebootMethod holds context-information for a command reboot.
type CommandRebootMethod struct {
nodeID string
rebootCommand []string
}
// NewCommandReboot creates a new command-rebooter which needs full privileges on the host.
func NewCommandReboot(nodeID string, rebootCommand []string) *CommandRebootMethod {
return &CommandRebootMethod{nodeID: nodeID, rebootCommand: rebootCommand}
}
// Reboot triggers the command-reboot.
func (c *CommandRebootMethod) Reboot() {
log.Infof("Running command: %s for node: %s", c.rebootCommand, c.nodeID)
if err := util.NewCommand(c.rebootCommand[0], c.rebootCommand[1:]...).Run(); err != nil {
log.Fatalf("Error invoking reboot command: %v", err)
}
}

View File

@@ -1,6 +0,0 @@
package reboot
// Reboot interface defines the Reboot function to be implemented.
type Reboot interface {
Reboot()
}

View File

@@ -1,34 +0,0 @@
package reboot
import (
"os"
"syscall"
log "github.com/sirupsen/logrus"
)
// SignalRebootMethod holds context-information for a signal reboot.
type SignalRebootMethod struct {
nodeID string
signal int
}
// NewSignalReboot creates a new signal-rebooter which can run unprivileged.
func NewSignalReboot(nodeID string, signal int) *SignalRebootMethod {
return &SignalRebootMethod{nodeID: nodeID, signal: signal}
}
// Reboot triggers the signal-reboot.
func (c *SignalRebootMethod) Reboot() {
log.Infof("Emit reboot-signal for node: %s", c.nodeID)
process, err := os.FindProcess(1)
if err != nil {
log.Fatalf("There was no systemd process found: %v", err)
}
err = process.Signal(syscall.Signal(c.signal))
if err != nil {
log.Fatalf("Signal of SIGRTMIN+5 failed: %v", err)
}
}

View File

@@ -65,7 +65,7 @@ func (t *Taint) Disable() {
}
func taintExists(client *kubernetes.Clientset, nodeID, taintName string) (bool, int, *v1.Node) {
updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
updatedNode, err := client.CoreV1().Nodes().Get(context.Background(), nodeID, metav1.GetOptions{})
if err != nil || updatedNode == nil {
log.Fatalf("Error reading node %s: %v", nodeID, err)
}
@@ -153,7 +153,7 @@ func preferNoSchedule(client *kubernetes.Clientset, nodeID, taintName string, ef
log.Fatalf("Error encoding taint patch for node %s: %v", nodeID, err)
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
_, err = client.CoreV1().Nodes().Patch(context.Background(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
if err != nil {
log.Fatalf("Error patching taint for node %s: %v", nodeID, err)
}

View File

@@ -1,23 +0,0 @@
package util
import (
"os/exec"
log "github.com/sirupsen/logrus"
)
// NewCommand creates a new Command with stdout/stderr wired to our standard logger
func NewCommand(name string, arg ...string) *exec.Cmd {
cmd := exec.Command(name, arg...)
cmd.Stdout = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "out").
WriterLevel(log.InfoLevel)
cmd.Stderr = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "err").
WriterLevel(log.WarnLevel)
return cmd
}