Compare commits

..

48 Commits

Author SHA1 Message Date
Daniel Holbach
ca56c02e2e Merge pull request #646 from evrardjp/clarify_for_redirected_users
Display new URL for redirected users
2022-09-20 15:01:59 +02:00
Jean-Philippe Evrard
c0092171f4 Display new URL for redirected users
To add clarity, we should publish, on our old page, the
information about how to use the new helm charts.

Signed-off-by: Jean-Philippe Evrard <open-source@a.spamming.party>
2022-09-20 14:59:34 +02:00
dholbach
9f0583ba71 Publish kured-3.0.1.tgz 2022-08-20 09:11:27 +00:00
ckotzbauer
c3cca29970 Publish kured-3.0.0.tgz 2022-07-31 13:51:38 +00:00
ckotzbauer
08ba855e41 Publish kured-2.17.0.tgz 2022-07-01 15:44:53 +00:00
jackfrancis
c5d21d4e03 Publish kured-2.16.0.tgz 2022-06-29 12:50:23 +00:00
ckotzbauer
f0815759b2 Publish kured-2.15.0.tgz 2022-06-08 17:32:33 +00:00
ckotzbauer
8b983e2507 Publish kured-2.14.2.tgz 2022-05-25 04:51:50 +00:00
ckotzbauer
090b33b726 Publish kured-2.14.1.tgz 2022-05-12 06:57:59 +00:00
jackfrancis
1bc4d46483 Publish kured-2.14.0.tgz 2022-05-06 19:42:06 +00:00
ckotzbauer
14636ee333 Publish kured-2.13.0.tgz 2022-04-02 15:26:54 +00:00
ckotzbauer
aba74cb73b Publish kured-2.12.1.tgz 2022-03-29 10:07:10 +00:00
ckotzbauer
40c99fbd76 Publish kured-2.12.0.tgz 2022-03-16 10:49:00 +00:00
ckotzbauer
18e4566504 Publish kured-2.11.2.tgz 2022-01-12 06:25:36 +00:00
ckotzbauer
55c66a4751 Publish kured-2.11.1.tgz 2022-01-06 18:13:28 +00:00
ckotzbauer
fdcc8438ac Publish kured-2.11.0.tgz 2021-12-17 13:15:05 +00:00
dholbach
09c6ac3a13 Publish kured-2.10.2.tgz 2021-12-06 14:04:27 +00:00
evrardjp
bd5fd3312a Publish kured-2.10.1.tgz 2021-11-27 10:19:18 +00:00
ckotzbauer
20f61e3a13 Publish kured-2.10.0.tgz 2021-10-08 14:02:19 +00:00
ckotzbauer
dc746f5f88 Publish kured-2.9.1.tgz 2021-09-15 16:46:01 +00:00
ckotzbauer
ec79ea66d9 Publish kured-2.9.0.tgz 2021-08-06 07:39:04 +00:00
ckotzbauer
72913ee233 Publish kured-2.8.0.tgz 2021-07-26 11:19:41 +00:00
dholbach
4b1506e15d Publish kured-2.7.1.tgz 2021-07-16 07:55:58 +00:00
ckotzbauer
6f7abae29b Publish kured-2.7.0.tgz 2021-06-17 16:14:33 +00:00
ckotzbauer
9db0ef7a38 Publish kured-2.6.0.tgz 2021-05-20 11:56:16 +00:00
ckotzbauer
f13943b929 Publish kured-2.5.0.tgz 2021-05-19 17:10:18 +00:00
dholbach
db4510d21a Publish kured-2.4.3.tgz 2021-04-14 08:11:51 +00:00
dholbach
cccf89601c Publish kured-2.4.2.tgz 2021-04-06 13:01:16 +00:00
evrardjp
606cc3b935 Publish kured-2.4.1.tgz 2021-04-02 08:06:31 +00:00
dholbach
491b55acb1 Publish kured-2.4.0.tgz 2021-03-11 11:05:05 +00:00
dholbach
091028f331 Publish kured-2.3.2.tgz 2021-02-08 15:05:49 +00:00
dholbach
df0d58e3ae Publish kured-2.3.1.tgz 2021-01-11 15:39:49 +00:00
dholbach
54dfa59722 Publish kured-2.3.0.tgz 2021-01-11 14:19:17 +00:00
dholbach
5fae235d6a Publish kured-2.2.4.tgz 2021-01-11 13:54:21 +00:00
Daniel Holbach
20bc76497d Merge pull request #265 from evrardjp/remove-circle-ci
Remove circle ci configuration from gh pages
2020-12-07 16:41:41 +01:00
Jean-Philippe Evrard
ed9e8f2b35 Remove circle ci configuration from gh pages
We removed Circle CI from this repo, as we can do everything
from github actions. There is no point in keeping this
configuration here. Removing.
2020-12-07 16:37:43 +01:00
dholbach
a59b47e75f Publish kured-2.2.1.tgz 2020-11-24 14:40:37 +00:00
dholbach
feaf366ac0 Publish kured-2.2.0.tgz 2020-09-01 13:54:50 +00:00
dholbach
972bab5e60 Publish kured-2.1.1.tgz 2020-08-05 09:35:40 +00:00
dholbach
2575ab4bed Publish kured-2.0.3.tgz 2020-07-01 09:30:45 +00:00
dholbach
250e1f0f58 Publish kured-2.0.1.tgz 2020-06-30 16:32:52 +00:00
Daniel Holbach
554cf53b7b Merge pull request #160 from dholbach/add-gh-page-readme
add README explaining how to install kured from chart
2020-06-30 17:37:43 +02:00
Daniel Holbach
a415ae856f add README explaining how to install kured from chart 2020-06-30 17:37:09 +02:00
Daniel Holbach
622c1c6082 Merge pull request #159 from dholbach/filter-out-gh-pages
Don't run Circle CI on gh-pages branch
2020-06-30 13:27:17 +02:00
Daniel Holbach
8ed3e7991d Don't run Circle CI on gh-pages branch
Follow the lead of
	https://github.com/weaveworks/flagger/blob/gh-pages/.circleci/config.yml
2020-06-30 11:28:14 +02:00
Daniel Holbach
18e1a4537d add a README.md file 2020-06-30 11:17:11 +02:00
dholbach
c4287dc22b Publish kured-2.0.0.tgz 2020-06-30 08:48:58 +00:00
Daniel Holbach
bc43dacf4a Create empty gh-pages branch
We will use Github pages for publishing our helm chart via
	https://github.com/stefanprodan/helm-gh-pages
2020-06-17 12:11:14 +02:00
96 changed files with 697 additions and 5689 deletions

7
.github/ct.yaml vendored
View File

@@ -1,7 +0,0 @@
# See https://github.com/helm/chart-testing#configuration
remote: origin
target-branch: main
chart-dirs:
- charts
chart-repos: []
helm-extra-args: --timeout 600s

View File

@@ -1,21 +0,0 @@
version: 2
updates:
# Maintain dependencies for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
# Maintain dependencies for gomod
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "daily"
ignore:
- dependency-name: "k8s.io/api"
- dependency-name: "k8s.io/apimachinery"
- dependency-name: "k8s.io/client-go"
- dependency-name: "k8s.io/kubectl"
- package-ecosystem: "docker"
directory: "cmd/kured"
schedule:
interval: "daily"

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: kindest/node:v1.21.2
- role: control-plane
image: kindest/node:v1.21.2
- role: control-plane
image: kindest/node:v1.21.2
- role: worker
image: kindest/node:v1.21.2
- role: worker
image: kindest/node:v1.21.2

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: kindest/node:v1.22.4
- role: control-plane
image: kindest/node:v1.22.4
- role: control-plane
image: kindest/node:v1.22.4
- role: worker
image: kindest/node:v1.22.4
- role: worker
image: kindest/node:v1.22.4

View File

@@ -1,13 +0,0 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
image: "kindest/node:v1.23.0"
- role: control-plane
image: "kindest/node:v1.23.0"
- role: control-plane
image: "kindest/node:v1.23.0"
- role: worker
image: "kindest/node:v1.23.0"
- role: worker
image: "kindest/node:v1.23.0"

View File

@@ -1,19 +0,0 @@
name: Publish helm chart
on:
push:
branches:
- "main"
paths:
- "charts/**"
jobs:
publish-helm-chart:
name: Publish latest chart
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Publish Helm chart
uses: stefanprodan/helm-gh-pages@master
with:
token: ${{ secrets.GITHUB_TOKEN }}
charts_dir: charts

View File

@@ -1,45 +0,0 @@
# We publish every merged commit in the form of an image
# named kured:<branch>-<short tag>
name: Push image of latest main
on:
push:
branches:
- main
jobs:
tag-scan-and-push-final-image:
name: "Build, scan, and publish tagged image"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME_WEAVEWORKSKUREDCI }}
password: ${{ secrets.DOCKERHUB_TOKEN_WEAVEWORKSKUREDCI }}
- name: Login to ghcr.io
uses: docker/login-action@v1
with:
registry: ghcr.io
username: weave-ghcr-bot
password: ${{ secrets.KURED_WEAVE_GHCR_BOT_TOKEN }}
- name: Build image
run: |
make DH_ORG="${{ github.repository_owner }}" image
- name: Publish image
run: |
make DH_ORG="${{ github.repository_owner }}" publish-image

View File

@@ -1,78 +0,0 @@
#This is just extra testing, for lint check, and basic installation
#Those can fail earlier than functional tests (shorter tests)
# and give developer feedback soon if they didn't test themselves
name: PR - charts
on:
pull_request:
paths:
- "charts/**"
jobs:
# We create two jobs (with a matrix) instead of one to make those parallel.
# We don't need to conditionally check if something has changed, due to github actions
# tackling that for us.
# Fail-fast ensures that if one of those matrix job fail, the other one gets cancelled.
test-chart:
name: Test helm chart changes
runs-on: ubuntu-latest
strategy:
fail-fast: true
matrix:
test-action:
- lint
- install
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: "0"
- uses: actions/setup-python@v2
with:
python-version: 3.7
# Helm is already present in github actions, so do not re-install it
- name: Setup chart testing
uses: helm/chart-testing-action@v2.1.0
- name: Create default kind cluster
uses: helm/kind-action@v1.2.0
with:
version: v0.11.0
if: ${{ matrix.test-action == 'install' }}
- name: Run chart tests
run: ct ${{ matrix.test-action }} --config .github/ct.yaml
# This doesn't re-use the ct actions, due to many limitations (auto tear down, no real testing)
deploy-chart:
name: Functional test of helm chart in its current state (needs published image of the helm chart)
runs-on: ubuntu-latest
needs: test-chart
steps:
- uses: actions/checkout@v2
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create 1 node kind cluster
uses: helm/kind-action@v1.2.0
with:
version: v0.11.0
- name: Deploy kured on default namespace with its helm chart
run: |
# Documented in official helm doc to live on the edge
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# Refresh bins
hash -r
helm install kured ./charts/kured/ --set configuration.period=1m --wait
kubectl config set-context kind-chart-testing
kubectl get ds --all-namespaces
kubectl describe ds kured
- name: Test if successful deploy
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 10
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds kured | grep -E 'kured.*1.*1.*1.*1.*1'"

View File

@@ -1,336 +0,0 @@
name: PR
on:
pull_request:
push:
jobs:
pr-gotest:
name: Run go tests
runs-on: ubuntu-18.04
steps:
- name: checkout
uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: run tests
run: go test -json ./... > test.json
- name: Annotate tests
if: always()
uses: guyarb/golang-test-annoations@v0.5.0
with:
test-results: test.json
pr-shellcheck:
name: Lint bash code with shellcheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Run ShellCheck
uses: bewuethr/shellcheck-action@v2
pr-lint-code:
name: Lint golang code
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: Lint cmd folder
uses: Jerome1337/golint-action@v1.0.2
with:
golint-path: './cmd/...'
- name: Lint pkg folder
uses: Jerome1337/golint-action@v1.0.2
with:
golint-path: './pkg/...'
pr-check-docs-links:
name: Check docs for incorrect links
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Link Checker
id: lc
uses: peter-evans/link-checker@v1
with:
args: -r *.md *.yaml */*/*.go -x .cluster.local
- name: Fail if there were link errors
run: exit ${{ steps.lc.outputs.exit_code }}
# This should not be made a mandatory test
# It is only used to make us aware of any potential security failure, that
# should trigger a bump of the image in build/.
pr-vuln-scan:
name: Build image and scan it against known vulnerabilities
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- run: make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
- uses: Azure/container-scan@v0
with:
image-name: docker.io/${{ github.repository_owner }}/kured:${{ github.sha }}
# This ensures the latest code works with the manifests built from tree.
# It is useful for two things:
# - Test manifests changes (obviously), ensuring they don't break existing clusters
# - Ensure manifests work with the latest versions even with no manifest change
# (compared to helm charts, manifests cannot easily template changes based on versions)
# Helm charts are _trailing_ releases, while manifests are done during development.
e2e-manifests:
name: End-to-End test with kured with code and manifests from HEAD
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
kubernetes:
- "1.21"
- "1.22"
- "1.23"
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: Build artifacts
run: |
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" manifest
- name: Workaround "Failed to attach 1 to compat systemd cgroup /actions_job/..." on gh actions
run: |
sudo bash << EOF
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
echo '{}' > /etc/docker/daemon.json
systemctl restart docker || journalctl --no-pager -n 500
systemctl status docker
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create kind cluster with 5 nodes
uses: helm/kind-action@v1.2.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.11.0
- name: Preload previously built images onto kind cluster
run: kind load docker-image docker.io/${{ github.repository_owner }}/kured:${{ github.sha }} --name chart-testing
- name: Do not wait for an hour before detecting the rebootSentinel
run: |
sed -i 's/#\(.*\)--period=1h/\1--period=30s/g' kured-ds.yaml
- name: Install kured with kubectl
run: |
kubectl apply -f kured-rbac.yaml && kubectl apply -f kured-ds.yaml
- name: Ensure kured is ready
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = to cluster_size
command: "kubectl get ds -n kube-system kured | grep -E 'kured.*5.*5.*5.*5.*5'"
- name: Create reboot sentinel files
run: |
./tests/kind/create-reboot-sentinels.sh
- name: Follow reboot until success
env:
DEBUG: true
run: |
./tests/kind/follow-coordinated-reboot.sh
scenario-prom-helm:
name: Test prometheus with latest code from HEAD (=overrides image of the helm chart)
runs-on: ubuntu-latest
# only build with oldest and newest supported, it should be good enough.
strategy:
fail-fast: false
matrix:
kubernetes:
- "1.21"
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: Build artifacts
run: |
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" helm-chart
- name: Workaround 'Failed to attach 1 to compat systemd cgroup /actions_job/...' on gh actions
run: |
sudo bash << EOF
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
echo '{}' > /etc/docker/daemon.json
systemctl restart docker || journalctl --no-pager -n 500
systemctl status docker
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create 1 node kind cluster
uses: helm/kind-action@v1.2.0
with:
version: v0.11.0
- name: Preload previously built images onto kind cluster
run: kind load docker-image docker.io/${{ github.repository_owner }}/kured:${{ github.sha }} --name chart-testing
- name: Deploy kured on default namespace with its helm chart
run: |
# Documented in official helm doc to live on the edge
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# Refresh bins
hash -r
helm install kured ./charts/kured/ --wait --values ./charts/kured/ci/prometheus-values.yaml
kubectl config set-context kind-chart-testing
kubectl get ds --all-namespaces
kubectl describe ds kured
- name: Ensure kured is ready
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE
command: "kubectl get ds kured | grep -E 'kured.*1.*1.*1.*1.*1' "
- name: Get metrics (healthy)
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 2
max_attempts: 12
retry_wait_seconds: 5
command: "./tests/kind/test-metrics.sh 0"
- name: Create reboot sentinel files
run: |
./tests/kind/create-reboot-sentinels.sh
- name: Get metrics (need reboot)
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 15
max_attempts: 10
retry_wait_seconds: 60
command: "./tests/kind/test-metrics.sh 1"
# TEMPLATE Scenario testing.
# Note: keep in mind that the helm chart's appVersion is overriden to test your HEAD of the branch,
# if you `make helm-chart`.
# This will allow you to test properly your scenario and not use an existing image which will not
# contain your feature.
# scenario-<REPLACETHIS>-helm:
# #example: Testing <REPLACETHIS> with helm chart and code from HEAD"
# name: "<REPLACETHIS>"
# runs-on: ubuntu-latest
# strategy:
# fail-fast: false
# # You can define your own kubernetes versions. For example if your helm chart change should behave differently with different kubernetes versions.
# matrix:
# kubernetes:
# - "1.20"
# steps:
# - uses: actions/checkout@v2
# - name: Find go version
# run: |
# GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
# echo "::set-output name=version::${GO_VERSION}"
# id: awk_gomod
# - name: Ensure go version
# uses: actions/setup-go@v2
# with:
# go-version: "${{ steps.awk_gomod.outputs.version }}"
# - name: Build artifacts
# run: |
# make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
# make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" helm-chart
#
# - name: "Workaround 'Failed to attach 1 to compat systemd cgroup /actions_job/...' on gh actions"
# run: |
# sudo bash << EOF
# cp /etc/docker/daemon.json /etc/docker/daemon.json.old
# echo '{}' > /etc/docker/daemon.json
# systemctl restart docker || journalctl --no-pager -n 500
# systemctl status docker
# EOF
#
# # Default name for helm/kind-action kind clusters is "chart-testing"
# - name: Create 5 node kind cluster
# uses: helm/kind-action@master
# with:
# config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
#
# - name: Preload previously built images onto kind cluster
# run: kind load docker-image docker.io/${{ github.repository_owner }}/kured:${{ github.sha }} --name chart-testing
#
# - name: Deploy kured on default namespace with its helm chart
# run: |
# # Documented in official helm doc to live on the edge
# curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# # Refresh bins
# hash -r
# helm install kured ./charts/kured/ --wait --values ./charts/kured/ci/<REPLACETHIS>-values.yaml
# kubectl config set-context kind-chart-testing
# kubectl get ds --all-namespaces
# kubectl describe ds kured
#
# - name: Ensure kured is ready
# uses: nick-invision/retry@v2.6.0
# with:
# timeout_minutes: 10
# max_attempts: 10
# retry_wait_seconds: 60
# # DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = 5
# command: "kubectl get ds kured | grep -E 'kured.*5.*5.*5.*5.*5' "
#
# - name: Create reboot sentinel files
# run: |
# ./tests/kind/create-reboot-sentinels.sh
#
# - name: Test <REPLACETHIS>
# env:
# DEBUG: true
# run: |
# <TODO>

View File

@@ -1,49 +0,0 @@
# when we add a tag to the repo, we should publish the kured image to a public repository
# if it's safe.
# It doesn't mean it's ready for release, but at least it's getting us started.
# The next step is to have a PR with the helm chart, to bump the version of the image used
name: Tag repo
on:
push:
tags:
- "*"
jobs:
tag-scan-and-push-final-image:
name: "Build, scan, and publish tagged image"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: Find current tag version
run: echo "::set-output name=version::${GITHUB_REF#refs/tags/}"
id: tags
- run: |
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ steps.tags.outputs.version }}" image
- uses: Azure/container-scan@v0
with:
image-name: docker.io/${{ github.repository_owner }}/kured:${{ steps.tags.outputs.version }}
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME_WEAVEWORKSKUREDCI }}
password: ${{ secrets.DOCKERHUB_TOKEN_WEAVEWORKSKUREDCI }}
- name: Login to ghcr.io
uses: docker/login-action@v1
with:
registry: ghcr.io
username: weave-ghcr-bot
password: ${{ secrets.KURED_WEAVE_GHCR_BOT_TOKEN }}
- name: Publish image
run: |
make DH_ORG="${{ github.repository_owner }}" VERSION="${{ steps.tags.outputs.version }}" publish-image

View File

@@ -1,136 +0,0 @@
name: Daily jobs
on:
schedule:
- cron: "30 1 * * *"
jobs:
periodics-gotest:
name: Run go tests
runs-on: ubuntu-18.04
steps:
- name: checkout
uses: actions/checkout@v2
- name: run tests
run: go test -json ./... > test.json
- name: Annotate tests
if: always()
uses: guyarb/golang-test-annoations@v0.5.0
with:
test-results: test.json
periodics-mark-stale:
name: Mark stale issues and PRs
runs-on: ubuntu-latest
steps:
# Stale by default waits for 60 days before marking PR/issues as stale, and closes them after 21 days.
# Do not expire the first issues that would allow the community to grow.
- uses: actions/stale@v4
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
stale-issue-message: 'This issue was automatically considered stale due to lack of activity. Please update it and/or join our slack channels to promote it, before it automatically closes (in 7 days).'
stale-pr-message: 'This PR was automatically considered stale due to lack of activity. Please refresh it and/or join our slack channels to highlight it, before it automatically closes (in 7 days).'
stale-issue-label: 'no-issue-activity'
stale-pr-label: 'no-pr-activity'
exempt-issue-labels: 'good first issue,keep'
days-before-close: 21
check-docs-links:
name: Check docs for incorrect links
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Link Checker
id: lc
uses: peter-evans/link-checker@v1
with:
args: -r *.md *.yaml */*/*.go -x .cluster.local
- name: Fail if there were link errors
run: exit ${{ steps.lc.outputs.exit_code }}
vuln-scan:
name: Build image and scan it against known vulnerabilities
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- run: make DH_ORG="${{ github.repository_owner }}" VERSION="${{ github.sha }}" image
- uses: Azure/container-scan@v0
with:
image-name: docker.io/${{ github.repository_owner }}/kured:${{ github.sha }}
deploy-helm:
name: Ensure our currently released helm chart works on all kubernetes versions
runs-on: ubuntu-latest
# only build with oldest and newest supported, it should be good enough.
strategy:
matrix:
kubernetes:
- "1.21"
- "1.22"
- "1.23"
steps:
- uses: actions/checkout@v2
- name: Find go version
run: |
GO_VERSION=$(awk '/^go/ {print $2};' go.mod)
echo "::set-output name=version::${GO_VERSION}"
id: awk_gomod
- name: Ensure go version
uses: actions/setup-go@v2
with:
go-version: "${{ steps.awk_gomod.outputs.version }}"
- name: "Workaround 'Failed to attach 1 to compat systemd cgroup /actions_job/...' on gh actions"
run: |
sudo bash << EOF
cp /etc/docker/daemon.json /etc/docker/daemon.json.old
echo '{}' > /etc/docker/daemon.json
systemctl restart docker || journalctl --no-pager -n 500
systemctl status docker
EOF
# Default name for helm/kind-action kind clusters is "chart-testing"
- name: Create 5 node kind cluster
uses: helm/kind-action@v1.2.0
with:
config: .github/kind-cluster-${{ matrix.kubernetes }}.yaml
version: v0.11.0
- name: Deploy kured on default namespace with its helm chart
run: |
# Documented in official helm doc to live on the edge
curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
# Refresh bins
hash -r
helm install kured ./charts/kured/ --set configuration.period=1m
kubectl config set-context kind-chart-testing
kubectl get ds --all-namespaces
kubectl describe ds kured
- name: Ensure kured is ready
uses: nick-invision/retry@v2.6.0
with:
timeout_minutes: 10
max_attempts: 10
retry_wait_seconds: 60
# DESIRED CURRENT READY UP-TO-DATE AVAILABLE should all be = 5
command: "kubectl get ds kured | grep -E 'kured.*5.*5.*5.*5.*5' "
- name: Create reboot sentinel files
run: |
./tests/kind/create-reboot-sentinels.sh
- name: Follow reboot until success
env:
DEBUG: true
run: |
./tests/kind/follow-coordinated-reboot.sh

3
.gitignore vendored
View File

@@ -1,3 +0,0 @@
cmd/kured/kured
vendor
build

View File

@@ -1,235 +0,0 @@
# Developing `kured`
We love contributions to `kured`, no matter if you are [helping out on
Slack][slack], reporting or triaging [issues][issues] or contributing code
to `kured`.
In any case, it will make sense to familiarise yourself with the main
[README][readme] to understand the different features and options, which is
helpful for testing. The "building" section in particular makes sense if
you are planning to contribute code.
[slack]: README.md#getting-help
[issues]: https://github.com/weaveworks/kured/issues
[readme]: README.md
## Regular development activities
### Updating k8s support
Whenever we want to update e.g. the `kubectl` or `client-go` dependencies,
some RBAC changes might be necessary too.
This is what it took to support Kubernetes 1.14:
<https://github.com/weaveworks/kured/pull/75>
That the process can be more involved that that can be seen in
<https://github.com/weaveworks/kured/commits/support-k8s-1.10>
Please update our .github/workflows with the new k8s images, starting by
the creation of a .github/kind-cluster-<version>.yaml, then updating
our workflows with the new versions.
Once you updated everything, make sure you update the support matrix on
the main [README][readme] as well.
### Updating other dependencies
Dependabot proposes changes in our go.mod/go.sum.
Some of those changes are covered by CI testing, some are not.
Please make sure to test those not covered by CI (mostly the integration
with other tools) manually before merging.
### Review periodic jobs
We run periodic jobs (see also Automated testing section of this documentation).
Those should be monitored for failures.
If a failure happen in periodics, something terribly wrong must have happened
(or github is failing at the creation of a kind cluster). Please monitor those
failures carefully.
### Introducing new features
When you introduce a new feature, the kured team expects you to have tested
your change thoroughly. If possible, include all the necessary testing in your change.
If your change involves a user facing change (change in flags of kured for example),
please include expose your new feature in our default manifest (`kured-ds.yaml`),
as a comment.
Do not update the helm chart directly.
Helm charts and our release manifests (see below) are our stable interfaces.
Any user facing changes will therefore have to wait for a while before being
exposed to our users.
This also means that when you expose a new feature, you should create another PR
for your changes in `charts/` to make your feature available for our next kured version.
In this change, you can directly bump the appVersion to the next minor version.
(for example, if current appVersion is 1.6.x, make sure you update your appVersion
to 1.7.0). It allows us to have an easy view of what we land each release.
Do not hesitate to increase the test coverage for your feature, whether it's unit
testing to full functional testing (even using helm charts)
### Increasing test coverage
We are welcoming any change to increase our test coverage.
See also our github issues for the label `testing`.
### Updating helm charts
Helm charts are continuously published. Any change in `charts/` will be immediately
pushed in production.
## Automated testing
Our CI is covered by github actions.
You can see their contents in .github/workflows.
We currently run:
- go tests and lint
- shellcheck
- a check for dead links in our docs
- a security check against our base image (alpine)
- a deep functional test using our manifests on all supported k8s versions
- basic deployment using our helm chart on any chart change
Changes in helm charts are not functionally tested on PRs. We assume that
the PRs to implement the feature are properly tested by our users and
contributors before merge.
To test your code manually, follow the section Manual testing.
## Manual (release) testing
Before `kured` is released, we want to make sure it still works fine on the
previous, current and next minor version of Kubernetes (with respect to the
`client-go` & `kubectl` dependencies in use). For local testing e.g.
`minikube` or `kind` can be sufficient. This will allow you to catch issues
that might not have been tested in our CI, like integration with other tools,
or your specific use case.
Deploy kured in your test scenario, make sure you pass the right `image`,
update the e.g. `period` and `reboot-days` options, so you get immediate
results, if you login to a node and run:
```console
sudo touch /var/run/reboot-required
```
### Example of golang testing
Please run `make test`. You should have golint installed.
### Example of testing with `minikube`
A test-run with `minikube` could look like this:
```console
# start minikube
minikube start --vm-driver kvm2 --kubernetes-version <k8s-release>
# build kured image and publish to registry accessible by minikube
make image minikube-publish
# edit kured-ds.yaml to
# - point to new image
# - change e.g. period and reboot-days option for immediate results
minikube kubectl -- apply -f kured-rbac.yaml
minikube kubectl -- apply -f kured-ds.yaml
minikube kubectl -- logs daemonset.apps/kured -n kube-system -f
# Alternatively use helm to install the chart
# edit values-local.yaml to change any chart parameters
helm install kured ./charts/kured --namespace kube-system -f ./charts/kured/values.minikube.yaml
# In separate terminal
minikube ssh
sudo touch /var/run/reboot-required
minikube logs -f
```
Now check for the 'Commanding reboot' message and minikube going down.
Unfortunately as of today, you are going to run into
<https://github.com/kubernetes/minikube/issues/2874>. This means that
minikube won't come back easily. You will need to start minikube again.
Then you can check for the lock release.
If all the tests ran well, kured maintainers can reach out to the Weaveworks
team to get an upcoming `kured` release tested in the Dev environment for
real life testing.
### Example of testing with `kind`
A test-run with `kind` could look like this:
```console
# create kind cluster
kind create cluster --config .github/kind-cluster-<k8s-version>.yaml
# create reboot required files on pre-defined kind nodes
./tests/kind/create-reboot-sentinels.sh
# check if reboot is working fine
./tests/kind/follow-coordinated-reboot.sh
```
## Publishing a new kured release
### Prepare Documentation
Check that `README.md` has an updated compatibility matrix and that the
url in the `kubectl` incantation (under "Installation") is updated to the
new version you want to release.
### Create a tag on the repo
Before going further, we should freeze the code for a release, by
tagging the code. The Github-Action should start a new job and push
the new image to the registry.
### Create the combined manifest
Now create the `kured-<release>-dockerhub.yaml` for e.g. `1.3.0`:
```sh
VERSION=1.3.0
MANIFEST="kured-$VERSION-dockerhub.yaml"
make DH_ORG="weaveworks" VERSION="${VERSION}" manifest
cat kured-rbac.yaml > "$MANIFEST"
cat kured-ds.yaml >> "$MANIFEST"
```
### Publish release artifacts
Now you can head to the Github UI, use the version number as tag and upload the
`kured-<release>-dockerhub.yaml` file.
Please describe what's new and noteworthy in the release notes, list the PRs
that landed and give a shout-out to everyone who contributed.
Please also note down on which releases the upcoming `kured` release was
tested on. (Check old release notes if you're unsure.)
### Update the Helm chart
You can automatically bump the helm chart's application version
with the latest image tag by running:
```sh
make DH_ORG="weaveworks" VERSION="1.3.0" helm-chart
```
A change in the helm chart requires a bump of the `version`
in `charts/kured/Chart.yaml` (following the versioning rules).
Update it, and issue a PR. Upon merge, that PR will automatically
publish the chart to the gh-pages branch.
When there are open helm-chart PRs which are on hold until the helm-chart has been updated
with the new kured version, they can be merged now (unless a rebase is needed from the contributor).

191
LICENSE
View File

@@ -1,191 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2017 Weaveworks Ltd.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -1,5 +0,0 @@
Christian Kotzbauer <christian.kotzbauer@gmail.com> (@ckotzbauer)
Daniel Holbach <daniel@weave.works> (@dholbach)
Hidde Beydals <hidde@weave.works> (@hiddeco)
Jean-Phillipe Evrard <jean-philippe.evrard@suse.com> (@evrardjp)
Jack Francis <jackfrancis@gmail.com> (@jackfrancis)

View File

@@ -1,55 +0,0 @@
.DEFAULT: all
.PHONY: all clean image publish-image minikube-publish manifest helm-chart test tests
DH_ORG=weaveworks
VERSION=$(shell git symbolic-ref --short HEAD)-$(shell git rev-parse --short HEAD)
SUDO=$(shell docker info >/dev/null 2>&1 || echo "sudo -E")
all: image
clean:
rm -f cmd/kured/kured
rm -rf ./build
godeps=$(shell go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
DEPS=$(call godeps,./cmd/kured)
cmd/kured/kured: $(DEPS)
cmd/kured/kured: cmd/kured/*.go
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "-X main.version=$(VERSION)" -o $@ cmd/kured/*.go
build/.image.done: cmd/kured/Dockerfile cmd/kured/kured
mkdir -p build
cp $^ build
$(SUDO) docker build -t docker.io/$(DH_ORG)/kured -f build/Dockerfile ./build
$(SUDO) docker tag docker.io/$(DH_ORG)/kured docker.io/$(DH_ORG)/kured:$(VERSION)
$(SUDO) docker tag docker.io/$(DH_ORG)/kured ghcr.io/$(DH_ORG)/kured:$(VERSION)
touch $@
image: build/.image.done
publish-image: image
$(SUDO) docker push docker.io/$(DH_ORG)/kured:$(VERSION)
$(SUDO) docker push ghcr.io/$(DH_ORG)/kured:$(VERSION)
minikube-publish: image
$(SUDO) docker save docker.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
manifest:
sed -i "s#image: docker.io/.*kured.*#image: docker.io/$(DH_ORG)/kured:$(VERSION)#g" kured-ds.yaml
echo "Please generate combined manifest if necessary"
helm-chart:
sed -i "s#repository:.*/kured#repository: $(DH_ORG)/kured#g" charts/kured/values.yaml
sed -i "s#appVersion:.*#appVersion: \"$(VERSION)\"#g" charts/kured/Chart.yaml
sed -i "s#\`[0-9]*\.[0-9]*\.[0-9]*\`#\`$(VERSION)\`#g" charts/kured/README.md
echo "Please bump version in charts/kured/Chart.yaml"
test: tests
echo "Running go tests"
go test ./...
echo "Running golint on pkg"
golint ./pkg/...
echo "Running golint on cmd"
golint ./cmd/...

375
README.md
View File

@@ -1,377 +1,20 @@
# Kured Helm Repository
# kured - Kubernetes Reboot Daemon
![Kured](https://raw.githubusercontent.com/kubereboot/kured/main/img/logo.png)
<img src="https://github.com/weaveworks/kured/raw/main/img/logo.png" align="right"/>
Caution! We are currently in the middle of the move to a different github organisation.
Here is the info for the new organisation.
- [Introduction](#introduction)
- [Kubernetes & OS Compatibility](#kubernetes--os-compatibility)
- [Installation](#installation)
- [Configuration](#configuration)
- [Reboot Sentinel File & Period](#reboot-sentinel-file--period)
- [Setting a schedule](#setting-a-schedule)
- [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
- [Blocking Reboots via Pods](#blocking-reboots-via-pods)
- [Prometheus Metrics](#prometheus-metrics)
- [Notifications](#notifications)
- [Overriding Lock Configuration](#overriding-lock-configuration)
- [Operation](#operation)
- [Testing](#testing)
- [Disabling Reboots](#disabling-reboots)
- [Manual Unlock](#manual-unlock)
- [Automatic Unlock](#automatic-unlock)
- [Delaying Lock Release](#delaying-lock-release)
- [Building](#building)
- [Frequently Asked/Anticipated Questions](#frequently-askedanticipated-questions)
- [Why is there no `latest` tag on Docker Hub?](#why-is-there-no-latest-tag-on-docker-hub)
- [Getting Help](#getting-help)
## Introduction
Kured (KUbernetes REboot Daemon) is a Kubernetes daemonset that
performs safe automatic node reboots when the need to do so is
indicated by the package management system of the underlying OS.
* Watches for the presence of a reboot sentinel file e.g. `/var/run/reboot-required`
or the successful run of a sentinel command.
* Utilises a lock in the API server to ensure only one node reboots at
a time
* Optionally defers reboots in the presence of active Prometheus alerts or selected pods
* Cordons & drains worker nodes before reboot, uncordoning them after
## Kubernetes & OS Compatibility
The daemon image contains versions of `k8s.io/client-go` and
`k8s.io/kubectl` (the binary of `kubectl` in older releases) for the purposes of
maintaining the lock and draining worker nodes. Kubernetes aims to provide
forwards and backwards compatibility of one minor version between client and
server:
| kured | kubectl | k8s.io/client-go | k8s.io/apimachinery | expected kubernetes compatibility |
|-------|---------|------------------|---------------------|-----------------------------------|
| main | 1.22.4 | v0.22.4 | v0.22.4 | 1.21.x, 1.22.x, 1.23.x |
| 1.8.1 | 1.21.4 | v0.21.4 | v0.21.4 | 1.20.x, 1.21.x, 1.22.x |
| 1.7.0 | 1.20.5 | v0.20.5 | v0.20.5 | 1.19.x, 1.20.x, 1.21.x |
| 1.6.1 | 1.19.4 | v0.19.4 | v0.19.4 | 1.18.x, 1.19.x, 1.20.x |
| 1.5.1 | 1.18.8 | v0.18.8 | v0.18.8 | 1.17.x, 1.18.x, 1.19.x |
| 1.4.4 | 1.17.7 | v0.17.0 | v0.17.0 | 1.16.x, 1.17.x, 1.18.x |
| 1.3.0 | 1.15.10 | v12.0.0 | release-1.15 | 1.15.x, 1.16.x, 1.17.x |
| 1.2.0 | 1.13.6 | v10.0.0 | release-1.13 | 1.12.x, 1.13.x, 1.14.x |
| 1.1.0 | 1.12.1 | v9.0.0 | release-1.12 | 1.11.x, 1.12.x, 1.13.x |
| 1.0.0 | 1.7.6 | v4.0.0 | release-1.7 | 1.6.x, 1.7.x, 1.8.x |
See the [release notes](https://github.com/weaveworks/kured/releases)
for specific version compatibility information, including which
combination have been formally tested.
Versions >=1.1.0 enter the host mount namespace to invoke
`systemctl reboot`, so should work on any systemd distribution.
## Installation
To obtain a default installation without Prometheus alerting interlock
or Slack notifications:
Add Kured repository to Helm repos:
```console
latest=$(curl -s https://api.github.com/repos/weaveworks/kured/releases | jq -r .[0].tag_name)
kubectl apply -f "https://github.com/weaveworks/kured/releases/download/$latest/kured-$latest-dockerhub.yaml"
helm repo add kubereboot https://kubereboot.github.io/charts/
```
If you want to customise the installation, download the manifest and
edit it in accordance with the following section before application.
## Configuration
The following arguments can be passed to kured via the daemonset pod template:
## Install Kured
```console
Flags:
--alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts
--alert-firing-only bool only consider firing alerts when checking for active alerts
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
--drain-grace-period int time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
--skip-wait-for-delete-timeout int when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
--ds-name string name of daemonset on which to place lock (default "kured")
--ds-namespace string namespace containing daemonset on which to place lock (default "kube-system")
--end-time string schedule reboot only before this time of day (default "23:59:59")
--force-reboot bool force a reboot even if the drain is still running (default: false)
--drain-timeout duration timeout after which the drain is aborted (default: 0, infinite time)
-h, --help help for kured
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--lock-release-delay duration hold lock after reboot by this duration (default: 0, disabled)
--lock-ttl duration expire lock annotation after this duration (default: 0, disabled)
--message-template-drain string message template used to notify about a node being drained (default "Draining node %s")
--message-template-reboot string message template used to notify about a node being rebooted (default "Rebooting node %s")
--notify-url url for reboot notifications (cannot use with --slack-hook-url flags)
--period duration reboot check period (default 1h0m0s)
--prefer-no-schedule-taint string Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting.
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-command string command to run when a reboot is required by the sentinel (default "/sbin/systemctl reboot")
--reboot-days strings schedule reboot on these days (default [su,mo,tu,we,th,fr,sa])
--reboot-delay duration add a delay after drain finishes but before the reboot command is issued (default 0, no time)
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--reboot-sentinel-command string command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
--slack-channel string slack channel for reboot notfications
--slack-hook-url string slack hook URL for reboot notfications [deprecated in favor of --notify-url]
--slack-username string slack username for reboot notfications (default "kured")
--start-time string schedule reboot only after this time of day (default "0:00")
--time-zone string use this timezone for schedule inputs (default "UTC")
--log-format string log format specified as text or json, defaults to "text"
helm install my-release kubereboot/kured
```
### Reboot Sentinel File & Period
By default kured checks for the existence of
`/var/run/reboot-required` every sixty minutes; you can override these
values with `--reboot-sentinel` and `--period`. Each replica of the
daemon uses a random offset derived from the period on startup so that
nodes don't all contend for the lock simultaneously.
Alternatively, a reboot sentinel command can be used. If a reboot
sentinel command is used, the reboot sentinel file presence will be
ignored.
### Setting a schedule
By default, kured will reboot any time it detects the sentinel, but this
may cause reboots during odd hours. While service disruption does not
normally occur, anything is possible and operators may want to restrict
reboots to predictable schedules. Use `--reboot-days`, `--start-time`,
`--end-time`, and `--time-zone` to set a schedule. For example, business
hours on the west coast USA can be specified with:
```console
--reboot-days=mon,tue,wed,thu,fri
--start-time=9am
--end-time=5pm
--time-zone=America/Los_Angeles
```
Times can be formatted in numerous ways, including `5pm`, `5:00pm` `17:00`,
and `17`. `--time-zone` represents a Go `time.Location`, and can be `UTC`,
`Local`, or any entry in the standard Linux tz database.
Note that when using smaller time windows, you should consider shortening
the sentinel check period (`--period`).
### Blocking Reboots via Alerts
You may find it desirable to block automatic node reboots when there
are active alerts - you can do so by providing the URL of your
Prometheus server:
```console
--prometheus-url=http://prometheus.monitoring.svc.cluster.local
```
By default the presence of *any* active (pending or firing) alerts
will block reboots, however you can ignore specific alerts:
```console
--alert-filter-regexp=^(RebootRequired|AnotherBenignAlert|...$
```
You can also only block reboots for firing alerts:
```console
--alert-firing-only=true
```
See the section on Prometheus metrics for an important application of this
filter.
### Blocking Reboots via Pods
You can also block reboots of an _individual node_ when specific pods
are scheduled on it:
```console
--blocking-pod-selector=runtime=long,cost=expensive
```
Since label selector strings use commas to express logical 'and', you can
specify this parameter multiple times for 'or':
```console
--blocking-pod-selector=runtime=long,cost=expensive
--blocking-pod-selector=name=temperamental
```
In this case, the presence of either an (appropriately labelled) expensive long
running job or a known temperamental pod on a node will stop it rebooting.
> Try not to abuse this mechanism - it's better to strive for
> restartability where possible. If you do use it, make sure you set
> up a RebootRequired alert as described in the next section so that
> you can intervene manually if reboots are blocked for too long.
### Prometheus Metrics
Each kured pod exposes a single gauge metric (`:8080/metrics`) that
indicates the presence of the sentinel file:
```console
# HELP kured_reboot_required OS requires reboot due to software updates.
# TYPE kured_reboot_required gauge
kured_reboot_required{node="ip-xxx-xxx-xxx-xxx.ec2.internal"} 0
```
The purpose of this metric is to power an alert which will summon an
operator if the cluster cannot reboot itself automatically for a
prolonged period:
```console
# Alert if a reboot is required for any machines. Acts as a failsafe for the
# reboot daemon, which will not reboot nodes if there are pending alerts save
# this one.
ALERT RebootRequired
IF max(kured_reboot_required) != 0
FOR 24h
LABELS { severity="warning" }
ANNOTATIONS {
summary = "Machine(s) require being rebooted, and the reboot daemon has failed to do so for 24 hours",
impact = "Cluster nodes more vulnerable to security exploits. Eventually, no disk space left.",
description = "Machine(s) require being rebooted, probably due to kernel update.",
}
```
If you choose to employ such an alert and have configured kured to
probe for active alerts before rebooting, be sure to specify
`--alert-filter-regexp=^RebootRequired$` to avoid deadlock!
### Notifications
When you specify a formatted URL using `--notify-url`, kured will notify
about draining and rebooting nodes across a list of technologies.
![Notification](img/slack-notification.png)
Alternatively you can use the `--message-template-drain` and `--message-template-reboot` to customize the text of the message, e.g.
```cli
--message-template-drain="Draining node %s part of *my-cluster* in region *xyz*"
```
Here is the syntax:
- slack: `slack://tokenA/tokenB/tokenC`
(`--slack-hook-url` is deprecated but possible to use)
- rocketchat: `rocketchat://[username@]rocketchat-host/token[/channel|@recipient]`
- teams: `teams://tName/token-a/token-b/token-c`
> **Attention** as the [format of the url has changed](https://github.com/containrrr/shoutrrr/issues/138) you also have to specify a `tName`
- Email: `smtp://username:password@host:port/?fromAddress=fromAddress&toAddresses=recipient1[,recipient2,...]`
More details here: [containrrr.dev/shoutrrr/v0.4/services/overview](https://containrrr.dev/shoutrrr/v0.4/services/overview)
### Overriding Lock Configuration
The `--ds-name` and `--ds-namespace` arguments should match the name and
namespace of the daemonset used to deploy the reboot daemon - the locking is
implemented by means of an annotation on this resource. The defaults match
the daemonset YAML provided in the repository.
Similarly `--lock-annotation` can be used to change the name of the
annotation kured will use to store the lock, but the default is almost
certainly safe.
## Operation
The example commands in this section assume that you have not
overriden the default lock annotation, daemonset name or namespace;
if you have, you will have to adjust the commands accordingly.
### Testing
You can test your configuration by provoking a reboot on a node:
```console
sudo touch /var/run/reboot-required
```
### Disabling Reboots
If you need to temporarily stop kured from rebooting any nodes, you
can take the lock manually:
```console
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock='{"nodeID":"manual"}'
```
Don't forget to release it afterwards!
### Manual Unlock
In exceptional circumstances, such as a node experiencing a permanent
failure whilst rebooting, manual intervention may be required to
remove the cluster lock:
```console
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock-
```
> NB the `-` at the end of the command is important - it instructs
> `kubectl` to remove that annotation entirely.
### Automatic Unlock
In exceptional circumstances (especially when used with cluster-autoscaler) a node
which holds lock might be killed thus annotation will stay there for ever.
Using `--lock-ttl=30m` will allow other nodes to take over if TTL has expired (in this case 30min) and continue reboot process.
### Delaying Lock Release
Using `--lock-release-delay=30m` will cause nodes to hold the lock for the specified time frame (in this case 30min) before it is released and the reboot process continues. This can be used to throttle reboots across the cluster.
## Building
Kured now uses [Go
Modules](https://github.com/golang/go/wiki/Modules), so build
instructions vary depending on where you have checked out the
repository:
**Building outside $GOPATH:**
```console
make
```
**Building inside $GOPATH:**
```console
GO111MODULE=on make
```
You can find the current preferred version of Golang in the [go.mod file](go.mod).
If you are interested in contributing code to kured, please take a look at
our [development][development] docs.
[development]: DEVELOPMENT.md
## Frequently Asked/Anticipated Questions
### Why is there no `latest` tag on Docker Hub?
Use of `latest` for production deployments is bad practice - see
[here](https://kubernetes.io/docs/concepts/configuration/overview) for
details. The manifest on `main` refers to `latest` for local
development testing with minikube only; for production use choose a
versioned manifest from the [release page](https://github.com/weaveworks/kured/releases/).
## Getting Help
If you have any questions about, feedback for or problems with `kured`:
* Invite yourself to the <a href="https://slack.weave.works/" target="_blank">Weave Users Slack</a>.
* Ask a question on the [#kured](https://weave-community.slack.com/messages/kured/) slack channel.
* [File an issue](https://github.com/weaveworks/kured/issues/new).
* Join us in [our monthly meeting](https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#),
every fourth Wednesday of the month at 16:00 UTC.
We follow the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md).
Your feedback is always welcome!
For more details on installing Kured please see the [chart readme](https://github.com/kubereboot/charts/tree/main/charts/kured).

View File

@@ -1,21 +0,0 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj

View File

@@ -1,14 +0,0 @@
apiVersion: v1
appVersion: "1.8.2"
description: A Helm chart for kured
name: kured
version: 2.10.2
home: https://github.com/weaveworks/kured
maintainers:
- name: ckotzbauer
email: christian.kotzbauer@gmail.com
- name: davidkarlsen
email: david@davidkarlsen.com
sources:
- https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png

View File

@@ -1,124 +0,0 @@
# Kured (KUbernetes REboot Daemon)
## Introduction
This chart installs the "Kubernetes Reboot Daemon" using the Helm Package Manager.
## Prerequisites
- Kubernetes 1.9+
## Installing the Chart
To install the chart with the release name `my-release`:
```bash
$ helm repo add kured https://weaveworks.github.io/kured
$ helm install my-release kured/kured
```
## Uninstalling the Chart
To uninstall/delete the `my-release` deployment:
```bash
$ helm delete my-release
```
The command removes all the Kubernetes components associated with the chart and deletes the release.
## Migrate from stable Helm-Chart
The following changes have been made compared to the stable chart:
- **[BREAKING CHANGE]** The `autolock` feature was removed. Use `configuration.startTime` and `configuration.endTime` instead.
- Role inconsistencies have been fixed (allowed verbs for modifying the `DaemonSet`, apiGroup of `PodSecurityPolicy`)
- Added support for affinities.
- Configuration of cli-flags can be made through a `configuration` object.
- Added optional `Service` and `ServiceMonitor` support for metrics endpoint.
## Configuration
| Config | Description | Default |
| ------ | ----------- | ------- |
| `image.repository` | Image repository | `weaveworks/kured` |
| `image.tag` | Image tag | `1.8.2` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
| `image.pullSecrets` | Image pull secrets | `[]` |
| `updateStrategy` | Daemonset update strategy | `RollingUpdate` |
| `maxUnavailable` | The max pods unavailable during a rolling update | `1` |
| `podAnnotations` | Annotations to apply to pods (eg to add Prometheus annotations) | `{}` |
| `extraArgs` | Extra arguments to pass to `/usr/bin/kured`. See below. | `{}` |
| `extraEnvVars` | Array of environment variables to pass to the daemonset. | `{}` |
| `configuration.lockTtl` | cli-parameter `--lock-ttl` | `0` |
| `configuration.lockReleaseDelay` | cli-parameter `--lock-release-delay` | `0` |
| `configuration.alertFilterRegexp` | cli-parameter `--alert-filter-regexp` | `""` |
| `configuration.alertFiringOnly` | cli-parameter `--alert-firing-only` | `false` |
| `configuration.blockingPodSelector` | Array of selectors for multiple cli-parameters `--blocking-pod-selector` | `[]` |
| `configuration.endTime` | cli-parameter `--end-time` | `""` |
| `configuration.lockAnnotation` | cli-parameter `--lock-annotation` | `""` |
| `configuration.period` | cli-parameter `--period` | `""` |
| `configuration.forceReboot` | cli-parameter `--force-reboot` | `false` |
| `configuration.drainGracePeriod` | cli-parameter `--drain-grace-period` | `""` |
| `configuration.drainTimeout` | cli-parameter `--drain-timeout` | `""` |
| `configuration.skipWaitForDeleteTimeout` | cli-parameter `--skip-wait-for-delete-timeout` | `""` |
| `configuration.prometheusUrl` | cli-parameter `--prometheus-url` | `""` |
| `configuration.rebootDays` | Array of days for multiple cli-parameters `--reboot-days` | `[]` |
| `configuration.rebootSentinel` | cli-parameter `--reboot-sentinel` | `""` |
| `configuration.rebootSentinelCommand` | cli-parameter `--reboot-sentinel-command` | `""` |
| `configuration.rebootCommand` | cli-parameter `--reboot-command` | `""` |
| `configuration.rebootDelay` | cli-parameter `--reboot-delay` | `""` |
| `configuration.slackChannel` | cli-parameter `--slack-channel` | `""` |
| `configuration.slackHookUrl` | cli-parameter `--slack-hook-url` | `""` |
| `configuration.slackUsername` | cli-parameter `--slack-username` | `""` |
| `configuration.notifyUrl` | cli-parameter `--notify-url` | `""` |
| `configuration.messageTemplateDrain` | cli-parameter `--message-template-drain` | `""` |
| `configuration.messageTemplateReboot` | cli-parameter `--message-template-reboot` | `""` |
| `configuration.startTime` | cli-parameter `--start-time` | `""` |
| `configuration.timeZone` | cli-parameter `--time-zone` | `""` |
| `configuration.annotateNodes` | cli-parameter `--annotate-nodes` | `false` |
| `configuration.preferNoScheduleTaint` | Taint name applied during pending node reboot | `""` |
| `rbac.create` | Create RBAC roles | `true` |
| `serviceAccount.create` | Create a service account | `true` |
| `serviceAccount.name` | Service account name to create (or use if `serviceAccount.create` is false) | (chart fullname) |
| `podSecurityPolicy.create` | Create podSecurityPolicy | `false` |
| `resources` | Resources requests and limits. | `{}` |
| `metrics.create` | Create a ServiceMonitor for prometheus-operator | `false` |
| `metrics.namespace` | The namespace to create the ServiceMonitor in | `""` |
| `metrics.labels` | Additional labels for the ServiceMonitor | `{}` |
| `metrics.interval` | Interval prometheus should scrape the endpoint | `60s` |
| `metrics.scrapeTimeout` | A custom scrapeTimeout for prometheus | `""` |
| `service.create` | Create a Service for the metrics endpoint | `false` |
| `service.name ` | Service name for the metrics endpoint | `""` |
| `service.port` | Port of the service to expose | `8080` |
| `service.annotations` | Annotations to apply to the service (eg to add Prometheus annotations) | `{}` |
| `podLabels` | Additional labels for pods (e.g. CostCenter=IT) | `{}` |
| `priorityClassName` | Priority Class to be used by the pods | `""` |
| `tolerations` | Tolerations to apply to the daemonset (eg to allow running on master) | `[{"key": "node-role.kubernetes.io/master", "effect": "NoSchedule"}]`|
| `affinity` | Affinity for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
| `nodeSelector` | Node Selector for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
| `volumeMounts` | Maps of volumes mount to mount | `{}` |
| `volumes` | Maps of volumes to mount | `{}` |
See https://github.com/weaveworks/kured#configuration for values (not contained in the `configuration` object) for `extraArgs`. Note that
```yaml
extraArgs:
foo: 1
bar-baz: 2
```
becomes `/usr/bin/kured ... --foo=1 --bar-baz=2`.
## Prometheus Metrics
Kured exposes a single prometheus metric indicating whether a reboot is required or not (see [kured docs](https://github.com/weaveworks/kured#prometheus-metrics)) for details.
#### Prometheus-Operator
```yaml
metrics:
create: true
```
#### Prometheus Annotations
```yaml
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/path: "/metrics"
prometheus.io/port: "8080"
```

View File

@@ -1,13 +0,0 @@
# This is tested twice:
# Basic install test with chart-testing (on charts PRs)
# Functional testing in PRs (other PRs)
service:
create: true
name: kured-prometheus-endpoint
port: 8080
type: NodePort
nodePort: 30000
# Do not override the configuration: period in this, so that
# We can test prometheus exposed metrics without rebooting.

View File

@@ -1,3 +0,0 @@
Kured will check for /var/run/reboot-required, and reboot nodes when needed.
See https://github.com/weaveworks/kured/ for details.

View File

@@ -1,72 +0,0 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "kured.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "kured.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "kured.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create the name of the service account to use
*/}}
{{- define "kured.serviceAccountName" -}}
{{- if .Values.serviceAccount.create -}}
{{ default (include "kured.fullname" .) .Values.serviceAccount.name }}
{{- else -}}
{{ default "default" .Values.serviceAccount.name }}
{{- end -}}
{{- end -}}
{{/*
Return the appropriate apiVersion for podsecuritypolicy.
*/}}
{{- define "kured.psp.apiVersion" -}}
{{- if semverCompare "<1.10-0" .Capabilities.KubeVersion.GitVersion -}}
{{- print "extensions/v1beta1" -}}
{{- else -}}
{{- print "policy/v1beta1" -}}
{{- end -}}
{{- end -}}
{{/*
Returns a set of labels applied to each resource.
*/}}
{{- define "kured.labels" -}}
app: {{ template "kured.name" . }}
chart: {{ template "kured.chart" . }}
release: {{ .Release.Name }}
heritage: {{ .Release.Service }}
{{- end -}}
{{/*
Returns a set of matchLabels applied.
*/}}
{{- define "kured.matchLabels" -}}
app: {{ template "kured.name" . }}
release: {{ .Release.Name }}
{{- end -}}

View File

@@ -1,30 +0,0 @@
{{- if .Values.rbac.create -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ template "kured.fullname" . }}
labels:
{{- include "kured.labels" . | nindent 4 }}
rules:
# Allow kured to read spec.unschedulable
# Allow kubectl to drain/uncordon
#
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
# match https://github.com/kubernetes/kubernetes/blob/v1.19.4/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
#
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list","delete","get"]
- apiGroups: ["extensions"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
{{- end -}}

View File

@@ -1,16 +0,0 @@
{{- if .Values.rbac.create -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ template "kured.fullname" . }}
labels:
{{- include "kured.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ template "kured.fullname" . }}
subjects:
- kind: ServiceAccount
name: {{ template "kured.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
{{- end -}}

View File

@@ -1,178 +0,0 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: {{ template "kured.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kured.labels" . | nindent 4 }}
spec:
updateStrategy:
type: {{ .Values.updateStrategy }}
{{- if eq .Values.updateStrategy "RollingUpdate"}}
rollingUpdate:
maxUnavailable: {{ .Values.maxUnavailable }}
{{- end}}
selector:
matchLabels:
{{- include "kured.matchLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "kured.labels" . | nindent 8 }}
{{- if .Values.podLabels }}
{{- toYaml .Values.podLabels | nindent 8 }}
{{- end }}
{{- if .Values.podAnnotations }}
annotations:
{{- range $key, $value := .Values.podAnnotations }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end }}
spec:
serviceAccountName: {{ template "kured.serviceAccountName" . }}
hostPID: true
restartPolicy: Always
{{- with .Values.image.pullSecrets }}
imagePullSecrets:
{{ toYaml . | indent 8 }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
resources:
{{ toYaml .Values.resources | indent 12 }}
command:
- /usr/bin/kured
args:
- --ds-name={{ template "kured.fullname" . }}
- --ds-namespace={{ .Release.Namespace }}
{{- if .Values.configuration.lockTtl }}
- --lock-ttl={{ .Values.configuration.lockTtl }}
{{- end }}
{{- if .Values.configuration.lockReleaseDelay }}
- --lock-release-delay={{ .Values.configuration.lockReleaseDelay }}
{{- end }}
{{- if .Values.configuration.alertFilterRegexp }}
- --alert-filter-regexp={{ .Values.configuration.alertFilterRegexp }}
{{- end }}
{{- if .Values.configuration.alertFiringOnly }}
- --alert-firing-only={{ .Values.configuration.alertFiringOnly }}
{{- end }}
{{- range .Values.configuration.blockingPodSelector }}
- --blocking-pod-selector={{ . }}
{{- end }}
{{- if .Values.configuration.endTime }}
- --end-time={{ .Values.configuration.endTime }}
{{- end }}
{{- if .Values.configuration.lockAnnotation }}
- --lock-annotation={{ .Values.configuration.lockAnnotation }}
{{- end }}
{{- if .Values.configuration.period }}
- --period={{ .Values.configuration.period }}
{{- end }}
{{- if .Values.configuration.forceReboot }}
- --force-reboot
{{- end }}
{{- if .Values.configuration.drainGracePeriod }}
- --drain-grace-period={{ .Values.configuration.drainGracePeriod }}
{{- end }}
{{- if .Values.configuration.drainTimeout }}
- --drain-timeout={{ .Values.configuration.drainTimeout }}
{{- end }}
{{- if .Values.configuration.skipWaitForDeleteTimeout }}
- --skip-wait-for-delete-timeout={{ .Values.configuration.skipWaitForDeleteTimeout }}
{{- end }}
{{- if .Values.configuration.prometheusUrl }}
- --prometheus-url={{ .Values.configuration.prometheusUrl }}
{{- end }}
{{- range .Values.configuration.rebootDays }}
- --reboot-days={{ . }}
{{- end }}
{{- if .Values.configuration.rebootSentinel }}
- --reboot-sentinel={{ .Values.configuration.rebootSentinel }}
{{- end }}
{{- if .Values.configuration.rebootSentinelCommand }}
- --reboot-sentinel-command={{ .Values.configuration.rebootSentinelCommand }}
{{- end }}
{{- if .Values.configuration.rebootCommand }}
- --reboot-command={{ .Values.configuration.rebootCommand }}
{{- end }}
{{- if .Values.configuration.rebootDelay }}
- --reboot-delay={{ .Values.configuration.rebootDelay }}
{{- end }}
{{- if .Values.configuration.slackChannel }}
- --slack-channel={{ .Values.configuration.slackChannel }}
{{- end }}
{{- if .Values.configuration.slackHookUrl }}
- --slack-hook-url={{ .Values.configuration.slackHookUrl }}
{{- end }}
{{- if .Values.configuration.slackUsername }}
- --slack-username={{ .Values.configuration.slackUsername }}
{{- end }}
{{- if .Values.configuration.notifyUrl }}
- --notify-url={{ .Values.configuration.notifyUrl }}
{{- end }}
{{- if .Values.configuration.messageTemplateDrain }}
- --message-template-drain={{ .Values.configuration.messageTemplateDrain }}
{{- end }}
{{- if .Values.configuration.messageTemplateReboot }}
- --message-template-reboot={{ .Values.configuration.messageTemplateReboot }}
{{- end }}
{{- if .Values.configuration.startTime }}
- --start-time={{ .Values.configuration.startTime }}
{{- end }}
{{- if .Values.configuration.timeZone }}
- --time-zone={{ .Values.configuration.timeZone }}
{{- end }}
{{- if .Values.configuration.annotateNodes }}
- --annotate-nodes={{ .Values.configuration.annotateNodes }}
{{- end }}
{{- if .Values.configuration.preferNoScheduleTaint }}
- --prefer-no-schedule-taint={{ .Values.configuration.preferNoScheduleTaint }}
{{- end }}
{{- range $key, $value := .Values.extraArgs }}
{{- if $value }}
- --{{ $key }}={{ $value }}
{{- else }}
- --{{ $key }}
{{- end }}
{{- end }}
{{- if .Values.volumeMounts }}
volumeMounts:
{{- toYaml .Values.volumeMounts | nindent 12 }}
{{- end }}
ports:
- containerPort: 8080
name: metrics
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
{{- if .Values.extraEnvVars }}
{{ toYaml .Values.extraEnvVars | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{ toYaml . | indent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{ toYaml . | indent 8 }}
{{- end }}
{{- if .Values.volumes }}
volumes:
{{- toYaml .Values.volumes | nindent 8 }}
{{- end }}

View File

@@ -1,21 +0,0 @@
{{- if .Values.podSecurityPolicy.create}}
apiVersion: {{ template "kured.psp.apiVersion" . }}
kind: PodSecurityPolicy
metadata:
name: {{ template "kured.fullname" . }}
labels:
{{- include "kured.labels" . | nindent 4 }}
spec:
privileged: true
hostPID: true
allowedCapabilities: ['*']
fsGroup:
rule: RunAsAny
runAsUser:
rule: RunAsAny
seLinux:
rule: RunAsAny
supplementalGroups:
rule: RunAsAny
volumes: ['*']
{{- end }}

View File

@@ -1,30 +0,0 @@
{{- if .Values.rbac.create -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: {{ .Release.Namespace }}
name: {{ template "kured.fullname" . }}
labels:
{{- include "kured.labels" . | nindent 4 }}
rules:
# Allow kured to lock/unlock itself
- apiGroups: ["extensions"]
resources: ["daemonsets"]
resourceNames: ["{{ template "kured.fullname" . }}"]
verbs: ["update", "patch"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
resourceNames: ["{{ template "kured.fullname" . }}"]
verbs: ["update", "patch"]
{{- if .Values.podSecurityPolicy.create }}
- apiGroups: ["extensions"]
resources: ["podsecuritypolicies"]
resourceNames: ["{{ template "kured.fullname" . }}"]
verbs: ["use"]
- apiGroups: ["policy"]
resources: ["podsecuritypolicies"]
resourceNames: ["{{ template "kured.fullname" . }}"]
verbs: ["use"]
{{- end }}
{{- end -}}

View File

@@ -1,17 +0,0 @@
{{- if .Values.rbac.create -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: {{ .Release.Namespace }}
name: {{ template "kured.fullname" . }}
labels:
{{- include "kured.labels" . | nindent 4 }}
subjects:
- kind: ServiceAccount
namespace: {{ .Release.Namespace }}
name: {{ template "kured.serviceAccountName" . }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ template "kured.fullname" . }}
{{- end -}}

View File

@@ -1,29 +0,0 @@
{{- if or .Values.service.create .Values.metrics.create }}
apiVersion: v1
kind: Service
metadata:
{{- if .Values.service.name }}
name: {{ .Values.service.name }}
{{- else }}
name: {{ template "kured.fullname" . }}
{{- end }}
labels:
{{- include "kured.labels" . | nindent 4 }}
{{- if .Values.service.annotations }}
annotations:
{{- range $key, $value := .Values.service.annotations }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- end }}
spec:
type: {{ .Values.service.type }}
ports:
- name: metrics
port: {{ .Values.service.port }}
targetPort: 8080
{{- if eq .Values.service.type "NodePort" }}
nodePort: {{ .Values.service.nodePort }}
{{- end }}
selector:
{{- include "kured.matchLabels" . | nindent 4 }}
{{- end }}

View File

@@ -1,9 +0,0 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ template "kured.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "kured.labels" . | nindent 4 }}
{{- end -}}

View File

@@ -1,31 +0,0 @@
{{- if .Values.metrics.create }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "kured.fullname" . }}
{{- if .Values.metrics.namespace }}
namespace: {{ .Values.metrics.namespace }}
{{- end }}
labels:
{{- include "kured.labels" . | nindent 4 }}
{{- if .Values.metrics.labels }}
{{- toYaml .Values.metrics.labels | nindent 4 }}
{{- end }}
spec:
endpoints:
- interval: {{ .Values.metrics.interval }}
{{- if .Values.metrics.scrapeTimeout }}
scrapeTimeout: {{ .Values.metrics.scrapeTimeout }}
{{- end }}
honorLabels: true
targetPort: 8080
path: /metrics
scheme: http
jobLabel: "{{ .Release.Name }}"
selector:
matchLabels:
{{- include "kured.matchLabels" . | nindent 6 }}
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
{{- end }}

View File

@@ -1,30 +0,0 @@
image:
repository: weaveworks/kured
tag: latest
configuration:
# annotationTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
# alertFilterRegexp: "" # alert names to ignore when checking for active alerts
# alertFiringOnly: false # only consider firing alerts when checking for active alerts
# blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
# endTime: "" # only reboot before this time of day (default "23:59")
# lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
period: "1m" # reboot check period (default 1h0m0s)
# forceReboot: false # force a reboot even if the drain fails or times out (default: false)
# drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
# drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
# skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
# prometheusUrl: "" # Prometheus instance to probe for active alerts
# rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
# rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
# rebootSentinelCommand: "" # command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
# slackChannel: "" # slack channel for reboot notfications
# slackHookUrl: "" # slack hook URL for reboot notfications
# slackUsername: "" # slack username for reboot notfications (default "kured")
# notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
# messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
# messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
# startTime: "" # only reboot after this time of day (default "0:00")
# timeZone: "" # time-zone to use (valid zones from "time" golang package)
# annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
# lockReleaseDelay: "5m" # hold lock after reboot by this amount of time (default 0, disabled)

View File

@@ -1,94 +0,0 @@
image:
repository: weaveworks/kured
tag: "" # will default to the appVersion in Chart.yaml
pullPolicy: IfNotPresent
pullSecrets: []
updateStrategy: RollingUpdate
# requires RollingUpdate updateStrategy
maxUnavailable: 1
podAnnotations: {}
extraArgs: {}
extraEnvVars:
# - name: slackHookUrl
# valueFrom:
# secretKeyRef:
# name: secret_name
# key: secret_key
# - name: regularEnvVariable
# value: 123
configuration:
lockTtl: 0 # force clean annotation after this amount of time (default 0, disabled)
alertFilterRegexp: "" # alert names to ignore when checking for active alerts
alertFiringOnly: false # only consider firing alerts when checking for active alerts
blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
endTime: "" # only reboot before this time of day (default "23:59")
lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
period: "" # reboot check period (default 1h0m0s)
forceReboot: false # force a reboot even if the drain fails or times out (default: false)
drainGracePeriod: "" # time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)
drainTimeout: "" # timeout after which the drain is aborted (default: 0, infinite time)
skipWaitForDeleteTimeout: "" # when time is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)
prometheusUrl: "" # Prometheus instance to probe for active alerts
rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
rebootSentinelCommand: "" # command for which a successful run signals need to reboot (default ""). If non-empty, sentinel file will be ignored.
rebootCommand: "/bin/systemctl reboot" # command to run when a reboot is required by the sentinel
rebootDelay: "" # add a delay after drain finishes but before the reboot command is issued
slackChannel: "" # slack channel for reboot notfications
slackHookUrl: "" # slack hook URL for reboot notfications
slackUsername: "" # slack username for reboot notfications (default "kured")
notifyUrl: "" # notification URL with the syntax as follows: https://containrrr.dev/shoutrrr/services/overview/
messageTemplateDrain: "" # slack message template when notifying about a node being drained (default "Draining node %s")
messageTemplateReboot: "" # slack message template when notifying about a node being rebooted (default "Rebooted node %s")
startTime: "" # only reboot after this time of day (default "0:00")
timeZone: "" # time-zone to use (valid zones from "time" golang package)
annotateNodes: false # enable 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' node annotations to signify kured reboot operations
lockReleaseDelay: 0 # hold lock after reboot by this amount of time (default 0, disabled)
preferNoScheduleTaint: "" # Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to "weave.works/kured-node-reboot" to enable tainting.
rbac:
create: true
serviceAccount:
create: true
name:
podSecurityPolicy:
create: false
resources: {}
metrics:
create: false
namespace: ""
labels: {}
interval: 60s
scrapeTimeout: ""
service:
create: false
port: 8080
annotations: {}
name: ""
type: ClusterIP
podLabels: {}
priorityClassName: ""
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
affinity: {}
nodeSelector: {}
volumeMounts: []
volumes: []

View File

@@ -1,4 +0,0 @@
FROM alpine:3.15.0
RUN apk update --no-cache && apk upgrade --no-cache && apk add --no-cache ca-certificates tzdata
COPY ./kured /usr/bin/kured
ENTRYPOINT ["/usr/bin/kured"]

View File

@@ -1,725 +0,0 @@
package main
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"net/http"
"net/url"
"os"
"os/exec"
"regexp"
"strings"
"time"
papi "github.com/prometheus/client_golang/api"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
"github.com/spf13/viper"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
kubectldrain "k8s.io/kubectl/pkg/drain"
"github.com/google/shlex"
shoutrrr "github.com/containrrr/shoutrrr"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/weaveworks/kured/pkg/alerts"
"github.com/weaveworks/kured/pkg/daemonsetlock"
"github.com/weaveworks/kured/pkg/delaytick"
"github.com/weaveworks/kured/pkg/taints"
"github.com/weaveworks/kured/pkg/timewindow"
)
var (
version = "unreleased"
// Command line flags
forceReboot bool
drainTimeout time.Duration
rebootDelay time.Duration
period time.Duration
drainGracePeriod int
skipWaitForDeleteTimeoutSeconds int
dsNamespace string
dsName string
lockAnnotation string
lockTTL time.Duration
lockReleaseDelay time.Duration
prometheusURL string
preferNoScheduleTaintName string
alertFilter *regexp.Regexp
alertFiringOnly bool
rebootSentinelFile string
rebootSentinelCommand string
notifyURL string
slackHookURL string
slackUsername string
slackChannel string
messageTemplateDrain string
messageTemplateReboot string
podSelectors []string
rebootCommand string
logFormat string
nodeID string
rebootDays []string
rebootStart string
rebootEnd string
timezone string
annotateNodes bool
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "kured",
Name: "reboot_required",
Help: "OS requires reboot due to software updates.",
}, []string{"node"})
)
const (
// KuredNodeLockAnnotation is the canonical string value for the kured node-lock annotation
KuredNodeLockAnnotation string = "weave.works/kured-node-lock"
// KuredRebootInProgressAnnotation is the canonical string value for the kured reboot-in-progress annotation
KuredRebootInProgressAnnotation string = "weave.works/kured-reboot-in-progress"
// KuredMostRecentRebootNeededAnnotation is the canonical string value for the kured most-recent-reboot-needed annotation
KuredMostRecentRebootNeededAnnotation string = "weave.works/kured-most-recent-reboot-needed"
// EnvPrefix The environment variable prefix of all environment variables bound to our command line flags.
EnvPrefix = "KURED"
)
func init() {
prometheus.MustRegister(rebootRequiredGauge)
}
func main() {
cmd := NewRootCommand()
if err := cmd.Execute(); err != nil {
log.Fatal(err)
}
}
// NewRootCommand construct the Cobra root command
func NewRootCommand() *cobra.Command {
rootCmd := &cobra.Command{
Use: "kured",
Short: "Kubernetes Reboot Daemon",
PersistentPreRunE: bindViper,
PreRun: flagCheck,
Run: root}
rootCmd.PersistentFlags().StringVar(&nodeID, "node-id", "",
"node name kured runs on, should be passed down from spec.nodeName via KURED_NODE_ID environment variable")
rootCmd.PersistentFlags().BoolVar(&forceReboot, "force-reboot", false,
"force a reboot even if the drain fails or times out (default: false)")
rootCmd.PersistentFlags().IntVar(&drainGracePeriod, "drain-grace-period", -1,
"time in seconds given to each pod to terminate gracefully, if negative, the default value specified in the pod will be used (default: -1)")
rootCmd.PersistentFlags().IntVar(&skipWaitForDeleteTimeoutSeconds, "skip-wait-for-delete-timeout", 0,
"when seconds is greater than zero, skip waiting for the pods whose deletion timestamp is older than N seconds while draining a node (default: 0)")
rootCmd.PersistentFlags().DurationVar(&drainTimeout, "drain-timeout", 0,
"timeout after which the drain is aborted (default: 0, infinite time)")
rootCmd.PersistentFlags().DurationVar(&rebootDelay, "reboot-delay", 0,
"delay reboot for this duration (default: 0, disabled)")
rootCmd.PersistentFlags().DurationVar(&period, "period", time.Minute*60,
"sentinel check period")
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-namespace", "kube-system",
"namespace containing daemonset on which to place lock")
rootCmd.PersistentFlags().StringVar(&dsName, "ds-name", "kured",
"name of daemonset on which to place lock")
rootCmd.PersistentFlags().StringVar(&lockAnnotation, "lock-annotation", KuredNodeLockAnnotation,
"annotation in which to record locking node")
rootCmd.PersistentFlags().DurationVar(&lockTTL, "lock-ttl", 0,
"expire lock annotation after this duration (default: 0, disabled)")
rootCmd.PersistentFlags().DurationVar(&lockReleaseDelay, "lock-release-delay", 0,
"delay lock release for this duration (default: 0, disabled)")
rootCmd.PersistentFlags().StringVar(&prometheusURL, "prometheus-url", "",
"Prometheus instance to probe for active alerts")
rootCmd.PersistentFlags().Var(&regexpValue{&alertFilter}, "alert-filter-regexp",
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().BoolVar(&alertFiringOnly, "alert-firing-only", false,
"only consider firing alerts when checking for active alerts (default: false)")
rootCmd.PersistentFlags().StringVar(&rebootSentinelFile, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence triggers the reboot command")
rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaintName, "prefer-no-schedule-taint", "",
"Taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes). Disabled by default. Set e.g. to \"weave.works/kured-node-reboot\" to enable tainting.")
rootCmd.PersistentFlags().StringVar(&rebootSentinelCommand, "reboot-sentinel-command", "",
"command for which a zero return code will trigger a reboot command")
rootCmd.PersistentFlags().StringVar(&rebootCommand, "reboot-command", "/bin/systemctl reboot",
"command to run when a reboot is required")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for notifications")
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
"slack username for notifications")
rootCmd.PersistentFlags().StringVar(&slackChannel, "slack-channel", "",
"slack channel for reboot notfications")
rootCmd.PersistentFlags().StringVar(&notifyURL, "notify-url", "",
"notify URL for reboot notfications")
rootCmd.PersistentFlags().StringVar(&messageTemplateDrain, "message-template-drain", "Draining node %s",
"message template used to notify about a node being drained")
rootCmd.PersistentFlags().StringVar(&messageTemplateReboot, "message-template-reboot", "Rebooting node %s",
"message template used to notify about a node being rebooted")
rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
"label selector identifying pods whose presence should prevent reboots")
rootCmd.PersistentFlags().StringSliceVar(&rebootDays, "reboot-days", timewindow.EveryDay,
"schedule reboot on these days")
rootCmd.PersistentFlags().StringVar(&rebootStart, "start-time", "0:00",
"schedule reboot only after this time of day")
rootCmd.PersistentFlags().StringVar(&rebootEnd, "end-time", "23:59:59",
"schedule reboot only before this time of day")
rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC",
"use this timezone for schedule inputs")
rootCmd.PersistentFlags().BoolVar(&annotateNodes, "annotate-nodes", false,
"if set, the annotations 'weave.works/kured-reboot-in-progress' and 'weave.works/kured-most-recent-reboot-needed' will be given to nodes undergoing kured reboots")
rootCmd.PersistentFlags().StringVar(&logFormat, "log-format", "text",
"use text or json log format")
return rootCmd
}
// temporary func that checks for deprecated slack-notification-related flags
func flagCheck(cmd *cobra.Command, args []string) {
if slackHookURL != "" && notifyURL != "" {
log.Warnf("Cannot use both --notify-url and --slack-hook-url flags. Kured will use --notify-url flag only...")
}
if slackHookURL != "" {
log.Warnf("Deprecated flag(s). Please use --notify-url flag instead.")
trataURL, err := url.Parse(slackHookURL)
if err != nil {
log.Warnf("slack-hook-url is not properly formatted...no notification will be sent: %v\n", err)
}
if len(strings.Split(strings.Trim(trataURL.Path, "/services/"), "/")) != 3 {
log.Warnf("slack-hook-url is not properly formatted...no notification will be sent: %v\n", err)
} else {
notifyURL = fmt.Sprintf("slack://%s", strings.Trim(trataURL.Path, "/services/"))
}
}
}
// bindViper initializes viper and binds command flags with environment variables
func bindViper(cmd *cobra.Command, args []string) error {
v := viper.New()
v.SetEnvPrefix(EnvPrefix)
v.AutomaticEnv()
bindFlags(cmd, v)
return nil
}
// bindFlags binds each cobra flag to its associated viper configuration (environment variable)
func bindFlags(cmd *cobra.Command, v *viper.Viper) {
cmd.Flags().VisitAll(func(f *pflag.Flag) {
// Environment variables can't have dashes in them, so bind them to their equivalent keys with underscores
if strings.Contains(f.Name, "-") {
v.BindEnv(f.Name, flagToEnvVar(f.Name))
}
// Apply the viper config value to the flag when the flag is not set and viper has a value
if !f.Changed && v.IsSet(f.Name) {
val := v.Get(f.Name)
log.Infof("Binding %s command flag to environment variable: %s=%s", f.Name, flagToEnvVar(f.Name), fmt.Sprintf("%v", val))
cmd.Flags().Set(f.Name, fmt.Sprintf("%v", val))
}
})
}
// flagToEnvVar converts command flag name to equivalent environment variable name
func flagToEnvVar(flag string) string {
envVarSuffix := strings.ToUpper(strings.ReplaceAll(flag, "-", "_"))
return fmt.Sprintf("%s_%s", EnvPrefix, envVarSuffix)
}
// newCommand creates a new Command with stdout/stderr wired to our standard logger
func newCommand(name string, arg ...string) *exec.Cmd {
cmd := exec.Command(name, arg...)
cmd.Stdout = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "out").
WriterLevel(log.InfoLevel)
cmd.Stderr = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "err").
WriterLevel(log.WarnLevel)
return cmd
}
// buildHostCommand writes a new command to run in the host namespace
// Rancher based need different pid
func buildHostCommand(pid int, command []string) []string {
// From the container, we nsenter into the proper PID to run the hostCommand.
// For this, kured daemonset need to be configured with hostPID:true and privileged:true
cmd := []string{"/usr/bin/nsenter", fmt.Sprintf("-m/proc/%d/ns/mnt", pid), "--"}
cmd = append(cmd, command...)
return cmd
}
func rebootRequired(sentinelCommand []string) bool {
if err := newCommand(sentinelCommand[0], sentinelCommand[1:]...).Run(); err != nil {
switch err := err.(type) {
case *exec.ExitError:
// We assume a non-zero exit code means 'reboot not required', but of course
// the user could have misconfigured the sentinel command or something else
// went wrong during its execution. In that case, not entering a reboot loop
// is the right thing to do, and we are logging stdout/stderr of the command
// so it should be obvious what is wrong.
return false
default:
// Something was grossly misconfigured, such as the command path being wrong.
log.Fatalf("Error invoking sentinel command: %v", err)
}
}
return true
}
// RebootBlocker interface should be implemented by types
// to know if their instantiations should block a reboot
type RebootBlocker interface {
isBlocked() bool
}
// PrometheusBlockingChecker contains info for connecting
// to prometheus, and can give info about whether a reboot should be blocked
type PrometheusBlockingChecker struct {
// prometheusClient to make prometheus-go-client and api config available
// into the PrometheusBlockingChecker struct
promClient *alerts.PromClient
// regexp used to get alerts
filter *regexp.Regexp
// bool to indicate if only firing alerts should be considered
firingOnly bool
}
// KubernetesBlockingChecker contains info for connecting
// to k8s, and can give info about whether a reboot should be blocked
type KubernetesBlockingChecker struct {
// client used to contact kubernetes API
client *kubernetes.Clientset
nodename string
// lised used to filter pods (podSelector)
filter []string
}
func (pb PrometheusBlockingChecker) isBlocked() bool {
alertNames, err := pb.promClient.ActiveAlerts(pb.filter, pb.firingOnly)
if err != nil {
log.Warnf("Reboot blocked: prometheus query error: %v", err)
return true
}
count := len(alertNames)
if count > 10 {
alertNames = append(alertNames[:10], "...")
}
if count > 0 {
log.Warnf("Reboot blocked: %d active alerts: %v", count, alertNames)
return true
}
return false
}
func (kb KubernetesBlockingChecker) isBlocked() bool {
fieldSelector := fmt.Sprintf("spec.nodeName=%s,status.phase!=Succeeded,status.phase!=Failed,status.phase!=Unknown", kb.nodename)
for _, labelSelector := range kb.filter {
podList, err := kb.client.CoreV1().Pods("").List(context.TODO(), metav1.ListOptions{
LabelSelector: labelSelector,
FieldSelector: fieldSelector,
Limit: 10})
if err != nil {
log.Warnf("Reboot blocked: pod query error: %v", err)
return true
}
if len(podList.Items) > 0 {
podNames := make([]string, 0, len(podList.Items))
for _, pod := range podList.Items {
podNames = append(podNames, pod.Name)
}
if len(podList.Continue) > 0 {
podNames = append(podNames, "...")
}
log.Warnf("Reboot blocked: matching pods: %v", podNames)
return true
}
}
return false
}
func rebootBlocked(blockers ...RebootBlocker) bool {
for _, blocker := range blockers {
if blocker.isBlocked() {
return true
}
}
return false
}
func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
holding, err := lock.Test(metadata)
if err != nil {
log.Fatalf("Error testing lock: %v", err)
}
if holding {
log.Infof("Holding lock")
}
return holding
}
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration) bool {
holding, holder, err := lock.Acquire(metadata, TTL)
switch {
case err != nil:
log.Fatalf("Error acquiring lock: %v", err)
return false
case !holding:
log.Warnf("Lock already held: %v", holder)
return false
default:
log.Infof("Acquired reboot lock")
return true
}
}
func throttle(releaseDelay time.Duration) {
if releaseDelay > 0 {
log.Infof("Delaying lock release by %v", releaseDelay)
time.Sleep(releaseDelay)
}
}
func release(lock *daemonsetlock.DaemonSetLock) {
log.Infof("Releasing lock")
if err := lock.Release(); err != nil {
log.Fatalf("Error releasing lock: %v", err)
}
}
func drain(client *kubernetes.Clientset, node *v1.Node) {
nodename := node.GetName()
log.Infof("Draining node %s", nodename)
if notifyURL != "" {
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateDrain, nodename)); err != nil {
log.Warnf("Error notifying: %v", err)
}
}
drainer := &kubectldrain.Helper{
Client: client,
Ctx: context.Background(),
GracePeriodSeconds: drainGracePeriod,
SkipWaitForDeleteTimeoutSeconds: skipWaitForDeleteTimeoutSeconds,
Force: true,
DeleteEmptyDirData: true,
IgnoreAllDaemonSets: true,
ErrOut: os.Stderr,
Out: os.Stdout,
Timeout: drainTimeout,
}
if err := kubectldrain.RunCordonOrUncordon(drainer, node, true); err != nil {
if !forceReboot {
log.Fatalf("Error cordonning %s: %v", nodename, err)
}
log.Errorf("Error cordonning %s: %v, continuing with reboot anyway", nodename, err)
return
}
if err := kubectldrain.RunNodeDrain(drainer, nodename); err != nil {
if !forceReboot {
log.Fatalf("Error draining %s: %v", nodename, err)
}
log.Errorf("Error draining %s: %v, continuing with reboot anyway", nodename, err)
return
}
}
func uncordon(client *kubernetes.Clientset, node *v1.Node) {
nodename := node.GetName()
log.Infof("Uncordoning node %s", nodename)
drainer := &kubectldrain.Helper{
Client: client,
ErrOut: os.Stderr,
Out: os.Stdout,
Ctx: context.Background(),
}
if err := kubectldrain.RunCordonOrUncordon(drainer, node, false); err != nil {
log.Fatalf("Error uncordonning %s: %v", nodename, err)
}
}
func invokeReboot(nodeID string, rebootCommand []string) {
log.Infof("Running command: %s for node: %s", rebootCommand, nodeID)
if notifyURL != "" {
if err := shoutrrr.Send(notifyURL, fmt.Sprintf(messageTemplateReboot, nodeID)); err != nil {
log.Warnf("Error notifying: %v", err)
}
}
if err := newCommand(rebootCommand[0], rebootCommand[1:]...).Run(); err != nil {
log.Fatalf("Error invoking reboot command: %v", err)
}
}
func maintainRebootRequiredMetric(nodeID string, sentinelCommand []string) {
for {
if rebootRequired(sentinelCommand) {
rebootRequiredGauge.WithLabelValues(nodeID).Set(1)
} else {
rebootRequiredGauge.WithLabelValues(nodeID).Set(0)
}
time.Sleep(time.Minute)
}
}
// nodeMeta is used to remember information across reboots
type nodeMeta struct {
Unschedulable bool `json:"unschedulable"`
}
func addNodeAnnotations(client *kubernetes.Clientset, nodeID string, annotations map[string]string) {
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil {
log.Fatalf("Error retrieving node object via k8s API: %s", err)
}
for k, v := range annotations {
node.Annotations[k] = v
log.Infof("Adding node %s annotation: %s=%s", node.GetName(), k, v)
}
bytes, err := json.Marshal(node)
if err != nil {
log.Fatalf("Error marshalling node object into JSON: %v", err)
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), node.GetName(), types.StrategicMergePatchType, bytes, metav1.PatchOptions{})
if err != nil {
var annotationsErr string
for k, v := range annotations {
annotationsErr += fmt.Sprintf("%s=%s ", k, v)
}
log.Fatalf("Error adding node annotations %s via k8s API: %v", annotationsErr, err)
}
}
func deleteNodeAnnotation(client *kubernetes.Clientset, nodeID, key string) {
log.Infof("Deleting node %s annotation %s", nodeID, key)
// JSON Patch takes as path input a JSON Pointer, defined in RFC6901
// So we replace all instances of "/" with "~1" as per:
// https://tools.ietf.org/html/rfc6901#section-3
patch := []byte(fmt.Sprintf("[{\"op\":\"remove\",\"path\":\"/metadata/annotations/%s\"}]", strings.ReplaceAll(key, "/", "~1")))
_, err := client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patch, metav1.PatchOptions{})
if err != nil {
log.Fatalf("Error deleting node annotation %s via k8s API: %v", key, err)
}
}
func rebootAsRequired(nodeID string, rebootCommand []string, sentinelCommand []string, window *timewindow.TimeWindow, TTL time.Duration, releaseDelay time.Duration) {
config, err := rest.InClusterConfig()
if err != nil {
log.Fatal(err)
}
client, err := kubernetes.NewForConfig(config)
if err != nil {
log.Fatal(err)
}
lock := daemonsetlock.New(client, nodeID, dsNamespace, dsName, lockAnnotation)
nodeMeta := nodeMeta{}
if holding(lock, &nodeMeta) {
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil {
log.Fatalf("Error retrieving node object via k8s API: %v", err)
}
if !nodeMeta.Unschedulable {
uncordon(client, node)
}
// If we're holding the lock we know we've tried, in a prior run, to reboot
// So (1) we want to confirm that the reboot succeeded practically ( !rebootRequired() )
// And (2) check if we previously annotated the node that it was in the process of being rebooted,
// And finally (3) if it has that annotation, to delete it.
// This indicates to other node tools running on the cluster that this node may be a candidate for maintenance
if annotateNodes && !rebootRequired(sentinelCommand) {
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; ok {
deleteNodeAnnotation(client, nodeID, KuredRebootInProgressAnnotation)
}
}
throttle(releaseDelay)
release(lock)
}
preferNoScheduleTaint := taints.New(client, nodeID, preferNoScheduleTaintName, v1.TaintEffectPreferNoSchedule)
// Remove taint immediately during startup to quickly allow scheduling again.
if !rebootRequired(sentinelCommand) {
preferNoScheduleTaint.Disable()
}
// instantiate prometheus client
promClient, err := alerts.NewPromClient(papi.Config{Address: prometheusURL})
if err != nil {
log.Fatal("Unable to create prometheus client: ", err)
}
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for range tick {
if !window.Contains(time.Now()) {
// Remove taint outside the reboot time window to allow for normal operation.
preferNoScheduleTaint.Disable()
continue
}
if !rebootRequired(sentinelCommand) {
log.Infof("Reboot not required")
preferNoScheduleTaint.Disable()
continue
}
log.Infof("Reboot required")
var blockCheckers []RebootBlocker
if prometheusURL != "" {
blockCheckers = append(blockCheckers, PrometheusBlockingChecker{promClient: promClient, filter: alertFilter, firingOnly: alertFiringOnly})
}
if podSelectors != nil {
blockCheckers = append(blockCheckers, KubernetesBlockingChecker{client: client, nodename: nodeID, filter: podSelectors})
}
if rebootBlocked(blockCheckers...) {
continue
}
node, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil {
log.Fatalf("Error retrieving node object via k8s API: %v", err)
}
nodeMeta.Unschedulable = node.Spec.Unschedulable
var timeNowString string
if annotateNodes {
if _, ok := node.Annotations[KuredRebootInProgressAnnotation]; !ok {
timeNowString = time.Now().Format(time.RFC3339)
// Annotate this node to indicate that "I am going to be rebooted!"
// so that other node maintenance tools running on the cluster are aware that this node is in the process of a "state transition"
annotations := map[string]string{KuredRebootInProgressAnnotation: timeNowString}
// & annotate this node with a timestamp so that other node maintenance tools know how long it's been since this node has been marked for reboot
annotations[KuredMostRecentRebootNeededAnnotation] = timeNowString
addNodeAnnotations(client, nodeID, annotations)
}
}
if !acquire(lock, &nodeMeta, TTL) {
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()
continue
}
drain(client, node)
if rebootDelay > 0 {
log.Infof("Delaying reboot for %v", rebootDelay)
time.Sleep(rebootDelay)
}
invokeReboot(nodeID, rebootCommand)
for {
log.Infof("Waiting for reboot")
time.Sleep(time.Minute)
}
}
}
// buildSentinelCommand creates the shell command line which will need wrapping to escape
// the container boundaries
func buildSentinelCommand(rebootSentinelFile string, rebootSentinelCommand string) []string {
if rebootSentinelCommand != "" {
cmd, err := shlex.Split(rebootSentinelCommand)
if err != nil {
log.Fatalf("Error parsing provided sentinel command: %v", err)
}
return cmd
}
return []string{"test", "-f", rebootSentinelFile}
}
// parseRebootCommand creates the shell command line which will need wrapping to escape
// the container boundaries
func parseRebootCommand(rebootCommand string) []string {
command, err := shlex.Split(rebootCommand)
if err != nil {
log.Fatalf("Error parsing provided reboot command: %v", err)
}
return command
}
func root(cmd *cobra.Command, args []string) {
if logFormat == "json" {
log.SetFormatter(&log.JSONFormatter{})
}
log.Infof("Kubernetes Reboot Daemon: %s", version)
if nodeID == "" {
log.Fatal("KURED_NODE_ID environment variable required")
}
window, err := timewindow.New(rebootDays, rebootStart, rebootEnd, timezone)
if err != nil {
log.Fatalf("Failed to build time window: %v", err)
}
sentinelCommand := buildSentinelCommand(rebootSentinelFile, rebootSentinelCommand)
restartCommand := parseRebootCommand(rebootCommand)
log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
if lockTTL > 0 {
log.Infof("Lock TTL set, lock will expire after: %v", lockTTL)
} else {
log.Info("Lock TTL not set, lock will remain until being released")
}
if lockReleaseDelay > 0 {
log.Infof("Lock release delay set, lock release will be delayed by: %v", lockReleaseDelay)
} else {
log.Info("Lock release delay not set, lock will be released immediately after rebooting")
}
log.Infof("PreferNoSchedule taint: %s", preferNoScheduleTaintName)
log.Infof("Blocking Pod Selectors: %v", podSelectors)
log.Infof("Reboot schedule: %v", window)
log.Infof("Reboot check command: %s every %v", sentinelCommand, period)
log.Infof("Reboot command: %s", restartCommand)
if annotateNodes {
log.Infof("Will annotate nodes during kured reboot operations")
}
// To run those commands as it was the host, we'll use nsenter
// Relies on hostPID:true and privileged:true to enter host mount space
// PID set to 1, until we have a better discovery mechanism.
hostSentinelCommand := buildHostCommand(1, sentinelCommand)
hostRestartCommand := buildHostCommand(1, restartCommand)
go rebootAsRequired(nodeID, hostRestartCommand, hostSentinelCommand, window, lockTTL, lockReleaseDelay)
go maintainRebootRequiredMetric(nodeID, hostSentinelCommand)
http.Handle("/metrics", promhttp.Handler())
log.Fatal(http.ListenAndServe(":8080", nil))
}

View File

@@ -1,235 +0,0 @@
package main
import (
"reflect"
"testing"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
"github.com/weaveworks/kured/pkg/alerts"
assert "gotest.tools/v3/assert"
papi "github.com/prometheus/client_golang/api"
)
type BlockingChecker struct {
blocking bool
}
func (fbc BlockingChecker) isBlocked() bool {
return fbc.blocking
}
var _ RebootBlocker = BlockingChecker{} // Verify that Type implements Interface.
var _ RebootBlocker = (*BlockingChecker)(nil) // Verify that *Type implements Interface.
func Test_flagCheck(t *testing.T) {
var cmd *cobra.Command
var args []string
slackHookURL = "https://hooks.slack.com/services/BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET"
flagCheck(cmd, args)
if notifyURL != "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET" {
t.Errorf("Slack URL Parsing is wrong: expecting %s but got %s\n", "slack://BLABLABA12345/IAM931A0VERY/COMPLICATED711854TOKEN1SET", notifyURL)
}
}
func Test_rebootBlocked(t *testing.T) {
noCheckers := []RebootBlocker{}
nonblockingChecker := BlockingChecker{blocking: false}
blockingChecker := BlockingChecker{blocking: true}
// Instantiate a prometheusClient with a broken_url
promClient, err := alerts.NewPromClient(papi.Config{Address: "broken_url"})
if err != nil {
log.Fatal("Can't create prometheusClient: ", err)
}
brokenPrometheusClient := PrometheusBlockingChecker{promClient: promClient, filter: nil, firingOnly: false}
type args struct {
blockers []RebootBlocker
}
tests := []struct {
name string
args args
want bool
}{
{
name: "Do not block on no blocker defined",
args: args{blockers: noCheckers},
want: false,
},
{
name: "Ensure a blocker blocks",
args: args{blockers: []RebootBlocker{blockingChecker}},
want: true,
},
{
name: "Ensure a non-blocker doesn't block",
args: args{blockers: []RebootBlocker{nonblockingChecker}},
want: false,
},
{
name: "Ensure one blocker is enough to block",
args: args{blockers: []RebootBlocker{nonblockingChecker, blockingChecker}},
want: true,
},
{
name: "Do block on error contacting prometheus API",
args: args{blockers: []RebootBlocker{brokenPrometheusClient}},
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := rebootBlocked(tt.args.blockers...); got != tt.want {
t.Errorf("rebootBlocked() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildHostCommand(t *testing.T) {
type args struct {
pid int
command []string
}
tests := []struct {
name string
args args
want []string
}{
{
name: "Ensure command will run with nsenter",
args: args{pid: 1, command: []string{"ls", "-Fal"}},
want: []string{"/usr/bin/nsenter", "-m/proc/1/ns/mnt", "--", "ls", "-Fal"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildHostCommand(tt.args.pid, tt.args.command); !reflect.DeepEqual(got, tt.want) {
t.Errorf("buildHostCommand() = %v, want %v", got, tt.want)
}
})
}
}
func Test_buildSentinelCommand(t *testing.T) {
type args struct {
rebootSentinelFile string
rebootSentinelCommand string
}
tests := []struct {
name string
args args
want []string
}{
{
name: "Ensure a sentinelFile generates a shell 'test' command with the right file",
args: args{
rebootSentinelFile: "/test1",
rebootSentinelCommand: "",
},
want: []string{"test", "-f", "/test1"},
},
{
name: "Ensure a sentinelCommand has priority over a sentinelFile if both are provided (because sentinelFile is always provided)",
args: args{
rebootSentinelFile: "/test1",
rebootSentinelCommand: "/sbin/reboot-required -r",
},
want: []string{"/sbin/reboot-required", "-r"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := buildSentinelCommand(tt.args.rebootSentinelFile, tt.args.rebootSentinelCommand); !reflect.DeepEqual(got, tt.want) {
t.Errorf("buildSentinelCommand() = %v, want %v", got, tt.want)
}
})
}
}
func Test_parseRebootCommand(t *testing.T) {
type args struct {
rebootCommand string
}
tests := []struct {
name string
args args
want []string
}{
{
name: "Ensure a reboot command is properly parsed",
args: args{
rebootCommand: "/sbin/systemctl reboot",
},
want: []string{"/sbin/systemctl", "reboot"},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := parseRebootCommand(tt.args.rebootCommand); !reflect.DeepEqual(got, tt.want) {
t.Errorf("parseRebootCommand() = %v, want %v", got, tt.want)
}
})
}
}
func Test_rebootRequired(t *testing.T) {
type args struct {
sentinelCommand []string
}
tests := []struct {
name string
args args
want bool
}{
{
name: "Ensure rc = 0 means reboot required",
args: args{
sentinelCommand: []string{"true"},
},
want: true,
},
{
name: "Ensure rc != 0 means reboot NOT required",
args: args{
sentinelCommand: []string{"false"},
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := rebootRequired(tt.args.sentinelCommand); got != tt.want {
t.Errorf("rebootRequired() = %v, want %v", got, tt.want)
}
})
}
}
func Test_rebootRequired_fatals(t *testing.T) {
cases := []struct {
param []string
expectFatal bool
}{
{
param: []string{"true"},
expectFatal: false,
},
{
param: []string{"./babar"},
expectFatal: true,
},
}
defer func() { log.StandardLogger().ExitFunc = nil }()
var fatal bool
log.StandardLogger().ExitFunc = func(int) { fatal = true }
for _, c := range cases {
fatal = false
rebootRequired(c.param)
assert.Equal(t, c.expectFatal, fatal)
}
}

View File

@@ -1,31 +0,0 @@
package main
import (
"regexp"
)
type regexpValue struct {
value **regexp.Regexp
}
func (rev *regexpValue) String() string {
if *rev.value == nil {
return ""
}
return (*rev.value).String()
}
func (rev *regexpValue) Set(s string) error {
value, err := regexp.Compile(s)
if err != nil {
return err
}
*rev.value = value
return nil
}
func (rev *regexpValue) Type() string {
return "regexp.Regexp"
}

20
go.mod
View File

@@ -1,20 +0,0 @@
module github.com/weaveworks/kured
go 1.16
require (
github.com/containrrr/shoutrrr v0.5.2
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510
github.com/prometheus/client_golang v1.11.0
github.com/prometheus/common v0.32.1
github.com/sirupsen/logrus v1.8.1
github.com/spf13/cobra v1.3.0
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.10.1
github.com/stretchr/testify v1.7.0
gotest.tools/v3 v3.0.3
k8s.io/api v0.22.4
k8s.io/apimachinery v0.22.4
k8s.io/client-go v0.22.4
k8s.io/kubectl v0.22.4
)

1189
go.sum

File diff suppressed because it is too large Load Diff

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.2 KiB

688
index.yaml Normal file
View File

@@ -0,0 +1,688 @@
apiVersion: v1
entries:
kured:
- apiVersion: v1
appVersion: 1.10.2
created: "2022-08-20T09:11:27.828539459Z"
description: A Helm chart for kured
digest: 0d69719fdec1e5c264cb0b04a849aca7452a016e85ea1caca64d3b57b402c75c
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-3.0.1.tgz
version: 3.0.1
- apiVersion: v1
appVersion: 1.10.1
created: "2022-07-31T13:51:38.928629992Z"
description: A Helm chart for kured
digest: 1b66b4183ca1d3ac66779cc5ff2e1276c2a2325c17875a85a19532e8a5022a10
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-3.0.0.tgz
version: 3.0.0
- apiVersion: v1
appVersion: 1.10.1
created: "2022-07-01T15:44:53.561402098Z"
description: A Helm chart for kured
digest: e2727a5db21ab73d8c57db5a2a3cd09793296408c0c494279f8e2afb5d52cf28
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.17.0.tgz
version: 2.17.0
- apiVersion: v1
appVersion: 1.10.0
created: "2022-06-29T12:50:23.453793995Z"
description: A Helm chart for kured
digest: 1e047a20c633e226d7f77fc4e85b33a5547ce3e2b44525b680cb3d0b89350cbd
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.16.0.tgz
version: 2.16.0
- apiVersion: v1
appVersion: 1.10.0
created: "2022-06-08T17:32:33.101479721Z"
description: A Helm chart for kured
digest: e168f38de6d44da877509c099fcad738e5fcc3b99240ded34221c7bfa7ed5d0a
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.15.0.tgz
version: 2.15.0
- apiVersion: v1
appVersion: 1.9.2
created: "2022-05-25T04:51:50.346850231Z"
description: A Helm chart for kured
digest: 48b267700a0d48ab73e4b6ace31c1c84c393959ed09c31a3ec03e170b6b4aacf
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.14.2.tgz
version: 2.14.2
- apiVersion: v1
appVersion: 1.9.2
created: "2022-05-12T06:57:59.679228473Z"
description: A Helm chart for kured
digest: 345949c01aecbc73312a8dbdd2b7b553ca1a80fc24744a17c92b3c3c990f36a2
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.14.1.tgz
version: 2.14.1
- apiVersion: v1
appVersion: 1.9.2
created: "2022-05-06T19:42:06.720738587Z"
description: A Helm chart for kured
digest: cddb002491f4d32fb418dadc3cb846b12885fa6cb8c32d0968021c11bb3b2733
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.14.0.tgz
version: 2.14.0
- apiVersion: v1
appVersion: 1.9.2
created: "2022-04-02T15:26:54.467410377Z"
description: A Helm chart for kured
digest: 76000a5c32552deab99bae1745fcb195f73f99bfcdb847a96cbcc4f833d4b641
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.13.0.tgz
version: 2.13.0
- apiVersion: v1
appVersion: 1.9.2
created: "2022-03-29T10:07:10.572530457Z"
description: A Helm chart for kured
digest: 7635175d009834464b53f92184066a2e17dffe5a9c9f7965c32ffaada570326e
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.12.1.tgz
version: 2.12.1
- apiVersion: v1
appVersion: 1.9.1
created: "2022-03-16T10:49:00.591818431Z"
description: A Helm chart for kured
digest: 5ef50be15401f068d6558e23f327333c960cd48b3d09431e56362f5da5aed84c
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.12.0.tgz
version: 2.12.0
- apiVersion: v1
appVersion: 1.9.1
created: "2022-01-12T06:25:36.587168836Z"
description: A Helm chart for kured
digest: 9f2991549faa094ffb8324abeec649d39f9d2dd915e0287e11642411a47a4c26
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.11.2.tgz
version: 2.11.2
- apiVersion: v1
appVersion: 1.9.1
created: "2022-01-06T18:13:28.526458698Z"
description: A Helm chart for kured
digest: cb9884e9968426177a39d78b437d02046bd61b019cb8f3165624560ba24a9907
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.11.1.tgz
version: 2.11.1
- apiVersion: v1
appVersion: 1.9.0
created: "2021-12-17T13:15:05.508704637Z"
description: A Helm chart for kured
digest: 125117291df9b58f7961de17d4d2d8d0b55267e2acc90ad76a2aab1fc9efea96
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.11.0.tgz
version: 2.11.0
- apiVersion: v1
appVersion: 1.8.2
created: "2021-12-06T14:04:27.615912334Z"
description: A Helm chart for kured
digest: 0527e881055b974e869e86d6bda1a5ac1a86f305dbf7f9d7ba8cc082a24f1e32
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.10.2.tgz
version: 2.10.2
- apiVersion: v1
appVersion: 1.8.1
created: "2021-11-27T10:19:18.570439253Z"
description: A Helm chart for kured
digest: 905576b23f8263dcf26da50da6c004cb266a143cca0567f0e5d5586569b8e367
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.10.1.tgz
version: 2.10.1
- apiVersion: v1
appVersion: 1.8.0
created: "2021-10-08T14:02:19.678658295Z"
description: A Helm chart for kured
digest: fff452ed6b03903cb4d5c2b7c865b7e199fc03f7ce6a5e9449115a1746c37f50
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.10.0.tgz
version: 2.10.0
- apiVersion: v1
appVersion: 1.7.0
created: "2021-09-15T16:46:01.039895438Z"
description: A Helm chart for kured
digest: 02fd3ce98b427b411bf425cbdd60567072596f3c1ca44ff3ecb17f4852cd0099
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.9.1.tgz
version: 2.9.1
- apiVersion: v1
appVersion: 1.7.0
created: "2021-08-06T07:39:04.864672062Z"
description: A Helm chart for kured
digest: ee06afc5ba1af0591ac29f1be1425517a855959112d2fa7bc185df905f793d90
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.9.0.tgz
version: 2.9.0
- apiVersion: v1
appVersion: 1.7.0
created: "2021-07-26T11:19:41.659147727Z"
description: A Helm chart for kured
digest: 68154ea2c074c0d331548b9e17f3c3246b283251eb1c5331eabb60dba168c1ed
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.8.0.tgz
version: 2.8.0
- apiVersion: v1
appVersion: 1.7.0
created: "2021-07-16T07:55:57.986831107Z"
description: A Helm chart for kured
digest: 2607eabd4c1fd308e9825f30148ee67bc066660f800c92eeaffb7a9678c5451f
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.7.1.tgz
version: 2.7.1
- apiVersion: v1
appVersion: 1.7.0
created: "2021-06-17T16:14:33.768706163Z"
description: A Helm chart for kured
digest: 85ab0f0d25a26a863bce43100dc3ad9584b6f11319ca6d320093ed33acf3bc6f
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.7.0.tgz
version: 2.7.0
- apiVersion: v1
appVersion: 1.7.0
created: "2021-05-20T11:56:16.670153606Z"
description: A Helm chart for kured
digest: b783d7acd1c19d3b12474a9e74d0bf396b5cb2c2b4984246cb1d1f8bc2c12d68
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.6.0.tgz
version: 2.6.0
- apiVersion: v1
appVersion: 1.7.0
created: "2021-05-19T17:10:18.386329817Z"
description: A Helm chart for kured
digest: d4815d495cc9476dcb6e8204e9a2791fac1f89f17a9136d3167d202be88f7000
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.5.0.tgz
version: 2.5.0
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-14T08:11:51.869402029Z"
description: A Helm chart for kured
digest: 1961e0937676e0bcb8ceb7a4973c61450d059e2d4beea78481a9323cf0b964a6
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.4.3.tgz
version: 2.4.3
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.715078451Z"
description: A Helm chart for kured
digest: 4f26e153bec10f32d120c9abb521262aba97d96fbb80b0e8829b41157b556c4b
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/main/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.4.2.tgz
version: 2.4.2
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.714161094Z"
description: A Helm chart for kured
digest: 4788a1d33a938b6c17a760d6602eb03d68c86eb6be46c50272d9ebeeee3941ae
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.4.1.tgz
version: 2.4.1
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.713214735Z"
description: A Helm chart for kured
digest: 5cb1837122133aa6022b56140fb04583f232b4199ed44fe3746a6240e9d116a2
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.4.0.tgz
version: 2.4.0
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.712224972Z"
description: A Helm chart for kured
digest: d6eed3eac12ea285716e46f8de0fc101692fc1827d6a56780976ef8f0c4d1cce
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.3.2.tgz
version: 2.3.2
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.711557431Z"
description: A Helm chart for kured
digest: 84a75e3967d13440e3a856ecfc5a2a845ce19089a8b8b8da30d3e6344d1f3c3b
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.3.1.tgz
version: 2.3.1
- apiVersion: v1
appVersion: 1.6.1
created: "2021-04-06T13:01:16.710894489Z"
description: A Helm chart for kured
digest: db5f718db2a38cc4c46b5afb41fbc4cb82ac5298388008589bb1fc321d233ca3
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.3.0.tgz
version: 2.3.0
- apiVersion: v1
appVersion: 1.5.1
created: "2021-04-06T13:01:16.709668812Z"
description: A Helm chart for kured
digest: b3a8b13a79efa56a0a94fa91976faa4916fbdab826d9f50ddf63f4d9179a36e4
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.2.4.tgz
version: 2.2.4
- apiVersion: v1
appVersion: 1.5.1
created: "2021-04-06T13:01:16.70899537Z"
description: A Helm chart for kured
digest: 47d881f78ce887567dd3513c5bf0a1c4532c34e05cd9697cc602ce9e461fd10a
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.2.1.tgz
version: 2.2.1
- apiVersion: v1
appVersion: 1.5.0
created: "2021-04-06T13:01:16.708325128Z"
description: A Helm chart for kured
digest: f1d8d83d9992346275d8ed5b4cdb84164cbeaada73b1ff11d802f0d7a38c1621
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
- email: david@davidkarlsen.com
name: davidkarlsen
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.2.0.tgz
version: 2.2.0
- apiVersion: v1
appVersion: 1.4.5
created: "2021-04-06T13:01:16.707676487Z"
description: A Helm chart for kured
digest: 5c63a1bf4aff4394afb703f44d6f20bcb0d9f79af4a89b7a1476148e5f8b0fd5
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: daniel@weave.works
name: dholbach
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.1.1.tgz
version: 2.1.1
- apiVersion: v1
appVersion: 1.4.4
created: "2021-04-06T13:01:16.707031347Z"
description: A Helm chart for kured
digest: 8ae0a2884d185ac6311d9333ba7b29c8815a2b433892bc073922c9ad5c0771bc
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: daniel@weave.works
name: dholbach
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.0.3.tgz
version: 2.0.3
- apiVersion: v1
appVersion: 1.4.3
created: "2021-04-06T13:01:16.706360205Z"
description: A Helm chart for kured
digest: 6b8057d3f8f5774ae75a57e38e63fe73ac7230871082177bd219543e03bc3981
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: daniel@weave.works
name: dholbach
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.0.1.tgz
version: 2.0.1
- apiVersion: v1
appVersion: 1.4.2
created: "2021-04-06T13:01:16.705726665Z"
description: A Helm chart for kured
digest: 3a97561f4b5ad420a9e73ca88bcfdc29f25d722195614fc797b770ff053df672
home: https://github.com/weaveworks/kured
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
maintainers:
- email: daniel@weave.works
name: dholbach
- email: christian.kotzbauer@gmail.com
name: ckotzbauer
name: kured
sources:
- https://github.com/weaveworks/kured
urls:
- https://weaveworks.github.io/kured/kured-2.0.0.tgz
version: 2.0.0
generated: "2022-08-20T09:11:27.825605805Z"

BIN
kured-2.0.0.tgz Normal file

Binary file not shown.

BIN
kured-2.0.1.tgz Normal file

Binary file not shown.

BIN
kured-2.0.3.tgz Normal file

Binary file not shown.

BIN
kured-2.1.1.tgz Normal file

Binary file not shown.

BIN
kured-2.10.0.tgz Normal file

Binary file not shown.

BIN
kured-2.10.1.tgz Normal file

Binary file not shown.

BIN
kured-2.10.2.tgz Normal file

Binary file not shown.

BIN
kured-2.11.0.tgz Normal file

Binary file not shown.

BIN
kured-2.11.1.tgz Normal file

Binary file not shown.

BIN
kured-2.11.2.tgz Normal file

Binary file not shown.

BIN
kured-2.12.0.tgz Normal file

Binary file not shown.

BIN
kured-2.12.1.tgz Normal file

Binary file not shown.

BIN
kured-2.13.0.tgz Normal file

Binary file not shown.

BIN
kured-2.14.0.tgz Normal file

Binary file not shown.

BIN
kured-2.14.1.tgz Normal file

Binary file not shown.

BIN
kured-2.14.2.tgz Normal file

Binary file not shown.

BIN
kured-2.15.0.tgz Normal file

Binary file not shown.

BIN
kured-2.16.0.tgz Normal file

Binary file not shown.

BIN
kured-2.17.0.tgz Normal file

Binary file not shown.

BIN
kured-2.2.0.tgz Normal file

Binary file not shown.

BIN
kured-2.2.1.tgz Normal file

Binary file not shown.

BIN
kured-2.2.4.tgz Normal file

Binary file not shown.

BIN
kured-2.3.0.tgz Normal file

Binary file not shown.

BIN
kured-2.3.1.tgz Normal file

Binary file not shown.

BIN
kured-2.3.2.tgz Normal file

Binary file not shown.

BIN
kured-2.4.0.tgz Normal file

Binary file not shown.

BIN
kured-2.4.1.tgz Normal file

Binary file not shown.

BIN
kured-2.4.2.tgz Normal file

Binary file not shown.

BIN
kured-2.4.3.tgz Normal file

Binary file not shown.

BIN
kured-2.5.0.tgz Normal file

Binary file not shown.

BIN
kured-2.6.0.tgz Normal file

Binary file not shown.

BIN
kured-2.7.0.tgz Normal file

Binary file not shown.

BIN
kured-2.7.1.tgz Normal file

Binary file not shown.

BIN
kured-2.8.0.tgz Normal file

Binary file not shown.

BIN
kured-2.9.0.tgz Normal file

Binary file not shown.

BIN
kured-2.9.1.tgz Normal file

Binary file not shown.

BIN
kured-3.0.0.tgz Normal file

Binary file not shown.

BIN
kured-3.0.1.tgz Normal file

Binary file not shown.

View File

@@ -1,78 +0,0 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: kured
namespace: kube-system
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: kured # Must match `--ds-name`
namespace: kube-system # Must match `--ds-namespace`
spec:
selector:
matchLabels:
name: kured
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: kured
spec:
serviceAccountName: kured
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
hostPID: true # Facilitate entering the host mount namespace via init
restartPolicy: Always
containers:
- name: kured
image: docker.io/weaveworks/kured:1.8.2
# If you find yourself here wondering why there is no
# :latest tag on Docker Hub,see the FAQ in the README
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
env:
# Pass in the name of the node on which this pod is scheduled
# for use with drain/uncordon operations and lock acquisition
- name: KURED_NODE_ID
valueFrom:
fieldRef:
fieldPath: spec.nodeName
command:
- /usr/bin/kured
# - --force-reboot=false
# - --drain-grace-period=-1
# - --skip-wait-for-delete-timeout=0
# - --drain-timeout=0
# - --period=1h
# - --ds-namespace=kube-system
# - --ds-name=kured
# - --lock-annotation=weave.works/kured-node-lock
# - --lock-ttl=0
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local
# - --alert-filter-regexp=^RebootRequired$
# - --alert-firing-only=false
# - --reboot-sentinel=/var/run/reboot-required
# - --prefer-no-schedule-taint=""
# - --reboot-sentinel-command=""
# - --slack-hook-url=https://hooks.slack.com/...
# - --slack-username=prod
# - --slack-channel=alerting
# - --notify-url="" # See also shoutrrr url format
# - --message-template-drain=Draining node %s
# - --message-template-drain=Rebooting node %s
# - --blocking-pod-selector=runtime=long,cost=expensive
# - --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
# - --reboot-days=sun,mon,tue,wed,thu,fri,sat
# - --reboot-delay=90s
# - --start-time=0:00
# - --end-time=23:59:59
# - --time-zone=UTC
# - --annotate-nodes=false
# - --lock-release-delay=30m
# - --log-format=text

View File

@@ -1,63 +0,0 @@
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: kured
rules:
# Allow kured to read spec.unschedulable
# Allow kubectl to drain/uncordon
#
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
# match https://github.com/kubernetes/kubernetes/blob/v1.19.4/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
#
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "patch"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["list","delete","get"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
verbs: ["get"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kured
subjects:
- kind: ServiceAccount
name: kured
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
namespace: kube-system
name: kured
rules:
# Allow kured to lock/unlock itself
- apiGroups: ["apps"]
resources: ["daemonsets"]
resourceNames: ["kured"]
verbs: ["update"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
namespace: kube-system
name: kured
subjects:
- kind: ServiceAccount
namespace: kube-system
name: kured
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: kured

View File

@@ -1,69 +0,0 @@
package alerts
import (
"context"
"fmt"
"regexp"
"sort"
"time"
papi "github.com/prometheus/client_golang/api"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
)
// PromClient is a wrapper around the Prometheus Client interface and implements the api
// This way, the PromClient can be instantiated with the configuration the Client needs, and
// the ability to use the methods the api has, like Query and so on.
type PromClient struct {
papi papi.Client
api v1.API
}
// NewPromClient creates a new client to the Prometheus API.
// It returns an error on any problem.
func NewPromClient(conf papi.Config) (*PromClient, error) {
promClient, err := papi.NewClient(conf)
if err != nil {
return nil, err
}
client := PromClient{papi: promClient, api: v1.NewAPI(promClient)}
return &client, nil
}
// ActiveAlerts is a method of type PromClient, it returns a list of names of active alerts
// (e.g. pending or firing), filtered by the supplied regexp or by the includeLabels query.
// filter by regexp means when the regex finds the alert-name; the alert is exluded from the
// block-list and will NOT block rebooting. query by includeLabel means,
// if the query finds an alert, it will include it to the block-list and it WILL block rebooting.
func (p *PromClient) ActiveAlerts(filter *regexp.Regexp, firingOnly bool) ([]string, error) {
// get all alerts from prometheus
value, _, err := p.api.Query(context.Background(), "ALERTS", time.Now())
if err != nil {
return nil, err
}
if value.Type() == model.ValVector {
if vector, ok := value.(model.Vector); ok {
activeAlertSet := make(map[string]bool)
for _, sample := range vector {
if alertName, isAlert := sample.Metric[model.AlertNameLabel]; isAlert && sample.Value != 0 {
if (filter == nil || !filter.MatchString(string(alertName))) && (!firingOnly || sample.Metric["alertstate"] == "firing") {
activeAlertSet[string(alertName)] = true
}
}
}
var activeAlerts []string
for activeAlert := range activeAlertSet {
activeAlerts = append(activeAlerts, activeAlert)
}
sort.Strings(activeAlerts)
return activeAlerts, nil
}
}
return nil, fmt.Errorf("Unexpected value type: %v", value)
}

View File

@@ -1,141 +0,0 @@
package alerts
import (
"log"
"net/http"
"net/http/httptest"
"regexp"
"testing"
"github.com/prometheus/client_golang/api"
"github.com/stretchr/testify/assert"
)
type MockResponse struct {
StatusCode int
Body []byte
}
// MockServerProperties ties a mock response to a url and a method
type MockServerProperties struct {
URI string
HTTPMethod string
Response MockResponse
}
// NewMockServer sets up a new MockServer with properties ad starts the server.
func NewMockServer(props ...MockServerProperties) *httptest.Server {
handler := http.HandlerFunc(
func(w http.ResponseWriter, r *http.Request) {
for _, proc := range props {
_, err := w.Write(proc.Response.Body)
if err != nil {
log.Fatal(err)
}
}
})
return httptest.NewServer(handler)
}
func TestActiveAlerts(t *testing.T) {
responsebody := `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"GatekeeperViolations","alertstate":"firing","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PodCrashing-dev","alertstate":"firing","container":"deployment","instance":"1.2.3.4:8080","job":"kube-state-metrics","namespace":"dev","pod":"dev-deployment-78dcbmf25v","severity":"critical","team":"dev"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PodRestart-dev","alertstate":"firing","container":"deployment","instance":"1.2.3.4:1234","job":"kube-state-metrics","namespace":"qa","pod":"qa-job-deployment-78dcbmf25v","severity":"warning","team":"qa"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"PrometheusTargetDown","alertstate":"firing","job":"kubernetes-pods","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]},{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`
addr := "http://localhost:10001"
for _, tc := range []struct {
it string
rFilter string
respBody string
aName string
wantN int
firingOnly bool
}{
{
it: "should return no active alerts",
respBody: responsebody,
rFilter: "",
wantN: 0,
firingOnly: false,
},
{
it: "should return a subset of all alerts",
respBody: responsebody,
rFilter: "Pod",
wantN: 3,
firingOnly: false,
},
{
it: "should return all active alerts by regex",
respBody: responsebody,
rFilter: "*",
wantN: 5,
firingOnly: false,
},
{
it: "should return all active alerts by regex filter",
respBody: responsebody,
rFilter: "*",
wantN: 5,
firingOnly: false,
},
{
it: "should return only firing alerts if firingOnly is true",
respBody: responsebody,
rFilter: "*",
wantN: 4,
firingOnly: true,
},
{
it: "should return ScheduledRebootFailing active alerts",
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"ScheduledRebootFailing","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
aName: "ScheduledRebootFailing",
rFilter: "*",
wantN: 1,
firingOnly: false,
},
{
it: "should not return an active alert if RebootRequired is firing (regex filter)",
respBody: `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"ALERTS","alertname":"RebootRequired","alertstate":"pending","severity":"warning","team":"platform-infra"},"value":[1622472933.973,"1"]}]}}`,
rFilter: "RebootRequired",
wantN: 0,
firingOnly: false,
},
} {
// Start mockServer
mockServer := NewMockServer(MockServerProperties{
URI: addr,
HTTPMethod: http.MethodPost,
Response: MockResponse{
Body: []byte(tc.respBody),
},
})
// Close mockServer after all connections are gone
defer mockServer.Close()
t.Run(tc.it, func(t *testing.T) {
// regex filter
regex, _ := regexp.Compile(tc.rFilter)
// instantiate the prometheus client with the mockserver-address
p, err := NewPromClient(api.Config{Address: mockServer.URL})
if err != nil {
log.Fatal(err)
}
result, err := p.ActiveAlerts(regex, tc.firingOnly)
if err != nil {
log.Fatal(err)
}
// assert
assert.Equal(t, tc.wantN, len(result), "expected amount of alerts %v, got %v", tc.wantN, len(result))
if tc.aName != "" {
assert.Equal(t, tc.aName, result[0], "expected active alert %v, got %v", tc.aName, result[0])
}
})
}
}

View File

@@ -1,170 +0,0 @@
package daemonsetlock
import (
"context"
"encoding/json"
"fmt"
"time"
v1 "k8s.io/api/apps/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
)
const (
k8sAPICallRetrySleep = 5 * time.Second // How much time to wait in between retrying a k8s API call
k8sAPICallRetryTimeout = 5 * time.Minute // How long to wait until we determine that the k8s API is definitively unavailable
)
// DaemonSetLock holds all necessary information to do actions
// on the kured ds which holds lock info through annotations.
type DaemonSetLock struct {
client *kubernetes.Clientset
nodeID string
namespace string
name string
annotation string
}
type lockAnnotationValue struct {
NodeID string `json:"nodeID"`
Metadata interface{} `json:"metadata,omitempty"`
Created time.Time `json:"created"`
TTL time.Duration `json:"TTL"`
}
// New creates a daemonsetLock object containing the necessary data for follow up k8s requests
func New(client *kubernetes.Clientset, nodeID, namespace, name, annotation string) *DaemonSetLock {
return &DaemonSetLock{client, nodeID, namespace, name, annotation}
}
// Acquire attempts to annotate the kured daemonset with lock info from instantiated DaemonSetLock using client-go
func (dsl *DaemonSetLock) Acquire(metadata interface{}, TTL time.Duration) (bool, string, error) {
for {
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
if err != nil {
return false, "", fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
}
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
if exists {
value := lockAnnotationValue{}
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
return false, "", err
}
if !ttlExpired(value.Created, value.TTL) {
return value.NodeID == dsl.nodeID, value.NodeID, nil
}
}
if ds.ObjectMeta.Annotations == nil {
ds.ObjectMeta.Annotations = make(map[string]string)
}
value := lockAnnotationValue{NodeID: dsl.nodeID, Metadata: metadata, Created: time.Now().UTC(), TTL: TTL}
valueBytes, err := json.Marshal(&value)
if err != nil {
return false, "", err
}
ds.ObjectMeta.Annotations[dsl.annotation] = string(valueBytes)
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
if err != nil {
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
// Something else updated the resource between us reading and writing - try again soon
time.Sleep(time.Second)
continue
} else {
return false, "", err
}
}
return true, dsl.nodeID, nil
}
}
// Test attempts to check the kured daemonset lock status (existence, expiry) from instantiated DaemonSetLock using client-go
func (dsl *DaemonSetLock) Test(metadata interface{}) (bool, error) {
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
if err != nil {
return false, fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
}
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
if exists {
value := lockAnnotationValue{Metadata: metadata}
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
return false, err
}
if !ttlExpired(value.Created, value.TTL) {
return value.NodeID == dsl.nodeID, nil
}
}
return false, nil
}
// Release attempts to remove the lock data from the kured ds annotations using client-go
func (dsl *DaemonSetLock) Release() error {
for {
ds, err := dsl.GetDaemonSet(k8sAPICallRetrySleep, k8sAPICallRetryTimeout)
if err != nil {
return fmt.Errorf("timed out trying to get daemonset %s in namespace %s: %w", dsl.name, dsl.namespace, err)
}
valueString, exists := ds.ObjectMeta.Annotations[dsl.annotation]
if exists {
value := lockAnnotationValue{}
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
return err
}
if value.NodeID != dsl.nodeID {
return fmt.Errorf("Not lock holder: %v", value.NodeID)
}
} else {
return fmt.Errorf("Lock not held")
}
delete(ds.ObjectMeta.Annotations, dsl.annotation)
_, err = dsl.client.AppsV1().DaemonSets(dsl.namespace).Update(context.TODO(), ds, metav1.UpdateOptions{})
if err != nil {
if se, ok := err.(*errors.StatusError); ok && se.ErrStatus.Reason == metav1.StatusReasonConflict {
// Something else updated the resource between us reading and writing - try again soon
time.Sleep(time.Second)
continue
} else {
return err
}
}
return nil
}
}
// GetDaemonSet returns the named DaemonSet resource from the DaemonSetLock's configured client
func (dsl *DaemonSetLock) GetDaemonSet(sleep, timeout time.Duration) (*v1.DaemonSet, error) {
var ds *v1.DaemonSet
var lastError error
err := wait.PollImmediate(sleep, timeout, func() (bool, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
if ds, lastError = dsl.client.AppsV1().DaemonSets(dsl.namespace).Get(ctx, dsl.name, metav1.GetOptions{}); lastError != nil {
return false, nil
}
return true, nil
})
if err != nil {
return nil, fmt.Errorf("Timed out trying to get daemonset %s in namespace %s: %v", dsl.name, dsl.namespace, lastError)
}
return ds, nil
}
func ttlExpired(created time.Time, ttl time.Duration) bool {
if ttl > 0 && time.Since(created) >= ttl {
return true
}
return false
}

View File

@@ -1,28 +0,0 @@
package daemonsetlock
import (
"testing"
"time"
)
func TestTtlExpired(t *testing.T) {
d := time.Date(2020, 05, 05, 14, 15, 0, 0, time.UTC)
second, _ := time.ParseDuration("1s")
zero, _ := time.ParseDuration("0m")
tests := []struct {
created time.Time
ttl time.Duration
result bool
}{
{d, second, true},
{time.Now(), second, false},
{d, zero, false},
}
for i, tst := range tests {
if ttlExpired(tst.created, tst.ttl) != tst.result {
t.Errorf("Test %d failed, expected %v but got %v", i, tst.result, !tst.result)
}
}
}

View File

@@ -1,22 +0,0 @@
package delaytick
import (
"math/rand"
"time"
)
// New ticks regularly after an initial delay randomly distributed between d/2 and d + d/2
func New(s rand.Source, d time.Duration) <-chan time.Time {
c := make(chan time.Time)
go func() {
random := rand.New(s)
time.Sleep(time.Duration(float64(d)/2 + float64(d)*random.Float64()))
c <- time.Now()
for t := range time.Tick(d) {
c <- t
}
}()
return c
}

View File

@@ -1,166 +0,0 @@
package taints
import (
"context"
"encoding/json"
"fmt"
log "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
)
// Taint allows to set soft and hard limitations for scheduling and executing pods on nodes.
type Taint struct {
client *kubernetes.Clientset
nodeID string
taintName string
effect v1.TaintEffect
exists bool
}
// New provides a new taint.
func New(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect) *Taint {
exists, _, _ := taintExists(client, nodeID, taintName)
return &Taint{
client: client,
nodeID: nodeID,
taintName: taintName,
effect: effect,
exists: exists,
}
}
// Enable creates the taint for a node. Creating an existing taint is a noop.
func (t *Taint) Enable() {
if t.taintName == "" {
return
}
if t.exists {
return
}
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, true)
t.exists = true
}
// Disable removes the taint for a node. Removing a missing taint is a noop.
func (t *Taint) Disable() {
if t.taintName == "" {
return
}
if !t.exists {
return
}
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, false)
t.exists = false
}
func taintExists(client *kubernetes.Clientset, nodeID, taintName string) (bool, int, *v1.Node) {
updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil || updatedNode == nil {
log.Fatalf("Error reading node %s: %v", nodeID, err)
}
for i, taint := range updatedNode.Spec.Taints {
if taint.Key == taintName {
return true, i, updatedNode
}
}
return false, 0, updatedNode
}
func preferNoSchedule(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect, shouldExists bool) {
taintExists, offset, updatedNode := taintExists(client, nodeID, taintName)
if taintExists && shouldExists {
log.Debugf("Taint %v exists already for node %v.", taintName, nodeID)
return
}
if !taintExists && !shouldExists {
log.Debugf("Taint %v already missing for node %v.", taintName, nodeID)
return
}
type patchTaints struct {
Op string `json:"op"`
Path string `json:"path"`
Value interface{} `json:"value,omitempty"`
}
taint := v1.Taint{
Key: taintName,
Effect: effect,
}
var patches []patchTaints
if len(updatedNode.Spec.Taints) == 0 {
// add first taint and ensure to keep current taints
patches = []patchTaints{
{
Op: "test",
Path: "/spec",
Value: updatedNode.Spec,
},
{
Op: "add",
Path: "/spec/taints",
Value: []v1.Taint{},
},
{
Op: "add",
Path: "/spec/taints/-",
Value: taint,
},
}
} else if taintExists {
// remove taint and ensure to test against race conditions
patches = []patchTaints{
{
Op: "test",
Path: fmt.Sprintf("/spec/taints/%d", offset),
Value: taint,
},
{
Op: "remove",
Path: fmt.Sprintf("/spec/taints/%d", offset),
},
}
} else {
// add missing taint to exsting list
patches = []patchTaints{
{
Op: "add",
Path: "/spec/taints/-",
Value: taint,
},
}
}
patchBytes, err := json.Marshal(patches)
if err != nil {
log.Fatalf("Error encoding taint patch for node %s: %v", nodeID, err)
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
if err != nil {
log.Fatalf("Error patching taint for node %s: %v", nodeID, err)
}
if shouldExists {
log.Info("Node taint added")
} else {
log.Info("Node taint removed")
}
}

View File

@@ -1,91 +0,0 @@
package timewindow
import (
"fmt"
"strconv"
"strings"
"time"
)
// EveryDay contains all days of the week, and exports it
// for convenience use in the cmd line arguments.
var EveryDay = []string{"su", "mo", "tu", "we", "th", "fr", "sa"}
// dayStrings maps day strings to time.Weekdays
var dayStrings = map[string]time.Weekday{
"su": time.Sunday,
"sun": time.Sunday,
"sunday": time.Sunday,
"mo": time.Monday,
"mon": time.Monday,
"monday": time.Monday,
"tu": time.Tuesday,
"tue": time.Tuesday,
"tuesday": time.Tuesday,
"we": time.Wednesday,
"wed": time.Wednesday,
"wednesday": time.Wednesday,
"th": time.Thursday,
"thu": time.Thursday,
"thursday": time.Thursday,
"fr": time.Friday,
"fri": time.Friday,
"friday": time.Friday,
"sa": time.Saturday,
"sat": time.Saturday,
"saturday": time.Saturday,
}
type weekdays uint32
// parseWeekdays creates a set of weekdays from a string slice
func parseWeekdays(days []string) (weekdays, error) {
var result uint32
for _, day := range days {
if len(day) == 0 {
continue
}
weekday, err := parseWeekday(day)
if err != nil {
return weekdays(0), err
}
result |= 1 << uint32(weekday)
}
return weekdays(result), nil
}
// Contains returns true if the specified weekday is a member of this set.
func (w weekdays) Contains(day time.Weekday) bool {
return uint32(w)&(1<<uint32(day)) != 0
}
// String returns a string representation of the set of weekdays.
func (w weekdays) String() string {
var b strings.Builder
for i := uint32(0); i < 7; i++ {
if uint32(w)&(1<<i) != 0 {
b.WriteString(time.Weekday(i).String()[0:3])
} else {
b.WriteString("---")
}
}
return b.String()
}
func parseWeekday(day string) (time.Weekday, error) {
if n, err := strconv.Atoi(day); err == nil {
if n >= 0 && n < 7 {
return time.Weekday(n), nil
}
return time.Sunday, fmt.Errorf("Invalid weekday, number out of range: %s", day)
}
if weekday, ok := dayStrings[strings.ToLower(day)]; ok {
return weekday, nil
}
return time.Sunday, fmt.Errorf("Invalid weekday: %s", day)
}

View File

@@ -1,46 +0,0 @@
package timewindow
import (
"strings"
"testing"
)
func TestParseWeekdays(t *testing.T) {
tests := []struct {
input string
result string
}{
{"0,4", "Sun---------Thu------"},
{"su,mo,tu", "SunMonTue------------"},
{"sunday,tu,thu", "Sun---Tue---Thu------"},
{"THURSDAY", "------------Thu------"},
{"we,WED,WeDnEsDaY", "---------Wed---------"},
{"", "---------------------"},
{",,,", "---------------------"},
}
for _, tst := range tests {
res, err := parseWeekdays(strings.Split(tst.input, ","))
if err != nil {
t.Errorf("Received error for input %s: %v", tst.input, err)
} else if res.String() != tst.result {
t.Errorf("Test %s: Expected %s got %s", tst.input, tst.result, res.String())
}
}
}
func TestParseWeekdaysErrors(t *testing.T) {
tests := []string{
"15",
"-8",
"8",
"mon,tue,wed,fridayyyy",
}
for _, tst := range tests {
_, err := parseWeekdays(strings.Split(tst, ","))
if err == nil {
t.Errorf("Expected to receive error for input %s", tst)
}
}
}

View File

@@ -1,81 +0,0 @@
package timewindow
import (
"fmt"
"time"
)
// TimeWindow specifies a schedule of days and times.
type TimeWindow struct {
days weekdays
location *time.Location
startTime time.Time
endTime time.Time
}
// New creates a TimeWindow instance based on string inputs specifying a schedule.
func New(days []string, startTime, endTime, location string) (*TimeWindow, error) {
tw := &TimeWindow{}
var err error
if tw.days, err = parseWeekdays(days); err != nil {
return nil, err
}
if tw.location, err = time.LoadLocation(location); err != nil {
return nil, err
}
if tw.startTime, err = parseTime(startTime, tw.location); err != nil {
return nil, err
}
if tw.endTime, err = parseTime(endTime, tw.location); err != nil {
return nil, err
}
return tw, nil
}
// Contains determines whether the specified time is within this time window.
func (tw *TimeWindow) Contains(t time.Time) bool {
loctime := t.In(tw.location)
if !tw.days.Contains(loctime.Weekday()) {
return false
}
start := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.startTime.Hour(), tw.startTime.Minute(), tw.startTime.Second(), 0, tw.location)
end := time.Date(loctime.Year(), loctime.Month(), loctime.Day(), tw.endTime.Hour(), tw.endTime.Minute(), tw.endTime.Second(), 1e9-1, tw.location)
// Time Wrap validation
// First we check for start and end time, if start is after end time
// Next we need to validate if we want to wrap to the day before or to the day after
// For that we check the loctime value to see if it is before end time, we wrap with the day before
// Otherwise we wrap to the next day.
if tw.startTime.After(tw.endTime) {
if loctime.Before(end) {
start = start.Add(-24 * time.Hour)
} else {
end = end.Add(24 * time.Hour)
}
}
return (loctime.After(start) || loctime.Equal(start)) && (loctime.Before(end) || loctime.Equal(end))
}
// String returns a string representation of this time window.
func (tw *TimeWindow) String() string {
return fmt.Sprintf("%s between %02d:%02d and %02d:%02d %s", tw.days.String(), tw.startTime.Hour(), tw.startTime.Minute(), tw.endTime.Hour(), tw.endTime.Minute(), tw.location.String())
}
// parseTime tries to parse a time with several formats.
func parseTime(s string, loc *time.Location) (time.Time, error) {
fmts := []string{"15:04", "15:04:05", "03:04pm", "15", "03pm", "3pm"}
for _, f := range fmts {
if t, err := time.ParseInLocation(f, s, loc); err == nil {
return t, nil
}
}
return time.Now(), fmt.Errorf("Invalid time format: %s", s)
}

View File

@@ -1,97 +0,0 @@
package timewindow
import (
"strings"
"testing"
"time"
)
func TestTimeWindows(t *testing.T) {
type testcase struct {
time string
result bool
}
tests := []struct {
days string
start string
end string
loc string
cases []testcase
}{
{"mon,tue,wed,thu,fri", "9am", "5pm", "America/Los_Angeles", []testcase{
{"2019/03/31 10:00 PDT", false},
{"2019/04/04 00:49 PDT", false},
{"2019/04/04 12:00 PDT", true},
{"2019/04/04 11:59 UTC", false},
{"2019/04/05 08:59 PDT", false},
{"2019/04/05 9:01 PDT", true},
}},
{"mon,we,fri", "10:01", "11:30am", "America/Los_Angeles", []testcase{
{"2019/04/05 10:30 PDT", true},
{"2019/04/06 10:30 PDT", false},
{"2019/04/07 10:30 PDT", false},
{"2019/04/08 10:30 PDT", true},
{"2019/04/09 10:30 PDT", false},
{"2019/04/10 10:30 PDT", true},
{"2019/04/11 10:30 PDT", false},
}},
{"mo,tu,we,th,fr", "00:00", "23:59:59", "UTC", []testcase{
{"2019/04/18 00:00 UTC", true},
{"2019/04/18 23:59 UTC", true},
}},
{"mon,tue,wed,thu,fri", "9pm", "5am", "America/Los_Angeles", []testcase{
{"2019/03/30 04:00 PDT", false},
{"2019/03/31 10:00 PDT", false},
{"2019/03/31 22:00 PDT", false},
{"2019/04/04 00:49 PDT", true},
{"2019/04/04 12:00 PDT", false},
{"2019/04/04 22:49 PDT", true},
{"2019/04/05 00:49 PDT", true},
{"2019/04/05 08:59 PDT", false},
{"2019/04/05 9:01 PDT", false},
}},
{"mon,tue,wed,thu,fri", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
{"2019/04/04 23:58 PDT", false},
{"2019/04/04 23:59 PDT", true},
{"2019/04/05 00:00 PDT", true},
{"2019/04/05 00:01 PDT", true},
{"2019/04/05 00:02 PDT", false},
}},
{"mon,tue,wed,fri", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
{"2019/04/04 23:58 PDT", false},
{"2019/04/04 23:59 PDT", false}, // Even that this falls in the between the hours Thursday is not included so should not run
{"2019/04/05 00:00 PDT", true},
{"2019/04/05 00:02 PDT", false},
}},
{"mon,tue,wed,thu", "11:59pm", "00:01am", "America/Los_Angeles", []testcase{
{"2019/04/04 23:58 PDT", false},
{"2019/04/04 23:59 PDT", true},
{"2019/04/05 00:00 PDT", false}, // Even that this falls in the between the hours Friday is not included so should not run
{"2019/04/05 00:02 PDT", false},
}},
{"mon,tue,wed,thu,fri", "11:59pm", "00:01am", "UTC", []testcase{
{"2019/04/04 23:58 UTC", false},
{"2019/04/04 23:59 UTC", true},
{"2019/04/05 00:00 UTC", true},
{"2019/04/05 00:01 UTC", true},
{"2019/04/05 00:02 UTC", false},
}},
}
for i, tst := range tests {
tw, err := New(strings.Split(tst.days, ","), tst.start, tst.end, tst.loc)
if err != nil {
t.Errorf("Test [%d] failed to create TimeWindow: %v", i, err)
}
for _, cas := range tst.cases {
tm, err := time.ParseInLocation("2006/01/02 15:04 MST", cas.time, tw.location)
if err != nil {
t.Errorf("Failed to parse time \"%s\": %v", cas.time, err)
} else if cas.result != tw.Contains(tm) {
t.Errorf("(%s) contains (%s) didn't match expected result of %v", tw.String(), cas.time, cas.result)
}
}
}
}

View File

@@ -1,12 +0,0 @@
#!/usr/bin/env bash
# USE KUBECTL_CMD to pass context and/or namespaces.
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
SENTINEL_FILE="${SENTINEL_FILE:-/var/run/reboot-required}"
echo "Creating reboot sentinel on all nodes"
for nodename in $("$KUBECTL_CMD" get nodes -o name); do
docker exec "${nodename/node\//}" hostname
docker exec "${nodename/node\//}" touch "${SENTINEL_FILE}"
done

View File

@@ -1,85 +0,0 @@
#!/usr/bin/env bash
NODECOUNT=${NODECOUNT:-5}
KUBECTL_CMD="${KUBECTL_CMD:-kubectl}"
DEBUG="${DEBUG:-false}"
CONTAINER_NAME_FORMAT=${CONTAINER_NAME_FORMAT:-"chart-testing-*"}
tmp_dir=$(mktemp -d -t kured-XXXX)
function gather_logs_and_cleanup {
if [[ -f "$tmp_dir"/node_output ]]; then
rm "$tmp_dir"/node_output
fi
rmdir "$tmp_dir"
# The next commands are useful regardless of success or failures.
if [[ "$DEBUG" == "true" ]]; then
echo "############################################################"
# This is useful to see if containers have crashed.
echo "docker ps -a:"
docker ps -a
echo "docker journal logs"
journalctl -u docker --no-pager
# This is useful to see if the nodes have _properly_ rebooted.
# It should show the reboot/two container starts per node.
for name in $(docker ps -a -f "name=${CONTAINER_NAME_FORMAT}" -q); do
echo "############################################################"
echo "docker logs for container $name:"
docker logs "$name"
done
fi
}
trap gather_logs_and_cleanup EXIT
declare -A was_unschedulable
declare -A has_recovered
max_attempts="60"
sleep_time=60
attempt_num=1
set +o errexit
echo "There are $NODECOUNT nodes in the cluster"
until [ ${#was_unschedulable[@]} == "$NODECOUNT" ] && [ ${#has_recovered[@]} == "$NODECOUNT" ]
do
echo "${#was_unschedulable[@]} nodes were removed from pool once:" "${!was_unschedulable[@]}"
echo "${#has_recovered[@]} nodes removed from the pool are now back:" "${!has_recovered[@]}"
"$KUBECTL_CMD" get nodes -o custom-columns=NAME:.metadata.name,SCHEDULABLE:.spec.unschedulable --no-headers > "$tmp_dir"/node_output
if [[ "$DEBUG" == "true" ]]; then
# This is useful to see if a node gets stuck after drain, and doesn't
# come back up.
echo "Result of command $KUBECTL_CMD get nodes ... showing unschedulable nodes:"
cat "$tmp_dir"/node_output
fi
while read -r node; do
unschedulable=$(echo "$node" | grep true | cut -f 1 -d ' ')
if [ -n "$unschedulable" ] && [ -z ${was_unschedulable["$unschedulable"]+x} ] ; then
echo "$unschedulable is now unschedulable!"
was_unschedulable["$unschedulable"]=1
fi
schedulable=$(echo "$node" | grep '<none>' | cut -f 1 -d ' ')
if [ -n "$schedulable" ] && [ ${was_unschedulable["$schedulable"]+x} ] && [ -z ${has_recovered["$schedulable"]+x} ]; then
echo "$schedulable has recovered!"
has_recovered["$schedulable"]=1
fi
done < "$tmp_dir"/node_output
if [[ "${#has_recovered[@]}" == "$NODECOUNT" ]]; then
echo "All nodes recovered."
break
else
if (( attempt_num == max_attempts ))
then
echo "Attempt $attempt_num failed and there are no more attempts left!"
exit 1
else
echo "Attempt $attempt_num failed! Trying again in $sleep_time seconds..."
sleep "$sleep_time"
fi
fi
(( attempt_num++ ))
done
echo "Test successful"

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
expected="$1"
if [[ "$expected" != "0" && "$expected" != "1" ]]; then
echo "You should give an argument to this script, the gauge value (0 or 1)"
exit 1
fi
HOST="${HOST:-localhost}"
PORT="${PORT:-30000}"
NODENAME="${NODENAME-chart-testing-control-plane}"
reboot_required=$(docker exec "$NODENAME" curl "http://$HOST:$PORT/metrics" | awk '/^kured_reboot_required/{print $2}')
if [[ "$reboot_required" == "$expected" ]]; then
echo "Test success"
else
echo "Test failed"
exit 1
fi