mirror of
https://github.com/kubereboot/kured.git
synced 2026-02-15 01:39:50 +00:00
Compare commits
44 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2762837dac | ||
|
|
d507361a45 | ||
|
|
1d1f22c93b | ||
|
|
644aca3fa0 | ||
|
|
59b078f38d | ||
|
|
36cef41c20 | ||
|
|
eb617adc2b | ||
|
|
2afd04ddd3 | ||
|
|
3eb7f17b3a | ||
|
|
a87e7b28d2 | ||
|
|
c6f341ec16 | ||
|
|
14bda85a03 | ||
|
|
d6f7609081 | ||
|
|
2b4830a0f6 | ||
|
|
688ba8ef72 | ||
|
|
aebe9463fd | ||
|
|
900f58ae2d | ||
|
|
b177a9f6eb | ||
|
|
8fafad18bb | ||
|
|
cc806b886c | ||
|
|
b78ba8e73b | ||
|
|
1ddd45d90b | ||
|
|
a815867584 | ||
|
|
d271165496 | ||
|
|
2bb7b7937e | ||
|
|
2afa0a9da7 | ||
|
|
ab0b5d137c | ||
|
|
d3ea5639f4 | ||
|
|
02bb6d650e | ||
|
|
c473caafc8 | ||
|
|
a574a67c61 | ||
|
|
4420dc82d6 | ||
|
|
cdbcf8d4a0 | ||
|
|
3508110f52 | ||
|
|
ec75533394 | ||
|
|
edeefcd2b9 | ||
|
|
cf03bc587c | ||
|
|
59a6700add | ||
|
|
64ebf53264 | ||
|
|
615e3d4840 | ||
|
|
1257d97ead | ||
|
|
1fc2522c0f | ||
|
|
7fb16fed9b | ||
|
|
72a31030db |
@@ -24,3 +24,5 @@ workflows:
|
||||
filters:
|
||||
tags:
|
||||
only: /.*/
|
||||
branches:
|
||||
ignore: gh-pages
|
||||
|
||||
6
.github/ct.yaml
vendored
Normal file
6
.github/ct.yaml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# See https://github.com/helm/chart-testing#configuration
|
||||
remote: origin
|
||||
chart-dirs:
|
||||
- charts
|
||||
chart-repos: []
|
||||
helm-extra-args: --timeout 600s
|
||||
32
.github/workflows/chart-lint.yml
vendored
Normal file
32
.github/workflows/chart-lint.yml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
name: lint-chart
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- "charts/**"
|
||||
|
||||
jobs:
|
||||
lint-test:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: "0"
|
||||
|
||||
- name: Run chart-testing (lint)
|
||||
id: lint
|
||||
uses: helm/chart-testing-action@v1.0.0-rc.2
|
||||
with:
|
||||
command: lint
|
||||
config: .github/ct.yaml
|
||||
|
||||
- name: Create kind cluster
|
||||
uses: helm/kind-action@v1.0.0-rc.1
|
||||
if: steps.lint.outputs.changed == 'true'
|
||||
|
||||
- name: Run chart-testing (install)
|
||||
uses: helm/chart-testing-action@v1.0.0-rc.2
|
||||
with:
|
||||
command: install
|
||||
config: .github/ct.yaml
|
||||
16
.github/workflows/chart-release.yml
vendored
Normal file
16
.github/workflows/chart-release.yml
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
name: release-chart
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Publish Helm chart
|
||||
uses: stefanprodan/helm-gh-pages@master
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
charts_dir: charts
|
||||
@@ -91,9 +91,6 @@ cat kured-ds.yaml >> "$MANIFEST"
|
||||
sed -i "s#docker.io/weaveworks/kured#docker.io/weaveworks/kured:$VERSION#g" "$MANIFEST"
|
||||
```
|
||||
|
||||
The last thing you need to do is update the `image:` to point to the release
|
||||
tag, e.g. `docker.io/weaveworks/kured:1.3.0`.
|
||||
|
||||
Now you can head to the Github UI, use the version number as tag and upload the
|
||||
`kured-<release>-dockerhub.yaml` file.
|
||||
|
||||
|
||||
84
README.md
84
README.md
@@ -1,21 +1,24 @@
|
||||
|
||||
# kured - Kubernetes Reboot Daemon
|
||||
|
||||
<img src="https://github.com/weaveworks/kured/raw/master/img/logo.png" align="right"/>
|
||||
|
||||
* [Introduction](#introduction)
|
||||
* [Kubernetes & OS Compatibility](#kubernetes-&-os-compatibility)
|
||||
* [Installation](#installation)
|
||||
* [Configuration](#configuration)
|
||||
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
|
||||
* [Setting a schedule](#setting-a-schedule)
|
||||
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
* [Blocking Reboots via Pods](#blocking-reboots-via-pods)
|
||||
* [Prometheus Metrics](#prometheus-metrics)
|
||||
* [Slack Notifications](#slack-notifications)
|
||||
* [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
|
||||
* [Setting a schedule](#setting-a-schedule)
|
||||
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
* [Blocking Reboots via Pods](#blocking-reboots-via-pods)
|
||||
* [Prometheus Metrics](#prometheus-metrics)
|
||||
* [Slack Notifications](#slack-notifications)
|
||||
* [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
* [Operation](#operation)
|
||||
* [Testing](#testing)
|
||||
* [Disabling Reboots](#disabling-reboots)
|
||||
* [Manual Unlock](#manual-unlock)
|
||||
* [Testing](#testing)
|
||||
* [Disabling Reboots](#disabling-reboots)
|
||||
* [Manual Unlock](#manual-unlock)
|
||||
* [Automatic Unlock](#automatic-unlock)
|
||||
* [Building](#building)
|
||||
* [Frequently Asked/Anticipated Questions](#frequently-askedanticipated-questions)
|
||||
* [Getting Help](#getting-help)
|
||||
@@ -41,12 +44,12 @@ compatibility of one minor version between client and server:
|
||||
|
||||
| kured | kubectl | k8s.io/client-go | k8s.io/apimachinery | expected kubernetes compatibility |
|
||||
|--------|---------|------------------|---------------------|-----------------------------------|
|
||||
| master | 1.17.5 | v0.17.0 | v0.17.0 | 1.16.x, 1.17.x, 1.18.x |
|
||||
| 1.4.0 | 1.17.5 | v0.17.0 | v0.17.0 | 1.16.x, 1.17.x, 1.18.x |
|
||||
| master | 1.17.7 | v0.17.0 | v0.17.0 | 1.16.x, 1.17.x, 1.18.x |
|
||||
| 1.4.4 | 1.17.7 | v0.17.0 | v0.17.0 | 1.16.x, 1.17.x, 1.18.x |
|
||||
| 1.3.0 | 1.15.10 | v12.0.0 | release-1.15 | 1.15.x, 1.16.x, 1.17.x |
|
||||
| 1.2.0 | 1.13.6 | v10.0.0 | release-1.13 | 1.12.x, 1.13.x, 1.14.x |
|
||||
| 1.1.0 | 1.12.1 | v9.0.0 | release-1.12 | 1.11.x, 1.12.x, 1.13.x |
|
||||
| 1.0.0 | 1.7.6 | v4.0.0 | release-1.7 | 1.6.x, 1.7.x, 1.8.x |
|
||||
| 1.0.0 | 1.7.6 | v4.0.0 | release-1.7 | 1.6.x, 1.7.x, 1.8.x |
|
||||
|
||||
See the [release notes](https://github.com/weaveworks/kured/releases)
|
||||
for specific version compatibility information, including which
|
||||
@@ -60,7 +63,7 @@ Versions >=1.1.0 enter the host mount namespace to invoke
|
||||
To obtain a default installation without Prometheus alerting interlock
|
||||
or Slack notifications:
|
||||
|
||||
```
|
||||
```console
|
||||
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.3.0/kured-1.3.0-dockerhub.yaml
|
||||
```
|
||||
|
||||
@@ -71,8 +74,9 @@ edit it in accordance with the following section before application.
|
||||
|
||||
The following arguments can be passed to kured via the daemonset pod template:
|
||||
|
||||
```
|
||||
```console
|
||||
Flags:
|
||||
--annotation-ttl time force clean annotation after this ammount of time (default 0, disabled)
|
||||
--alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts
|
||||
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
|
||||
--ds-name string name of daemonset on which to place lock (default "kured")
|
||||
@@ -108,11 +112,11 @@ reboots to predictable schedules. Use `--reboot-days`, `--start-time`,
|
||||
`--end-time`, and `--time-zone` to set a schedule. For example, business
|
||||
hours on the west coast USA can be specified with:
|
||||
|
||||
```
|
||||
--reboot-days mon,tue,wed,thu,fri
|
||||
--start-time 9am
|
||||
--end-time 5pm
|
||||
--time-zone America/Los_Angeles
|
||||
```console
|
||||
--reboot-days mon,tue,wed,thu,fri
|
||||
--start-time 9am
|
||||
--end-time 5pm
|
||||
--time-zone America/Los_Angeles
|
||||
```
|
||||
|
||||
Times can be formatted in numerous ways, including `5pm`, `5:00pm` `17:00`,
|
||||
@@ -128,14 +132,14 @@ You may find it desirable to block automatic node reboots when there
|
||||
are active alerts - you can do so by providing the URL of your
|
||||
Prometheus server:
|
||||
|
||||
```
|
||||
```console
|
||||
--prometheus-url=http://prometheus.monitoring.svc.cluster.local
|
||||
```
|
||||
|
||||
By default the presence of *any* active (pending or firing) alerts
|
||||
will block reboots, however you can ignore specific alerts:
|
||||
|
||||
```
|
||||
```console
|
||||
--alert-filter-regexp=^(RebootRequired|AnotherBenignAlert|...$
|
||||
```
|
||||
|
||||
@@ -147,14 +151,14 @@ filter.
|
||||
You can also block reboots of an _individual node_ when specific pods
|
||||
are scheduled on it:
|
||||
|
||||
```
|
||||
```console
|
||||
--blocking-pod-selector=runtime=long,cost=expensive
|
||||
```
|
||||
|
||||
Since label selector strings use commas to express logical 'and', you can
|
||||
specify this parameter multiple times for 'or':
|
||||
|
||||
```
|
||||
```console
|
||||
--blocking-pod-selector=runtime=long,cost=expensive
|
||||
--blocking-pod-selector=name=temperamental
|
||||
```
|
||||
@@ -172,7 +176,7 @@ running job or a known temperamental pod on a node will stop it rebooting.
|
||||
Each kured pod exposes a single gauge metric (`:8080/metrics`) that
|
||||
indicates the presence of the sentinel file:
|
||||
|
||||
```
|
||||
```console
|
||||
# HELP kured_reboot_required OS requires reboot due to software updates.
|
||||
# TYPE kured_reboot_required gauge
|
||||
kured_reboot_required{node="ip-xxx-xxx-xxx-xxx.ec2.internal"} 0
|
||||
@@ -182,7 +186,7 @@ The purpose of this metric is to power an alert which will summon an
|
||||
operator if the cluster cannot reboot itself automatically for a
|
||||
prolonged period:
|
||||
|
||||
```
|
||||
```console
|
||||
# Alert if a reboot is required for any machines. Acts as a failsafe for the
|
||||
# reboot daemon, which will not reboot nodes if there are pending alerts save
|
||||
# this one.
|
||||
@@ -206,7 +210,7 @@ probe for active alerts before rebooting, be sure to specify
|
||||
If you specify a Slack hook via `--slack-hook-url`, kured will notify
|
||||
you immediately prior to rebooting a node:
|
||||
|
||||
<img src="https://github.com/weaveworks/kured/raw/master/img/slack-notification.png"/>
|
||||

|
||||
|
||||
We recommend setting `--slack-username` to be the name of the
|
||||
environment, e.g. `dev` or `prod`.
|
||||
@@ -232,7 +236,7 @@ if you have, you will have to adjust the commands accordingly.
|
||||
|
||||
You can test your configuration by provoking a reboot on a node:
|
||||
|
||||
```
|
||||
```console
|
||||
sudo touch /var/run/reboot-required
|
||||
```
|
||||
|
||||
@@ -241,7 +245,7 @@ sudo touch /var/run/reboot-required
|
||||
If you need to temporarily stop kured from rebooting any nodes, you
|
||||
can take the lock manually:
|
||||
|
||||
```
|
||||
```console
|
||||
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock='{"nodeID":"manual"}'
|
||||
```
|
||||
|
||||
@@ -253,12 +257,20 @@ In exceptional circumstances, such as a node experiencing a permanent
|
||||
failure whilst rebooting, manual intervention may be required to
|
||||
remove the cluster lock:
|
||||
|
||||
```
|
||||
```console
|
||||
kubectl -n kube-system annotate ds kured weave.works/kured-node-lock-
|
||||
```
|
||||
|
||||
> NB the `-` at the end of the command is important - it instructs
|
||||
> `kubectl` to remove that annotation entirely.
|
||||
|
||||
### Automatic Unlock
|
||||
|
||||
In exceptional circumstances (especially when used with cluster-autoscaler) a node
|
||||
which holds lock might be killed thus annotation will stay there for ever.
|
||||
|
||||
Using `--annotation-ttl=30m` will allow other nodes to take over if TTL has expired (in this case 30min) and continue reboot process.
|
||||
|
||||
## Building
|
||||
|
||||
See the [CircleCI config](.circleci/config.yml) for the preferred
|
||||
@@ -269,13 +281,13 @@ repository:
|
||||
|
||||
**Building outside $GOPATH:**
|
||||
|
||||
```
|
||||
```console
|
||||
make
|
||||
```
|
||||
|
||||
**Building inside $GOPATH:**
|
||||
|
||||
```
|
||||
```console
|
||||
GO111MODULE=on make
|
||||
```
|
||||
|
||||
@@ -298,10 +310,10 @@ versioned manifest from the [release page](https://github.com/weaveworks/kured/r
|
||||
|
||||
If you have any questions about, feedback for or problems with `kured`:
|
||||
|
||||
- Invite yourself to the <a href="https://slack.weave.works/" target="_blank">Weave Users Slack</a>.
|
||||
- Ask a question on the [#kured](https://weave-community.slack.com/messages/kured/) slack channel.
|
||||
- [File an issue](https://github.com/weaveworks/kured/issues/new).
|
||||
- Join us in [our monthly meeting](https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#),
|
||||
* Invite yourself to the <a href="https://slack.weave.works/" target="_blank">Weave Users Slack</a>.
|
||||
* Ask a question on the [#kured](https://weave-community.slack.com/messages/kured/) slack channel.
|
||||
* [File an issue](https://github.com/weaveworks/kured/issues/new).
|
||||
* Join us in [our monthly meeting](https://docs.google.com/document/d/1bsHTjHhqaaZ7yJnXF6W8c89UB_yn-OoSZEmDnIP34n8/edit#),
|
||||
every fourth Wednesday of the month at 16:00 UTC.
|
||||
|
||||
Your feedback is always welcome!
|
||||
|
||||
21
charts/kured/.helmignore
Normal file
21
charts/kured/.helmignore
Normal file
@@ -0,0 +1,21 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
14
charts/kured/Chart.yaml
Normal file
14
charts/kured/Chart.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
appVersion: "1.4.4"
|
||||
description: A Helm chart for kured
|
||||
name: kured
|
||||
version: 2.0.3
|
||||
home: https://github.com/weaveworks/kured
|
||||
maintainers:
|
||||
- name: dholbach
|
||||
email: daniel@weave.works
|
||||
- name: ckotzbauer
|
||||
email: christian.kotzbauer@gmail.com
|
||||
sources:
|
||||
- https://github.com/weaveworks/kured
|
||||
icon: https://raw.githubusercontent.com/weaveworks/kured/master/img/logo.png
|
||||
105
charts/kured/README.md
Normal file
105
charts/kured/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# Kured (KUbernetes REboot Daemon)
|
||||
|
||||
## Introduction
|
||||
This chart installs the "Kubernetes Reboot Daemon" using the Helm Package Manager.
|
||||
|
||||
## Prerequisites
|
||||
- Kubernetes 1.9+
|
||||
|
||||
## Installing the Chart
|
||||
To install the chart with the release name `my-release`:
|
||||
```bash
|
||||
$ helm repo add kured https://weaveworks.github.io/kured
|
||||
$ helm install my-release kured/kured
|
||||
```
|
||||
|
||||
## Uninstalling the Chart
|
||||
To uninstall/delete the `my-release` deployment:
|
||||
```bash
|
||||
$ helm delete my-release
|
||||
```
|
||||
|
||||
The command removes all the Kubernetes components associated with the chart and deletes the release.
|
||||
|
||||
|
||||
## Migrate from stable Helm-Chart
|
||||
The following changes have been made compared to the stable chart:
|
||||
- **[BREAKING CHANGE]** The `autolock` feature was removed. Use `configuration.startTime` and `configuration.endTime` instead.
|
||||
- Role inconsistencies have been fixed (allowed verbs for modifying the `DaemonSet`, apiGroup of `PodSecurityPolicy`)
|
||||
- Added support for affinities.
|
||||
- Configuration of cli-flags can be made through a `configuration` object.
|
||||
- Added optional `Service` and `ServiceMonitor` support for metrics endpoint.
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
| Config | Description | Default |
|
||||
| ------ | ----------- | ------- |
|
||||
| `image.repository` | Image repository | `weaveworks/kured` |
|
||||
| `image.tag` | Image tag | `1.4.4` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
| `image.pullSecrets` | Image pull secrets | `[]` |
|
||||
| `updateStrategy` | Daemonset update strategy | `OnDelete` |
|
||||
| `podAnnotations` | Annotations to apply to pods (eg to add Prometheus annotations) | `{}` |
|
||||
| `extraArgs` | Extra arguments to pass to `/usr/bin/kured`. See below. | `{}` |
|
||||
| `configuration.annotationTtl` | cli-parameter `--annotation-ttl` | `0` |
|
||||
| `configuration.alertFilterRegexp` | cli-parameter `--alert-filter-regexp` | `""` |
|
||||
| `configuration.blockingPodSelector` | Array of selectors for multiple cli-parameters `--blocking-pod-selector` | `[]` |
|
||||
| `configuration.endTime` | cli-parameter `--end-time` | `""` |
|
||||
| `configuration.lockAnnotation` | cli-parameter `--lock-annotation` | `""` |
|
||||
| `configuration.period` | cli-parameter `--period` | `""` |
|
||||
| `configuration.prometheusUrl` | cli-parameter `--prometheus-url` | `""` |
|
||||
| `configuration.rebootDays` | Array of days for multiple cli-parameters `--reboot-days` | `[]` |
|
||||
| `configuration.rebootSentinel` | cli-parameter `--reboot-sentinel` | `""` |
|
||||
| `configuration.slackChannel` | cli-parameter `--slack-channel` | `""` |
|
||||
| `configuration.slackHookUrl` | cli-parameter `--slack-hook-url` | `""` |
|
||||
| `configuration.slackUsername` | cli-parameter `--slack-username` | `""` |
|
||||
| `configuration.startTime` | cli-parameter `--start-time` | `""` |
|
||||
| `configuration.timeZone` | cli-parameter `--time-zone` | `""` |
|
||||
| `rbac.create` | Create RBAC roles | `true` |
|
||||
| `serviceAccount.create` | Create a service account | `true` |
|
||||
| `serviceAccount.name` | Service account name to create (or use if `serviceAccount.create` is false) | (chart fullname) |
|
||||
| `podSecurityPolicy.create` | Create podSecurityPolicy | `false` |
|
||||
| `resources` | Resources requests and limits. | `{}` |
|
||||
| `metrics.create` | Create a ServiceMonitor for prometheus-operator | `false` |
|
||||
| `metrics.namespace` | The namespace to create the ServiceMonitor in | `""` |
|
||||
| `metrics.labels` | Additional labels for the ServiceMonitor | `{}` |
|
||||
| `metrics.interval` | Interval prometheus should scrape the endpoint | `60s` |
|
||||
| `metrics.scrapeTimeout` | A custom scrapeTimeout for prometheus | `""` |
|
||||
| `service.create` | Create a Service for the metrics endpoint | `false` |
|
||||
| `service.port` | Port of the service to expose | `8080` |
|
||||
| `service.annotations` | Annotations to apply to the service (eg to add Prometheus annotations) | `{}` |
|
||||
| `priorityClassName` | Priority Class to be used by the pods | `""` |
|
||||
| `tolerations` | Tolerations to apply to the daemonset (eg to allow running on master) | `[{"key": "node-role.kubernetes.io/master", "effect": "NoSchedule"}]`|
|
||||
| `affinity` | Affinity for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
| `nodeSelector` | Node Selector for the daemonset (ie, restrict which nodes kured runs on) | `{}` |
|
||||
|
||||
See https://github.com/weaveworks/kured#configuration for values (not contained in the `configuration` object) for `extraArgs`. Note that
|
||||
```yaml
|
||||
extraArgs:
|
||||
foo: 1
|
||||
bar-baz: 2
|
||||
```
|
||||
becomes `/usr/bin/kured ... --foo=1 --bar-baz=2`.
|
||||
|
||||
|
||||
## Prometheus Metrics
|
||||
|
||||
Kured exposes a single prometheus metric indicating whether a reboot is required or not (see [kured docs](https://github.com/weaveworks/kured#prometheus-metrics)) for details.
|
||||
|
||||
#### Prometheus-Operator
|
||||
|
||||
```yaml
|
||||
metrics:
|
||||
create: true
|
||||
```
|
||||
|
||||
#### Prometheus Annotations
|
||||
|
||||
```yaml
|
||||
service:
|
||||
annotations:
|
||||
prometheus.io/scrape: "true"
|
||||
prometheus.io/path: "/metrics"
|
||||
prometheus.io/port: "8080"
|
||||
```
|
||||
3
charts/kured/templates/NOTES.txt
Normal file
3
charts/kured/templates/NOTES.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
Kured will check for /var/run/reboot-required, and reboot nodes when needed.
|
||||
|
||||
See https://github.com/weaveworks/kured/ for details.
|
||||
72
charts/kured/templates/_helpers.tpl
Normal file
72
charts/kured/templates/_helpers.tpl
Normal file
@@ -0,0 +1,72 @@
|
||||
{{/* vim: set filetype=mustache: */}}
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "kured.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "kured.fullname" -}}
|
||||
{{- if .Values.fullnameOverride -}}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride -}}
|
||||
{{- if contains $name .Release.Name -}}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- else -}}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "kured.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "kured.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
{{ default (include "kured.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else -}}
|
||||
{{ default "default" .Values.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Return the appropriate apiVersion for podsecuritypolicy.
|
||||
*/}}
|
||||
{{- define "kured.psp.apiVersion" -}}
|
||||
{{- if semverCompare "<1.10-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
{{- print "extensions/v1beta1" -}}
|
||||
{{- else -}}
|
||||
{{- print "policy/v1beta1" -}}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Returns a set of labels applied to each resource.
|
||||
*/}}
|
||||
{{- define "kured.labels" -}}
|
||||
app: {{ template "kured.name" . }}
|
||||
chart: {{ template "kured.chart" . }}
|
||||
release: {{ .Release.Name }}
|
||||
heritage: {{ .Release.Service }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Returns a set of matchLabels applied.
|
||||
*/}}
|
||||
{{- define "kured.matchLabels" -}}
|
||||
app: {{ template "kured.name" . }}
|
||||
release: {{ .Release.Name }}
|
||||
{{- end -}}
|
||||
30
charts/kured/templates/clusterrole.yaml
Normal file
30
charts/kured/templates/clusterrole.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
{{- if .Values.rbac.create -}}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
rules:
|
||||
# Allow kured to read spec.unschedulable
|
||||
# Allow kubectl to drain/uncordon
|
||||
#
|
||||
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
|
||||
# match https://github.com/kubernetes/kubernetes/blob/v1.12.1/pkg/kubectl/cmd/drain.go
|
||||
#
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "patch"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["list","delete","get"]
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["daemonsets"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
verbs: ["get"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/eviction"]
|
||||
verbs: ["create"]
|
||||
{{- end -}}
|
||||
16
charts/kured/templates/clusterrolebinding.yaml
Normal file
16
charts/kured/templates/clusterrolebinding.yaml
Normal file
@@ -0,0 +1,16 @@
|
||||
{{- if .Values.rbac.create -}}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: {{ template "kured.fullname" . }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: {{ template "kured.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
{{- end -}}
|
||||
118
charts/kured/templates/daemonset.yaml
Normal file
118
charts/kured/templates/daemonset.yaml
Normal file
@@ -0,0 +1,118 @@
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
spec:
|
||||
updateStrategy:
|
||||
type: {{ .Values.updateStrategy }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "kured.matchLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 8 }}
|
||||
{{- if .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- range $key, $value := .Values.podAnnotations }}
|
||||
{{ $key }}: {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
spec:
|
||||
serviceAccountName: {{ template "kured.serviceAccountName" . }}
|
||||
hostPID: true
|
||||
restartPolicy: Always
|
||||
{{- with .Values.image.pullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
{{- if .Values.priorityClassName }}
|
||||
priorityClassName: {{ .Values.priorityClassName }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
securityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
resources:
|
||||
{{ toYaml .Values.resources | indent 12 }}
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
args:
|
||||
- --ds-name={{ template "kured.fullname" . }}
|
||||
- --ds-namespace={{ .Release.Namespace }}
|
||||
{{- if .Values.configuration.annotationTtl }}
|
||||
- --annotation-ttl={{ .Values.configuration.annotationTtl }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.alertFilterRegexp }}
|
||||
- --alert-filter-regexp={{ .Values.configuration.alertFilterRegexp | quote }}
|
||||
{{- end }}
|
||||
{{- range .Values.configuration.blockingPodSelector }}
|
||||
- --blocking-pod-selector={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.endTime }}
|
||||
- --end-time={{ .Values.configuration.endTime }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.lockAnnotation }}
|
||||
- --lock-annotation={{ .Values.configuration.lockAnnotation }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.period }}
|
||||
- --period={{ .Values.configuration.period }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.prometheusUrl }}
|
||||
- --prometheus-url={{ .Values.configuration.prometheusUrl }}
|
||||
{{- end }}
|
||||
{{- range .Values.configuration.rebootDays }}
|
||||
- --reboot-days={{ . }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.rebootSentinel }}
|
||||
- --reboot-sentinel={{ .Values.configuration.rebootSentinel }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackChannel }}
|
||||
- --slack-channel={{ .Values.configuration.slackChannel }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackHookUrl }}
|
||||
- --slack-hook-url={{ .Values.configuration.slackHookUrl }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.slackUsername }}
|
||||
- --slack-username={{ .Values.configuration.slackUsername }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.startTime }}
|
||||
- --start-time={{ .Values.configuration.startTime }}
|
||||
{{- end }}
|
||||
{{- if .Values.configuration.timeZone }}
|
||||
- --time-zone={{ .Values.configuration.timeZone }}
|
||||
{{- end }}
|
||||
{{- range $key, $value := .Values.extraArgs }}
|
||||
{{- if $value }}
|
||||
- --{{ $key }}={{ $value }}
|
||||
{{- else }}
|
||||
- --{{ $key }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: metrics
|
||||
env:
|
||||
# Pass in the name of the node on which this pod is scheduled
|
||||
# for use with drain/uncordon operations and lock acquisition
|
||||
- name: KURED_NODE_ID
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
fieldPath: spec.nodeName
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{ toYaml . | indent 8 }}
|
||||
{{- end }}
|
||||
21
charts/kured/templates/podsecuritypolicy.yaml
Normal file
21
charts/kured/templates/podsecuritypolicy.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
{{- if .Values.podSecurityPolicy.create}}
|
||||
apiVersion: {{ template "kured.psp.apiVersion" . }}
|
||||
kind: PodSecurityPolicy
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
spec:
|
||||
privileged: true
|
||||
hostPID: true
|
||||
allowedCapabilities: ['*']
|
||||
fsGroup:
|
||||
rule: RunAsAny
|
||||
runAsUser:
|
||||
rule: RunAsAny
|
||||
seLinux:
|
||||
rule: RunAsAny
|
||||
supplementalGroups:
|
||||
rule: RunAsAny
|
||||
volumes: ['*']
|
||||
{{- end }}
|
||||
30
charts/kured/templates/role.yaml
Normal file
30
charts/kured/templates/role.yaml
Normal file
@@ -0,0 +1,30 @@
|
||||
{{- if .Values.rbac.create -}}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
rules:
|
||||
# Allow kured to lock/unlock itself
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["daemonsets"]
|
||||
resourceNames: ["{{ template "kured.fullname" . }}"]
|
||||
verbs: ["update", "patch"]
|
||||
- apiGroups: ["apps"]
|
||||
resources: ["daemonsets"]
|
||||
resourceNames: ["{{ template "kured.fullname" . }}"]
|
||||
verbs: ["update", "patch"]
|
||||
{{- if .Values.podSecurityPolicy.create }}
|
||||
- apiGroups: ["extensions"]
|
||||
resources: ["podsecuritypolicies"]
|
||||
resourceNames: ["{{ template "kured.fullname" . }}"]
|
||||
verbs: ["use"]
|
||||
- apiGroups: ["policy"]
|
||||
resources: ["podsecuritypolicies"]
|
||||
resourceNames: ["{{ template "kured.fullname" . }}"]
|
||||
verbs: ["use"]
|
||||
{{- end }}
|
||||
|
||||
{{- end -}}
|
||||
17
charts/kured/templates/rolebinding.yaml
Normal file
17
charts/kured/templates/rolebinding.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
{{- if .Values.rbac.create -}}
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
namespace: {{ .Release.Namespace }}
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
namespace: {{ .Release.Namespace }}
|
||||
name: {{ template "kured.serviceAccountName" . }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: {{ template "kured.fullname" . }}
|
||||
{{- end -}}
|
||||
22
charts/kured/templates/service.yaml
Normal file
22
charts/kured/templates/service.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
{{- if or .Values.service.create .Values.metrics.create }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
{{- if .Values.service.annotations }}
|
||||
annotations:
|
||||
{{- range $key, $value := .Values.service.annotations }}
|
||||
{{ $key }}: {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: ClusterIP
|
||||
ports:
|
||||
- name: metrics
|
||||
port: {{ .Values.service.port }}
|
||||
targetPort: 8080
|
||||
selector:
|
||||
{{- include "kured.matchLabels" . | nindent 4 }}
|
||||
{{- end }}
|
||||
9
charts/kured/templates/serviceaccount.yaml
Normal file
9
charts/kured/templates/serviceaccount.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ template "kured.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
{{- end -}}
|
||||
31
charts/kured/templates/servicemonitor.yaml
Normal file
31
charts/kured/templates/servicemonitor.yaml
Normal file
@@ -0,0 +1,31 @@
|
||||
{{- if .Values.metrics.create }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ template "kured.fullname" . }}
|
||||
{{- if .Values.metrics.namespace }}
|
||||
namespace: {{ .Values.metrics.namespace }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "kured.labels" . | nindent 4 }}
|
||||
{{- if .Values.metrics.labels }}
|
||||
{{- toYaml .Values.metrics.labels | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
endpoints:
|
||||
- interval: {{ .Values.metrics.interval }}
|
||||
{{- if .Values.metrics.scrapeTimeout }}
|
||||
scrapeTimeout: {{ .Values.metrics.scrapeTimeout }}
|
||||
{{- end }}
|
||||
honorLabels: true
|
||||
targetPort: 8080
|
||||
path: /metrics
|
||||
scheme: http
|
||||
jobLabel: "{{ .Release.Name }}"
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "kured.matchLabels" . | nindent 6 }}
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
{{- end }}
|
||||
61
charts/kured/values.yaml
Normal file
61
charts/kured/values.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
image:
|
||||
repository: weaveworks/kured
|
||||
tag: 1.4.4
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
|
||||
updateStrategy: OnDelete
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
extraArgs: {}
|
||||
|
||||
configuration:
|
||||
annotationTtl: 0 # force clean annotation after this ammount of time (default 0, disabled)
|
||||
alertFilterRegexp: "" # alert names to ignore when checking for active alerts
|
||||
blockingPodSelector: [] # label selector identifying pods whose presence should prevent reboots
|
||||
endTime: "" # only reboot before this time of day (default "23:59")
|
||||
lockAnnotation: "" # annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
period: "" # reboot check period (default 1h0m0s)
|
||||
prometheusUrl: "" # Prometheus instance to probe for active alerts
|
||||
rebootDays: [] # only reboot on these days (default [su,mo,tu,we,th,fr,sa])
|
||||
rebootSentinel: "" # path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
slackChannel: "" # slack channel for reboot notfications
|
||||
slackHookUrl: "" # slack hook URL for reboot notfications
|
||||
slackUsername: "" # slack username for reboot notfications (default "kured")
|
||||
startTime: "" # only reboot after this time of day (default "0:00")
|
||||
timeZone: "" # time-zone to use (valid zones from "time" golang package)
|
||||
|
||||
rbac:
|
||||
create: true
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
name:
|
||||
|
||||
podSecurityPolicy:
|
||||
create: false
|
||||
|
||||
resources: {}
|
||||
|
||||
metrics:
|
||||
create: false
|
||||
namespace: ""
|
||||
labels: {}
|
||||
interval: 60s
|
||||
scrapeTimeout: ""
|
||||
|
||||
service:
|
||||
create: false
|
||||
port: 8080
|
||||
annotations: {}
|
||||
|
||||
priorityClassName: ""
|
||||
|
||||
tolerations:
|
||||
- key: node-role.kubernetes.io/master
|
||||
effect: NoSchedule
|
||||
|
||||
affinity: {}
|
||||
|
||||
nodeSelector: {}
|
||||
@@ -1,7 +1,7 @@
|
||||
FROM alpine:3.11
|
||||
RUN apk update && apk add ca-certificates tzdata && rm -rf /var/cache/apk/*
|
||||
# NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details
|
||||
ADD https://storage.googleapis.com/kubernetes-release/release/v1.17.5/bin/linux/amd64/kubectl /usr/bin/kubectl
|
||||
ADD https://storage.googleapis.com/kubernetes-release/release/v1.17.7/bin/linux/amd64/kubectl /usr/bin/kubectl
|
||||
RUN chmod 0755 /usr/bin/kubectl
|
||||
COPY ./kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
|
||||
@@ -45,6 +45,8 @@ var (
|
||||
rebootEnd string
|
||||
timezone string
|
||||
|
||||
annotationTTL time.Duration
|
||||
|
||||
// Metrics
|
||||
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Subsystem: "kured",
|
||||
@@ -97,6 +99,9 @@ func main() {
|
||||
rootCmd.PersistentFlags().StringVar(&timezone, "time-zone", "UTC",
|
||||
"use this timezone for schedule inputs")
|
||||
|
||||
rootCmd.PersistentFlags().DurationVar(&annotationTTL, "annotation-ttl", 0,
|
||||
"force clean annotation after this ammount of time (default 0, disabled)")
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@@ -204,8 +209,8 @@ func holding(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
return holding
|
||||
}
|
||||
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}) bool {
|
||||
holding, holder, err := lock.Acquire(metadata)
|
||||
func acquire(lock *daemonsetlock.DaemonSetLock, metadata interface{}, TTL time.Duration) bool {
|
||||
holding, holder, err := lock.Acquire(metadata, TTL)
|
||||
switch {
|
||||
case err != nil:
|
||||
log.Fatalf("Error acquiring lock: %v", err)
|
||||
@@ -283,7 +288,7 @@ type nodeMeta struct {
|
||||
Unschedulable bool `json:"unschedulable"`
|
||||
}
|
||||
|
||||
func rebootAsRequired(nodeID string, window *timewindow.TimeWindow) {
|
||||
func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Duration) {
|
||||
config, err := rest.InClusterConfig()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@@ -314,7 +319,7 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow) {
|
||||
}
|
||||
nodeMeta.Unschedulable = node.Spec.Unschedulable
|
||||
|
||||
if acquire(lock, &nodeMeta) {
|
||||
if acquire(lock, &nodeMeta, TTL) {
|
||||
if !nodeMeta.Unschedulable {
|
||||
drain(nodeID)
|
||||
}
|
||||
@@ -346,8 +351,13 @@ func root(cmd *cobra.Command, args []string) {
|
||||
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
|
||||
log.Infof("Blocking Pod Selectors: %v", podSelectors)
|
||||
log.Infof("Reboot on: %v", window)
|
||||
if annotationTTL > 0 {
|
||||
log.Infof("Force annotation cleanup after: %v", annotationTTL)
|
||||
} else {
|
||||
log.Info("Force annotation cleanup disabled.")
|
||||
}
|
||||
|
||||
go rebootAsRequired(nodeID, window)
|
||||
go rebootAsRequired(nodeID, window, annotationTTL)
|
||||
go maintainRebootRequiredMetric(nodeID)
|
||||
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
@@ -8,7 +8,7 @@ rules:
|
||||
# Allow kubectl to drain/uncordon
|
||||
#
|
||||
# NB: These permissions are tightly coupled to the bundled version of kubectl; the ones below
|
||||
# match https://github.com/kubernetes/kubernetes/blob/v1.17.5/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
|
||||
# match https://github.com/kubernetes/kubernetes/blob/v1.17.7/staging/src/k8s.io/kubectl/pkg/cmd/drain/drain.go
|
||||
#
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
|
||||
@@ -19,15 +19,17 @@ type DaemonSetLock struct {
|
||||
}
|
||||
|
||||
type lockAnnotationValue struct {
|
||||
NodeID string `json:"nodeID"`
|
||||
Metadata interface{} `json:"metadata,omitempty"`
|
||||
NodeID string `json:"nodeID"`
|
||||
Metadata interface{} `json:"metadata,omitempty"`
|
||||
Created time.Time `json:"created"`
|
||||
TTL time.Duration `json:"TTL"`
|
||||
}
|
||||
|
||||
func New(client *kubernetes.Clientset, nodeID, namespace, name, annotation string) *DaemonSetLock {
|
||||
return &DaemonSetLock{client, nodeID, namespace, name, annotation}
|
||||
}
|
||||
|
||||
func (dsl *DaemonSetLock) Acquire(metadata interface{}) (acquired bool, owner string, err error) {
|
||||
func (dsl *DaemonSetLock) Acquire(metadata interface{}, TTL time.Duration) (acquired bool, owner string, err error) {
|
||||
for {
|
||||
ds, err := dsl.client.AppsV1().DaemonSets(dsl.namespace).Get(dsl.name, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
@@ -40,13 +42,18 @@ func (dsl *DaemonSetLock) Acquire(metadata interface{}) (acquired bool, owner st
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, "", err
|
||||
}
|
||||
|
||||
if ttlExpired(value.Created, value.TTL) {
|
||||
return true, value.NodeID, nil
|
||||
}
|
||||
|
||||
return value.NodeID == dsl.nodeID, value.NodeID, nil
|
||||
}
|
||||
|
||||
if ds.ObjectMeta.Annotations == nil {
|
||||
ds.ObjectMeta.Annotations = make(map[string]string)
|
||||
}
|
||||
value := lockAnnotationValue{NodeID: dsl.nodeID, Metadata: metadata}
|
||||
value := lockAnnotationValue{NodeID: dsl.nodeID, Metadata: metadata, Created: time.Now().UTC(), TTL: TTL}
|
||||
valueBytes, err := json.Marshal(&value)
|
||||
if err != nil {
|
||||
return false, "", err
|
||||
@@ -79,6 +86,11 @@ func (dsl *DaemonSetLock) Test(metadata interface{}) (holding bool, err error) {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
if ttlExpired(value.Created, value.TTL) {
|
||||
return true, nil
|
||||
}
|
||||
|
||||
return value.NodeID == dsl.nodeID, nil
|
||||
}
|
||||
|
||||
@@ -98,7 +110,7 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
if err := json.Unmarshal([]byte(valueString), &value); err != nil {
|
||||
return err
|
||||
}
|
||||
if value.NodeID != dsl.nodeID {
|
||||
if value.NodeID != dsl.nodeID && !ttlExpired(value.Created, value.TTL) {
|
||||
return fmt.Errorf("Not lock holder: %v", value.NodeID)
|
||||
}
|
||||
} else {
|
||||
@@ -120,3 +132,10 @@ func (dsl *DaemonSetLock) Release() error {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func ttlExpired(created time.Time, ttl time.Duration) bool {
|
||||
if ttl > 0 && time.Since(created) >= ttl {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
28
pkg/daemonsetlock/daemonsetlock_test.go
Normal file
28
pkg/daemonsetlock/daemonsetlock_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package daemonsetlock
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestTtlExpired(t *testing.T) {
|
||||
d := time.Date(2020, 05, 05, 14, 15, 0, 0, time.UTC)
|
||||
second, _ := time.ParseDuration("1s")
|
||||
zero, _ := time.ParseDuration("0m")
|
||||
|
||||
tests := []struct {
|
||||
created time.Time
|
||||
ttl time.Duration
|
||||
result bool
|
||||
}{
|
||||
{d, second, true},
|
||||
{time.Now(), second, false},
|
||||
{d, zero, false},
|
||||
}
|
||||
|
||||
for i, tst := range tests {
|
||||
if ttlExpired(tst.created, tst.ttl) != tst.result {
|
||||
t.Errorf("Test %d failed, expected %v but got %v", i, tst.result, !tst.result)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user