mirror of
https://github.com/kubereboot/kured.git
synced 2026-02-18 11:19:50 +00:00
Compare commits
16 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fc94716ffe | ||
|
|
c20c9e987b | ||
|
|
10443c5178 | ||
|
|
1b3d84d360 | ||
|
|
50136cd865 | ||
|
|
69b509f246 | ||
|
|
556789e6c7 | ||
|
|
0127675514 | ||
|
|
b1370be8f3 | ||
|
|
de3593799f | ||
|
|
521e15cc73 | ||
|
|
84be7929d1 | ||
|
|
06b22bc3ad | ||
|
|
f6f9e7492c | ||
|
|
114c34950b | ||
|
|
048bba446f |
@@ -12,7 +12,7 @@ jobs:
|
||||
- deploy:
|
||||
name: Build and push image
|
||||
command: |
|
||||
docker login -u "$DOCKER_USER" -p "$DOCKER_PASS" quay.io
|
||||
echo "$DOCKER_PASS" | docker login --username "$DOCKER_USER" --password-stdin
|
||||
if [ -z "${CIRCLE_TAG}" ]; then
|
||||
make publish-image
|
||||
else
|
||||
|
||||
74
Gopkg.lock
generated
74
Gopkg.lock
generated
@@ -7,26 +7,14 @@
|
||||
packages = ["quantile"]
|
||||
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/ghodss/yaml"
|
||||
packages = ["."]
|
||||
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
|
||||
version = "v1.0.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/gogo/protobuf"
|
||||
packages = [
|
||||
"proto",
|
||||
"sortkeys"
|
||||
]
|
||||
revision = "636bf0302bc95575d69441b25a2603156ffdddf1"
|
||||
version = "v1.1.1"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/golang/glog"
|
||||
packages = ["."]
|
||||
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
|
||||
revision = "4cbf7e384e768b4e01799441fdf2a706a5635ae7"
|
||||
version = "v1.2.0"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/golang/protobuf"
|
||||
@@ -69,7 +57,7 @@
|
||||
".",
|
||||
"diskcache"
|
||||
]
|
||||
revision = "9cad4c3443a7200dd6400aef47183728de563a38"
|
||||
revision = "c63ab54fda8f77302f8d414e19933f2b6026a089"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/inconshreveable/mousetrap"
|
||||
@@ -129,7 +117,7 @@
|
||||
"prometheus/internal",
|
||||
"prometheus/promhttp"
|
||||
]
|
||||
revision = "16f375c74db6ccf880e1cd9c6c6087a6d58e5d12"
|
||||
revision = "fb3d5cb2ad5789367093b409855a3937d651b572"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -145,7 +133,7 @@
|
||||
"internal/bitbucket.org/ww/goautoneg",
|
||||
"model"
|
||||
]
|
||||
revision = "7e9e6cabbd393fc208072eedef99188d0ce788b6"
|
||||
revision = "67670fe90761d7ff18ec1d640135e53b9198328f"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -156,19 +144,19 @@
|
||||
"nfs",
|
||||
"xfs"
|
||||
]
|
||||
revision = "185b4288413d2a0dd0806f78c90dde719829e5ae"
|
||||
revision = "14fa7590c24d4615893b68e22fce3b3489689f65"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/sirupsen/logrus"
|
||||
packages = ["."]
|
||||
revision = "ad15b42461921f1fb3529b058c6786c6a45d5162"
|
||||
version = "v1.1.1"
|
||||
revision = "bcd833dfe83d3cebad139e4a29ed79cb2318bf95"
|
||||
version = "v1.2.0"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
name = "github.com/spf13/cobra"
|
||||
packages = ["."]
|
||||
revision = "fe5e611709b0c57fa4a89136deaa8e1d4004d053"
|
||||
revision = "d2d81d9a96e23f0255397222bb0b4e3165e492dc"
|
||||
|
||||
[[projects]]
|
||||
name = "github.com/spf13/pflag"
|
||||
@@ -180,7 +168,7 @@
|
||||
branch = "master"
|
||||
name = "golang.org/x/crypto"
|
||||
packages = ["ssh/terminal"]
|
||||
revision = "85e1b3f9139abd58575d728a509643924e3b2ebf"
|
||||
revision = "8d7daa0c54b357f3071e11eaef7efc4e19a417e2"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -193,7 +181,7 @@
|
||||
"http2/hpack",
|
||||
"idna"
|
||||
]
|
||||
revision = "9b4f9f5ad5197c79fd623a3638e70d8b26cef344"
|
||||
revision = "927f97764cc334a6575f4b7a1584a147864d5723"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -202,7 +190,7 @@
|
||||
".",
|
||||
"internal"
|
||||
]
|
||||
revision = "9dcd33a902f40452422c2367fefcb95b54f9f8f8"
|
||||
revision = "d668ce993890a79bda886613ee587a69dd5da7a6"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -211,7 +199,7 @@
|
||||
"unix",
|
||||
"windows"
|
||||
]
|
||||
revision = "d989b31c87461dc8ab2f1cac6792814e27fadea9"
|
||||
revision = "82a175fd1598e8a172e58ebdf5ed262bb29129e5"
|
||||
|
||||
[[projects]]
|
||||
name = "golang.org/x/text"
|
||||
@@ -238,7 +226,7 @@
|
||||
branch = "master"
|
||||
name = "golang.org/x/time"
|
||||
packages = ["rate"]
|
||||
revision = "fbb02b2291d28baffd63558aa44b4b56f178d650"
|
||||
revision = "85acf8d2951cb2a3bde7632f9ff273ef0379bcbd"
|
||||
|
||||
[[projects]]
|
||||
name = "google.golang.org/appengine"
|
||||
@@ -251,8 +239,8 @@
|
||||
"internal/urlfetch",
|
||||
"urlfetch"
|
||||
]
|
||||
revision = "ae0ab99deb4dc413a2b4bd6c8bdd0eb67f1e4d06"
|
||||
version = "v1.2.0"
|
||||
revision = "e9657d882bb81064595ca3b56cbe2546bbabf7b1"
|
||||
version = "v1.4.0"
|
||||
|
||||
[[projects]]
|
||||
name = "gopkg.in/inf.v0"
|
||||
@@ -263,8 +251,8 @@
|
||||
[[projects]]
|
||||
name = "gopkg.in/yaml.v2"
|
||||
packages = ["."]
|
||||
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
|
||||
version = "v2.2.1"
|
||||
revision = "51d6538a90f86fe93ac480b35f37b2be17fef232"
|
||||
version = "v2.2.2"
|
||||
|
||||
[[projects]]
|
||||
branch = "master"
|
||||
@@ -275,6 +263,7 @@
|
||||
"apps/v1",
|
||||
"apps/v1beta1",
|
||||
"apps/v1beta2",
|
||||
"auditregistration/v1alpha1",
|
||||
"authentication/v1",
|
||||
"authentication/v1beta1",
|
||||
"authorization/v1",
|
||||
@@ -302,10 +291,10 @@
|
||||
"storage/v1alpha1",
|
||||
"storage/v1beta1"
|
||||
]
|
||||
revision = "843ad2d9b9ae703c74f2f43959e6ce0b24cc3185"
|
||||
revision = "173ce66c1e39d1d0f56e0b3347ff2988068aecd0"
|
||||
|
||||
[[projects]]
|
||||
branch = "release-1.12"
|
||||
branch = "release-1.13"
|
||||
name = "k8s.io/apimachinery"
|
||||
packages = [
|
||||
"pkg/api/errors",
|
||||
@@ -344,7 +333,7 @@
|
||||
"pkg/watch",
|
||||
"third_party/forked/golang/reflect"
|
||||
]
|
||||
revision = "6dd46049f39503a1fc8d65de4bd566829e95faff"
|
||||
revision = "2b1284ed4c93a43499e781493253e2ac5959c4fd"
|
||||
|
||||
[[projects]]
|
||||
name = "k8s.io/client-go"
|
||||
@@ -357,6 +346,7 @@
|
||||
"kubernetes/typed/apps/v1",
|
||||
"kubernetes/typed/apps/v1beta1",
|
||||
"kubernetes/typed/apps/v1beta2",
|
||||
"kubernetes/typed/auditregistration/v1alpha1",
|
||||
"kubernetes/typed/authentication/v1",
|
||||
"kubernetes/typed/authentication/v1beta1",
|
||||
"kubernetes/typed/authorization/v1",
|
||||
@@ -399,12 +389,24 @@
|
||||
"util/flowcontrol",
|
||||
"util/integer"
|
||||
]
|
||||
revision = "1638f8970cefaa404ff3a62950f88b08292b2696"
|
||||
version = "v9.0.0"
|
||||
revision = "e64494209f554a6723674bd494d69445fb76a1d4"
|
||||
version = "v10.0.0"
|
||||
|
||||
[[projects]]
|
||||
name = "k8s.io/klog"
|
||||
packages = ["."]
|
||||
revision = "a5bc97fbc634d635061f3146511332c7e313a55a"
|
||||
version = "v0.1.0"
|
||||
|
||||
[[projects]]
|
||||
name = "sigs.k8s.io/yaml"
|
||||
packages = ["."]
|
||||
revision = "fd68e9863619f6ec2fdd8625fe1f02e7c877e480"
|
||||
version = "v1.1.0"
|
||||
|
||||
[solve-meta]
|
||||
analyzer-name = "dep"
|
||||
analyzer-version = 1
|
||||
inputs-digest = "76687ace1736ad6f237f5bdc9ad6c82e82906a7098a5c61211676c61e5b088a0"
|
||||
inputs-digest = "96704623ac96e94ce47b0820b4ff9e359b76c68a72eb83621a3de9d99d3d9d4f"
|
||||
solver-name = "gps-cdcl"
|
||||
solver-version = 1
|
||||
|
||||
@@ -16,11 +16,11 @@
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/client-go"
|
||||
version = "v9.0.0"
|
||||
version = "v10.0.0"
|
||||
|
||||
[[constraint]]
|
||||
name = "k8s.io/apimachinery"
|
||||
branch = "release-1.12"
|
||||
branch = "release-1.13"
|
||||
|
||||
[prune]
|
||||
go-tests = true
|
||||
|
||||
10
Makefile
10
Makefile
@@ -12,7 +12,7 @@ clean:
|
||||
rm -f cmd/kured/kured
|
||||
rm -rf ./build
|
||||
|
||||
godeps=$(shell go get $1 && go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
|
||||
godeps=$(shell go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
|
||||
|
||||
DEPS=$(call godeps,./cmd/kured)
|
||||
|
||||
@@ -23,14 +23,14 @@ cmd/kured/kured: cmd/kured/*.go
|
||||
build/.image.done: cmd/kured/Dockerfile cmd/kured/kured
|
||||
mkdir -p build
|
||||
cp $^ build
|
||||
$(SUDO) docker build -t quay.io/$(DH_ORG)/kured -f build/Dockerfile ./build
|
||||
$(SUDO) docker tag quay.io/$(DH_ORG)/kured quay.io/$(DH_ORG)/kured:$(VERSION)
|
||||
$(SUDO) docker build -t docker.io/$(DH_ORG)/kured -f build/Dockerfile ./build
|
||||
$(SUDO) docker tag docker.io/$(DH_ORG)/kured docker.io/$(DH_ORG)/kured:$(VERSION)
|
||||
touch $@
|
||||
|
||||
image: build/.image.done
|
||||
|
||||
publish-image: image
|
||||
$(SUDO) docker push quay.io/$(DH_ORG)/kured:$(VERSION)
|
||||
$(SUDO) docker push docker.io/$(DH_ORG)/kured:$(VERSION)
|
||||
|
||||
minikube-publish: image
|
||||
$(SUDO) docker save quay.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
|
||||
$(SUDO) docker save docker.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
|
||||
|
||||
78
README.md
78
README.md
@@ -7,6 +7,7 @@
|
||||
* [Configuration](#configuration)
|
||||
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
|
||||
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
|
||||
* [Blocking Reboots via Pods](#blocking-reboots-via-pods)
|
||||
* [Prometheus Metrics](#prometheus-metrics)
|
||||
* [Slack Notifications](#slack-notifications)
|
||||
* [Overriding Lock Configuration](#overriding-lock-configuration)
|
||||
@@ -27,16 +28,25 @@ indicated by the package management system of the underlying OS.
|
||||
* Watches for the presence of a reboot sentinel e.g. `/var/run/reboot-required`
|
||||
* Utilises a lock in the API server to ensure only one node reboots at
|
||||
a time
|
||||
* Optionally defers reboots in the presence of active Prometheus alerts
|
||||
* Optionally defers reboots in the presence of active Prometheus alerts or selected pods
|
||||
* Cordons & drains worker nodes before reboot, uncordoning them after
|
||||
|
||||
## Kubernetes & OS Compatibility
|
||||
|
||||
The daemon image contains versions of `k8s.io/client-go` and the
|
||||
`kubectl` binary for the purposes of maintaining the lock and draining
|
||||
worker nodes. See the [release
|
||||
notes](https://github.com/weaveworks/kured/releases) for specific
|
||||
version compatibility information.
|
||||
worker nodes. Kubernetes aims to provide forwards & backwards
|
||||
compatibility of one minor version between client and server:
|
||||
|
||||
| kured | kubectl | k8s.io/client-go | k8s.io/apimachinery | expected kubernetes compatibility |
|
||||
|--------|---------|------------------|---------------------|-----------------------------------|
|
||||
| master | 1.13.6 | v10.0.0 | release-1.13 | 1.12.x, 1.13.x, 1.14.x |
|
||||
| 1.1.0 | 1.12.1 | v9.0.0 | release-1.12 | 1.11.x, 1.12.x, 1.13.x |
|
||||
| 1.0.0 | 1.7.6 | v4.0.0 | release-1.7 | 1.6.x, 1.7.x, 1.8.x |
|
||||
|
||||
See the [release notes](https://github.com/weaveworks/kured/releases)
|
||||
for specific version compatibility information, including which
|
||||
combination have been formally tested.
|
||||
|
||||
Versions >=1.1.0 enter the host mount namespace to invoke
|
||||
`systemctl reboot`, so should work on any systemd distribution.
|
||||
@@ -47,7 +57,7 @@ To obtain a default installation without Prometheus alerting interlock
|
||||
or Slack notifications:
|
||||
|
||||
```
|
||||
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.1.0/kured-1.1.0.yaml
|
||||
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.1.0/kured-1.1.0-dockerhub.yaml
|
||||
```
|
||||
|
||||
If you want to customise the installation, download the manifest and
|
||||
@@ -59,15 +69,17 @@ The following arguments can be passed to kured via the daemonset pod template:
|
||||
|
||||
```
|
||||
Flags:
|
||||
--alert-filter-regexp value alert names to ignore when checking for active alerts
|
||||
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
--ds-namespace string name of daemonset on which to place lock (default "kured")
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--slack-hook-url string slack hook URL for reboot notfications
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
--alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts
|
||||
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
|
||||
--ds-name string name of daemonset on which to place lock (default "kured")
|
||||
--ds-namespace string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
-h, --help help for kured
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--slack-hook-url string slack hook URL for reboot notfications
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
```
|
||||
|
||||
### Reboot Sentinel File & Period
|
||||
@@ -95,8 +107,33 @@ will block reboots, however you can ignore specific alerts:
|
||||
--alert-filter-regexp=^(RebootRequired|AnotherBenignAlert|...$
|
||||
```
|
||||
|
||||
An important application of this filter will become apparent in the
|
||||
next section.
|
||||
See the section on Prometheus metrics for an important application of this
|
||||
filter.
|
||||
|
||||
### Blocking Reboots via Pods
|
||||
|
||||
You can also block reboots of an _individual node_ when specific pods
|
||||
are scheduled on it:
|
||||
|
||||
```
|
||||
--blocking-pod-selector=runtime=long,cost=expensive
|
||||
```
|
||||
|
||||
Since label selector strings use commas to express logical 'and', you can
|
||||
specify this parameter multiple times for 'or':
|
||||
|
||||
```
|
||||
--blocking-pod-selector=runtime=long,cost=expensive
|
||||
--blocking-pod-selector=name=temperamental
|
||||
```
|
||||
|
||||
In this case, the presence of either an (appropriately labelled) expensive long
|
||||
running job or a known temperamental pod on a node will stop it rebooting.
|
||||
|
||||
> Try not to abuse this mechanism - it's better to strive for
|
||||
> restartability where possible. If you do use it, make sure you set
|
||||
> up a RebootRequired alert as described in the next section so that
|
||||
> you can intervene manually if reboots are blocked for too long.
|
||||
|
||||
### Prometheus Metrics
|
||||
|
||||
@@ -198,7 +235,7 @@ dep ensure && make
|
||||
|
||||
## Frequently Asked/Anticipated Questions
|
||||
|
||||
### Why is there no `latest` tag on quay.io?
|
||||
### Why is there no `latest` tag on Docker Hub?
|
||||
|
||||
Use of `latest` for production deployments is bad practice - see
|
||||
[here](https://kubernetes.io/docs/concepts/configuration/overview) for
|
||||
@@ -210,9 +247,8 @@ versioned manifest from the [release page](https://github.com/weaveworks/kured/r
|
||||
|
||||
If you have any questions about, feedback for or problems with `kured`:
|
||||
|
||||
- Invite yourself to the <a href="https://weaveworks.github.io/community-slack/" target="_blank"> #weave-community </a> slack channel.
|
||||
- Ask a question on the <a href="https://weave-community.slack.com/messages/general/"> #weave-community</a> slack channel.
|
||||
- Send an email to <a href="mailto:weave-users@weave.works">weave-users@weave.works</a>
|
||||
- <a href="https://github.com/weaveworks/kured/issues/new">File an issue.</a>
|
||||
- Invite yourself to the <a href="https://slack.weave.works/" target="_blank">Weave Users Slack</a>.
|
||||
- Ask a question on the [#general](https://weave-community.slack.com/messages/general/) slack channel.
|
||||
- [File an issue](https://github.com/weaveworks/kured/issues/new).
|
||||
|
||||
Your feedback is always welcome!
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
FROM alpine:3.8
|
||||
RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/*
|
||||
# NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details
|
||||
ADD https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl /usr/bin/kubectl
|
||||
ADD https://storage.googleapis.com/kubernetes-release/release/v1.13.6/bin/linux/amd64/kubectl /usr/bin/kubectl
|
||||
RUN chmod 0755 /usr/bin/kubectl
|
||||
COPY ./kured /usr/bin/kured
|
||||
ENTRYPOINT ["/usr/bin/kured"]
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
@@ -35,6 +36,7 @@ var (
|
||||
rebootSentinel string
|
||||
slackHookURL string
|
||||
slackUsername string
|
||||
podSelectors []string
|
||||
|
||||
// Metrics
|
||||
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
@@ -74,6 +76,9 @@ func main() {
|
||||
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
|
||||
"slack username for reboot notfications")
|
||||
|
||||
rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
|
||||
"label selector identifying pods whose presence should prevent reboots")
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
@@ -126,7 +131,7 @@ func rebootRequired() bool {
|
||||
}
|
||||
}
|
||||
|
||||
func rebootBlocked() bool {
|
||||
func rebootBlocked(client *kubernetes.Clientset, nodeID string) bool {
|
||||
if prometheusURL != "" {
|
||||
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
|
||||
if err != nil {
|
||||
@@ -142,6 +147,31 @@ func rebootBlocked() bool {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
fieldSelector := fmt.Sprintf("spec.nodeName=%s", nodeID)
|
||||
for _, labelSelector := range podSelectors {
|
||||
podList, err := client.CoreV1().Pods("").List(metav1.ListOptions{
|
||||
LabelSelector: labelSelector,
|
||||
FieldSelector: fieldSelector,
|
||||
Limit: 10})
|
||||
if err != nil {
|
||||
log.Warnf("Reboot blocked: pod query error: %v", err)
|
||||
return true
|
||||
}
|
||||
|
||||
if len(podList.Items) > 0 {
|
||||
podNames := make([]string, 0, len(podList.Items))
|
||||
for _, pod := range podList.Items {
|
||||
podNames = append(podNames, pod.Name)
|
||||
}
|
||||
if len(podList.Continue) > 0 {
|
||||
podNames = append(podNames, "...")
|
||||
}
|
||||
log.Warnf("Reboot blocked: matching pods: %v", podNames)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -259,7 +289,7 @@ func rebootAsRequired(nodeID string) {
|
||||
source := rand.NewSource(time.Now().UnixNano())
|
||||
tick := delaytick.New(source, period)
|
||||
for _ = range tick {
|
||||
if rebootRequired() && !rebootBlocked() {
|
||||
if rebootRequired() && !rebootBlocked(client, nodeID) {
|
||||
node, err := client.CoreV1().Nodes().Get(nodeID, metav1.GetOptions{})
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
@@ -291,6 +321,7 @@ func root(cmd *cobra.Command, args []string) {
|
||||
log.Infof("Node ID: %s", nodeID)
|
||||
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
|
||||
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
|
||||
log.Infof("Blocking Pod Selectors: %v", podSelectors)
|
||||
|
||||
go rebootAsRequired(nodeID)
|
||||
go maintainRebootRequiredMetric(nodeID)
|
||||
|
||||
@@ -29,10 +29,10 @@ spec:
|
||||
restartPolicy: Always
|
||||
containers:
|
||||
- name: kured
|
||||
image: quay.io/weaveworks/kured # If you find yourself here
|
||||
# wondering why there is no
|
||||
# :latest tag on quay.io, see
|
||||
# the FAQ in the README
|
||||
image: docker.io/weaveworks/kured # If you find yourself here
|
||||
# wondering why there is no
|
||||
# :latest tag on Docker Hub,
|
||||
# see the FAQ in the README
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
privileged: true # Give permission to nsenter /proc/1/ns/mnt
|
||||
@@ -46,6 +46,9 @@ spec:
|
||||
command:
|
||||
- /usr/bin/kured
|
||||
# - --alert-filter-regexp=^RebootRequired$
|
||||
# - --blocking-pod-selector=runtime=long,cost=expensive
|
||||
# - --blocking-pod-selector=name=temperamental
|
||||
# - --blocking-pod-selector=...
|
||||
# - --ds-name=kured
|
||||
# - --ds-namespace=kube-system
|
||||
# - --lock-annotation=weave.works/kured-node-lock
|
||||
|
||||
Reference in New Issue
Block a user