Compare commits

..

16 Commits
1.1.0 ... 1.2.0

Author SHA1 Message Date
Adam Harrison
fc94716ffe Update install instructions to use alternative dockerhub yaml 2019-05-16 11:29:26 +01:00
Adam Harrison
c20c9e987b Merge pull request #73 from weaveworks/remove-spurious-go-get
Remove spurious `go get` from Makefile
2019-05-16 11:04:13 +01:00
Adam Harrison
10443c5178 Remove spurious go get from Makefile 2019-05-16 11:00:21 +01:00
Adam Harrison
1b3d84d360 Correct master kubectl version in README 2019-05-16 10:56:41 +01:00
Adam Harrison
50136cd865 Merge pull request #57 from weaveworks/support-k8s-1.13
Support k8s 1.13
2019-05-16 10:55:59 +01:00
Adam Harrison
69b509f246 Add master to compatibility matrix 2019-05-16 10:51:51 +01:00
Adam Harrison
556789e6c7 Update embedded kubectl to v1.13.6 2019-05-16 10:51:51 +01:00
Adam Harrison
0127675514 Update client-go and transitive dependencies 2019-05-16 10:51:46 +01:00
Hidde Beydals
b1370be8f3 Merge pull request #69 from weaveworks/switch-docker-hub
Move image to Docker Hub
2019-04-18 09:51:34 +02:00
Hidde Beydals
de3593799f Move image to Docker Hub 2019-04-17 13:43:47 +02:00
Matthias Radestock
521e15cc73 Merge pull request #58 from dholbach/drop-email
weave-users mailing list is closed
2019-01-08 11:58:35 +00:00
Daniel Holbach
84be7929d1 weave-users mailing list is closed: https://groups.google.com/a/weave.works/forum/#!topic/weave-users/0QXWGOPdBfY
Signed-off-by: Daniel Holbach <daniel@weave.works>
2019-01-08 08:18:21 +05:30
Adam Harrison
06b22bc3ad Merge pull request #46 from weaveworks/labelled-pods-can-block-reboots
Allow selected pods to prevent reboots
2019-01-03 11:54:43 +00:00
Adam Harrison
f6f9e7492c Allow selected pods to prevent reboots 2018-11-21 15:03:29 +00:00
Adam Harrison
114c34950b Merge pull request #41 from weaveworks/doc-compat-matrix
Document embedded kubectl/client-go versions
2018-10-30 17:06:54 +00:00
Adam Harrison
048bba446f Document embedded kubectl/client-go versions 2018-10-30 17:04:10 +00:00
8 changed files with 144 additions and 72 deletions

View File

@@ -12,7 +12,7 @@ jobs:
- deploy:
name: Build and push image
command: |
docker login -u "$DOCKER_USER" -p "$DOCKER_PASS" quay.io
echo "$DOCKER_PASS" | docker login --username "$DOCKER_USER" --password-stdin
if [ -z "${CIRCLE_TAG}" ]; then
make publish-image
else

74
Gopkg.lock generated
View File

@@ -7,26 +7,14 @@
packages = ["quantile"]
revision = "3a771d992973f24aa725d07868b467d1ddfceafb"
[[projects]]
name = "github.com/ghodss/yaml"
packages = ["."]
revision = "0ca9ea5df5451ffdf184b4428c902747c2c11cd7"
version = "v1.0.0"
[[projects]]
name = "github.com/gogo/protobuf"
packages = [
"proto",
"sortkeys"
]
revision = "636bf0302bc95575d69441b25a2603156ffdddf1"
version = "v1.1.1"
[[projects]]
branch = "master"
name = "github.com/golang/glog"
packages = ["."]
revision = "23def4e6c14b4da8ac2ed8007337bc5eb5007998"
revision = "4cbf7e384e768b4e01799441fdf2a706a5635ae7"
version = "v1.2.0"
[[projects]]
name = "github.com/golang/protobuf"
@@ -69,7 +57,7 @@
".",
"diskcache"
]
revision = "9cad4c3443a7200dd6400aef47183728de563a38"
revision = "c63ab54fda8f77302f8d414e19933f2b6026a089"
[[projects]]
name = "github.com/inconshreveable/mousetrap"
@@ -129,7 +117,7 @@
"prometheus/internal",
"prometheus/promhttp"
]
revision = "16f375c74db6ccf880e1cd9c6c6087a6d58e5d12"
revision = "fb3d5cb2ad5789367093b409855a3937d651b572"
[[projects]]
branch = "master"
@@ -145,7 +133,7 @@
"internal/bitbucket.org/ww/goautoneg",
"model"
]
revision = "7e9e6cabbd393fc208072eedef99188d0ce788b6"
revision = "67670fe90761d7ff18ec1d640135e53b9198328f"
[[projects]]
branch = "master"
@@ -156,19 +144,19 @@
"nfs",
"xfs"
]
revision = "185b4288413d2a0dd0806f78c90dde719829e5ae"
revision = "14fa7590c24d4615893b68e22fce3b3489689f65"
[[projects]]
name = "github.com/sirupsen/logrus"
packages = ["."]
revision = "ad15b42461921f1fb3529b058c6786c6a45d5162"
version = "v1.1.1"
revision = "bcd833dfe83d3cebad139e4a29ed79cb2318bf95"
version = "v1.2.0"
[[projects]]
branch = "master"
name = "github.com/spf13/cobra"
packages = ["."]
revision = "fe5e611709b0c57fa4a89136deaa8e1d4004d053"
revision = "d2d81d9a96e23f0255397222bb0b4e3165e492dc"
[[projects]]
name = "github.com/spf13/pflag"
@@ -180,7 +168,7 @@
branch = "master"
name = "golang.org/x/crypto"
packages = ["ssh/terminal"]
revision = "85e1b3f9139abd58575d728a509643924e3b2ebf"
revision = "8d7daa0c54b357f3071e11eaef7efc4e19a417e2"
[[projects]]
branch = "master"
@@ -193,7 +181,7 @@
"http2/hpack",
"idna"
]
revision = "9b4f9f5ad5197c79fd623a3638e70d8b26cef344"
revision = "927f97764cc334a6575f4b7a1584a147864d5723"
[[projects]]
branch = "master"
@@ -202,7 +190,7 @@
".",
"internal"
]
revision = "9dcd33a902f40452422c2367fefcb95b54f9f8f8"
revision = "d668ce993890a79bda886613ee587a69dd5da7a6"
[[projects]]
branch = "master"
@@ -211,7 +199,7 @@
"unix",
"windows"
]
revision = "d989b31c87461dc8ab2f1cac6792814e27fadea9"
revision = "82a175fd1598e8a172e58ebdf5ed262bb29129e5"
[[projects]]
name = "golang.org/x/text"
@@ -238,7 +226,7 @@
branch = "master"
name = "golang.org/x/time"
packages = ["rate"]
revision = "fbb02b2291d28baffd63558aa44b4b56f178d650"
revision = "85acf8d2951cb2a3bde7632f9ff273ef0379bcbd"
[[projects]]
name = "google.golang.org/appengine"
@@ -251,8 +239,8 @@
"internal/urlfetch",
"urlfetch"
]
revision = "ae0ab99deb4dc413a2b4bd6c8bdd0eb67f1e4d06"
version = "v1.2.0"
revision = "e9657d882bb81064595ca3b56cbe2546bbabf7b1"
version = "v1.4.0"
[[projects]]
name = "gopkg.in/inf.v0"
@@ -263,8 +251,8 @@
[[projects]]
name = "gopkg.in/yaml.v2"
packages = ["."]
revision = "5420a8b6744d3b0345ab293f6fcba19c978f1183"
version = "v2.2.1"
revision = "51d6538a90f86fe93ac480b35f37b2be17fef232"
version = "v2.2.2"
[[projects]]
branch = "master"
@@ -275,6 +263,7 @@
"apps/v1",
"apps/v1beta1",
"apps/v1beta2",
"auditregistration/v1alpha1",
"authentication/v1",
"authentication/v1beta1",
"authorization/v1",
@@ -302,10 +291,10 @@
"storage/v1alpha1",
"storage/v1beta1"
]
revision = "843ad2d9b9ae703c74f2f43959e6ce0b24cc3185"
revision = "173ce66c1e39d1d0f56e0b3347ff2988068aecd0"
[[projects]]
branch = "release-1.12"
branch = "release-1.13"
name = "k8s.io/apimachinery"
packages = [
"pkg/api/errors",
@@ -344,7 +333,7 @@
"pkg/watch",
"third_party/forked/golang/reflect"
]
revision = "6dd46049f39503a1fc8d65de4bd566829e95faff"
revision = "2b1284ed4c93a43499e781493253e2ac5959c4fd"
[[projects]]
name = "k8s.io/client-go"
@@ -357,6 +346,7 @@
"kubernetes/typed/apps/v1",
"kubernetes/typed/apps/v1beta1",
"kubernetes/typed/apps/v1beta2",
"kubernetes/typed/auditregistration/v1alpha1",
"kubernetes/typed/authentication/v1",
"kubernetes/typed/authentication/v1beta1",
"kubernetes/typed/authorization/v1",
@@ -399,12 +389,24 @@
"util/flowcontrol",
"util/integer"
]
revision = "1638f8970cefaa404ff3a62950f88b08292b2696"
version = "v9.0.0"
revision = "e64494209f554a6723674bd494d69445fb76a1d4"
version = "v10.0.0"
[[projects]]
name = "k8s.io/klog"
packages = ["."]
revision = "a5bc97fbc634d635061f3146511332c7e313a55a"
version = "v0.1.0"
[[projects]]
name = "sigs.k8s.io/yaml"
packages = ["."]
revision = "fd68e9863619f6ec2fdd8625fe1f02e7c877e480"
version = "v1.1.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "76687ace1736ad6f237f5bdc9ad6c82e82906a7098a5c61211676c61e5b088a0"
inputs-digest = "96704623ac96e94ce47b0820b4ff9e359b76c68a72eb83621a3de9d99d3d9d4f"
solver-name = "gps-cdcl"
solver-version = 1

View File

@@ -16,11 +16,11 @@
[[constraint]]
name = "k8s.io/client-go"
version = "v9.0.0"
version = "v10.0.0"
[[constraint]]
name = "k8s.io/apimachinery"
branch = "release-1.12"
branch = "release-1.13"
[prune]
go-tests = true

View File

@@ -12,7 +12,7 @@ clean:
rm -f cmd/kured/kured
rm -rf ./build
godeps=$(shell go get $1 && go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
godeps=$(shell go list -f '{{join .Deps "\n"}}' $1 | grep -v /vendor/ | xargs go list -f '{{if not .Standard}}{{ $$dep := . }}{{range .GoFiles}}{{$$dep.Dir}}/{{.}} {{end}}{{end}}')
DEPS=$(call godeps,./cmd/kured)
@@ -23,14 +23,14 @@ cmd/kured/kured: cmd/kured/*.go
build/.image.done: cmd/kured/Dockerfile cmd/kured/kured
mkdir -p build
cp $^ build
$(SUDO) docker build -t quay.io/$(DH_ORG)/kured -f build/Dockerfile ./build
$(SUDO) docker tag quay.io/$(DH_ORG)/kured quay.io/$(DH_ORG)/kured:$(VERSION)
$(SUDO) docker build -t docker.io/$(DH_ORG)/kured -f build/Dockerfile ./build
$(SUDO) docker tag docker.io/$(DH_ORG)/kured docker.io/$(DH_ORG)/kured:$(VERSION)
touch $@
image: build/.image.done
publish-image: image
$(SUDO) docker push quay.io/$(DH_ORG)/kured:$(VERSION)
$(SUDO) docker push docker.io/$(DH_ORG)/kured:$(VERSION)
minikube-publish: image
$(SUDO) docker save quay.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)
$(SUDO) docker save docker.io/$(DH_ORG)/kured | (eval $$(minikube docker-env) && docker load)

View File

@@ -7,6 +7,7 @@
* [Configuration](#configuration)
* [Reboot Sentinel File & Period](#reboot-sentinel-file-&-period)
* [Blocking Reboots via Alerts](#blocking-reboots-via-alerts)
* [Blocking Reboots via Pods](#blocking-reboots-via-pods)
* [Prometheus Metrics](#prometheus-metrics)
* [Slack Notifications](#slack-notifications)
* [Overriding Lock Configuration](#overriding-lock-configuration)
@@ -27,16 +28,25 @@ indicated by the package management system of the underlying OS.
* Watches for the presence of a reboot sentinel e.g. `/var/run/reboot-required`
* Utilises a lock in the API server to ensure only one node reboots at
a time
* Optionally defers reboots in the presence of active Prometheus alerts
* Optionally defers reboots in the presence of active Prometheus alerts or selected pods
* Cordons & drains worker nodes before reboot, uncordoning them after
## Kubernetes & OS Compatibility
The daemon image contains versions of `k8s.io/client-go` and the
`kubectl` binary for the purposes of maintaining the lock and draining
worker nodes. See the [release
notes](https://github.com/weaveworks/kured/releases) for specific
version compatibility information.
worker nodes. Kubernetes aims to provide forwards & backwards
compatibility of one minor version between client and server:
| kured | kubectl | k8s.io/client-go | k8s.io/apimachinery | expected kubernetes compatibility |
|--------|---------|------------------|---------------------|-----------------------------------|
| master | 1.13.6 | v10.0.0 | release-1.13 | 1.12.x, 1.13.x, 1.14.x |
| 1.1.0 | 1.12.1 | v9.0.0 | release-1.12 | 1.11.x, 1.12.x, 1.13.x |
| 1.0.0 | 1.7.6 | v4.0.0 | release-1.7 | 1.6.x, 1.7.x, 1.8.x |
See the [release notes](https://github.com/weaveworks/kured/releases)
for specific version compatibility information, including which
combination have been formally tested.
Versions >=1.1.0 enter the host mount namespace to invoke
`systemctl reboot`, so should work on any systemd distribution.
@@ -47,7 +57,7 @@ To obtain a default installation without Prometheus alerting interlock
or Slack notifications:
```
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.1.0/kured-1.1.0.yaml
kubectl apply -f https://github.com/weaveworks/kured/releases/download/1.1.0/kured-1.1.0-dockerhub.yaml
```
If you want to customise the installation, download the manifest and
@@ -59,15 +69,17 @@ The following arguments can be passed to kured via the daemonset pod template:
```
Flags:
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
--alert-filter-regexp regexp.Regexp alert names to ignore when checking for active alerts
--blocking-pod-selector stringArray label selector identifying pods whose presence should prevent reboots
--ds-name string name of daemonset on which to place lock (default "kured")
--ds-namespace string namespace containing daemonset on which to place lock (default "kube-system")
-h, --help help for kured
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
```
### Reboot Sentinel File & Period
@@ -95,8 +107,33 @@ will block reboots, however you can ignore specific alerts:
--alert-filter-regexp=^(RebootRequired|AnotherBenignAlert|...$
```
An important application of this filter will become apparent in the
next section.
See the section on Prometheus metrics for an important application of this
filter.
### Blocking Reboots via Pods
You can also block reboots of an _individual node_ when specific pods
are scheduled on it:
```
--blocking-pod-selector=runtime=long,cost=expensive
```
Since label selector strings use commas to express logical 'and', you can
specify this parameter multiple times for 'or':
```
--blocking-pod-selector=runtime=long,cost=expensive
--blocking-pod-selector=name=temperamental
```
In this case, the presence of either an (appropriately labelled) expensive long
running job or a known temperamental pod on a node will stop it rebooting.
> Try not to abuse this mechanism - it's better to strive for
> restartability where possible. If you do use it, make sure you set
> up a RebootRequired alert as described in the next section so that
> you can intervene manually if reboots are blocked for too long.
### Prometheus Metrics
@@ -198,7 +235,7 @@ dep ensure && make
## Frequently Asked/Anticipated Questions
### Why is there no `latest` tag on quay.io?
### Why is there no `latest` tag on Docker Hub?
Use of `latest` for production deployments is bad practice - see
[here](https://kubernetes.io/docs/concepts/configuration/overview) for
@@ -210,9 +247,8 @@ versioned manifest from the [release page](https://github.com/weaveworks/kured/r
If you have any questions about, feedback for or problems with `kured`:
- Invite yourself to the <a href="https://weaveworks.github.io/community-slack/" target="_blank"> #weave-community </a> slack channel.
- Ask a question on the <a href="https://weave-community.slack.com/messages/general/"> #weave-community</a> slack channel.
- Send an email to <a href="mailto:weave-users@weave.works">weave-users@weave.works</a>
- <a href="https://github.com/weaveworks/kured/issues/new">File an issue.</a>
- Invite yourself to the <a href="https://slack.weave.works/" target="_blank">Weave Users Slack</a>.
- Ask a question on the [#general](https://weave-community.slack.com/messages/general/) slack channel.
- [File an issue](https://github.com/weaveworks/kured/issues/new).
Your feedback is always welcome!

View File

@@ -1,7 +1,7 @@
FROM alpine:3.8
RUN apk update && apk add ca-certificates && rm -rf /var/cache/apk/*
# NB: you may need to update RBAC permissions when upgrading kubectl - see kured-rbac.yaml for details
ADD https://storage.googleapis.com/kubernetes-release/release/v1.12.1/bin/linux/amd64/kubectl /usr/bin/kubectl
ADD https://storage.googleapis.com/kubernetes-release/release/v1.13.6/bin/linux/amd64/kubectl /usr/bin/kubectl
RUN chmod 0755 /usr/bin/kubectl
COPY ./kured /usr/bin/kured
ENTRYPOINT ["/usr/bin/kured"]

View File

@@ -1,6 +1,7 @@
package main
import (
"fmt"
"math/rand"
"net/http"
"os"
@@ -35,6 +36,7 @@ var (
rebootSentinel string
slackHookURL string
slackUsername string
podSelectors []string
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -74,6 +76,9 @@ func main() {
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
"slack username for reboot notfications")
rootCmd.PersistentFlags().StringArrayVar(&podSelectors, "blocking-pod-selector", nil,
"label selector identifying pods whose presence should prevent reboots")
if err := rootCmd.Execute(); err != nil {
log.Fatal(err)
}
@@ -126,7 +131,7 @@ func rebootRequired() bool {
}
}
func rebootBlocked() bool {
func rebootBlocked(client *kubernetes.Clientset, nodeID string) bool {
if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
if err != nil {
@@ -142,6 +147,31 @@ func rebootBlocked() bool {
return true
}
}
fieldSelector := fmt.Sprintf("spec.nodeName=%s", nodeID)
for _, labelSelector := range podSelectors {
podList, err := client.CoreV1().Pods("").List(metav1.ListOptions{
LabelSelector: labelSelector,
FieldSelector: fieldSelector,
Limit: 10})
if err != nil {
log.Warnf("Reboot blocked: pod query error: %v", err)
return true
}
if len(podList.Items) > 0 {
podNames := make([]string, 0, len(podList.Items))
for _, pod := range podList.Items {
podNames = append(podNames, pod.Name)
}
if len(podList.Continue) > 0 {
podNames = append(podNames, "...")
}
log.Warnf("Reboot blocked: matching pods: %v", podNames)
return true
}
}
return false
}
@@ -259,7 +289,7 @@ func rebootAsRequired(nodeID string) {
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for _ = range tick {
if rebootRequired() && !rebootBlocked() {
if rebootRequired() && !rebootBlocked(client, nodeID) {
node, err := client.CoreV1().Nodes().Get(nodeID, metav1.GetOptions{})
if err != nil {
log.Fatal(err)
@@ -291,6 +321,7 @@ func root(cmd *cobra.Command, args []string) {
log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
log.Infof("Blocking Pod Selectors: %v", podSelectors)
go rebootAsRequired(nodeID)
go maintainRebootRequiredMetric(nodeID)

View File

@@ -29,10 +29,10 @@ spec:
restartPolicy: Always
containers:
- name: kured
image: quay.io/weaveworks/kured # If you find yourself here
# wondering why there is no
# :latest tag on quay.io, see
# the FAQ in the README
image: docker.io/weaveworks/kured # If you find yourself here
# wondering why there is no
# :latest tag on Docker Hub,
# see the FAQ in the README
imagePullPolicy: IfNotPresent
securityContext:
privileged: true # Give permission to nsenter /proc/1/ns/mnt
@@ -46,6 +46,9 @@ spec:
command:
- /usr/bin/kured
# - --alert-filter-regexp=^RebootRequired$
# - --blocking-pod-selector=runtime=long,cost=expensive
# - --blocking-pod-selector=name=temperamental
# - --blocking-pod-selector=...
# - --ds-name=kured
# - --ds-namespace=kube-system
# - --lock-annotation=weave.works/kured-node-lock