Compare commits

...

12 Commits

Author SHA1 Message Date
Frank Vissing
6a4dcc8fb3 reboot if forceRebootSentinel exists 2018-06-01 13:29:49 +02:00
Frank Vissing
989594acbf Update main.go
fix spelling
2018-05-15 14:19:08 +02:00
Frank Vissing
1b5565cbae adding option to force reboot, ignoring active allerts 2018-05-15 12:40:54 +02:00
Adam Harrison
b27aaa1b0c Add help section to TOC 2018-04-13 09:44:50 +01:00
Adam Harrison
1a5da68369 Merge pull request #13 from dholbach/get-in-touch-information
add information on how to get in touch at the bottom of the page
2018-04-13 09:43:11 +01:00
Daniel Holbach
f388fcadc3 add information on how to get in touch at the bottom of the page 2018-04-13 10:39:48 +02:00
Adam Harrison
d7b9c9fbec Embed kubectl v1.9.6 for draining 2018-04-10 14:09:47 +01:00
Adam Harrison
a788809162 Log stdout/stderr from kubectl/systemctl invocations 2018-04-10 14:04:25 +01:00
Adam Harrison
f0f3314f68 Merge pull request #5 from SaaldjorMike/name-vs-namespace
Set variables based on correct arguments w.r.t. name of daemonset and…
2017-11-20 10:14:29 +00:00
Mike Rostermund
8f816562a4 Set variables based on correct arguments w.r.t. name of daemonset and namespace. 2017-11-17 13:35:25 +01:00
Adam Harrison
ae12f55aaf Merge pull request #3 from Bregor/patch-2
Looks like typo in daemonset
2017-11-06 11:28:13 +00:00
Maxim Filatov
8c5639e791 Looks like typo in daemonset
Due to 

    metadata:
      name: kured            # Must match `--ds-name`
      namespace: kube-system # Must match `--ds-namespace`

There should be 

    - --ds-name=kured
    - --ds-namespace=kube-system

As args.
2017-11-03 23:02:03 +03:00
6 changed files with 266 additions and 52 deletions

206
Gopkg.lock generated
View File

@@ -13,12 +13,6 @@
packages = ["."]
revision = "bbf7a2afc14f93e1e0a5c06df524fbd75e5031e5"
[[projects]]
branch = "master"
name = "github.com/Sirupsen/logrus"
packages = ["."]
revision = "abc6f20dabf4b10195f233ad21ea6c5ba33acae0"
[[projects]]
name = "github.com/asaskevich/govalidator"
packages = ["."]
@@ -46,13 +40,19 @@
[[projects]]
branch = "master"
name = "github.com/docker/distribution"
packages = ["digest","reference"]
packages = [
"digest",
"reference"
]
revision = "7365003236ca58bd7fa17ef1459328d13301d7d5"
[[projects]]
branch = "master"
name = "github.com/emicklei/go-restful"
packages = [".","log"]
packages = [
".",
"log"
]
revision = "b14c3a95fc27c52959d2eddc85066da3c14bf269"
[[projects]]
@@ -118,7 +118,10 @@
[[projects]]
branch = "master"
name = "github.com/gogo/protobuf"
packages = ["proto","sortkeys"]
packages = [
"proto",
"sortkeys"
]
revision = "e33835a643a970c11ac74f6333f5f6866387a101"
[[projects]]
@@ -154,7 +157,11 @@
[[projects]]
branch = "master"
name = "github.com/mailru/easyjson"
packages = ["buffer","jlexer","jwriter"]
packages = [
"buffer",
"jlexer",
"jwriter"
]
revision = "2af9a745a611440bab0528e5ac19b2805a1c50eb"
[[projects]]
@@ -172,7 +179,11 @@
[[projects]]
branch = "master"
name = "github.com/prometheus/client_golang"
packages = ["api/prometheus","prometheus","prometheus/promhttp"]
packages = [
"api/prometheus",
"prometheus",
"prometheus/promhttp"
]
revision = "5636dc67ae776adf5590da7349e70fbb9559972d"
[[projects]]
@@ -184,15 +195,28 @@
[[projects]]
branch = "master"
name = "github.com/prometheus/common"
packages = ["expfmt","internal/bitbucket.org/ww/goautoneg","model"]
packages = [
"expfmt",
"internal/bitbucket.org/ww/goautoneg",
"model"
]
revision = "ebdfc6da46522d58825777cf1f90490a5b1ef1d8"
[[projects]]
branch = "master"
name = "github.com/prometheus/procfs"
packages = [".","xfs"]
packages = [
".",
"xfs"
]
revision = "e645f4e5aaa8506fc71d6edbc5c4ff02c04c46f2"
[[projects]]
name = "github.com/sirupsen/logrus"
packages = ["."]
revision = "c155da19408a8799da419ed3eeb0cb5db0ad5dbc"
version = "v1.0.5"
[[projects]]
branch = "master"
name = "github.com/spf13/cobra"
@@ -211,22 +235,45 @@
packages = ["codec"]
revision = "3487a5545b3d480987dfb0492035299077fab33a"
[[projects]]
branch = "master"
name = "golang.org/x/crypto"
packages = ["ssh/terminal"]
revision = "beb2a9779c3b677077c41673505f150149fce895"
[[projects]]
branch = "master"
name = "golang.org/x/net"
packages = ["context","context/ctxhttp","http2","http2/hpack","idna"]
packages = [
"context",
"context/ctxhttp",
"http2",
"http2/hpack",
"idna"
]
revision = "2a35e686583654a1b89ca79c4ac78cb3d6529ca3"
[[projects]]
branch = "master"
name = "golang.org/x/sys"
packages = ["unix"]
revision = "a646d33e2ee3172a661fc09bca23bb4889a41bc8"
packages = [
"unix",
"windows"
]
revision = "3b87a42e500a6dc65dae1a55d0b641295971163e"
[[projects]]
branch = "master"
name = "golang.org/x/text"
packages = ["internal/gen","internal/triegen","internal/ucd","transform","unicode/cldr","unicode/norm","width"]
packages = [
"internal/gen",
"internal/triegen",
"internal/ucd",
"transform",
"unicode/cldr",
"unicode/norm",
"width"
]
revision = "a9a820217f98f7c8a207ec1e45a874e1fe12c478"
[[projects]]
@@ -238,7 +285,10 @@
[[projects]]
branch = "v2"
name = "gopkg.in/mgo.v2"
packages = ["bson","internal/json"]
packages = [
"bson",
"internal/json"
]
revision = "3f83fa5005286a7fe593b055f0d7771a7dce4655"
[[projects]]
@@ -250,18 +300,132 @@
[[projects]]
branch = "release-1.7"
name = "k8s.io/apimachinery"
packages = ["pkg/api/equality","pkg/api/errors","pkg/api/meta","pkg/api/resource","pkg/apimachinery","pkg/apimachinery/announced","pkg/apimachinery/registered","pkg/apis/meta/v1","pkg/apis/meta/v1/unstructured","pkg/apis/meta/v1alpha1","pkg/conversion","pkg/conversion/queryparams","pkg/conversion/unstructured","pkg/fields","pkg/labels","pkg/openapi","pkg/runtime","pkg/runtime/schema","pkg/runtime/serializer","pkg/runtime/serializer/json","pkg/runtime/serializer/protobuf","pkg/runtime/serializer/recognizer","pkg/runtime/serializer/streaming","pkg/runtime/serializer/versioning","pkg/selection","pkg/types","pkg/util/clock","pkg/util/diff","pkg/util/errors","pkg/util/framer","pkg/util/intstr","pkg/util/json","pkg/util/net","pkg/util/rand","pkg/util/runtime","pkg/util/sets","pkg/util/validation","pkg/util/validation/field","pkg/util/wait","pkg/util/yaml","pkg/version","pkg/watch","third_party/forked/golang/reflect"]
packages = [
"pkg/api/equality",
"pkg/api/errors",
"pkg/api/meta",
"pkg/api/resource",
"pkg/apimachinery",
"pkg/apimachinery/announced",
"pkg/apimachinery/registered",
"pkg/apis/meta/v1",
"pkg/apis/meta/v1/unstructured",
"pkg/apis/meta/v1alpha1",
"pkg/conversion",
"pkg/conversion/queryparams",
"pkg/conversion/unstructured",
"pkg/fields",
"pkg/labels",
"pkg/openapi",
"pkg/runtime",
"pkg/runtime/schema",
"pkg/runtime/serializer",
"pkg/runtime/serializer/json",
"pkg/runtime/serializer/protobuf",
"pkg/runtime/serializer/recognizer",
"pkg/runtime/serializer/streaming",
"pkg/runtime/serializer/versioning",
"pkg/selection",
"pkg/types",
"pkg/util/clock",
"pkg/util/diff",
"pkg/util/errors",
"pkg/util/framer",
"pkg/util/intstr",
"pkg/util/json",
"pkg/util/net",
"pkg/util/rand",
"pkg/util/runtime",
"pkg/util/sets",
"pkg/util/validation",
"pkg/util/validation/field",
"pkg/util/wait",
"pkg/util/yaml",
"pkg/version",
"pkg/watch",
"third_party/forked/golang/reflect"
]
revision = "8ab5f3d8a330c2e9baaf84e39042db8d49034ae2"
[[projects]]
name = "k8s.io/client-go"
packages = ["discovery","kubernetes","kubernetes/scheme","kubernetes/typed/admissionregistration/v1alpha1","kubernetes/typed/apps/v1beta1","kubernetes/typed/authentication/v1","kubernetes/typed/authentication/v1beta1","kubernetes/typed/authorization/v1","kubernetes/typed/authorization/v1beta1","kubernetes/typed/autoscaling/v1","kubernetes/typed/autoscaling/v2alpha1","kubernetes/typed/batch/v1","kubernetes/typed/batch/v2alpha1","kubernetes/typed/certificates/v1beta1","kubernetes/typed/core/v1","kubernetes/typed/extensions/v1beta1","kubernetes/typed/networking/v1","kubernetes/typed/policy/v1beta1","kubernetes/typed/rbac/v1alpha1","kubernetes/typed/rbac/v1beta1","kubernetes/typed/settings/v1alpha1","kubernetes/typed/storage/v1","kubernetes/typed/storage/v1beta1","pkg/api","pkg/api/v1","pkg/api/v1/ref","pkg/apis/admissionregistration","pkg/apis/admissionregistration/v1alpha1","pkg/apis/apps","pkg/apis/apps/v1beta1","pkg/apis/authentication","pkg/apis/authentication/v1","pkg/apis/authentication/v1beta1","pkg/apis/authorization","pkg/apis/authorization/v1","pkg/apis/authorization/v1beta1","pkg/apis/autoscaling","pkg/apis/autoscaling/v1","pkg/apis/autoscaling/v2alpha1","pkg/apis/batch","pkg/apis/batch/v1","pkg/apis/batch/v2alpha1","pkg/apis/certificates","pkg/apis/certificates/v1beta1","pkg/apis/extensions","pkg/apis/extensions/v1beta1","pkg/apis/networking","pkg/apis/networking/v1","pkg/apis/policy","pkg/apis/policy/v1beta1","pkg/apis/rbac","pkg/apis/rbac/v1alpha1","pkg/apis/rbac/v1beta1","pkg/apis/settings","pkg/apis/settings/v1alpha1","pkg/apis/storage","pkg/apis/storage/v1","pkg/apis/storage/v1beta1","pkg/util","pkg/util/parsers","pkg/version","rest","rest/watch","tools/clientcmd/api","tools/metrics","transport","util/cert","util/flowcontrol","util/integer"]
packages = [
"discovery",
"kubernetes",
"kubernetes/scheme",
"kubernetes/typed/admissionregistration/v1alpha1",
"kubernetes/typed/apps/v1beta1",
"kubernetes/typed/authentication/v1",
"kubernetes/typed/authentication/v1beta1",
"kubernetes/typed/authorization/v1",
"kubernetes/typed/authorization/v1beta1",
"kubernetes/typed/autoscaling/v1",
"kubernetes/typed/autoscaling/v2alpha1",
"kubernetes/typed/batch/v1",
"kubernetes/typed/batch/v2alpha1",
"kubernetes/typed/certificates/v1beta1",
"kubernetes/typed/core/v1",
"kubernetes/typed/extensions/v1beta1",
"kubernetes/typed/networking/v1",
"kubernetes/typed/policy/v1beta1",
"kubernetes/typed/rbac/v1alpha1",
"kubernetes/typed/rbac/v1beta1",
"kubernetes/typed/settings/v1alpha1",
"kubernetes/typed/storage/v1",
"kubernetes/typed/storage/v1beta1",
"pkg/api",
"pkg/api/v1",
"pkg/api/v1/ref",
"pkg/apis/admissionregistration",
"pkg/apis/admissionregistration/v1alpha1",
"pkg/apis/apps",
"pkg/apis/apps/v1beta1",
"pkg/apis/authentication",
"pkg/apis/authentication/v1",
"pkg/apis/authentication/v1beta1",
"pkg/apis/authorization",
"pkg/apis/authorization/v1",
"pkg/apis/authorization/v1beta1",
"pkg/apis/autoscaling",
"pkg/apis/autoscaling/v1",
"pkg/apis/autoscaling/v2alpha1",
"pkg/apis/batch",
"pkg/apis/batch/v1",
"pkg/apis/batch/v2alpha1",
"pkg/apis/certificates",
"pkg/apis/certificates/v1beta1",
"pkg/apis/extensions",
"pkg/apis/extensions/v1beta1",
"pkg/apis/networking",
"pkg/apis/networking/v1",
"pkg/apis/policy",
"pkg/apis/policy/v1beta1",
"pkg/apis/rbac",
"pkg/apis/rbac/v1alpha1",
"pkg/apis/rbac/v1beta1",
"pkg/apis/settings",
"pkg/apis/settings/v1alpha1",
"pkg/apis/storage",
"pkg/apis/storage/v1",
"pkg/apis/storage/v1beta1",
"pkg/util",
"pkg/util/parsers",
"pkg/version",
"rest",
"rest/watch",
"tools/clientcmd/api",
"tools/metrics",
"transport",
"util/cert",
"util/flowcontrol",
"util/integer"
]
revision = "d92e8497f71b7b4e0494e5bd204b48d34bd6f254"
version = "v4.0.0"
[solve-meta]
analyzer-name = "dep"
analyzer-version = 1
inputs-digest = "6892de026c4d8eb2c9973e20ee4734016093888c0376a54247a79147ab9d2fb6"
inputs-digest = "029e6d2251ccbf5acdfc3bc0c36f340dc3a98511b0d7338c3e9bb167e412a155"
solver-name = "gps-cdcl"
solver-version = 1

View File

@@ -1,6 +1,7 @@
[[constraint]]
name = "github.com/Sirupsen/logrus"
name = "github.com/sirupsen/logrus"
version = "v1.0.5"
[[constraint]]
branch = "master"

View File

@@ -15,6 +15,7 @@
* [Disabling Reboots](#disabling-reboots)
* [Manual Unlock](#manual-unlock)
* [Building](#building)
* [Getting Help](#getting-help)
## Introduction
@@ -22,7 +23,7 @@ Kured (KUbernetes REboot Daemon) is a Kubernetes daemonset that
performs safe automatic node reboots when the need to do so is
indicated by the package management system of the underlying OS.
* Watches for the presence of a reboot sentinel e.g. `/var/run/reboot-required`
* Watches for the presence of a reboot sentinel e.g. `/var/run/reboot-required`
* Utilises a lock in the API server to ensure only one node reboots at
a time
* Optionally defers reboots in the presence of active Prometheus alerts
@@ -60,15 +61,16 @@ The following arguments can be passed to kured via the daemonset pod template:
```
Flags:
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--force-reboot-sentinel string path to file whose existence signals need to force reboot aka. ignore active prometheus alerts (default "/var/run/force-reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
```
### Reboot Sentinel File & Period
@@ -196,3 +198,14 @@ kubectl -n kube-system annotate ds kured weave.works/kured-node-lock-
```
dep ensure && make
```
## Getting Help
If you have any questions about, feedback for or problems with `kured`:
- Invite yourself to the <a href="https://weaveworks.github.io/community-slack/" target="_blank"> #weave-community </a> slack channel.
- Ask a question on the <a href="https://weave-community.slack.com/messages/general/"> #weave-community</a> slack channel.
- Send an email to <a href="mailto:weave-users@weave.works">weave-users@weave.works</a>
- <a href="https://github.com/weaveworks/kured/issues/new">File an issue.</a>
Your feedback is always welcome!

View File

@@ -1,6 +1,6 @@
FROM ubuntu:16.04
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/cache/apt
ADD https://storage.googleapis.com/kubernetes-release/release/v1.7.6/bin/linux/amd64/kubectl /usr/bin/kubectl
ADD https://storage.googleapis.com/kubernetes-release/release/v1.9.6/bin/linux/amd64/kubectl /usr/bin/kubectl
RUN chmod 0755 /usr/bin/kubectl
COPY ./kured /usr/bin/kured
ENTRYPOINT ["/usr/bin/kured"]

View File

@@ -8,7 +8,7 @@ import (
"regexp"
"time"
log "github.com/Sirupsen/logrus"
log "github.com/sirupsen/logrus"
"github.com/spf13/cobra"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
@@ -26,15 +26,16 @@ var (
version = "unreleased"
// Command line flags
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
slackHookURL string
slackUsername string
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
forceRebootSentinel string
slackHookURL string
slackUsername string
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -56,9 +57,9 @@ func main() {
rootCmd.PersistentFlags().DurationVar(&period, "period", time.Minute*60,
"reboot check period")
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-name", "kube-system",
rootCmd.PersistentFlags().StringVar(&dsNamespace, "ds-namespace", "kube-system",
"namespace containing daemonset on which to place lock")
rootCmd.PersistentFlags().StringVar(&dsName, "ds-namespace", "kured",
rootCmd.PersistentFlags().StringVar(&dsName, "ds-name", "kured",
"name of daemonset on which to place lock")
rootCmd.PersistentFlags().StringVar(&lockAnnotation, "lock-annotation", "weave.works/kured-node-lock",
"annotation in which to record locking node")
@@ -68,7 +69,8 @@ func main() {
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence signals need to reboot")
rootCmd.PersistentFlags().StringVar(&forceRebootSentinel, "force-reboot-sentinel", "/var/run/force-reboot-required",
"path to file whose existence signals need to force reboot")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notfications")
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
@@ -79,6 +81,23 @@ func main() {
}
}
// newCommand creates a new Command with stdout/stderr wired to our standard logger
func newCommand(name string, arg ...string) *exec.Cmd {
cmd := exec.Command(name, arg...)
cmd.Stdout = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "out").
WriterLevel(log.InfoLevel)
cmd.Stderr = log.NewEntry(log.StandardLogger()).
WithField("cmd", cmd.Args[0]).
WithField("std", "err").
WriterLevel(log.WarnLevel)
return cmd
}
func sentinelExists() bool {
_, err := os.Stat(rebootSentinel)
switch {
@@ -91,9 +110,21 @@ func sentinelExists() bool {
return false // unreachable; prevents compilation error
}
}
func forceRebootsentinelExists() bool {
_, err := os.Stat(forceRebootSentinel)
switch {
case err == nil:
return true
case os.IsNotExist(err):
return false
default:
log.Fatalf("Unable to determine existence of force reboot sentinel: %v", err)
return false // unreachable; prevents compilation error
}
}
func rebootRequired() bool {
if sentinelExists() {
if sentinelExists() || forceRebootsentinelExists() {
log.Infof("Reboot required")
return true
} else {
@@ -103,6 +134,10 @@ func rebootRequired() bool {
}
func rebootBlocked() bool {
if forceRebootsentinelExists() {
log.Infof("Force reboot sentinel %v exists, force rebooting activated", forceRebootSentinel)
return false
}
if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
if err != nil {
@@ -156,8 +191,9 @@ func release(lock *daemonsetlock.DaemonSetLock) {
func drain(nodeID string) {
log.Infof("Draining node %s", nodeID)
drainCmd := exec.Command("/usr/bin/kubectl", "drain",
drainCmd := newCommand("/usr/bin/kubectl", "drain",
"--ignore-daemonsets", "--delete-local-data", "--force", nodeID)
if err := drainCmd.Run(); err != nil {
log.Fatalf("Error invoking drain command: %v", err)
}
@@ -165,7 +201,7 @@ func drain(nodeID string) {
func uncordon(nodeID string) {
log.Infof("Uncordoning node %s", nodeID)
uncordonCmd := exec.Command("/usr/bin/kubectl", "uncordon", nodeID)
uncordonCmd := newCommand("/usr/bin/kubectl", "uncordon", nodeID)
if err := uncordonCmd.Run(); err != nil {
log.Fatalf("Error invoking uncordon command: %v", err)
}
@@ -181,7 +217,7 @@ func commandReboot(nodeID string) {
}
// Relies on /var/run/dbus/system_bus_socket bind mount to talk to systemd
rebootCmd := exec.Command("/bin/systemctl", "reboot")
rebootCmd := newCommand("/bin/systemctl", "reboot")
if err := rebootCmd.Run(); err != nil {
log.Fatalf("Error invoking reboot command: %v", err)
}

View File

@@ -17,8 +17,8 @@ spec:
- /usr/bin/kured
# args:
# - --alert-filter-regexp=^RebootRequired$
# - --ds-name=kube-system
# - --ds-namespace=kured
# - --ds-name=kured
# - --ds-namespace=kube-system
# - --lock-annotation=weave.works/kured-node-lock
# - --period=1h
# - --prometheus-url=http://prometheus.monitoring.svc.cluster.local