Compare commits

...

3 Commits

Author SHA1 Message Date
Frank Vissing
6a4dcc8fb3 reboot if forceRebootSentinel exists 2018-06-01 13:29:49 +02:00
Frank Vissing
989594acbf Update main.go
fix spelling
2018-05-15 14:19:08 +02:00
Frank Vissing
1b5565cbae adding option to force reboot, ignoring active allerts 2018-05-15 12:40:54 +02:00
2 changed files with 39 additions and 20 deletions

View File

@@ -61,15 +61,16 @@ The following arguments can be passed to kured via the daemonset pod template:
```
Flags:
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--force-reboot-sentinel string path to file whose existence signals need to force reboot aka. ignore active prometheus alerts (default "/var/run/force-reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
```
### Reboot Sentinel File & Period

View File

@@ -26,15 +26,16 @@ var (
version = "unreleased"
// Command line flags
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
slackHookURL string
slackUsername string
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
forceRebootSentinel string
slackHookURL string
slackUsername string
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -68,7 +69,8 @@ func main() {
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence signals need to reboot")
rootCmd.PersistentFlags().StringVar(&forceRebootSentinel, "force-reboot-sentinel", "/var/run/force-reboot-required",
"path to file whose existence signals need to force reboot")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notfications")
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
@@ -108,9 +110,21 @@ func sentinelExists() bool {
return false // unreachable; prevents compilation error
}
}
func forceRebootsentinelExists() bool {
_, err := os.Stat(forceRebootSentinel)
switch {
case err == nil:
return true
case os.IsNotExist(err):
return false
default:
log.Fatalf("Unable to determine existence of force reboot sentinel: %v", err)
return false // unreachable; prevents compilation error
}
}
func rebootRequired() bool {
if sentinelExists() {
if sentinelExists() || forceRebootsentinelExists() {
log.Infof("Reboot required")
return true
} else {
@@ -120,6 +134,10 @@ func rebootRequired() bool {
}
func rebootBlocked() bool {
if forceRebootsentinelExists() {
log.Infof("Force reboot sentinel %v exists, force rebooting activated", forceRebootSentinel)
return false
}
if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
if err != nil {