mirror of
https://github.com/kubereboot/kured.git
synced 2026-02-14 17:39:49 +00:00
Compare commits
3 Commits
1.21.0
...
force-rebo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6a4dcc8fb3 | ||
|
|
989594acbf | ||
|
|
1b5565cbae |
19
README.md
19
README.md
@@ -61,15 +61,16 @@ The following arguments can be passed to kured via the daemonset pod template:
|
||||
|
||||
```
|
||||
Flags:
|
||||
--alert-filter-regexp value alert names to ignore when checking for active alerts
|
||||
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
--ds-namespace string name of daemonset on which to place lock (default "kured")
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--slack-hook-url string slack hook URL for reboot notfications
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
--alert-filter-regexp value alert names to ignore when checking for active alerts
|
||||
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
|
||||
--ds-namespace string name of daemonset on which to place lock (default "kured")
|
||||
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
|
||||
--period duration reboot check period (default 1h0m0s)
|
||||
--prometheus-url string Prometheus instance to probe for active alerts
|
||||
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
|
||||
--force-reboot-sentinel string path to file whose existence signals need to force reboot aka. ignore active prometheus alerts (default "/var/run/force-reboot-required")
|
||||
--slack-hook-url string slack hook URL for reboot notfications
|
||||
--slack-username string slack username for reboot notfications (default "kured")
|
||||
```
|
||||
|
||||
### Reboot Sentinel File & Period
|
||||
|
||||
@@ -26,15 +26,16 @@ var (
|
||||
version = "unreleased"
|
||||
|
||||
// Command line flags
|
||||
period time.Duration
|
||||
dsNamespace string
|
||||
dsName string
|
||||
lockAnnotation string
|
||||
prometheusURL string
|
||||
alertFilter *regexp.Regexp
|
||||
rebootSentinel string
|
||||
slackHookURL string
|
||||
slackUsername string
|
||||
period time.Duration
|
||||
dsNamespace string
|
||||
dsName string
|
||||
lockAnnotation string
|
||||
prometheusURL string
|
||||
alertFilter *regexp.Regexp
|
||||
rebootSentinel string
|
||||
forceRebootSentinel string
|
||||
slackHookURL string
|
||||
slackUsername string
|
||||
|
||||
// Metrics
|
||||
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
@@ -68,7 +69,8 @@ func main() {
|
||||
"alert names to ignore when checking for active alerts")
|
||||
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
|
||||
"path to file whose existence signals need to reboot")
|
||||
|
||||
rootCmd.PersistentFlags().StringVar(&forceRebootSentinel, "force-reboot-sentinel", "/var/run/force-reboot-required",
|
||||
"path to file whose existence signals need to force reboot")
|
||||
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
|
||||
"slack hook URL for reboot notfications")
|
||||
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
|
||||
@@ -108,9 +110,21 @@ func sentinelExists() bool {
|
||||
return false // unreachable; prevents compilation error
|
||||
}
|
||||
}
|
||||
func forceRebootsentinelExists() bool {
|
||||
_, err := os.Stat(forceRebootSentinel)
|
||||
switch {
|
||||
case err == nil:
|
||||
return true
|
||||
case os.IsNotExist(err):
|
||||
return false
|
||||
default:
|
||||
log.Fatalf("Unable to determine existence of force reboot sentinel: %v", err)
|
||||
return false // unreachable; prevents compilation error
|
||||
}
|
||||
}
|
||||
|
||||
func rebootRequired() bool {
|
||||
if sentinelExists() {
|
||||
if sentinelExists() || forceRebootsentinelExists() {
|
||||
log.Infof("Reboot required")
|
||||
return true
|
||||
} else {
|
||||
@@ -120,6 +134,10 @@ func rebootRequired() bool {
|
||||
}
|
||||
|
||||
func rebootBlocked() bool {
|
||||
if forceRebootsentinelExists() {
|
||||
log.Infof("Force reboot sentinel %v exists, force rebooting activated", forceRebootSentinel)
|
||||
return false
|
||||
}
|
||||
if prometheusURL != "" {
|
||||
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user