Commit 1b5565cb authored by Frank Vissing's avatar Frank Vissing
Browse files

adding option to force reboot, ignoring active allerts

parent b27aaa1b
......@@ -61,15 +61,16 @@ The following arguments can be passed to kured via the daemonset pod template:
```
Flags:
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
--alert-filter-regexp value alert names to ignore when checking for active alerts
--ds-name string namespace containing daemonset on which to place lock (default "kube-system")
--ds-namespace string name of daemonset on which to place lock (default "kured")
--lock-annotation string annotation in which to record locking node (default "weave.works/kured-node-lock")
--period duration reboot check period (default 1h0m0s)
--prometheus-url string Prometheus instance to probe for active alerts
--reboot-sentinel string path to file whose existence signals need to reboot (default "/var/run/reboot-required")
--force-reboot-sentinel string path to file whose existence signals need to force reboot aka. ignore active prometheus alerts (default "/var/run/force-reboot-required")
--slack-hook-url string slack hook URL for reboot notfications
--slack-username string slack username for reboot notfications (default "kured")
```
### Reboot Sentinel File & Period
......
......@@ -26,15 +26,16 @@ var (
version = "unreleased"
// Command line flags
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
slackHookURL string
slackUsername string
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
forceRebootSentinel string
slackHookURL string
slackUsername string
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
......@@ -68,7 +69,8 @@ func main() {
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence signals need to reboot")
rootCmd.PersistentFlags().StringVar(&forceRebootSentinel, "force-reboot-sentinel", "/var/run/force-reboot-required",
"path to file whose existence signals need to force reboot")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notfications")
rootCmd.PersistentFlags().StringVar(&slackUsername, "slack-username", "kured",
......@@ -108,6 +110,18 @@ func sentinelExists() bool {
return false // unreachable; prevents compilation error
}
}
func forceRebootsentinelExists() bool {
_, err := os.Stat(forceRebootSentinel)
switch {
case err == nil:
return true
case os.IsNotExist(err):
return false
default:
log.Fatalf("Unable to determine existence of force reboot sentinel: %v", err)
return false // unreachable; prevents compilation error
}
}
func rebootRequired() bool {
if sentinelExists() {
......@@ -120,6 +134,10 @@ func rebootRequired() bool {
}
func rebootBlocked() bool {
if forceRebootsentinelExists() {
log.Infof("Force reebot sentinel %v exists, force reeboting activated",forceRebootSentinel)
return false
}
if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
if err != nil {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment