Commit 6e44cb13 authored by Adam Harrison's avatar Adam Harrison
Browse files

Implement kured_reboot_required metric

parent 91bf84a9
...@@ -2,6 +2,7 @@ package main ...@@ -2,6 +2,7 @@ package main
import ( import (
"math/rand" "math/rand"
"net/http"
"os" "os"
"os/exec" "os/exec"
"regexp" "regexp"
...@@ -13,6 +14,8 @@ import ( ...@@ -13,6 +14,8 @@ import (
"k8s.io/client-go/pkg/api/v1" "k8s.io/client-go/pkg/api/v1"
"k8s.io/client-go/rest" "k8s.io/client-go/rest"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/weaveworks/kured/pkg/alerts" "github.com/weaveworks/kured/pkg/alerts"
"github.com/weaveworks/kured/pkg/daemonsetlock" "github.com/weaveworks/kured/pkg/daemonsetlock"
"github.com/weaveworks/kured/pkg/delaytick" "github.com/weaveworks/kured/pkg/delaytick"
...@@ -20,7 +23,9 @@ import ( ...@@ -20,7 +23,9 @@ import (
) )
var ( var (
version = "unreleased" version = "unreleased"
// Command line flags
period time.Duration period time.Duration
dsNamespace string dsNamespace string
dsName string dsName string
...@@ -30,8 +35,19 @@ var ( ...@@ -30,8 +35,19 @@ var (
rebootSentinel string rebootSentinel string
slackHookURL string slackHookURL string
slackUsername string slackUsername string
// Metrics
rebootRequiredGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Subsystem: "kured",
Name: "reboot_required",
Help: "OS requires reboot due to software updates.",
}, []string{"node"})
) )
func init() {
prometheus.MustRegister(rebootRequiredGauge)
}
func main() { func main() {
rootCmd := &cobra.Command{ rootCmd := &cobra.Command{
Use: "kured", Use: "kured",
...@@ -63,21 +79,29 @@ func main() { ...@@ -63,21 +79,29 @@ func main() {
} }
} }
func rebootRequired() bool { func sentinelExists() bool {
_, err := os.Stat(rebootSentinel) _, err := os.Stat(rebootSentinel)
switch { switch {
case err == nil: case err == nil:
log.Infof("Reboot required")
return true return true
case os.IsNotExist(err): case os.IsNotExist(err):
log.Infof("Reboot not required")
return false return false
default: default:
log.Fatalf("Unable to determine if reboot required: %v", err) log.Fatalf("Unable to determine existence of sentinel: %v", err)
return false // unreachable; prevents compilation error return false // unreachable; prevents compilation error
} }
} }
func rebootRequired() bool {
if sentinelExists() {
log.Infof("Reboot required")
return true
} else {
log.Infof("Reboot not required")
return false
}
}
func rebootBlocked() bool { func rebootBlocked() bool {
if prometheusURL != "" { if prometheusURL != "" {
alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter) alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
...@@ -181,7 +205,7 @@ func waitForDrain(client *kubernetes.Clientset, nodeID string) { ...@@ -181,7 +205,7 @@ func waitForDrain(client *kubernetes.Clientset, nodeID string) {
} }
} }
func reboot(nodeID string) { func commandReboot(nodeID string) {
log.Infof("Commanding reboot") log.Infof("Commanding reboot")
if slackHookURL != "" { if slackHookURL != "" {
...@@ -197,9 +221,13 @@ func reboot(nodeID string) { ...@@ -197,9 +221,13 @@ func reboot(nodeID string) {
} }
} }
func waitForReboot() { func maintainRebootRequiredMetric(nodeID string) {
for { for {
log.Infof("Waiting for reboot") if sentinelExists() {
rebootRequiredGauge.WithLabelValues(nodeID).Set(1)
} else {
rebootRequiredGauge.WithLabelValues(nodeID).Set(0)
}
time.Sleep(time.Minute) time.Sleep(time.Minute)
} }
} }
...@@ -209,18 +237,7 @@ type nodeMeta struct { ...@@ -209,18 +237,7 @@ type nodeMeta struct {
Unschedulable bool `json:"unschedulable"` Unschedulable bool `json:"unschedulable"`
} }
func root(cmd *cobra.Command, args []string) { func rebootAsRequired(nodeID string) {
log.Infof("Kubernetes Reboot Daemon: %s", version)
nodeID := os.Getenv("KURED_NODE_ID")
if nodeID == "" {
log.Fatal("KURED_NODE_ID environment variable required")
}
log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
config, err := rest.InClusterConfig() config, err := rest.InClusterConfig()
if err != nil { if err != nil {
log.Fatal(err) log.Fatal(err)
...@@ -256,11 +273,31 @@ func root(cmd *cobra.Command, args []string) { ...@@ -256,11 +273,31 @@ func root(cmd *cobra.Command, args []string) {
drain(nodeID) drain(nodeID)
waitForDrain(client, nodeID) waitForDrain(client, nodeID)
} }
reboot(nodeID) commandReboot(nodeID)
break for {
log.Infof("Waiting for reboot")
time.Sleep(time.Minute)
}
} }
} }
} }
}
func root(cmd *cobra.Command, args []string) {
log.Infof("Kubernetes Reboot Daemon: %s", version)
nodeID := os.Getenv("KURED_NODE_ID")
if nodeID == "" {
log.Fatal("KURED_NODE_ID environment variable required")
}
log.Infof("Node ID: %s", nodeID)
log.Infof("Lock Annotation: %s/%s:%s", dsNamespace, dsName, lockAnnotation)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
go rebootAsRequired(nodeID)
go maintainRebootRequiredMetric(nodeID)
waitForReboot() http.Handle("/metrics", promhttp.Handler())
log.Fatal(http.ListenAndServe(":8080", nil))
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment