Commit 80a254ec authored by Adam Harrison's avatar Adam Harrison
Browse files

Improved active alert detection and logging

Zero-valued alerts are now ignored, and the names of the first ten
blocking alerts are logged for diagnostic purposes.
parent 1a6e814a
...@@ -80,13 +80,17 @@ func rebootRequired() bool { ...@@ -80,13 +80,17 @@ func rebootRequired() bool {
func rebootBlocked() bool { func rebootBlocked() bool {
if prometheusURL != "" { if prometheusURL != "" {
count, err := alerts.PrometheusCountActive(prometheusURL, alertFilter) alertNames, err := alerts.PrometheusActiveAlerts(prometheusURL, alertFilter)
if err != nil { if err != nil {
log.Warnf("Reboot blocked: prometheus query error: %v", err) log.Warnf("Reboot blocked: prometheus query error: %v", err)
return true return true
} }
count := len(alertNames)
if count > 10 {
alertNames = append(alertNames[:10], "...")
}
if count > 0 { if count > 0 {
log.Warnf("Reboot blocked: %d active alerts", count) log.Warnf("Reboot blocked: %d active alerts: %v", count, alertNames)
return true return true
} }
} }
......
...@@ -10,33 +10,34 @@ import ( ...@@ -10,33 +10,34 @@ import (
"github.com/prometheus/common/model" "github.com/prometheus/common/model"
) )
// Return true if there are any active (e.g. pending or firing) alerts // Returns a list of names of active (e.g. pending or firing) alerts, filtered
func PrometheusCountActive(prometheusURL string, filter *regexp.Regexp) (int, error) { // by the supplied regexp.
func PrometheusActiveAlerts(prometheusURL string, filter *regexp.Regexp) ([]string, error) {
client, err := prometheus.New(prometheus.Config{Address: prometheusURL}) client, err := prometheus.New(prometheus.Config{Address: prometheusURL})
if err != nil { if err != nil {
return 0, err return nil, err
} }
queryAPI := prometheus.NewQueryAPI(client) queryAPI := prometheus.NewQueryAPI(client)
value, err := queryAPI.Query(context.Background(), "ALERTS", time.Now()) value, err := queryAPI.Query(context.Background(), "ALERTS", time.Now())
if err != nil { if err != nil {
return 0, err return nil, err
} }
if value.Type() == model.ValVector { if value.Type() == model.ValVector {
if vector, ok := value.(model.Vector); ok { if vector, ok := value.(model.Vector); ok {
var count int var activeAlerts []string
for _, sample := range vector { for _, sample := range vector {
if alertName, isAlert := sample.Metric[model.AlertNameLabel]; isAlert { if alertName, isAlert := sample.Metric[model.AlertNameLabel]; isAlert && sample.Value != 0 {
if filter == nil || !filter.MatchString(string(alertName)) { if filter == nil || !filter.MatchString(string(alertName)) {
count++ activeAlerts = append(activeAlerts, string(alertName))
} }
} }
} }
return count, nil return activeAlerts, nil
} }
} }
return 0, fmt.Errorf("Unexpected value type: %v", value) return nil, fmt.Errorf("Unexpected value type: %v", value)
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment