Commit e4c684c3 authored by David Sauer's avatar David Sauer
Browse files

taint node with PreferNoSchedule to prevent receiving (and double draining)...

taint node with PreferNoSchedule to prevent receiving (and double draining) additional pods from other rebooting nodes
parent 204a06ca
......@@ -24,6 +24,7 @@ import (
"github.com/weaveworks/kured/pkg/daemonsetlock"
"github.com/weaveworks/kured/pkg/delaytick"
"github.com/weaveworks/kured/pkg/notifications/slack"
"github.com/weaveworks/kured/pkg/taints"
"github.com/weaveworks/kured/pkg/timewindow"
)
......@@ -31,20 +32,21 @@ var (
version = "unreleased"
// Command line flags
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
lockTTL time.Duration
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
slackHookURL string
slackUsername string
slackChannel string
messageTemplateDrain string
messageTemplateReboot string
podSelectors []string
period time.Duration
dsNamespace string
dsName string
lockAnnotation string
lockTTL time.Duration
prometheusURL string
alertFilter *regexp.Regexp
rebootSentinel string
preferNoScheduleTaintName string
slackHookURL string
slackUsername string
slackChannel string
messageTemplateDrain string
messageTemplateReboot string
podSelectors []string
rebootDays []string
rebootStart string
......@@ -85,6 +87,8 @@ func main() {
"alert names to ignore when checking for active alerts")
rootCmd.PersistentFlags().StringVar(&rebootSentinel, "reboot-sentinel", "/var/run/reboot-required",
"path to file whose existence signals need to reboot")
rootCmd.PersistentFlags().StringVar(&preferNoScheduleTaintName, "prefer-no-schedule-taint", "weave.works/kured-node-reboot",
"taint name applied during pending node reboot (to prevent receiving additional pods from other rebooting nodes)")
rootCmd.PersistentFlags().StringVar(&slackHookURL, "slack-hook-url", "",
"slack hook URL for reboot notfications")
......@@ -336,10 +340,19 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur
release(lock)
}
preferNoScheduleTaint := taints.New(client, nodeID, preferNoScheduleTaintName, v1.TaintEffectPreferNoSchedule)
// Remove taint immediately during startup to quickly allow scheduling again.
if !rebootRequired() {
preferNoScheduleTaint.Disable()
}
source := rand.NewSource(time.Now().UnixNano())
tick := delaytick.New(source, period)
for range tick {
if !window.Contains(time.Now()) {
// Remove taint outside the reboot time window to allow for normal operation.
preferNoScheduleTaint.Disable()
continue
}
......@@ -358,6 +371,8 @@ func rebootAsRequired(nodeID string, window *timewindow.TimeWindow, TTL time.Dur
nodeMeta.Unschedulable = node.Spec.Unschedulable
if !acquire(lock, &nodeMeta, TTL) {
// Prefer to not schedule pods onto this node to avoid draing the same pod multiple times.
preferNoScheduleTaint.Enable()
continue
}
......@@ -392,6 +407,7 @@ func root(cmd *cobra.Command, args []string) {
} else {
log.Info("Lock TTL not set, lock will remain until being released")
}
log.Infof("PreferNoSchedule taint: %s", preferNoScheduleTaintName)
log.Infof("Reboot Sentinel: %s every %v", rebootSentinel, period)
log.Infof("Blocking Pod Selectors: %v", podSelectors)
log.Infof("Reboot on: %v", window)
......
package taints
import (
"context"
"encoding/json"
"fmt"
log "github.com/sirupsen/logrus"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
)
// Taint allows to set soft and hard limitations for scheduling and executing pods on nodes.
type Taint struct {
client *kubernetes.Clientset
nodeID string
taintName string
effect v1.TaintEffect
}
// New provides a new taint.
func New(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect) *Taint {
return &Taint{
client: client,
nodeID: nodeID,
taintName: taintName,
effect: effect,
}
}
// Enable creates the taint for a node. Creating an existing taint is a noop.
func (t *Taint) Enable() {
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, true)
}
// Disable removes the taint for a node. Removing a missing taint is a noop.
func (t *Taint) Disable() {
preferNoSchedule(t.client, t.nodeID, t.taintName, t.effect, false)
}
func preferNoSchedule(client *kubernetes.Clientset, nodeID, taintName string, effect v1.TaintEffect, taintShouldExists bool) {
updatedNode, err := client.CoreV1().Nodes().Get(context.TODO(), nodeID, metav1.GetOptions{})
if err != nil || updatedNode == nil {
log.Fatalf("Error reading node %s: %v", nodeID, err)
}
taintExists := false
offset := 0
for i, taint := range updatedNode.Spec.Taints {
if taint.Key == taintName {
taintExists = true
offset = i
break
}
}
if taintExists && taintShouldExists {
log.Debugf("Taint %v exists already for node %v.", taintName, nodeID)
return
}
if !taintExists && !taintShouldExists {
log.Debugf("Taint %v already missing for node %v.", taintName, nodeID)
return
}
type patchTaints struct {
Op string `json:"op"`
Path string `json:"path"`
Value interface{} `json:"value,omitempty"`
}
taint := v1.Taint{
Key: taintName,
Effect: effect,
}
var patches []patchTaints
if len(updatedNode.Spec.Taints) == 0 {
// add first taint and ensure to keep current taints
patches = []patchTaints{
{
Op: "test",
Path: "/spec",
Value: updatedNode.Spec,
},
{
Op: "add",
Path: "/spec/taints",
Value: []v1.Taint{},
},
{
Op: "add",
Path: "/spec/taints/-",
Value: taint,
},
}
} else if taintExists {
// remove taint and ensure to test against race conditions
patches = []patchTaints{
{
Op: "test",
Path: fmt.Sprintf("/spec/taints/%d", offset),
Value: taint,
},
{
Op: "remove",
Path: fmt.Sprintf("/spec/taints/%d", offset),
},
}
} else {
// add missing taint to exsting list
patches = []patchTaints{
{
Op: "add",
Path: "/spec/taints/-",
Value: taint,
},
}
}
patchBytes, err := json.Marshal(patches)
if err != nil {
log.Fatalf("Error encoding taint patcht for node %s: %v", nodeID, err)
}
_, err = client.CoreV1().Nodes().Patch(context.TODO(), nodeID, types.JSONPatchType, patchBytes, metav1.PatchOptions{})
if err != nil {
log.Fatalf("Error patching taint for node %s: %v", nodeID, err)
}
if taintShouldExists {
log.Info("Node taint added")
} else {
log.Info("Node taint removed")
}
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment