Commit c58c1e8f authored by Robert Rose's avatar Robert Rose
Browse files

nixos/k3s: add options for graceful node shutdown and kubelet config

Allow to set kubelet configuration parameters
via an option. Additionally, expose the
respective options for graceful node
shutdown directly, as it is anticipated to
be used frequently.
parent 2dbea5a3
Loading
Loading
Loading
Loading
+98 −35
Original line number Diff line number Diff line
@@ -359,6 +359,53 @@ in
        by the k3s agent. This option only makes sense on nodes with an enabled agent.
      '';
    };

    gracefulNodeShutdown = {
      enable = lib.mkEnableOption ''
        graceful node shutdowns where the kubelet attempts to detect
        node system shutdown and terminates pods running on the node. See the
        [documentation](https://kubernetes.io/docs/concepts/cluster-administration/node-shutdown/#graceful-node-shutdown)
        for further information.
      '';

      shutdownGracePeriod = lib.mkOption {
        type = lib.types.nonEmptyStr;
        default = "30s";
        example = "1m30s";
        description = ''
          Specifies the total duration that the node should delay the shutdown by. This is the total
          grace period for pod termination for both regular and critical pods.
        '';
      };

      shutdownGracePeriodCriticalPods = lib.mkOption {
        type = lib.types.nonEmptyStr;
        default = "10s";
        example = "15s";
        description = ''
          Specifies the duration used to terminate critical pods during a node shutdown. This should be
          less than `shutdownGracePeriod`.
        '';
      };
    };

    extraKubeletConfig = lib.mkOption {
      type = with lib.types; attrsOf anything;
      default = { };
      example = {
        podsPerCore = 3;
        memoryThrottlingFactor = 0.69;
        containerLogMaxSize = "5Mi";
      };
      description = ''
        Extra configuration to add to the kubelet's configuration file. The subset of the kubelet's
        configuration that can be configured via a file is defined by the
        [KubeletConfiguration](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/)
        struct. See the
        [documentation](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/)
        for further information.
      '';
    };
  };

  # implementation
@@ -397,7 +444,22 @@ in

    environment.systemPackages = [ config.services.k3s.package ];

    systemd.services.k3s = {
    systemd.services.k3s =
      let
        kubeletParams =
          (lib.optionalAttrs (cfg.gracefulNodeShutdown.enable) {
            inherit (cfg.gracefulNodeShutdown) shutdownGracePeriod shutdownGracePeriodCriticalPods;
          })
          // cfg.extraKubeletConfig;
        kubeletConfig = (pkgs.formats.yaml { }).generate "k3s-kubelet-config" (
          {
            apiVersion = "kubelet.config.k8s.io/v1beta1";
            kind = "KubeletConfiguration";
          }
          // kubeletParams
        );
      in
      {
        description = "k3s service";
        after = [
          "firewall.service"
@@ -430,6 +492,7 @@ in
            ++ (optional (cfg.token != "") "--token ${cfg.token}")
            ++ (optional (cfg.tokenFile != null) "--token-file ${cfg.tokenFile}")
            ++ (optional (cfg.configPath != null) "--config ${cfg.configPath}")
            ++ (optional (kubeletParams != { }) "--kubelet-arg=config=${kubeletConfig}")
            ++ (lib.flatten cfg.extraFlags)
          );
        };
+4 −1
Original line number Diff line number Diff line
@@ -15,6 +15,9 @@ in
      inherit (pkgs) etcd;
    }
  ) allK3s;
  single-node = lib.mapAttrs (_: k3s: import ./single-node.nix { inherit system pkgs k3s; }) allK3s;
  kubelet-config = lib.mapAttrs (
    _: k3s: import ./kubelet-config.nix { inherit system pkgs k3s; }
  ) allK3s;
  multi-node = lib.mapAttrs (_: k3s: import ./multi-node.nix { inherit system pkgs k3s; }) allK3s;
  single-node = lib.mapAttrs (_: k3s: import ./single-node.nix { inherit system pkgs k3s; }) allK3s;
}
+80 −0
Original line number Diff line number Diff line
# A test that sets extra kubelet configuration and enables graceful node shutdown
import ../make-test-python.nix (
  {
    pkgs,
    lib,
    k3s,
    ...
  }:
  let
    nodeName = "test";
    shutdownGracePeriod = "1m13s";
    shutdownGracePeriodCriticalPods = "13s";
    podsPerCore = 3;
    memoryThrottlingFactor = 0.69;
    containerLogMaxSize = "5Mi";
  in
  {
    name = "${k3s.name}-kubelet-config";
    nodes.machine =
      { pkgs, ... }:
      {
        environment.systemPackages = [ pkgs.jq ];

        # k3s uses enough resources the default vm fails.
        virtualisation.memorySize = 1536;
        virtualisation.diskSize = 4096;

        services.k3s = {
          enable = true;
          package = k3s;
          # Slightly reduce resource usage
          extraFlags = [
            "--disable coredns"
            "--disable local-storage"
            "--disable metrics-server"
            "--disable servicelb"
            "--disable traefik"
            "--node-name ${nodeName}"
          ];
          gracefulNodeShutdown = {
            enable = true;
            inherit shutdownGracePeriod shutdownGracePeriodCriticalPods;
          };
          extraKubeletConfig = {
            inherit podsPerCore memoryThrottlingFactor containerLogMaxSize;
          };
        };
      };

    testScript = ''
      import json

      start_all()
      machine.wait_for_unit("k3s")
      # wait until the node is ready
      machine.wait_until_succeeds(r"""kubectl wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' nodes/${nodeName}""")
      # test whether the kubelet registered an inhibitor lock
      machine.succeed("systemd-inhibit --list --no-legend | grep \"kubelet.*k3s-server.*shutdown\"")
      # run kubectl proxy in the background, close stdout through redirection to not wait for the command to finish
      machine.execute("kubectl proxy --address 127.0.0.1 --port=8001 >&2 &")
      machine.wait_until_succeeds("nc -z 127.0.0.1 8001")
      # get the kubeletconfig
      kubelet_config=json.loads(machine.succeed("curl http://127.0.0.1:8001/api/v1/nodes/${nodeName}/proxy/configz | jq '.kubeletconfig'"))

      with subtest("Kubelet config values are set correctly"):
        assert kubelet_config["shutdownGracePeriod"] == "${shutdownGracePeriod}", \
          f"unexpected value for shutdownGracePeriod: {kubelet_config["shutdownGracePeriod"]}"
        assert kubelet_config["shutdownGracePeriodCriticalPods"] == "${shutdownGracePeriodCriticalPods}", \
          f"unexpected value for shutdownGracePeriodCriticalPods: {kubelet_config["shutdownGracePeriodCriticalPods"]}"
        assert kubelet_config["podsPerCore"] == ${toString podsPerCore}, \
          f"unexpected value for podsPerCore: {kubelet_config["podsPerCore"]}"
        assert kubelet_config["memoryThrottlingFactor"] == ${toString memoryThrottlingFactor}, \
          f"unexpected value for memoryThrottlingFactor: {kubelet_config["memoryThrottlingFactor"]}"
        assert kubelet_config["containerLogMaxSize"] == "${containerLogMaxSize}", \
          f"unexpected value for containerLogMaxSize: {kubelet_config["containerLogMaxSize"]}"
    '';

    meta.maintainers = lib.teams.k3s.members;
  }
)