Commit e362fe9c authored by Oliver Schmidt's avatar Oliver Schmidt Committed by Jörg Thalheim
Browse files

security/acme: limit concurrent certificate generations

fixes #232505

Implements the new option `security.acme.maxConcurrentRenewals` to limit
the number of certificate generation (or renewal) jobs that can run in
parallel. This avoids overloading the system resources with many
certificates or running into acme registry rate limits and network
timeouts.

Architecture considerations:
- simplicity, lightweight: Concerns have been voiced about making this
  already rather complex module even more convoluted. Additionally,
  locking solutions shall not significantly increase performance and
  footprint of individual job runs.
  To accomodate these concerns, this solution is implemented purely in
  Nix, bash, and using the light-weight `flock` util. To reduce
  complexity, jobs are already assigned their lockfile slot at system
  build time instead of dynamic locking and retrying. This comes at the
  cost of not always maxing out the permitted concurrency at runtime.
- no stale locks: Limiting concurrency via locking mechanism is usually
  approached with semaphores. Unfortunately, both SysV as well as
  POSIX-Semaphores are *not* released when the process currently locking
  them is SIGKILLed. This poses the danger of stale locks staying around
  and certificate renewal being blocked from running altogether.
  `flock` locks though are released when the process holding the file
  descriptor of the lock file is KILLed or terminated.
- lockfile generation: Lock files could either be created at build time
  in the Nix store or at script runtime in a idempotent manner.
  While the latter would be simpler to achieve, we might exceed the number
  of permitted concurrent runs during a system switch: Already running
  jobs are still locked on the existing lock files, while jobs started
  after the system switch will acquire locks on freshly created files,
  not being blocked by the still running services.
  For this reason, locks are generated and managed at runtime in the
  shared state directory `/var/lib/locks/`.

nixos/security/acme: move locks to /run

also, move over permission and directory management to systemd-tmpfiles

nixos/security/acme: fix some linter remarks in my code

there are some remarks left for existing code, not touching that

nixos/security/acme: redesign script locking flow

- get rid of subshell
- provide function for wrapping scripts in a locked environment

nixos/acme: improve visibility of blocking on locks

nixos/acme: add smoke test for concurrency limitation

heavily inspired by m1cr0man

nixos/acme: release notes entry on new concurrency limits

nixos/acme: cleanup, clarifications
parent 084dfe80
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -261,6 +261,8 @@ The module update takes care of the new config syntax and the data itself (user

- The `cawbird` package is dropped from nixpkgs, as it got broken by the Twitter API closing down and has been abandoned upstream.

- Certificate generation via the `security.acme` now limits the concurrent number of running certificate renewals and generation jobs, to avoid spiking resource usage when processing many certificates at once. The limit defaults to *5* and can be adjusted via `maxConcurrentRenewals`. Setting it to *0* disables the limits altogether.

## Nixpkgs internals {#sec-release-23.11-nixpkgs-internals}

- The use of `sourceRoot = "source";`, `sourceRoot = "source/subdir";`, and similar lines in package derivations using the default `unpackPhase` is deprecated as it requires `unpackPhase` to always produce a directory named "source". Use `sourceRoot = src.name`, `sourceRoot = "${src.name}/subdir";`, or `setSourceRoot = "sourceRoot=$(echo */subdir)";` or similar instead.
+93 −13
Original line number Diff line number Diff line
{ config, lib, pkgs, options, ... }:
with lib;
let


  cfg = config.security.acme;
  opt = options.security.acme;
  user = if cfg.useRoot then "root" else "acme";
@@ -14,6 +16,36 @@ let
  mkAccountHash = acmeServer: data: mkHash "${toString acmeServer} ${data.keyType} ${data.email}";
  accountDirRoot = "/var/lib/acme/.lego/accounts/";

  lockdir = "/run/acme/";
  concurrencyLockfiles = map (n: "${toString n}.lock") (lib.range 1 cfg.maxConcurrentRenewals);
  # Assign elements of `baseList` to each element of `needAssignmentList`, until the latter is exhausted.
  # returns: [{fst = "element of baseList"; snd = "element of needAssignmentList"}]
  roundRobinAssign = baseList: needAssignmentList:
    if baseList == [] then []
    else _rrCycler baseList baseList needAssignmentList;
  _rrCycler = with builtins; origBaseList: workingBaseList: needAssignmentList:
    if (workingBaseList == [] || needAssignmentList == [])
    then []
    else
      [{ fst = head workingBaseList; snd = head needAssignmentList;}] ++
      _rrCycler origBaseList (if (tail workingBaseList == []) then origBaseList else tail workingBaseList) (tail needAssignmentList);
  attrsToList = mapAttrsToList (attrname: attrval: {name = attrname; value = attrval;});
  # for an AttrSet `funcsAttrs` having functions as values, apply single arguments from
  # `argsList` to them in a round-robin manner.
  # Returns an attribute set with the applied functions as values.
  roundRobinApplyAttrs = funcsAttrs: argsList: lib.listToAttrs (map (x: {inherit (x.snd) name; value = x.snd.value x.fst;}) (roundRobinAssign argsList (attrsToList funcsAttrs)));
  wrapInFlock = lockfilePath: script:
    # explainer: https://stackoverflow.com/a/60896531
    ''
      exec {LOCKFD}> ${lockfilePath}
      echo "Waiting to acquire lock ${lockfilePath}"
      ${pkgs.flock}/bin/flock ''${LOCKFD} || exit 1
      echo "Acquired lock ${lockfilePath}"
    ''
    + script + "\n"
    + ''echo "Releasing lock ${lockfilePath}"  # only released after process exit'';


  # There are many services required to make cert renewals work.
  # They all follow a common structure:
  #   - They inherit this commonServiceConfig
@@ -31,6 +63,7 @@ let
    ProtectSystem = "strict";
    ReadWritePaths = [
      "/var/lib/acme"
      lockdir
    ];
    PrivateTmp = true;

@@ -118,7 +151,8 @@ let
      # We don't want this to run every time a renewal happens
      RemainAfterExit = true;

      # These StateDirectory entries negate the need for tmpfiles
      # StateDirectory entries are a cleaner, service-level mechanism
      # for dealing with persistent service data
      StateDirectory = [ "acme" "acme/.lego" "acme/.lego/accounts" ];
      StateDirectoryMode = 755;
      WorkingDirectory = "/var/lib/acme";
@@ -127,6 +161,25 @@ let
      ExecStart = "+" + (pkgs.writeShellScript "acme-fixperms" script);
    };
  };
  lockfilePrepareService = {
    description = "Manage lock files for acme services";

    # ensure all required lock files exist, but none more
    script = ''
      GLOBIGNORE="${concatStringsSep ":" concurrencyLockfiles}"
      rm -f *
      unset GLOBIGNORE

      xargs touch <<< "${toString concurrencyLockfiles}"
    '';

    serviceConfig = commonServiceConfig // {
      # We don't want this to run every time a renewal happens
      RemainAfterExit = true;
      WorkingDirectory = lockdir;
    };
  };


  certToConfig = cert: data: let
    acmeServer = data.server;
@@ -229,10 +282,10 @@ let
      };
    };

    selfsignService = {
    selfsignService = lockfileName: {
      description = "Generate self-signed certificate for ${cert}";
      after = [ "acme-selfsigned-ca.service" "acme-fixperms.service" ];
      requires = [ "acme-selfsigned-ca.service" "acme-fixperms.service" ];
      after = [ "acme-selfsigned-ca.service" "acme-fixperms.service" ] ++ optional (cfg.maxConcurrentRenewals > 0) "acme-lockfiles.service";
      requires = [ "acme-selfsigned-ca.service" "acme-fixperms.service" ] ++ optional (cfg.maxConcurrentRenewals > 0) "acme-lockfiles.service";

      path = with pkgs; [ minica ];

@@ -256,7 +309,7 @@ let
      # Working directory will be /tmp
      # minica will output to a folder sharing the name of the first domain
      # in the list, which will be ${data.domain}
      script = ''
      script = (if (lockfileName == null) then lib.id else wrapInFlock "${lockdir}${lockfileName}") ''
        minica \
          --ca-key ca/key.pem \
          --ca-cert ca/cert.pem \
@@ -277,10 +330,10 @@ let
      '';
    };

    renewService = {
    renewService = lockfileName: {
      description = "Renew ACME certificate for ${cert}";
      after = [ "network.target" "network-online.target" "acme-fixperms.service" "nss-lookup.target" ] ++ selfsignedDeps;
      wants = [ "network-online.target" "acme-fixperms.service" ] ++ selfsignedDeps;
      after = [ "network.target" "network-online.target" "acme-fixperms.service" "nss-lookup.target" ] ++ selfsignedDeps ++ optional (cfg.maxConcurrentRenewals > 0) "acme-lockfiles.service";
      wants = [ "network-online.target" "acme-fixperms.service" ] ++ selfsignedDeps ++ optional (cfg.maxConcurrentRenewals > 0) "acme-lockfiles.service";

      # https://github.com/NixOS/nixpkgs/pull/81371#issuecomment-605526099
      wantedBy = optionals (!config.boot.isContainer) [ "multi-user.target" ];
@@ -329,7 +382,7 @@ let
      };

      # Working directory will be /tmp
      script = ''
      script = (if (lockfileName == null) then lib.id else wrapInFlock "${lockdir}${lockfileName}") ''
        ${optionalString data.enableDebugLogs "set -x"}
        set -euo pipefail

@@ -755,6 +808,17 @@ in {
          }
        '';
      };
      maxConcurrentRenewals = mkOption {
        default = 5;
        type = types.int;
        description = lib.mdDoc ''
          Maximum number of concurrent certificate generation or renewal jobs. All other
          jobs will queue and wait running jobs to finish. Reduces the system load of
          certificate generation.

          Set to `0` to allow unlimited number of concurrent job runs."
          '';
      };
    };
  };

@@ -875,12 +939,28 @@ in {

      users.groups.acme = {};

      systemd.services = {
        "acme-fixperms" = userMigrationService;
      } // (mapAttrs' (cert: conf: nameValuePair "acme-${cert}" conf.renewService) certConfigs)
      # for lock files, still use tmpfiles as they should better reside in /run
      systemd.tmpfiles.rules = [
        "d ${lockdir} 0700 ${user} - - -"
        "Z ${lockdir} 0700 ${user} - - -"
      ];

      systemd.services = let
        renewServiceFunctions = mapAttrs' (cert: conf: nameValuePair "acme-${cert}" conf.renewService) certConfigs;
        renewServices =  if cfg.maxConcurrentRenewals > 0
          then roundRobinApplyAttrs renewServiceFunctions concurrencyLockfiles
          else mapAttrs (_: f: f null) renewServiceFunctions;
        selfsignServiceFunctions = mapAttrs' (cert: conf: nameValuePair "acme-selfsigned-${cert}" conf.selfsignService) certConfigs;
        selfsignServices = if cfg.maxConcurrentRenewals > 0
          then roundRobinApplyAttrs selfsignServiceFunctions concurrencyLockfiles
          else mapAttrs (_: f: f null) selfsignServiceFunctions;
        in
        { "acme-fixperms" = userMigrationService; }
        // (optionalAttrs (cfg.maxConcurrentRenewals > 0) {"acme-lockfiles" = lockfilePrepareService; })
        // renewServices
        // (optionalAttrs (cfg.preliminarySelfsigned) ({
        "acme-selfsigned-ca" = selfsignCAService;
      } // (mapAttrs' (cert: conf: nameValuePair "acme-selfsigned-${cert}" conf.selfsignService) certConfigs)));
      } // selfsignServices));

      systemd.timers = mapAttrs' (cert: conf: nameValuePair "acme-${cert}" conf.renewTimer) certConfigs;

+41 −1
Original line number Diff line number Diff line
@@ -266,6 +266,37 @@ in {
          }
        ];

        concurrency-limit.configuration = {pkgs, ...}: lib.mkMerge [
          webserverBasicConfig {
            security.acme.maxConcurrentRenewals = 1;

            services.nginx.virtualHosts = {
              "f.example.test" = vhostBase // {
                enableACME = true;
              };
              "g.example.test" = vhostBase // {
                enableACME = true;
              };
              "h.example.test" = vhostBase // {
                enableACME = true;
              };
            };

            systemd.services = {
              # check for mutual exclusion of starting renew services
              "acme-f.example.test".serviceConfig.ExecPreStart = "+" + (pkgs.writeShellScript "test-f" ''
                test "$(systemctl is-active acme-{g,h}.example.test.service | grep activating | wc -l)" -le 0
                '');
              "acme-g.example.test".serviceConfig.ExecPreStart = "+" + (pkgs.writeShellScript "test-g" ''
                test "$(systemctl is-active acme-{f,h}.example.test.service | grep activating | wc -l)" -le 0
                '');
              "acme-h.example.test".serviceConfig.ExecPreStart = "+" + (pkgs.writeShellScript "test-h" ''
                test "$(systemctl is-active acme-{g,f}.example.test.service | grep activating | wc -l)" -le 0
                '');
              };
          }
        ];

        # Test lego internal server (listenHTTP option)
        # Also tests useRoot option
        lego-server.configuration = { ... }: {
@@ -297,7 +328,7 @@ in {

          services.caddy = {
            enable = true;
            virtualHosts."a.exmaple.test" = {
            virtualHosts."a.example.test" = {
              useACMEHost = "example.test";
              extraConfig = ''
                root * ${documentRoot}
@@ -591,6 +622,15 @@ in {
          webserver.wait_for_unit("nginx.service")
          check_connection(client, "slow.example.test")

      with subtest("Can limit concurrency of running renewals"):
          switch_to(webserver, "concurrency-limit")
          webserver.wait_for_unit("acme-finished-f.example.test.target")
          webserver.wait_for_unit("acme-finished-g.example.test.target")
          webserver.wait_for_unit("acme-finished-h.example.test.target")
          check_connection(client, "f.example.test")
          check_connection(client, "g.example.test")
          check_connection(client, "h.example.test")

      with subtest("Works with caddy"):
          switch_to(webserver, "caddy")
          webserver.wait_for_unit("acme-finished-example.test.target")