Unverified Commit 29c30249 authored by Philip Kannegaard Hayes's avatar Philip Kannegaard Hayes
Browse files

aesmd: fix failure on first run and clean up old DCAP compat

On our prod machines, aesmd would always fail the first time after boot,
with logs like:
`XXX-copy-aesmd-data-files.sh[770]: chown: invalid user: 'aesmd:aesmd'`.
Everything would be fine after the next restart, but blocking boot for
15 seconds is definitely annoying.

It appears the issue was that we were trying to name the DynamicUser
User and Group (aesmd/aesmd) during ExecStartPre, but systemd only
creates these right before ExecStart. After playing around with the
ExecStartPre script, I finally got it to work correctly. As a bonus, we
can also re-add the RootDirectory/chroot hardening.

I've also cleaned out the config for the old out-of-tree isgx driver and
the old DCAP driver. The kernel has had in-kernel SGX support since 5.11
(~2021/02).
parent c7beca36
Loading
Loading
Loading
Loading
+11 −45
Original line number Diff line number Diff line
@@ -11,7 +11,6 @@ let
    literalExpression
    makeLibraryPath
    mkEnableOption
    mkForce
    mkIf
    mkOption
    mkPackageOption
@@ -129,11 +128,6 @@ in

    hardware.cpu.intel.sgx.provision.enable = true;

    # Make sure the AESM service can find the SGX devices until
    # https://github.com/intel/linux-sgx/issues/772 is resolved
    # and updated in nixpkgs.
    hardware.cpu.intel.sgx.enableDcapCompat = mkForce true;

    systemd.services.aesmd =
      let
        storeAesmFolder = "${sgx-psw}/aesm";
@@ -156,36 +150,17 @@ in
        }
        // cfg.environment;

        # Make sure any of the SGX application enclave devices is available
        unitConfig.AssertPathExists = [
          # legacy out-of-tree driver
          "|/dev/isgx"
          # DCAP driver
          "|/dev/sgx/enclave"
          # in-tree driver
          "|/dev/sgx_enclave"
        ];
        # Ensure the SGX application enclave device is available
        unitConfig.AssertPathExists = [ "/dev/sgx_enclave" ];

        serviceConfig = {
          ExecStartPre =
            let
              script = pkgs.writeShellScript "copy-aesmd-data-files.sh" ''
                set -euo pipefail

                # For some reason systemd 257+ won't properly bind mount the
                # StateDirectory with the aesmd DynamicUser owning it
                chown -R aesmd:aesmd /var/opt/aesmd

                whiteListFile="${aesmDataFolder}/white_list_cert_to_be_verify.bin"
                if [[ ! -f "$whiteListFile" ]]; then
                  install -m 644 -o aesmd -g aesmd -D \
          # Run with elevated privileges to create /var/opt/aesmd/... before
          # dropping to DynamicUser.
          ExecStartPre = ''
            +${lib.getExe' pkgs.coreutils "install"} -m 644 -D \
                "${storeAesmFolder}/data/white_list_cert_to_be_verify.bin" \
                    "$whiteListFile"
                fi
                "${aesmDataFolder}/white_list_cert_to_be_verify.bin"
          '';
              # Run setup with elevated privileges
            in
            "+${script}";
          ExecStart = "${sgx-psw}/bin/aesm_service --no-daemon";
          ExecReload = ''${pkgs.coreutils}/bin/kill -SIGHUP "$MAINPID"'';

@@ -208,12 +183,7 @@ in

          # --- Hardening ---

          # NOTE(phlip9): Starting with systemd-v257, enabling both
          #   `RootDirectory` (chroot) and `DynamicUser` prevents the service
          #   from locating the `aesmd` user:
          #   `XXX-copy-aesmd-data-files.sh: chown: invalid user: ‘aesmd:aesmd’`
          # RootDirectory = "%t/aesmd";

          RootDirectory = "%t/aesmd";
          BindReadOnlyPaths = [
            builtins.storeDir
            # Hardcoded path AESM_CONFIG_FILE in psw/ae/aesm_service/source/utils/aesm_config.cpp
@@ -229,10 +199,6 @@ in
          PrivateDevices = false;
          DevicePolicy = "closed";
          DeviceAllow = [
            # legacy out-of-tree driver
            "/dev/isgx rw"
            # DCAP driver
            "/dev/sgx rw"
            # in-tree driver
            "/dev/sgx_enclave rw"
            "/dev/sgx_provision rw"
@@ -244,7 +210,7 @@ in
          RestrictAddressFamilies = [
            # Allocates the socket /var/run/aesmd/aesm.socket
            "AF_UNIX"
            # Uses the HTTP protocol to initialize some services
            # Makes HTTPS requests to the Intel PCCS service (or a cache).
            "AF_INET"
            "AF_INET6"
          ];