Unverified Commit 4b4e4c3e authored by Ryan Lahfa's avatar Ryan Lahfa Committed by GitHub
Browse files

Merge pull request #207039 from NixOS/qemu-boot-disk-using-make-disk-image

nixos/qemu-vm: refactor bootDisk using make-disk-image
parents 93fad082 76f1b633
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -90,6 +90,9 @@
# NixOS integration test driver
/nixos/lib/test-driver  @tfc

# NixOS QEMU virtualisation
/nixos/virtualisation/qemu-vm.nix           @raitobezarius

# Systemd
/nixos/modules/system/boot/systemd.nix      @NixOS/systemd
/nixos/modules/system/boot/systemd          @NixOS/systemd
+139 −162
Original line number Diff line number Diff line
@@ -55,6 +55,11 @@ let

  };

  selectPartitionTableLayout = { useEFIBoot, useDefaultFilesystems }:
  if useDefaultFilesystems then
    if useEFIBoot then "efi" else "legacy"
  else "none";

  driveCmdline = idx: { file, driveExtraOpts, deviceExtraOpts, ... }:
    let
      drvId = "drive${toString idx}";
@@ -98,7 +103,6 @@ let
  addDeviceNames =
    imap1 (idx: drive: drive // { device = driveDeviceName idx; });


  # Shell script to start the VM.
  startVM =
    ''
@@ -111,8 +115,23 @@ let
      NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE"

      if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then
          ${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" \
            ${toString config.virtualisation.diskSize}M
          echo "Disk image do not exist, creating the virtualisation disk image..."
          # If we are using a bootloader and default filesystems layout.
          # We have to reuse the system image layout as a backing image format (CoW)
          # So we can write on the top of it.

          # If we are not using the default FS layout, potentially, we are interested into
          # performing operations in postDeviceCommands or at early boot on the raw device.
          # We can still boot through QEMU direct kernel boot feature.

          # CoW prevent size to be attributed to an image.
          # FIXME: raise this issue to upstream.
          ${qemu}/bin/qemu-img create \
          ${concatStringsSep " \\\n" ([ "-f qcow2" ]
          ++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2"
          ++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M"
          ++ [ "$NIX_DISK_IMAGE" ])}
          echo "Virtualisation disk image created."
      fi

      # Create a directory for storing temporary data of the running VM.
@@ -152,19 +171,13 @@ let

      ${lib.optionalString cfg.useBootLoader
      ''
        if ${if !cfg.persistBootDevice then "true" else "! test -e $TMPDIR/disk.img"}; then
          # Create a writable copy/snapshot of the boot disk.
          # A writable boot disk can be booted from automatically.
          ${qemu}/bin/qemu-img create -f qcow2 -F qcow2 -b ${bootDisk}/disk.img "$TMPDIR/disk.img"
        fi

        NIX_EFI_VARS=$(readlink -f "''${NIX_EFI_VARS:-${cfg.efiVars}}")
        NIX_EFI_VARS=$(readlink -f "''${NIX_EFI_VARS:-${config.system.name}-efi-vars.fd}")

        ${lib.optionalString cfg.useEFIBoot
        ''
          # VM needs writable EFI vars
          if ! test -e "$NIX_EFI_VARS"; then
            cp ${bootDisk}/efi-vars.fd "$NIX_EFI_VARS"
            cp ${systemImage}/efi-vars.fd "$NIX_EFI_VARS"
            chmod 0644 "$NIX_EFI_VARS"
          fi
        ''}
@@ -200,95 +213,29 @@ let

  regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; };


  # Generate a hard disk image containing a /boot partition and GRUB
  # in the MBR.  Used when the `useBootLoader' option is set.
  # Uses `runInLinuxVM` to create the image in a throwaway VM.
  # See note [Disk layout with `useBootLoader`].
  # FIXME: use nixos/lib/make-disk-image.nix.
  bootDisk =
    pkgs.vmTools.runInLinuxVM (
      pkgs.runCommand "nixos-boot-disk"
        { preVM =
            ''
              mkdir $out
              diskImage=$out/disk.img
              ${qemu}/bin/qemu-img create -f qcow2 $diskImage "120M"
              ${lib.optionalString cfg.useEFIBoot ''
                efiVars=$out/efi-vars.fd
                cp ${cfg.efi.variables} $efiVars
                chmod 0644 $efiVars
              ''}
            '';
          buildInputs = [ pkgs.util-linux ];
          QEMU_OPTS = "-nographic -serial stdio -monitor none"
                      + lib.optionalString cfg.useEFIBoot (
                        " -drive if=pflash,format=raw,unit=0,readonly=on,file=${cfg.efi.firmware}"
                      + " -drive if=pflash,format=raw,unit=1,file=$efiVars");
        }
        ''
          # Create a /boot EFI partition with 120M and arbitrary but fixed GUIDs for reproducibility
          ${pkgs.gptfdisk}/bin/sgdisk \
            --set-alignment=1 --new=1:34:2047 --change-name=1:BIOSBootPartition --typecode=1:ef02 \
            --set-alignment=512 --largest-new=2 --change-name=2:EFISystem --typecode=2:ef00 \
            --attributes=1:set:1 \
            --attributes=2:set:2 \
            --disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C1 \
            --partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
            --partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
            --hybrid 2 \
            --recompute-chs /dev/vda

          ${optionalString (config.boot.loader.grub.device != "/dev/vda")
            # In this throwaway VM, we only have the /dev/vda disk, but the
            # actual VM described by `config` (used by `switch-to-configuration`
            # below) may set `boot.loader.grub.device` to a different device
            # that's nonexistent in the throwaway VM.
            # Create a symlink for that device, so that the `grub-install`
            # by `switch-to-configuration` will hit /dev/vda anyway.
            ''
              ln -s /dev/vda ${config.boot.loader.grub.device}
            ''
          }

          ${pkgs.dosfstools}/bin/mkfs.fat -F16 /dev/vda2
          export MTOOLS_SKIP_CHECK=1
          ${pkgs.mtools}/bin/mlabel -i /dev/vda2 ::boot

          # Mount /boot; load necessary modules first.
          ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_cp437.ko.xz || true
          ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_iso8859-1.ko.xz || true
          ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/fat.ko.xz || true
          ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/vfat.ko.xz || true
          ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/efivarfs/efivarfs.ko.xz || true
          mkdir /boot
          mount /dev/vda2 /boot

          ${optionalString config.boot.loader.efi.canTouchEfiVariables ''
            mount -t efivarfs efivarfs /sys/firmware/efi/efivars
          ''}

          # This is needed for GRUB 0.97, which doesn't know about virtio devices.
          mkdir /boot/grub
          echo '(hd0) /dev/vda' > /boot/grub/device.map

          # This is needed for systemd-boot to find ESP, and udev is not available here to create this
          mkdir -p /dev/block
          ln -s /dev/vda2 /dev/block/254:2

          # Set up system profile (normally done by nixos-rebuild / nix-env --set)
          mkdir -p /nix/var/nix/profiles
          ln -s ${config.system.build.toplevel} /nix/var/nix/profiles/system-1-link
          ln -s /nix/var/nix/profiles/system-1-link /nix/var/nix/profiles/system

          # Install bootloader
          touch /etc/NIXOS
          export NIXOS_INSTALL_BOOTLOADER=1
          ${config.system.build.toplevel}/bin/switch-to-configuration boot

          umount /boot
        '' # */
    );
  # System image is akin to a complete NixOS install with
  # a boot partition and root partition.
  systemImage = import ../../lib/make-disk-image.nix {
    inherit pkgs config lib;
    additionalPaths = [ regInfo ];
    format = "qcow2";
    onlyNixStore = false;
    partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; };
    # Bootloader should be installed on the system image only if we are booting through bootloaders.
    # Though, if a user is not using our default filesystems, it is possible to not have any ESP
    # or a strange partition table that's incompatible with GRUB configuration.
    # As a consequence, this may lead to disk image creation failures.
    # To avoid this, we prefer to let the user find out about how to install the bootloader on its ESP/disk.
    # Usually, this can be through building your own disk image.
    # TODO: If a user is interested into a more fine grained heuristic for `installBootLoader`
    # by examining the actual contents of `cfg.fileSystems`, please send a PR.
    installBootLoader = cfg.useBootLoader && cfg.useDefaultFilesystems;
    touchEFIVars = cfg.useEFIBoot;
    diskSize = "auto";
    additionalSpace = "0M";
    copyChannel = false;
    OVMF = cfg.efi.OVMF;
  };

  storeImage = import ../../lib/make-disk-image.nix {
    inherit pkgs config lib;
@@ -297,17 +244,42 @@ let
    onlyNixStore = true;
    partitionTableType = "none";
    installBootLoader = false;
    touchEFIVars = false;
    diskSize = "auto";
    additionalSpace = "0M";
    copyChannel = false;
  };

  bootConfiguration =
    if cfg.useDefaultFilesystems
    then
      if cfg.useBootLoader
      then
        if cfg.useEFIBoot then "efi_bootloading_with_default_fs"
        else "legacy_bootloading_with_default_fs"
      else
        "direct_boot_with_default_fs"
    else
      "custom";
  suggestedRootDevice = {
    "efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2";
    "legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1";
    "direct_boot_with_default_fs" = cfg.bootLoaderDevice;
    # This will enforce a NixOS module type checking error
    # to ask explicitly the user to set a rootDevice.
    # As it will look like `rootDevice = lib.mkDefault null;` after
    # all "computations".
    "custom" = null;
  }.${bootConfiguration};
in

{
  imports = [
    ../profiles/qemu-guest.nix
    (mkRenamedOptionModule [ "virtualisation" "pathsInNixDB" ] [ "virtualisation" "additionalPaths" ])
    (mkRemovedOptionModule [ "virtualisation" "bootDevice" ] "This option was renamed to `virtualisation.rootDevice`, as it was incorrectly named and misleading. Take the time to review what you want to do and look at the new options like `virtualisation.{bootLoaderDevice, bootPartition}`, open an issue in case of issues.")
    (mkRemovedOptionModule [ "virtualisation" "efiVars" ] "This option was removed, it is possible to provide a template UEFI variable with `virtualisation.efi.variables` ; if this option is important to you, open an issue")
    (mkRemovedOptionModule [ "virtualisation" "persistBootDevice" ] "Boot device is always persisted if you use a bootloader through the root disk image ; if this does not work for your usecase, please examine carefully what `virtualisation.{bootDevice, rootDevice, bootPartition}` options offer you and open an issue explaining your need.`")
  ];

  options = {
@@ -362,24 +334,47 @@ in
          '';
      };

    virtualisation.bootDevice =
    virtualisation.bootLoaderDevice =
      mkOption {
        type = types.path;
        default = lookupDriveDeviceName "root" cfg.qemu.drives;
        defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives'';
        example = "/dev/vda";
        description =
          lib.mdDoc ''
            The disk to be used for the root filesystem.
            The disk to be used for the boot filesystem.
            By default, it is the same disk as the root filesystem.
          '';
        };

    virtualisation.persistBootDevice =
    virtualisation.bootPartition =
      mkOption {
        type = types.bool;
        default = false;
        type = types.nullOr types.path;
        default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null;
        defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null'';
        example = "/dev/vda1";
        description =
          lib.mdDoc ''
            If useBootLoader is specified, whether to recreate the boot device
            on each instantiaton or allow it to persist.
            The boot partition to be used to mount /boot filesystem.
            In legacy boots, this should be null.
            By default, in EFI boot, it is the first partition of the boot device.
          '';
      };

    virtualisation.rootDevice =
      mkOption {
        type = types.nullOr types.path;
        example = "/dev/vda2";
        description =
          lib.mdDoc ''
            The disk or partition to be used for the root filesystem.
            By default (read the source code for more details):

            - under EFI with a bootloader: 2nd partition of the boot disk
            - in legacy boot with a bootloader: 1st partition of the boot disk
            - in direct boot (i.e. without a bootloader): whole disk

            In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device.
          '';
      };

@@ -749,10 +744,22 @@ in
        };

    virtualisation.efi = {
      OVMF = mkOption {
        type = types.package;
        default = (pkgs.OVMF.override {
          secureBoot = cfg.useSecureBoot;
        }).fd;
        defaultText = ''(pkgs.OVMF.override {
          secureBoot = cfg.useSecureBoot;
        }).fd'';
        description =
        lib.mdDoc "OVMF firmware package, defaults to OVMF configured with secure boot if needed.";
      };

      firmware = mkOption {
        type = types.path;
        default = pkgs.OVMF.firmware;
        defaultText = literalExpression "pkgs.OVMF.firmware";
        default = cfg.efi.OVMF.firmware;
        defaultText = literalExpression "cfg.efi.OVMF.firmware";
        description =
          lib.mdDoc ''
            Firmware binary for EFI implementation, defaults to OVMF.
@@ -761,8 +768,8 @@ in

      variables = mkOption {
        type = types.path;
        default = pkgs.OVMF.variables;
        defaultText = literalExpression "pkgs.OVMF.variables";
        default = cfg.efi.OVMF.variables;
        defaultText = literalExpression "cfg.efi.OVMF.variables";
        description =
          lib.mdDoc ''
            Platform-specific flash binary for EFI variables, implementation-dependent to the EFI firmware.
@@ -786,18 +793,17 @@ in
          '';
      };

    virtualisation.efiVars =
    virtualisation.useSecureBoot =
      mkOption {
        type = types.str;
        default = "./${config.system.name}-efi-vars.fd";
        defaultText = literalExpression ''"./''${config.system.name}-efi-vars.fd"'';
        type = types.bool;
        default = false;
        description =
          lib.mdDoc ''
            Path to nvram image containing UEFI variables.  The will be created
            on startup if it does not exist.
            Enable Secure Boot support in the EFI firmware.
          '';
      };


    virtualisation.bios =
      mkOption {
        type = types.nullOr types.package;
@@ -853,30 +859,13 @@ in
            ${opt.writableStore} = false;
        '';

    # Note [Disk layout with `useBootLoader`]
    #
    # If `useBootLoader = true`, we configure 2 drives:
    # `/dev/?da` for the root disk, and `/dev/?db` for the boot disk
    # which has the `/boot` partition and the boot loader.
    # Concretely:
    #
    # * The second drive's image `disk.img` is created in `bootDisk = ...`
    #   using a throwaway VM. Note that there the disk is always `/dev/vda`,
    #   even though in the final VM it will be at `/dev/*b`.
    # * The disks are attached in `virtualisation.qemu.drives`.
    #   Their order makes them appear as devices `a`, `b`, etc.
    # * `fileSystems."/boot"` is adjusted to be on device `b`.
    # * The disk.img is recreated each time the VM is booted unless
    #   virtualisation.persistBootDevice is set.

    # If `useBootLoader`, GRUB goes to the second disk, see
    # note [Disk layout with `useBootLoader`].
    boot.loader.grub.device = mkVMOverride (
      if cfg.useBootLoader
        then driveDeviceName 2 # second disk
        else cfg.bootDevice
    );
    # In UEFI boot, we use a EFI-only partition table layout, thus GRUB will fail when trying to install
    # legacy and UEFI. In order to avoid this, we have to put "nodev" to force UEFI-only installs.
    # Otherwise, we set the proper bootloader device for this.
    # FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint?
    boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice);
    boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}";
    virtualisation.rootDevice = mkDefault suggestedRootDevice;

    boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ];

@@ -890,10 +879,10 @@ in
      ''
        # If the disk image appears to be empty, run mke2fs to
        # initialise.
        FSTYPE=$(blkid -o value -s TYPE ${cfg.bootDevice} || true)
        PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.bootDevice} || true)
        FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true)
        PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true)
        if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
            mke2fs -t ext4 ${cfg.bootDevice}
            mke2fs -t ext4 ${cfg.rootDevice}
        fi
      '';

@@ -939,8 +928,6 @@ in
      optional cfg.writableStore "overlay"
      ++ optional (cfg.qemu.diskInterface == "scsi") "sym53c8xx";

    virtualisation.bootDevice = mkDefault (driveDeviceName 1);

    virtualisation.additionalPaths = [ config.system.build.toplevel ];

    virtualisation.sharedDirectories = {
@@ -997,7 +984,7 @@ in
      ])
      (mkIf cfg.useEFIBoot [
        "-drive if=pflash,format=raw,unit=0,readonly=on,file=${cfg.efi.firmware}"
        "-drive if=pflash,format=raw,unit=1,file=$NIX_EFI_VARS"
        "-drive if=pflash,format=raw,unit=1,readonly=off,file=$NIX_EFI_VARS"
      ])
      (mkIf (cfg.bios != null) [
        "-bios ${cfg.bios}/bios.bin"
@@ -1013,23 +1000,14 @@ in
        file = ''"$NIX_DISK_IMAGE"'';
        driveExtraOpts.cache = "writeback";
        driveExtraOpts.werror = "report";
        deviceExtraOpts.bootindex = "1";
      }])
      (mkIf cfg.useNixStoreImage [{
        name = "nix-store";
        file = ''"$TMPDIR"/store.img'';
        deviceExtraOpts.bootindex = if cfg.useBootLoader then "3" else "2";
        deviceExtraOpts.bootindex = "2";
        driveExtraOpts.format = if cfg.writableStore then "qcow2" else "raw";
      }])
      (mkIf cfg.useBootLoader [
        # The order of this list determines the device names, see
        # note [Disk layout with `useBootLoader`].
        {
          name = "boot";
          file = ''"$TMPDIR"/disk.img'';
          driveExtraOpts.media = "disk";
          deviceExtraOpts.bootindex = "1";
        }
      ])
      (imap0 (idx: _: {
        file = "$(pwd)/empty${toString idx}.qcow2";
        driveExtraOpts.werror = "report";
@@ -1065,7 +1043,7 @@ in
          device = "tmpfs";
          fsType = "tmpfs";
        } else {
          device = cfg.bootDevice;
          device = cfg.rootDevice;
          fsType = "ext4";
          autoFormat = true;
        });
@@ -1086,9 +1064,8 @@ in
          options = [ "mode=0755" ];
          neededForBoot = true;
        };
        # see note [Disk layout with `useBootLoader`]
        "/boot" = lib.mkIf cfg.useBootLoader {
          device = "${lookupDriveDeviceName "boot" cfg.qemu.drives}2"; # 2 for e.g. `vdb2`, as created in `bootDisk`
        "/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) {
          device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage`
          fsType = "vfat";
          noCheck = true; # fsck fails on a r/o filesystem
        };
+2 −2
Original line number Diff line number Diff line
@@ -108,9 +108,9 @@ in
      machine.start()
      machine.wait_for_unit("multi-user.target")

      machine.succeed("test -e /run/current-system/bootspec/boot.json")
      machine.succeed("test -e /run/current-system/boot.json")

      bootspec = json.loads(machine.succeed("jq -r '.v1' /run/current-system/bootspec/boot.json"))
      bootspec = json.loads(machine.succeed("jq -r '.v1' /run/current-system/boot.json"))

      assert all(key in bootspec for key in ('initrd', 'initrdSecrets')), "Bootspec should contain initrd or initrdSecrets field when initrd is enabled"
    '';
+1 −1
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ in makeTest {
        # Small root disk for installer
        512
      ];
      virtualisation.bootDevice = "/dev/vdb";
      virtualisation.rootDevice = "/dev/vdb";
    };
  };

+5 −5
Original line number Diff line number Diff line
@@ -30,26 +30,26 @@ in {
    specialisation.boot-luks-wrong-keyfile.configuration = {
      boot.initrd.luks.devices = lib.mkVMOverride {
        cryptroot = {
          device = "/dev/vdc";
          device = "/dev/vdb";
          keyFile = "/etc/cryptroot.key";
          tryEmptyPassphrase = true;
          fallbackToPassword = !systemdStage1;
        };
      };
      virtualisation.bootDevice = "/dev/mapper/cryptroot";
      virtualisation.rootDevice = "/dev/mapper/cryptroot";
      boot.initrd.secrets."/etc/cryptroot.key" = keyfile;
    };

    specialisation.boot-luks-missing-keyfile.configuration = {
      boot.initrd.luks.devices = lib.mkVMOverride {
        cryptroot = {
          device = "/dev/vdc";
          device = "/dev/vdb";
          keyFile = "/etc/cryptroot.key";
          tryEmptyPassphrase = true;
          fallbackToPassword = !systemdStage1;
        };
      };
      virtualisation.bootDevice = "/dev/mapper/cryptroot";
      virtualisation.rootDevice = "/dev/mapper/cryptroot";
    };
  };

@@ -76,7 +76,7 @@ in {

    # Create encrypted volume
    machine.wait_for_unit("multi-user.target")
    machine.succeed("echo "" | cryptsetup luksFormat /dev/vdc --batch-mode")
    machine.succeed("echo "" | cryptsetup luksFormat /dev/vdb --batch-mode")
    machine.succeed("bootctl set-default nixos-generation-1-specialisation-boot-luks-wrong-keyfile.conf")
    machine.succeed("sync")
    machine.crash()
Loading