Unverified Commit 3d941b6b authored by Ryan Lahfa's avatar Ryan Lahfa Committed by GitHub
Browse files

Merge pull request #236656 from nikstur/qemu-vm-persistent-block-device-names

parents 179f75ac 0bdba6c9
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -55,3 +55,17 @@
- A new option was added to the virtualisation module that enables specifying explicitly named network interfaces in QEMU VMs. The existing `virtualisation.vlans` is still supported for cases where the name of the network interface is irrelevant.

- `services.nginx` gained a `defaultListen` option at server-level with support for PROXY protocol listeners, also `proxyProtocol` is now exposed in `services.nginx.virtualHosts.<name>.listen` option. It is now possible to run PROXY listeners and non-PROXY listeners at a server-level, see [#213510](https://github.com/NixOS/nixpkgs/pull/213510/) for more details.

## Nixpkgs internals {#sec-release-23.11-nixpkgs-internals}

- The `qemu-vm.nix` module by default now identifies block devices via
  persistent names available in `/dev/disk/by-*`. Because the rootDevice is
  identfied by its filesystem label, it needs to be formatted before the VM is
  started. The functionality of automatically formatting the rootDevice in the
  initrd is removed from the QEMU module. However, for tests that depend on
  this functionality, a test utility for the scripted initrd is added
  (`nixos/tests/common/auto-format-root-device.nix`). To use this in a NixOS
  test, import the module, e.g. `imports = [
  ./common/auto-format-root-device.nix ];` When you use the systemd initrd, you
  can automatically format the root device by setting
  `virtualisation.fileSystems."/".autoFormat = true;`.
+1 −0
Original line number Diff line number Diff line
@@ -573,6 +573,7 @@ let format' = format; in let
        # In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb
        # Use this option to create a symlink from vda to any arbitrary device you want.
        ${optionalString (config.boot.loader.grub.device != "/dev/vda") ''
            mkdir -p $(dirname ${config.boot.loader.grub.device})
            ln -s /dev/vda ${config.boot.loader.grub.device}
        ''}

+65 −98
Original line number Diff line number Diff line
@@ -81,25 +81,6 @@ let

  drivesCmdLine = drives: concatStringsSep "\\\n    " (imap1 driveCmdline drives);


  # Creates a device name from a 1-based a numerical index, e.g.
  # * `driveDeviceName 1` -> `/dev/vda`
  # * `driveDeviceName 2` -> `/dev/vdb`
  driveDeviceName = idx:
    let letter = elemAt lowerChars (idx - 1);
    in if cfg.qemu.diskInterface == "scsi" then
      "/dev/sd${letter}"
    else
      "/dev/vd${letter}";

  lookupDriveDeviceName = driveName: driveList:
    (findSingle (drive: drive.name == driveName)
      (throw "Drive ${driveName} not found")
      (throw "Multiple drives named ${driveName}") driveList).device;

  addDeviceNames =
    imap1 (idx: drive: drive // { device = driveDeviceName idx; });

  # Shell script to start the VM.
  startVM =
    ''
@@ -109,25 +90,41 @@ let

      set -e

      # Create an empty ext4 filesystem image. A filesystem image does not
      # contain a partition table but just a filesystem.
      createEmptyFilesystemImage() {
        local name=$1
        local size=$2
        local temp=$(mktemp)
        ${qemu}/bin/qemu-img create -f raw "$temp" "$size"
        ${pkgs.e2fsprogs}/bin/mkfs.ext4 -L ${rootFilesystemLabel} "$temp"
        ${qemu}/bin/qemu-img convert -f raw -O qcow2 "$temp" "$name"
        rm "$temp"
      }

      NIX_DISK_IMAGE=$(readlink -f "''${NIX_DISK_IMAGE:-${toString config.virtualisation.diskImage}}") || test -z "$NIX_DISK_IMAGE"

      if test -n "$NIX_DISK_IMAGE" && ! test -e "$NIX_DISK_IMAGE"; then
          echo "Disk image do not exist, creating the virtualisation disk image..."
          # If we are using a bootloader and default filesystems layout.
          # We have to reuse the system image layout as a backing image format (CoW)
          # So we can write on the top of it.

          # If we are not using the default FS layout, potentially, we are interested into
          # performing operations in postDeviceCommands or at early boot on the raw device.
          # We can still boot through QEMU direct kernel boot feature.
          ${if (cfg.useBootLoader && cfg.useDefaultFilesystems) then ''
            # Create a writable qcow2 image using the systemImage as a backing
            # image.

            # CoW prevent size to be attributed to an image.
            # FIXME: raise this issue to upstream.
            ${qemu}/bin/qemu-img create \
          ${concatStringsSep " \\\n" ([ "-f qcow2" ]
          ++ optional (cfg.useBootLoader && cfg.useDefaultFilesystems) "-F qcow2 -b ${systemImage}/nixos.qcow2"
          ++ optional (!(cfg.useBootLoader && cfg.useDefaultFilesystems)) "-o size=${toString config.virtualisation.diskSize}M"
          ++ [ ''"$NIX_DISK_IMAGE"'' ])}
              -f qcow2 \
              -b ${systemImage}/nixos.qcow2 \
              -F qcow2 \
              "$NIX_DISK_IMAGE"
          '' else if cfg.useDefaultFilesystems then ''
            createEmptyFilesystemImage "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
          '' else ''
            # Create an empty disk image without a filesystem.
            ${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" "${toString cfg.diskSize}M"
          ''
          }
          echo "Virtualisation disk image created."
      fi

@@ -148,6 +145,7 @@ let
              ${pkgs.erofs-utils}/bin/mkfs.erofs \
                --force-uid=0 \
                --force-gid=0 \
                -L ${nixStoreFilesystemLabel} \
                -U eb176051-bd15-49b7-9e6b-462e0b467019 \
                -T 0 \
                --exclude-regex="$(
@@ -218,6 +216,19 @@ let

  regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.additionalPaths; };

  # Use well-defined and persistent filesystem labels to identify block devices.
  rootFilesystemLabel = "nixos";
  espFilesystemLabel = "ESP"; # Hard-coded by make-disk-image.nix
  nixStoreFilesystemLabel = "nix-store";

  # The root drive is a raw disk which does not necessarily contain a
  # filesystem or partition table. It thus cannot be identified via the typical
  # persistent naming schemes (e.g. /dev/disk/by-{label, uuid, partlabel,
  # partuuid}. Instead, supply a well-defined and persistent serial attribute
  # via QEMU. Inside the running system, the disk can then be identified via
  # the /dev/disk/by-id scheme.
  rootDriveSerialAttr = "root";

  # System image is akin to a complete NixOS install with
  # a boot partition and root partition.
  systemImage = import ../../lib/make-disk-image.nix {
@@ -225,6 +236,7 @@ let
    additionalPaths = [ regInfo ];
    format = "qcow2";
    onlyNixStore = false;
    label = rootFilesystemLabel;
    partitionTableType = selectPartitionTableLayout { inherit (cfg) useDefaultFilesystems useEFIBoot; };
    # Bootloader should be installed on the system image only if we are booting through bootloaders.
    # Though, if a user is not using our default filesystems, it is possible to not have any ESP
@@ -247,6 +259,7 @@ let
    additionalPaths = [ regInfo ];
    format = "qcow2";
    onlyNixStore = true;
    label = nixStoreFilesystemLabel;
    partitionTableType = "none";
    installBootLoader = false;
    touchEFIVars = false;
@@ -255,28 +268,6 @@ let
    copyChannel = false;
  };

  bootConfiguration =
    if cfg.useDefaultFilesystems
    then
      if cfg.useBootLoader
      then
        if cfg.useEFIBoot then "efi_bootloading_with_default_fs"
        else "legacy_bootloading_with_default_fs"
      else
        if cfg.directBoot.enable then "direct_boot_with_default_fs"
        else "custom"
    else
      "custom";
  suggestedRootDevice = {
    "efi_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}2";
    "legacy_bootloading_with_default_fs" = "${cfg.bootLoaderDevice}1";
    "direct_boot_with_default_fs" = cfg.bootLoaderDevice;
    # This will enforce a NixOS module type checking error
    # to ask explicitly the user to set a rootDevice.
    # As it will look like `rootDevice = lib.mkDefault null;` after
    # all "computations".
    "custom" = null;
  }.${bootConfiguration};
in

{
@@ -343,44 +334,39 @@ in
    virtualisation.bootLoaderDevice =
      mkOption {
        type = types.path;
        default = lookupDriveDeviceName "root" cfg.qemu.drives;
        defaultText = literalExpression ''lookupDriveDeviceName "root" cfg.qemu.drives'';
        example = "/dev/vda";
        default = "/dev/disk/by-id/virtio-${rootDriveSerialAttr}";
        defaultText = literalExpression ''/dev/disk/by-id/virtio-${rootDriveSerialAttr}'';
        example = "/dev/disk/by-id/virtio-boot-loader-device";
        description =
          lib.mdDoc ''
            The disk to be used for the boot filesystem.
            By default, it is the same disk as the root filesystem.
            The path (inside th VM) to the device to boot from when legacy booting.
          '';
        };

    virtualisation.bootPartition =
      mkOption {
        type = types.nullOr types.path;
        default = if cfg.useEFIBoot then "${cfg.bootLoaderDevice}1" else null;
        defaultText = literalExpression ''if cfg.useEFIBoot then "''${cfg.bootLoaderDevice}1" else null'';
        example = "/dev/vda1";
        default = if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null;
        defaultText = literalExpression ''if cfg.useEFIBoot then "/dev/disk/by-label/${espFilesystemLabel}" else null'';
        example = "/dev/disk/by-label/esp";
        description =
          lib.mdDoc ''
            The boot partition to be used to mount /boot filesystem.
            In legacy boots, this should be null.
            By default, in EFI boot, it is the first partition of the boot device.
            The path (inside the VM) to the device containing the EFI System Partition (ESP).

            If you are *not* booting from a UEFI firmware, this value is, by
            default, `null`. The ESP is mounted under `/boot`.
          '';
      };

    virtualisation.rootDevice =
      mkOption {
        type = types.nullOr types.path;
        example = "/dev/vda2";
        default = "/dev/disk/by-label/${rootFilesystemLabel}";
        defaultText = literalExpression ''/dev/disk/by-label/${rootFilesystemLabel}'';
        example = "/dev/disk/by-label/nixos";
        description =
          lib.mdDoc ''
            The disk or partition to be used for the root filesystem.
            By default (read the source code for more details):

            - under EFI with a bootloader: 2nd partition of the boot disk
            - in legacy boot with a bootloader: 1st partition of the boot disk
            - in direct boot (i.e. without a bootloader): whole disk

            In case you are not using a default boot device or a default filesystem, you have to set explicitly your root device.
            The path (inside the VM) to the device containing the root filesystem.
          '';
      };

@@ -711,7 +697,6 @@ in
        mkOption {
          type = types.listOf (types.submodule driveOpts);
          description = lib.mdDoc "Drives passed to qemu.";
          apply = addDeviceNames;
        };

      diskInterface =
@@ -975,29 +960,11 @@ in
    # FIXME: make a sense of this mess wrt to multiple ESP present in the system, probably use boot.efiSysMountpoint?
    boot.loader.grub.device = mkVMOverride (if cfg.useEFIBoot then "nodev" else cfg.bootLoaderDevice);
    boot.loader.grub.gfxmodeBios = with cfg.resolution; "${toString x}x${toString y}";
    virtualisation.rootDevice = mkDefault suggestedRootDevice;

    boot.initrd.kernelModules = optionals (cfg.useNixStoreImage && !cfg.writableStore) [ "erofs" ];

    boot.loader.supportsInitrdSecrets = mkIf (!cfg.useBootLoader) (mkVMOverride false);

    boot.initrd.extraUtilsCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
      ''
        # We need mke2fs in the initrd.
        copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
      '';

    boot.initrd.postDeviceCommands = lib.mkIf (cfg.useDefaultFilesystems && !config.boot.initrd.systemd.enable)
      ''
        # If the disk image appears to be empty, run mke2fs to
        # initialise.
        FSTYPE=$(blkid -o value -s TYPE ${cfg.rootDevice} || true)
        PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.rootDevice} || true)
        if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
            mke2fs -t ext4 ${cfg.rootDevice}
        fi
      '';

    boot.initrd.postMountCommands = lib.mkIf (!config.boot.initrd.systemd.enable)
      ''
        # Mark this as a NixOS machine.
@@ -1112,6 +1079,7 @@ in
        driveExtraOpts.cache = "writeback";
        driveExtraOpts.werror = "report";
        deviceExtraOpts.bootindex = "1";
        deviceExtraOpts.serial = rootDriveSerialAttr;
      }])
      (mkIf cfg.useNixStoreImage [{
        name = "nix-store";
@@ -1154,7 +1122,6 @@ in
        } else {
          device = cfg.rootDevice;
          fsType = "ext4";
          autoFormat = true;
        });
        "/tmp" = lib.mkIf config.boot.tmp.useTmpfs {
          device = "tmpfs";
@@ -1164,7 +1131,7 @@ in
          options = [ "mode=1777" "strictatime" "nosuid" "nodev" "size=${toString config.boot.tmp.tmpfsSize}" ];
        };
        "/nix/${if cfg.writableStore then ".ro-store" else "store"}" = lib.mkIf cfg.useNixStoreImage {
          device = "${lookupDriveDeviceName "nix-store" cfg.qemu.drives}";
          device = "/dev/disk/by-label/${nixStoreFilesystemLabel}";
          neededForBoot = true;
          options = [ "ro" ];
        };
@@ -1174,7 +1141,7 @@ in
          neededForBoot = true;
        };
        "/boot" = lib.mkIf (cfg.useBootLoader && cfg.bootPartition != null) {
          device = cfg.bootPartition; # 1 for e.g. `vda1`, as created in `systemImage`
          device = cfg.bootPartition;
          fsType = "vfat";
          noCheck = true; # fsck fails on a r/o filesystem
        };
+29 −0
Original line number Diff line number Diff line
# This is a test utility that automatically formats
# `config.virtualisation.rootDevice` in the initrd.
# Note that when you are using
# `boot.initrd.systemd.enable = true`, you can use
# `virtualisation.fileSystems."/".autoFormat = true;`
# instead.

{ config, pkgs, ... }:

let
  rootDevice = config.virtualisation.rootDevice;
in
{

  boot.initrd.extraUtilsCommands = ''
    # We need mke2fs in the initrd.
    copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
  '';

  boot.initrd.postDeviceCommands = ''
    # If the disk image appears to be empty, run mke2fs to
    # initialise.
    FSTYPE=$(blkid -o value -s TYPE ${rootDevice} || true)
    PARTTYPE=$(blkid -o value -s PTTYPE ${rootDevice} || true)
    if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
        mke2fs -t ext4 ${rootDevice}
    fi
  '';
}
+7 −3
Original line number Diff line number Diff line
@@ -21,13 +21,17 @@ import ./make-test-python.nix {
    boot.initrd.systemd.enable = systemdStage1;
  };

  testScript = ''
  testScript =  { nodes, ...}:
  let
    rootDevice = nodes.machine.virtualisation.rootDevice;
  in
  ''
    machine.wait_for_unit("default.target")

    with subtest("root fs is fsckd"):
        machine.succeed("journalctl -b | grep '${if systemdStage1
          then "fsck.*vda.*clean"
          else "fsck.ext4.*/dev/vda"}'")
          then "fsck.*${builtins.baseNameOf rootDevice}.*clean"
          else "fsck.ext4.*${rootDevice}"}'")

    with subtest("mnt fs is fsckd"):
        machine.succeed("journalctl -b | grep 'fsck.*vdb.*clean'")
Loading