Unverified Commit cb60a011 authored by rorosen's avatar rorosen Committed by GitHub
Browse files

nixos/rke2: make tests work in test driver sandbox (#395775)

parent b222bbdb
Loading
Loading
Loading
Loading
+128 −113
Original line number Diff line number Diff line
@@ -6,26 +6,32 @@ import ../make-test-python.nix (
    ...
  }:
  let
    pauseImage = pkgs.dockerTools.streamLayeredImage {
      name = "test.local/pause";
    throwSystem = throw "RKE2: Unsupported system: ${pkgs.stdenv.hostPlatform.system}";
    coreImages =
      {
        aarch64-linux = rke2.images-core-linux-arm64-tar-zst;
        x86_64-linux = rke2.images-core-linux-amd64-tar-zst;
      }
      .${pkgs.stdenv.hostPlatform.system} or throwSystem;
    canalImages =
      {
        aarch64-linux = rke2.images-canal-linux-arm64-tar-zst;
        x86_64-linux = rke2.images-canal-linux-amd64-tar-zst;
      }
      .${pkgs.stdenv.hostPlatform.system} or throwSystem;
    helloImage = pkgs.dockerTools.buildImage {
      name = "test.local/hello";
      tag = "local";
      contents = pkgs.buildEnv {
        name = "rke2-pause-image-env";
      compressor = "zstd";
      copyToRoot = pkgs.buildEnv {
        name = "rke2-hello-image-env";
        paths = with pkgs; [
          tini
          bashInteractive
          coreutils
          socat
        ];
      };
      config.Entrypoint = [
        "/bin/tini"
        "--"
        "/bin/sleep"
        "inf"
      ];
    };
    # A daemonset that responds 'server' on port 8000
    # A daemonset that responds 'hello' on port 8000
    networkTestDaemonset = pkgs.writeText "test.yml" ''
      apiVersion: apps/v1
      kind: DaemonSet
@@ -44,113 +50,133 @@ import ../make-test-python.nix (
          spec:
            containers:
            - name: test
              image: test.local/pause:local
              image: test.local/hello:local
              imagePullPolicy: Never
              resources:
                limits:
                  memory: 20Mi
              command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo server"]
              command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo hello"]
    '';
    tokenFile = pkgs.writeText "token" "p@s$w0rd";
    agentTokenFile = pkgs.writeText "agent-token" "p@s$w0rd";
    agentTokenFile = pkgs.writeText "agent-token" "agentP@s$w0rd";
    # Let flannel use eth1 to enable inter-node communication in tests
    canalConfig = pkgs.writeText "rke2-canal-config.yaml" ''
      apiVersion: helm.cattle.io/v1
      kind: HelmChartConfig
      metadata:
        name: rke2-canal
        namespace: kube-system
      spec:
        valuesContent: |-
          flannel:
            iface: "eth1"
    '';
  in
  {
    name = "${rke2.name}-multi-node";
    meta.maintainers = rke2.meta.maintainers;

    nodes = {
      server1 =
        { pkgs, ... }:
      server =
        {
          networking.firewall.enable = false;
          networking.useDHCP = false;
          networking.defaultGateway = "192.168.1.1";
          networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
          config,
          nodes,
          pkgs,
          ...
        }:
        {
              address = "192.168.1.1";
              prefixLength = 24;
            }
          ];

          virtualisation.memorySize = 1536;
          virtualisation.diskSize = 4096;

          services.rke2 = {
            enable = true;
            role = "server";
            inherit tokenFile;
            inherit agentTokenFile;
            nodeName = "${rke2.name}-server1";
            package = rke2;
            nodeIP = "192.168.1.1";
            disable = [
              "rke2-coredns"
              "rke2-metrics-server"
              "rke2-ingress-nginx"
            ];
            extraFlags = [
              "--cluster-reset"
            ];
          # Setup image archives to be imported by rke2
          systemd.tmpfiles.settings."10-rke2" = {
            "/var/lib/rancher/rke2/agent/images/rke2-images-core.tar.zst" = {
              "L+".argument = "${coreImages}";
            };
            "/var/lib/rancher/rke2/agent/images/rke2-images-canal.tar.zst" = {
              "L+".argument = "${canalImages}";
            };
            "/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
              "L+".argument = "${helloImage}";
            };
            # Copy the canal config so that rke2 can write the remaining default values to it
            "/var/lib/rancher/rke2/server/manifests/rke2-canal-config.yaml" = {
              "C".argument = "${canalConfig}";
            };
          };

      server2 =
        { pkgs, ... }:
        {
          networking.firewall.enable = false;
          networking.useDHCP = false;
          networking.defaultGateway = "192.168.1.2";
          networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
            {
              address = "192.168.1.2";
              prefixLength = 24;
            }
          # Canal CNI with VXLAN
          networking.firewall.allowedUDPPorts = [ 8472 ];
          networking.firewall.allowedTCPPorts = [
            # Kubernetes API
            6443
            # Canal CNI health checks
            9099
            # RKE2 supervisor API
            9345
          ];

          virtualisation.memorySize = 1536;
          virtualisation.diskSize = 4096;
          # RKE2 needs more resources than the default
          virtualisation.cores = 4;
          virtualisation.memorySize = 4096;
          virtualisation.diskSize = 8092;

          services.rke2 = {
            enable = true;
            role = "server";
            serverAddr = "https://192.168.1.1:6443";
            package = rke2;
            inherit tokenFile;
            inherit agentTokenFile;
            nodeName = "${rke2.name}-server2";
            package = rke2;
            nodeIP = "192.168.1.2";
            # Without nodeIP the apiserver starts with the wrong service IP family
            nodeIP = config.networking.primaryIPAddress;
            disable = [
              "rke2-coredns"
              "rke2-metrics-server"
              "rke2-ingress-nginx"
              "rke2-snapshot-controller"
              "rke2-snapshot-controller-crd"
              "rke2-snapshot-validation-webhook"
            ];
          };
        };

      agent1 =
        { pkgs, ... }:
      agent =
        {
          networking.firewall.enable = false;
          networking.useDHCP = false;
          networking.defaultGateway = "192.168.1.3";
          networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
          config,
          nodes,
          pkgs,
          ...
        }:
        {
              address = "192.168.1.3";
              prefixLength = 24;
            }
          ];
          # Setup image archives to be imported by rke2
          systemd.tmpfiles.settings."10-rke2" = {
            "/var/lib/rancher/rke2/agent/images/rke2-images-core.linux-amd64.tar.zst" = {
              "L+".argument = "${coreImages}";
            };
            "/var/lib/rancher/rke2/agent/images/rke2-images-canal.linux-amd64.tar.zst" = {
              "L+".argument = "${canalImages}";
            };
            "/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
              "L+".argument = "${helloImage}";
            };
            "/var/lib/rancher/rke2/server/manifests/rke2-canal-config.yaml" = {
              "C".argument = "${canalConfig}";
            };
          };

          virtualisation.memorySize = 1536;
          virtualisation.diskSize = 4096;
          # Canal CNI health checks
          networking.firewall.allowedTCPPorts = [ 9099 ];
          # Canal CNI with VXLAN
          networking.firewall.allowedUDPPorts = [ 8472 ];

          # The agent node can work with less resources
          virtualisation.memorySize = 2048;
          virtualisation.diskSize = 8092;

          services.rke2 = {
            enable = true;
            role = "agent";
            tokenFile = agentTokenFile;
            serverAddr = "https://192.168.1.2:6443";
            nodeName = "${rke2.name}-agent1";
            package = rke2;
            nodeIP = "192.168.1.3";
            tokenFile = agentTokenFile;
            serverAddr = "https://${nodes.server.networking.primaryIPAddress}:9345";
            nodeIP = config.networking.primaryIPAddress;
          };
        };
    };
@@ -158,53 +184,42 @@ import ../make-test-python.nix (
    testScript =
      let
        kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
        ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
        jq = "${pkgs.jq}/bin/jq";
        ping = "${pkgs.iputils}/bin/ping";
      in
      # python
      ''
        machines = [server1, server2, agent1]
        start_all()

        for machine in machines:
            machine.start()
            machine.wait_for_unit("rke2")
        server.wait_for_unit("rke2-server")
        agent.wait_for_unit("rke2-agent")

        # wait for the agent to show up
        server1.succeed("${kubectl} get node ${rke2.name}-agent1")
        # Wait for the agent to be ready
        server.wait_until_succeeds(r"""${kubectl} wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' nodes/agent""")

        for machine in machines:
            machine.succeed("${pauseImage} | ${ctr} image import -")

        server1.succeed("${kubectl} cluster-info")
        server1.wait_until_succeeds("${kubectl} get serviceaccount default")
        server.succeed("${kubectl} cluster-info")
        server.wait_until_succeeds("${kubectl} get serviceaccount default")

        # Now create a pod on each node via a daemonset and verify they can talk to each other.
        server1.succeed("${kubectl} apply -f ${networkTestDaemonset}")
        server1.wait_until_succeeds(
        server.succeed("${kubectl} apply -f ${networkTestDaemonset}")
        server.wait_until_succeeds(
            f'[ "$(${kubectl} get ds test -o json | ${jq} .status.numberReady)" -eq {len(machines)} ]'
        )

        # Get pod IPs
        pods = server1.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines()
        pods = server.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines()
        pod_ips = [
            server1.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods
            server.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods
        ]

        # Verify each server can ping each pod ip
        # Verify each node can ping each pod ip
        for pod_ip in pod_ips:
            server1.succeed(f"${ping} -c 1 {pod_ip}")
            agent1.succeed(f"${ping} -c 1 {pod_ip}")

        # Verify the pods can talk to each other
        resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[0]} -- socat TCP:{pod_ips[1]}:8000 -")
        assert resp.strip() == "server"
        resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[1]} -- socat TCP:{pod_ips[0]}:8000 -")
        assert resp.strip() == "server"

        # Cleanup
        server1.succeed("${kubectl} delete -f ${networkTestDaemonset}")
        for machine in machines:
            machine.shutdown()
            # The CNI sometimes needs a little time
            server.wait_until_succeeds(f"ping -c 1 {pod_ip}", timeout=5)
            agent.wait_until_succeeds(f"ping -c 1 {pod_ip}", timeout=5)
            # Verify the server can exec into the pod
            # for pod in pods:
            #     resp = server.succeed(f"${kubectl} exec {pod} -- socat TCP:{pod_ip}:8000 -")
            #     assert resp.strip() == "hello", f"Unexpected response from hello daemonset: {resp.strip()}"
      '';
  }
)
+61 −51
Original line number Diff line number Diff line
@@ -6,69 +6,83 @@ import ../make-test-python.nix (
    ...
  }:
  let
    pauseImage = pkgs.dockerTools.streamLayeredImage {
      name = "test.local/pause";
    throwSystem = throw "RKE2: Unsupported system: ${pkgs.stdenv.hostPlatform.system}";
    coreImages =
      {
        aarch64-linux = rke2.images-core-linux-arm64-tar-zst;
        x86_64-linux = rke2.images-core-linux-amd64-tar-zst;
      }
      .${pkgs.stdenv.hostPlatform.system} or throwSystem;
    canalImages =
      {
        aarch64-linux = rke2.images-canal-linux-arm64-tar-zst;
        x86_64-linux = rke2.images-canal-linux-amd64-tar-zst;
      }
      .${pkgs.stdenv.hostPlatform.system} or throwSystem;
    helloImage = pkgs.dockerTools.buildImage {
      name = "test.local/hello";
      tag = "local";
      contents = pkgs.buildEnv {
        name = "rke2-pause-image-env";
        paths = with pkgs; [
          tini
          (hiPrio coreutils)
          busybox
        ];
      compressor = "zstd";
      copyToRoot = pkgs.hello;
      config.Entrypoint = [ "${pkgs.hello}/bin/hello" ];
    };
      config.Entrypoint = [
        "/bin/tini"
        "--"
        "/bin/sleep"
        "inf"
      ];
    };
    testPodYaml = pkgs.writeText "test.yaml" ''
      apiVersion: v1
      kind: Pod
    testJobYaml = pkgs.writeText "test.yaml" ''
      apiVersion: batch/v1
      kind: Job
      metadata:
        name: test
      spec:
        template:
          spec:
            containers:
            - name: test
          image: test.local/pause:local
          imagePullPolicy: Never
          command: ["sh", "-c", "sleep inf"]
              image: "test.local/hello:local"
            restartPolicy: Never
    '';
  in
  {
    name = "${rke2.name}-single-node";
    meta.maintainers = rke2.meta.maintainers;

    nodes.machine =
      { pkgs, ... }:
      {
        networking.firewall.enable = false;
        networking.useDHCP = false;
        networking.defaultGateway = "192.168.1.1";
        networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
        config,
        nodes,
        pkgs,
        ...
      }:
      {
            address = "192.168.1.1";
            prefixLength = 24;
          }
        ];
        # Setup image archives to be imported by rke2
        systemd.tmpfiles.settings."10-rke2" = {
          "/var/lib/rancher/rke2/agent/images/rke2-images-core.tar.zst" = {
            "L+".argument = "${coreImages}";
          };
          "/var/lib/rancher/rke2/agent/images/rke2-images-canal.tar.zst" = {
            "L+".argument = "${canalImages}";
          };
          "/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
            "L+".argument = "${helloImage}";
          };
        };

        virtualisation.memorySize = 1536;
        virtualisation.diskSize = 4096;
        # RKE2 needs more resources than the default
        virtualisation.cores = 4;
        virtualisation.memorySize = 4096;
        virtualisation.diskSize = 8092;

        services.rke2 = {
          enable = true;
          role = "server";
          package = rke2;
          nodeIP = "192.168.1.1";
          # Without nodeIP the apiserver starts with the wrong service IP family
          nodeIP = config.networking.primaryIPAddress;
          # Slightly reduce resource consumption
          disable = [
            "rke2-coredns"
            "rke2-metrics-server"
            "rke2-ingress-nginx"
          ];
          extraFlags = [
            "--cluster-reset"
            "rke2-snapshot-controller"
            "rke2-snapshot-controller-crd"
            "rke2-snapshot-validation-webhook"
          ];
        };
      };
@@ -76,23 +90,19 @@ import ../make-test-python.nix (
    testScript =
      let
        kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
        ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
      in
      # python
      ''
        start_all()

        machine.wait_for_unit("rke2")
        machine.wait_for_unit("rke2-server")
        machine.succeed("${kubectl} cluster-info")
        machine.wait_until_succeeds(
          "${pauseImage} | ${ctr} -n k8s.io image import -"
        )

        machine.wait_until_succeeds("${kubectl} get serviceaccount default")
        machine.succeed("${kubectl} apply -f ${testPodYaml}")
        machine.succeed("${kubectl} wait --for 'condition=Ready' pod/test")
        machine.succeed("${kubectl} delete -f ${testPodYaml}")

        machine.shutdown()
        machine.succeed("${kubectl} apply -f ${testJobYaml}")
        machine.wait_until_succeeds("${kubectl} wait --for 'condition=complete' job/test")
        output = machine.succeed("${kubectl} logs -l batch.kubernetes.io/job-name=test")
        assert output.rstrip() == "Hello, world!", f"unexpected output of test job: {output}"
      '';
  }
)
+9 −3
Original line number Diff line number Diff line
@@ -134,15 +134,21 @@ let
    passthru = {
      inherit updateScript;
      tests =
        let
          moduleTests =
            let
              package_version =
                "rke2_" + lib.replaceStrings [ "." ] [ "_" ] (lib.versions.majorMinor rke2Version);
            in
            lib.mapAttrs (name: value: nixosTests.rke2.${name}.${package_version}) nixosTests.rke2;
        in
        {
          version = testers.testVersion {
            package = rke2;
            version = "v${version}";
          };
        }
        // lib.optionalAttrs stdenv.hostPlatform.isLinux {
          inherit (nixosTests) rke2;
        };
        // moduleTests;
    } // (lib.mapAttrs (_: value: fetchurl value) imagesVersions);

    meta = with lib; {