Commit 009a234b authored by Winter's avatar Winter
Browse files

prefetch-npm-deps: repack hosted git deps

Previously, we stored the tarballs from the hosted Git providers directly in the cache. However, as we've seen with `fetchFromGitHub` etc, these files may change subtly.

Given this, this commit repacks the dependencies before storing them in the cache.
parent 091d039b
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -7,10 +7,11 @@
      substitutions = {
        nodeSrc = srcOnly nodejs;

        # Specify the stdenv's `diff` and `jq` by abspath to ensure that the user's build
        # Specify `diff`, `jq`, and `prefetch-npm-deps` by abspath to ensure that the user's build
        # inputs do not cause us to find the wrong binaries.
        diff = "${buildPackages.diffutils}/bin/diff";
        jq = "${buildPackages.jq}/bin/jq";
        prefetchNpmDeps = "${buildPackages.prefetch-npm-deps}/bin/prefetch-npm-deps";

        nodeVersion = nodejs.version;
        nodeVersionMajor = lib.versions.major nodejs.version;
+2 −0
Original line number Diff line number Diff line
@@ -56,6 +56,8 @@ npmConfigHook() {
      exit 1
    fi

    @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile"

    local cachePath

    if [ -z "${makeCacheWritable-}" ]; then
+8 −3
Original line number Diff line number Diff line
{ lib, stdenvNoCC, rustPlatform, Security, testers, fetchurl, prefetch-npm-deps, fetchNpmDeps }:
{ lib, stdenvNoCC, rustPlatform, makeWrapper, Security, gnutar, gzip, testers, fetchurl, prefetch-npm-deps, fetchNpmDeps }:

{
  prefetch-npm-deps = rustPlatform.buildRustPackage {
@@ -16,8 +16,13 @@

    cargoLock.lockFile = ./Cargo.lock;

    nativeBuildInputs = [ makeWrapper ];
    buildInputs = lib.optional stdenvNoCC.isDarwin Security;

    postInstall = ''
      wrapProgram "$out/bin/prefetch-npm-deps" --prefix PATH : ${lib.makeBinPath [ gnutar gzip ]}
    '';

    passthru.tests =
      let
        makeTestSrc = { name, src }: stdenvNoCC.mkDerivation {
@@ -79,7 +84,7 @@
            hash = "sha256-X9mCwPqV5yP0S2GonNvpYnLSLJMd/SUIked+hMRxDpA=";
          };

          hash = "sha256-ri8qvYjn420ykmCC2Uy5P3jxVVrKWJG3ug/qLIGcR7o=";
          hash = "sha256-5Mg7KDJLMM5e/7BCHGinGAnBRft2ySQzvKW06p3u/0o=";
        };

        linkDependencies = makeTest {
@@ -102,7 +107,7 @@
            hash = "sha256-1fGNxYJi1I4cXK/jinNG+Y6tPEOhP3QAqWOBEQttS9E=";
          };

          hash = "sha256-73rLcSBgsZRJFELaKK++62hVbt1QT8JgLu2hyDSmIZE=";
          hash = "sha256-8xF8F74nHwL9KPN2QLsxnfvsk0rNCKOZniYJQCD5u/I=";
        };
      };

+140 −31
Original line number Diff line number Diff line
@@ -4,11 +4,12 @@ use crate::cacache::Cache;
use anyhow::{anyhow, Context};
use rayon::prelude::*;
use serde::Deserialize;
use serde_json::{Map, Value};
use std::{
    collections::{HashMap, HashSet},
    env, fmt, fs,
    env, fmt, fs, io,
    path::Path,
    process::{self, Command},
    process::{self, Command, Stdio},
};
use tempfile::tempdir;
use url::Url;
@@ -245,6 +246,55 @@ fn get_initial_url() -> anyhow::Result<Url> {
    Url::parse("git+ssh://git@a.b").context("initial url should be valid")
}

/// `fixup_lockfile` removes the `integrity` field from Git dependencies.
///
/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
/// tarball into the lockfile.
///
/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> {
    if lock
        .get("lockfileVersion")
        .ok_or_else(|| anyhow!("couldn't get lockfile version"))?
        .as_i64()
        .ok_or_else(|| anyhow!("lockfile version isn't an int"))?
        < 2
    {
        return Ok(None);
    }

    let mut fixed = false;

    for package in lock
        .get_mut("packages")
        .ok_or_else(|| anyhow!("couldn't get packages"))?
        .as_object_mut()
        .ok_or_else(|| anyhow!("packages isn't a map"))?
        .values_mut()
    {
        if let Some(Value::String(resolved)) = package.get("resolved") {
            if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() {
                fixed = true;

                package
                    .as_object_mut()
                    .ok_or_else(|| anyhow!("package isn't a map"))?
                    .remove("integrity");
            }
        }
    }

    if fixed {
        lock.remove("dependencies");

        Ok(Some(lock))
    } else {
        Ok(None)
    }
}

#[allow(clippy::too_many_lines)]
fn main() -> anyhow::Result<()> {
    let args = env::args().collect::<Vec<_>>();

@@ -256,6 +306,18 @@ fn main() -> anyhow::Result<()> {
        process::exit(1);
    }

    if args[1] == "--fixup-lockfile" {
        let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;

        if let Some(fixed) = fixup_lockfile(lock)? {
            println!("Fixing lockfile");

            fs::write(&args[2], serde_json::to_string(&fixed)?)?;
        }

        return Ok(());
    }

    let lock_content = fs::read_to_string(&args[1])?;
    let lock: PackageLock = serde_json::from_str(&lock_content)?;

@@ -310,7 +372,9 @@ fn main() -> anyhow::Result<()> {

    let cache = Cache::new(out.join("_cacache"));

    packages.into_par_iter().try_for_each(|(dep, package)| {
    packages
        .into_par_iter()
        .try_for_each(|(dep, mut package)| {
            eprintln!("{dep}");

            let mut resolved = match package.resolved {
@@ -318,17 +382,62 @@ fn main() -> anyhow::Result<()> {
                _ => unreachable!(),
            };

            let mut hosted = false;

            if let Some(hosted_git_url) = get_hosted_git_url(&resolved) {
                resolved = hosted_git_url;
                package.integrity = None;
                hosted = true;
            }

            let mut data = Vec::new();

        agent
            .get(resolved.as_str())
            .call()?
            .into_reader()
            .read_to_end(&mut data)?;
            let mut body = agent.get(resolved.as_str()).call()?.into_reader();

            if hosted {
                let workdir = tempdir()?;

                let tar_path = workdir.path().join("package");

                fs::create_dir(&tar_path)?;

                let mut cmd = Command::new("tar")
                    .args(["--extract", "--gzip", "--strip-components=1", "-C"])
                    .arg(&tar_path)
                    .stdin(Stdio::piped())
                    .spawn()?;

                io::copy(&mut body, &mut cmd.stdin.take().unwrap())?;

                let exit = cmd.wait()?;

                if !exit.success() {
                    return Err(anyhow!(
                        "failed to extract tarball for {dep}: tar exited with status code {}",
                        exit.code().unwrap()
                    ));
                }

                data = Command::new("tar")
                    .args([
                        "--sort=name",
                        "--mtime=0",
                        "--owner=0",
                        "--group=0",
                        "--numeric-owner",
                        "--format=gnu",
                        "-I",
                        "gzip -n -9",
                        "--create",
                        "-C",
                    ])
                    .arg(workdir.path())
                    .arg("package")
                    .output()?
                    .stdout;
            } else {
                body.read_to_end(&mut data)?;
            }

            cache
                .put(
+53 −2
Original line number Diff line number Diff line
use super::{
    get_hosted_git_url, get_ideal_hash, get_initial_url, to_new_packages, OldPackage, Package,
    UrlOrString,
    fixup_lockfile, get_hosted_git_url, get_ideal_hash, get_initial_url, to_new_packages,
    OldPackage, Package, UrlOrString,
};
use serde_json::json;
use std::collections::HashMap;
use url::Url;

@@ -88,3 +89,53 @@ fn git_shorthand_v1() -> anyhow::Result<()> {

    Ok(())
}

#[test]
fn lockfile_fixup() -> anyhow::Result<()> {
    let input = json!({
        "lockfileVersion": 2,
        "name": "foo",
        "packages": {
            "": {

            },
            "foo": {
                "resolved": "https://github.com/NixOS/nixpkgs",
                "integrity": "aaa"
            },
            "bar": {
                "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
                "integrity": "bbb"
            }
        }
    });

    let expected = json!({
        "lockfileVersion": 2,
        "name": "foo",
        "packages": {
            "": {

            },
            "foo": {
                "resolved": "https://github.com/NixOS/nixpkgs",
                "integrity": "aaa"
            },
            "bar": {
                "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
            }
        }
    });

    assert_eq!(
        fixup_lockfile(input.as_object().unwrap().clone())?,
        Some(expected.as_object().unwrap().clone())
    );

    assert_eq!(
        fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?,
        None
    );

    Ok(())
}