Unverified Commit ac35d7ea authored by Winter's avatar Winter Committed by Lily Foster
Browse files

prefetch-npm-deps: look up hashes from cache when fixing up lockfiles

parent 7efebca8
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -56,6 +56,9 @@ npmConfigHook() {
      exit 1
    fi

    export CACHE_MAP_PATH="$TMP/MEOW"
    @prefetchNpmDeps@ --map-cache

    @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile"

    local cachePath
@@ -109,6 +112,9 @@ npmConfigHook() {

    patchShebangs node_modules

    rm "$CACHE_MAP_PATH"
    unset CACHE_MAP_PATH

    echo "Finished npmConfigHook"
}

+30 −0
Original line number Diff line number Diff line
@@ -305,6 +305,7 @@ dependencies = [
 "tempfile",
 "ureq",
 "url",
 "walkdir",
]

[[package]]
@@ -400,6 +401,15 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"

[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
 "winapi-util",
]

[[package]]
name = "scopeguard"
version = "1.1.0"
@@ -583,6 +593,17 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"

[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
 "same-file",
 "winapi",
 "winapi-util",
]

[[package]]
name = "wasm-bindgen"
version = "0.2.82"
@@ -682,6 +703,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"

[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
 "winapi",
]

[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
+1 −0
Original line number Diff line number Diff line
@@ -17,3 +17,4 @@ sha2 = "0.10.6"
tempfile = "3.3.0"
ureq = { version = "2.5.0" }
url = { version = "2.3.1", features = ["serde"] }
walkdir = "2.3.2"
+15 −15
Original line number Diff line number Diff line
use digest::{Digest, Update};
use serde::Serialize;
use serde::{Deserialize, Serialize};
use sha1::Sha1;
use sha2::{Sha256, Sha512};
use std::{
@@ -9,24 +9,24 @@ use std::{
};
use url::Url;

#[derive(Serialize)]
struct Key {
    key: String,
    integrity: String,
    time: u8,
    size: usize,
    metadata: Metadata,
#[derive(Serialize, Deserialize)]
pub(super) struct Key {
    pub(super) key: String,
    pub(super) integrity: String,
    pub(super) time: u8,
    pub(super) size: usize,
    pub(super) metadata: Metadata,
}

#[derive(Serialize)]
struct Metadata {
    url: Url,
    options: Options,
#[derive(Serialize, Deserialize)]
pub(super) struct Metadata {
    pub(super) url: Url,
    pub(super) options: Options,
}

#[derive(Serialize)]
struct Options {
    compress: bool,
#[derive(Serialize, Deserialize)]
pub(super) struct Options {
    pub(super) compress: bool,
}

pub struct Cache(PathBuf);
+259 −37
Original line number Diff line number Diff line
#![warn(clippy::pedantic)]

use crate::cacache::Cache;
use anyhow::anyhow;
use crate::cacache::{Cache, Key};
use anyhow::{anyhow, bail};
use rayon::prelude::*;
use serde_json::{Map, Value};
use std::{
    collections::HashMap,
    env, fs,
    path::Path,
    path::{Path, PathBuf},
    process::{self, Command},
};
use tempfile::tempdir;
use url::Url;
use walkdir::WalkDir;

mod cacache;
mod parse;

/// `fixup_lockfile` removes the `integrity` field from Git dependencies.
fn cache_map_path() -> Option<PathBuf> {
    env::var_os("CACHE_MAP_PATH").map(PathBuf::from)
}

/// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies.
///
/// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1
/// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash
/// we cache as (which is the strongest available one).
///
/// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features.
/// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that
/// tarball into the lockfile.
///
/// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers.
fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> {
    if lock
///
/// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the
/// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data.
fn fixup_lockfile(
    mut lock: Map<String, Value>,
    cache: &Option<HashMap<String, String>>,
) -> anyhow::Result<Option<Map<String, Value>>> {
    let mut fixed = false;

    match lock
        .get("lockfileVersion")
        .ok_or_else(|| anyhow!("couldn't get lockfile version"))?
        .as_i64()
        .ok_or_else(|| anyhow!("lockfile version isn't an int"))?
        < 2
    {
        return Ok(None);
    }

    let mut fixed = false;

        1 => fixup_v1_deps(
            lock.get_mut("dependencies")
                .unwrap()
                .as_object_mut()
                .unwrap(),
            cache,
            &mut fixed,
        ),
        2 | 3 => {
            for package in lock
                .get_mut("packages")
                .ok_or_else(|| anyhow!("couldn't get packages"))?
@@ -42,26 +64,113 @@ fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<Str
                .values_mut()
            {
                if let Some(Value::String(resolved)) = package.get("resolved") {
            if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() {
                    if let Some(Value::String(integrity)) = package.get("integrity") {
                        if resolved.starts_with("git+ssh://") {
                            fixed = true;

                            package
                                .as_object_mut()
                                .ok_or_else(|| anyhow!("package isn't a map"))?
                                .remove("integrity");
                        } else if let Some(cache_hashes) = cache {
                            let cache_hash = cache_hashes
                                .get(resolved)
                                .expect("dependency should have a hash");

                            if integrity != cache_hash {
                                fixed = true;

                                *package
                                    .as_object_mut()
                                    .ok_or_else(|| anyhow!("package isn't a map"))?
                                    .get_mut("integrity")
                                    .unwrap() = Value::String(cache_hash.clone());
                            }
                        }
                    }
                }
            }

            if fixed {
                lock.remove("dependencies");
            }
        }
        v => bail!("unsupported lockfile version {v}"),
    }

    if fixed {
        Ok(Some(lock))
    } else {
        Ok(None)
    }
}

// Recursive helper to fixup v1 lockfile deps
fn fixup_v1_deps(
    dependencies: &mut serde_json::Map<String, Value>,
    cache: &Option<HashMap<String, String>>,
    fixed: &mut bool,
) {
    for dep in dependencies.values_mut() {
        if let Some(Value::String(resolved)) = dep
            .as_object()
            .expect("v1 dep must be object")
            .get("resolved")
        {
            if let Some(Value::String(integrity)) = dep
                .as_object()
                .expect("v1 dep must be object")
                .get("integrity")
            {
                if resolved.starts_with("git+ssh://") {
                    *fixed = true;

                    dep.as_object_mut()
                        .expect("v1 dep must be object")
                        .remove("integrity");
                } else if let Some(cache_hashes) = cache {
                    let cache_hash = cache_hashes
                        .get(resolved)
                        .expect("dependency should have a hash");

                    if integrity != cache_hash {
                        *fixed = true;

                        *dep.as_object_mut()
                            .expect("v1 dep must be object")
                            .get_mut("integrity")
                            .unwrap() = Value::String(cache_hash.clone());
                    }
                }
            }
        }

        if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies")
        {
            fixup_v1_deps(more_deps, cache, fixed);
        }
    }
}

fn map_cache() -> anyhow::Result<HashMap<Url, String>> {
    let mut hashes = HashMap::new();

    let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5");

    for entry in WalkDir::new(content_path) {
        let entry = entry?;

        if entry.file_type().is_file() {
            let content = fs::read_to_string(entry.path())?;
            let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?;

            hashes.insert(key.metadata.url, key.integrity);
        }
    }

    Ok(hashes)
}

fn main() -> anyhow::Result<()> {
    let args = env::args().collect::<Vec<_>>();

@@ -76,12 +185,25 @@ fn main() -> anyhow::Result<()> {
    if args[1] == "--fixup-lockfile" {
        let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?;

        if let Some(fixed) = fixup_lockfile(lock)? {
        let cache = cache_map_path()
            .map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?))
            .transpose()?;

        if let Some(fixed) = fixup_lockfile(lock, &cache)? {
            println!("Fixing lockfile");

            fs::write(&args[2], serde_json::to_string(&fixed)?)?;
        }

        return Ok(());
    } else if args[1] == "--map-cache" {
        let map = map_cache()?;

        fs::write(
            cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"),
            serde_json::to_string(&map)?,
        )?;

        return Ok(());
    }

@@ -133,6 +255,8 @@ fn main() -> anyhow::Result<()> {

#[cfg(test)]
mod tests {
    use std::collections::HashMap;

    use super::fixup_lockfile;
    use serde_json::json;

@@ -147,12 +271,20 @@ mod tests {
                },
                "foo": {
                    "resolved": "https://github.com/NixOS/nixpkgs",
                    "integrity": "aaa"
                    "integrity": "sha1-aaa"
                },
                "bar": {
                    "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
                    "integrity": "bbb"
                }
                    "integrity": "sha512-aaa"
                },
                "foo-bad": {
                    "resolved": "foo",
                    "integrity": "sha1-foo"
                },
                "foo-good": {
                    "resolved": "foo",
                    "integrity": "sha512-foo"
                },
            }
        });

@@ -165,22 +297,112 @@ mod tests {
                },
                "foo": {
                    "resolved": "https://github.com/NixOS/nixpkgs",
                    "integrity": "aaa"
                    "integrity": ""
                },
                "bar": {
                    "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
                }
                },
                "foo-bad": {
                    "resolved": "foo",
                    "integrity": "sha512-foo"
                },
                "foo-good": {
                    "resolved": "foo",
                    "integrity": "sha512-foo"
                },
            }
        });

        let mut hashes = HashMap::new();

        hashes.insert(
            String::from("https://github.com/NixOS/nixpkgs"),
            String::new(),
        );

        hashes.insert(
            String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
            String::new(),
        );

        hashes.insert(String::from("foo"), String::from("sha512-foo"));

        assert_eq!(
            fixup_lockfile(input.as_object().unwrap().clone())?,
            fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
            Some(expected.as_object().unwrap().clone())
        );

        Ok(())
    }

    #[test]
    fn lockfile_v1_fixup() -> anyhow::Result<()> {
        let input = json!({
            "lockfileVersion": 1,
            "name": "foo",
            "dependencies": {
                "foo": {
                    "resolved": "https://github.com/NixOS/nixpkgs",
                    "integrity": "sha512-aaa"
                },
                "foo-good": {
                    "resolved": "foo",
                    "integrity": "sha512-foo"
                },
                "bar": {
                    "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
                    "integrity": "sha512-bbb",
                    "dependencies": {
                        "foo-bad": {
                            "resolved": "foo",
                            "integrity": "sha1-foo"
                        },
                    },
                },
            }
        });

        let expected = json!({
            "lockfileVersion": 1,
            "name": "foo",
            "dependencies": {
                "foo": {
                    "resolved": "https://github.com/NixOS/nixpkgs",
                    "integrity": ""
                },
                "foo-good": {
                    "resolved": "foo",
                    "integrity": "sha512-foo"
                },
                "bar": {
                    "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git",
                    "dependencies": {
                        "foo-bad": {
                            "resolved": "foo",
                            "integrity": "sha512-foo"
                        },
                    },
                },
            }
        });

        let mut hashes = HashMap::new();

        hashes.insert(
            String::from("https://github.com/NixOS/nixpkgs"),
            String::new(),
        );

        hashes.insert(
            String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"),
            String::new(),
        );

        hashes.insert(String::from("foo"), String::from("sha512-foo"));

        assert_eq!(
            fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?,
            None
            fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?,
            Some(expected.as_object().unwrap().clone())
        );

        Ok(())
Loading