Unverified Commit cfd83c93 authored by Robert Hensing's avatar Robert Hensing Committed by GitHub
Browse files

Merge pull request #261732 from tweag/fileset.fromSource

`lib.fileset.fromSource`: init
parents e7a5bb59 e07e80e8
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -244,5 +244,4 @@ Here's a list of places in the library that need to be updated in the future:
- > The file set library is currently somewhat limited but is being expanded to include more functions over time.

  in [the manual](../../doc/functions/fileset.section.md)
- If/Once a function to convert `lib.sources` values into file sets exists, the `_coerce` and `toSource` functions should be updated to mention that function in the error when such a value is passed
- If/Once a function exists that can optionally include a path depending on whether it exists, the error message for the path not existing in `_coerce` should mention the new function
+77 −1
Original line number Diff line number Diff line
@@ -3,8 +3,10 @@ let

  inherit (import ./internal.nix { inherit lib; })
    _coerce
    _singleton
    _coerceMany
    _toSourceFilter
    _fromSourceFilter
    _unionMany
    _fileFilter
    _printFileset
@@ -152,7 +154,12 @@ in {
      sourceFilter = _toSourceFilter fileset;
    in
    if ! isPath root then
      if isStringLike root then
      if root ? _isLibCleanSourceWith then
        throw ''
          lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
              To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
              Note that this only works for sources created from paths.''
      else if isStringLike root then
        throw ''
          lib.fileset.toSource: `root` (${toString root}) is a string-like value, but it should be a path instead.
              Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@@ -188,6 +195,75 @@ in {
        filter = sourceFilter;
      };

  /*
  Create a file set with the same files as a `lib.sources`-based value.
  This does not import any of the files into the store.

  This can be used to gradually migrate from `lib.sources`-based filtering to `lib.fileset`.

  A file set can be turned back into a source using [`toSource`](#function-library-lib.fileset.toSource).

  :::{.note}
  File sets cannot represent empty directories.
  Turning the result of this function back into a source using `toSource` will therefore not preserve empty directories.
  :::

  Type:
    fromSource :: SourceLike -> FileSet

  Example:
    # There's no cleanSource-like function for file sets yet,
    # but we can just convert cleanSource to a file set and use it that way
    toSource {
      root = ./.;
      fileset = fromSource (lib.sources.cleanSource ./.);
    }

    # Keeping a previous sourceByRegex (which could be migrated to `lib.fileset.unions`),
    # but removing a subdirectory using file set functions
    difference
      (fromSource (lib.sources.sourceByRegex ./. [
        "^README\.md$"
        # This regex includes everything in ./doc
        "^doc(/.*)?$"
      ])
      ./doc/generated

    # Use cleanSource, but limit it to only include ./Makefile and files under ./src
    intersection
      (fromSource (lib.sources.cleanSource ./.))
      (unions [
        ./Makefile
        ./src
      ]);
  */
  fromSource = source:
    let
      # This function uses `._isLibCleanSourceWith`, `.origSrc` and `.filter`,
      # which are technically internal to lib.sources,
      # but we'll allow this since both libraries are in the same code base
      # and this function is a bridge between them.
      isFiltered = source ? _isLibCleanSourceWith;
      path = if isFiltered then source.origSrc else source;
    in
    # We can only support sources created from paths
    if ! isPath path then
      if isStringLike path then
        throw ''
          lib.fileset.fromSource: The source origin of the argument is a string-like value ("${toString path}"), but it should be a path instead.
              Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.''
      else
        throw ''
          lib.fileset.fromSource: The source origin of the argument is of type ${typeOf path}, but it should be a path instead.''
    else if ! pathExists path then
      throw ''
        lib.fileset.fromSource: The source origin (${toString path}) of the argument does not exist.''
    else if isFiltered then
      _fromSourceFilter path source.filter
    else
      # If there's no filter, no need to run the expensive conversion, all subpaths will be included
      _singleton path;

  /*
    The file set containing all files that are in either of two given file sets.
    This is the same as [`unions`](#function-library-lib.fileset.unions),
+59 −1
Original line number Diff line number Diff line
@@ -167,7 +167,12 @@ rec {
      else
        value
    else if ! isPath value then
      if isStringLike value then
      if value ? _isLibCleanSourceWith then
        throw ''
          ${context} is a `lib.sources`-based value, but it should be a file set or a path instead.
              To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
              Note that this only works for sources created from paths.''
      else if isStringLike value then
        throw ''
          ${context} ("${toString value}") is a string-like value, but it should be a file set or a path instead.
              Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.''
@@ -470,6 +475,59 @@ rec {
    else
      nonEmpty;

  # Turn a builtins.filterSource-based source filter on a root path into a file set
  # containing only files included by the filter.
  # The filter is lazily called as necessary to determine whether paths are included
  # Type: Path -> (String -> String -> Bool) -> fileset
  _fromSourceFilter = root: sourceFilter:
    let
      # During the recursion we need to track both:
      # - The path value such that we can safely call `readDir` on it
      # - The path string value such that we can correctly call the `filter` with it
      #
      # While we could just recurse with the path value,
      # this would then require converting it to a path string for every path,
      # which is a fairly expensive operation

      # Create a file set from a directory entry
      fromDirEntry = path: pathString: type:
        # The filter needs to run on the path as a string
        if ! sourceFilter pathString type then
          null
        else if type == "directory" then
          fromDir path pathString
        else
          type;

      # Create a file set from a directory
      fromDir = path: pathString:
        mapAttrs
          # This looks a bit funny, but we need both the path-based and the path string-based values
          (name: fromDirEntry (path + "/${name}") (pathString + "/${name}"))
          # We need to readDir on the path value, because reading on a path string
          # would be unspecified if there are multiple filesystem roots
          (readDir path);

      rootPathType = pathType root;

      # We need to convert the path to a string to imitate what builtins.path calls the filter function with.
      # We don't want to rely on `toString` for this though because it's not very well defined, see ../path/README.md
      # So instead we use `lib.path.splitRoot` to safely deconstruct the path into its filesystem root and subpath
      # We don't need the filesystem root though, builtins.path doesn't expose that in any way to the filter.
      # So we only need the components, which we then turn into a string as one would expect.
      rootString = "/" + concatStringsSep "/" (components (splitRoot root).subpath);
    in
    if rootPathType == "directory" then
      # We imitate builtins.path not calling the filter on the root path
      _create root (fromDir root rootString)
    else
      # Direct files are always included by builtins.path without calling the filter
      # But we need to lift up the base path to its parent to satisfy the base path invariant
      _create (dirOf root)
        {
          ${baseNameOf root} = rootPathType;
        };

  # Transforms the filesetTree of a file set to a shorter base path, e.g.
  # _shortenTreeBase [ "foo" ] (_create /foo/bar null)
  # => { bar = null; }
+262 −26
Original line number Diff line number Diff line
#!/usr/bin/env bash
# shellcheck disable=SC2016
# shellcheck disable=SC2317
# shellcheck disable=SC2192

# Tests lib.fileset
# Run:
@@ -224,6 +226,43 @@ withFileMonitor() {
    fi
}


# Create the tree structure declared in the tree variable, usage:
#
# tree=(
#   [a/b] =   # Declare that file       a/b should exist
#   [c/a] =   # Declare that file       c/a should exist
#   [c/d/]=   # Declare that directory c/d/ should exist
# )
# createTree
declare -A tree
createTree() {
    # Track which paths need to be created
    local -a dirsToCreate=()
    local -a filesToCreate=()
    for p in "${!tree[@]}"; do
        # If keys end with a `/` we treat them as directories, otherwise files
        if [[ "$p" =~ /$ ]]; then
            dirsToCreate+=("$p")
        else
            filesToCreate+=("$p")
        fi
    done

    # Create all the necessary paths.
    # This is done with only a fixed number of processes,
    # in order to not be too slow
    # Though this does mean we're a bit limited with how many files can be created
    if (( ${#dirsToCreate[@]} != 0 )); then
        mkdir -p "${dirsToCreate[@]}"
    fi
    if (( ${#filesToCreate[@]} != 0 )); then
        readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}")
        mkdir -p "${parentsToCreate[@]}"
        touch "${filesToCreate[@]}"
    fi
}

# Check whether a file set includes/excludes declared paths as expected, usage:
#
# tree=(
@@ -232,34 +271,26 @@ withFileMonitor() {
#   [c/d/]=   # Declare that directory c/d/ should exist and expect it to be excluded in the store path
# )
# checkFileset './a' # Pass the fileset as the argument
declare -A tree
checkFileset() {
    # New subshell so that we can have a separate trap handler, see `trap` below
    local fileset=$1

    # Create the tree
    createTree

    # Process the tree into separate arrays for included paths, excluded paths and excluded files.
    local -a included=()
    local -a excluded=()
    local -a excludedFiles=()
    # Track which paths need to be created
    local -a dirsToCreate=()
    local -a filesToCreate=()
    for p in "${!tree[@]}"; do
        # If keys end with a `/` we treat them as directories, otherwise files
        if [[ "$p" =~ /$ ]]; then
            dirsToCreate+=("$p")
            isFile=
        else
            filesToCreate+=("$p")
            isFile=1
        fi
        case "${tree[$p]}" in
            1)
                included+=("$p")
                ;;
            0)
                excluded+=("$p")
                if [[ -n "$isFile" ]]; then
                # If keys end with a `/` we treat them as directories, otherwise files
                if [[ ! "$p" =~ /$ ]]; then
                    excludedFiles+=("$p")
                fi
                ;;
@@ -268,19 +299,6 @@ checkFileset() {
        esac
    done

    # Create all the necessary paths.
    # This is done with only a fixed number of processes,
    # in order to not be too slow
    # Though this does mean we're a bit limited with how many files can be created
    if (( ${#dirsToCreate[@]} != 0 )); then
        mkdir -p "${dirsToCreate[@]}"
    fi
    if (( ${#filesToCreate[@]} != 0 )); then
        readarray -d '' -t parentsToCreate < <(dirname -z "${filesToCreate[@]}")
        mkdir -p "${parentsToCreate[@]}"
        touch "${filesToCreate[@]}"
    fi

    expression="toSource { root = ./.; fileset = $fileset; }"

    # We don't have lambda's in bash unfortunately,
@@ -321,6 +339,10 @@ checkFileset() {
expectFailure 'toSource { root = "/nix/store/foobar"; fileset = ./.; }' 'lib.fileset.toSource: `root` \(/nix/store/foobar\) is a string-like value, but it should be a path instead.
\s*Paths in strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'

expectFailure 'toSource { root = cleanSourceWith { src = ./.; }; fileset = ./.; }' 'lib.fileset.toSource: `root` is a `lib.sources`-based value, but it should be a path instead.
\s*To use a `lib.sources`-based value, convert it to a file set using `lib.fileset.fromSource` and pass it as `fileset`.
\s*Note that this only works for sources created from paths.'

# Only paths are accepted as `root`
expectFailure 'toSource { root = 10; fileset = ./.; }' 'lib.fileset.toSource: `root` is of type int, but it should be a path instead.'

@@ -365,6 +387,9 @@ rm -rf -- *
expectFailure 'toSource { root = ./.; fileset = 10; }' 'lib.fileset.toSource: `fileset` is of type int, but it should be a file set or a path instead.'
expectFailure 'toSource { root = ./.; fileset = "/some/path"; }' 'lib.fileset.toSource: `fileset` \("/some/path"\) is a string-like value, but it should be a file set or a path instead.
\s*Paths represented as strings are not supported by `lib.fileset`, use `lib.sources` or derivations instead.'
expectFailure 'toSource { root = ./.; fileset = cleanSourceWith { src = ./.; }; }' 'lib.fileset.toSource: `fileset` is a `lib.sources`-based value, but it should be a file set or a path instead.
\s*To convert a `lib.sources`-based value to a file set you can use `lib.fileset.fromSource`.
\s*Note that this only works for sources created from paths.'

# Path coercion errors for non-existent paths
expectFailure 'toSource { root = ./.; fileset = ./a; }' 'lib.fileset.toSource: `fileset` \('"$work"'/a\) is a path that does not exist.'
@@ -995,6 +1020,217 @@ touch 0 "${filesToCreate[@]}"
expectTrace 'unions (mapAttrsToList (n: _: ./. + "/${n}") (removeAttrs (builtins.readDir ./.) [ "0" ]))' "$expectedTrace"
rm -rf -- *

## lib.fileset.fromSource

# Check error messages
expectFailure 'fromSource null' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'

expectFailure 'fromSource (lib.cleanSource "")' 'lib.fileset.fromSource: The source origin of the argument is a string-like value \(""\), but it should be a path instead.
\s*Sources created from paths in strings cannot be turned into file sets, use `lib.sources` or derivations instead.'

expectFailure 'fromSource (lib.cleanSource null)' 'lib.fileset.fromSource: The source origin of the argument is of type null, but it should be a path instead.'

# fromSource on a path works and is the same as coercing that path
mkdir a
touch a/b c
expectEqual 'trace (fromSource ./.) null' 'trace ./. null'
rm -rf -- *

# Check that converting to a file set doesn't read the included files
mkdir a
touch a/b
run() {
    expectEqual "trace (fromSource (lib.cleanSourceWith { src = ./a; })) null" "builtins.trace \"$work/a (all files in directory)\" null"
    rm a/b
}
withFileMonitor run a/b
rm -rf -- *

# Check that converting to a file set doesn't read entries for directories that are filtered out
mkdir -p a/b
touch a/b/c
run() {
    expectEqual "trace (fromSource (lib.cleanSourceWith {
      src = ./a;
      filter = pathString: type: false;
    })) null" "builtins.trace \"(empty)\" null"
    rm a/b/c
    rmdir a/b
}
withFileMonitor run a/b
rm -rf -- *

# The filter is not needed on empty directories
expectEqual 'trace (fromSource (lib.cleanSourceWith {
  src = ./.;
  filter = abort "filter should not be needed";
})) null' 'trace _emptyWithoutBase null'

# Single files also work
touch a b
expectEqual 'trace (fromSource (cleanSourceWith { src = ./a; })) null' 'trace ./a null'
rm -rf -- *

# For a tree assigning each subpath true/false,
# check whether a source filter with those results includes the same files
# as a file set created using fromSource. Usage:
#
# tree=(
#   [a]=1  # ./a is a file and the filter should return true for it
#   [b/]=0 # ./b is a directory and the filter should return false for it
# )
# checkSource
checkSource() {
    createTree

    # Serialise the tree as JSON (there's only minimal savings with jq,
    # and we don't need to handle escapes)
    {
        echo "{"
        first=1
        for p in "${!tree[@]}"; do
            if [[ -z "$first" ]]; then
                echo ","
            else
                first=
            fi
            echo "\"$p\":"
            case "${tree[$p]}" in
                1)
                    echo "true"
                    ;;
                0)
                    echo "false"
                    ;;
                *)
                    die "Unsupported tree value: ${tree[$p]}"
            esac
        done
        echo "}"
    } > "$tmp/tree.json"

    # An expression to create a source value with a filter matching the tree
    sourceExpr='
      let
        tree = importJSON '"$tmp"'/tree.json;
      in
      cleanSourceWith {
        src = ./.;
        filter =
          pathString: type:
          let
            stripped = removePrefix (toString ./. + "/") pathString;
            key = stripped + optionalString (type == "directory") "/";
          in
          tree.${key} or
            (throw "tree key ${key} missing");
      }
    '

    filesetExpr='
      toSource {
        root = ./.;
        fileset = fromSource ('"$sourceExpr"');
      }
    '

    # Turn both into store paths
    sourceStorePath=$(expectStorePath "$sourceExpr")
    filesetStorePath=$(expectStorePath "$filesetExpr")

    # Loop through each path in the tree
    while IFS= read -r -d $'\0' subpath; do
        if [[ ! -e "$sourceStorePath"/"$subpath" ]]; then
            # If it's not in the source store path, it's also not in the file set store path
            if [[ -e "$filesetStorePath"/"$subpath" ]]; then
                die "The store path $sourceStorePath created by $expr doesn't contain $subpath, but the corresponding store path $filesetStorePath created via fromSource does contain $subpath"
            fi
        elif [[ -z "$(find "$sourceStorePath"/"$subpath" -type f)" ]]; then
            # If it's an empty directory in the source store path, it shouldn't be in the file set store path
            if [[ -e "$filesetStorePath"/"$subpath" ]]; then
                die "The store path $sourceStorePath created by $expr contains the path $subpath without any files, but the corresponding store path $filesetStorePath created via fromSource didn't omit it"
            fi
        else
            # If it's non-empty directory or a file, it should be in the file set store path
            if [[ ! -e "$filesetStorePath"/"$subpath" ]]; then
                die "The store path $sourceStorePath created by $expr contains the non-empty path $subpath, but the corresponding store path $filesetStorePath created via fromSource doesn't include it"
            fi
        fi
    done < <(find . -mindepth 1 -print0)

    rm -rf -- *
}

# Check whether the filter is evaluated correctly
tree=(
    [a]=
    [b/]=
    [b/c]=
    [b/d]=
    [e/]=
    [e/e/]=
)
# We fill out the above tree values with all possible combinations of 0 and 1
# Then check whether a filter based on those return values gets turned into the corresponding file set
for i in $(seq 0 $((2 ** ${#tree[@]} - 1 ))); do
    for p in "${!tree[@]}"; do
        tree[$p]=$(( i % 2 ))
        (( i /= 2 )) || true
    done
    checkSource
done

# The filter is called with the same arguments in the same order
mkdir a e
touch a/b a/c d e
expectEqual '
  trace (fromSource (cleanSourceWith {
    src = ./.;
    filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
  })) null
' '
  builtins.seq (cleanSourceWith {
    src = ./.;
    filter = pathString: type: builtins.trace "${pathString} ${toString type}" true;
  }).outPath
  builtins.trace "'"$work"' (all files in directory)"
  null
'
rm -rf -- *

# Test that if a directory is not included, the filter isn't called on its contents
mkdir a b
touch a/c b/d
expectEqual 'trace (fromSource (cleanSourceWith {
  src = ./.;
  filter = pathString: type:
    if pathString == toString ./a then
      false
    else if pathString == toString ./b then
      true
    else if pathString == toString ./b/d then
      true
    else
      abort "This filter should not be called with path ${pathString}";
})) null' 'trace (_create ./. { b = "directory"; }) null'
rm -rf -- *

# The filter is called lazily:
# If a later say intersection removes a part of the tree, the filter won't run on it
mkdir a d
touch a/{b,c} d/e
expectEqual 'trace (intersection ./a (fromSource (lib.cleanSourceWith {
  src = ./.;
  filter = pathString: type:
    if pathString == toString ./a || pathString == toString ./a/b then
      true
    else if pathString == toString ./a/c then
      false
    else
      abort "filter should not be called on ${pathString}";
}))) null' 'trace ./a/b null'
rm -rf -- *

# TODO: Once we have combinators and a property testing library, derive property tests from https://en.wikipedia.org/wiki/Algebra_of_sets

echo >&2 tests ok