Unverified Commit fc28c5e5 authored by Silvan Mosberger's avatar Silvan Mosberger Committed by GitHub
Browse files

Merge pull request #259065 from tweag/fileset.difference

`lib.fileset.difference`: init
parents 1c7f17f3 50df7f97
Loading
Loading
Loading
Loading
+53 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ let
    _fileFilter
    _printFileset
    _intersection
    _difference
    ;

  inherit (builtins)
@@ -368,6 +369,58 @@ If a directory does not recursively contain any file, it is omitted from the sto
      (elemAt filesets 0)
      (elemAt filesets 1);

  /*
    The file set containing all files from the first file set that are not in the second file set.
    See also [Difference (set theory)](https://en.wikipedia.org/wiki/Complement_(set_theory)#Relative_complement).

    The given file sets are evaluated as lazily as possible,
    with the first argument being evaluated first if needed.

    Type:
      union :: FileSet -> FileSet -> FileSet

    Example:
      # Create a file set containing all files from the current directory,
      # except ones under ./tests
      difference ./. ./tests

      let
        # A set of Nix-related files
        nixFiles = unions [ ./default.nix ./nix ./tests/default.nix ];
      in
      # Create a file set containing all files under ./tests, except ones in `nixFiles`,
      # meaning only without ./tests/default.nix
      difference ./tests nixFiles
  */
  difference =
    # The positive file set.
    # The result can only contain files that are also in this file set.
    #
    # This argument can also be a path,
    # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
    positive:
    # The negative file set.
    # The result will never contain files that are also in this file set.
    #
    # This argument can also be a path,
    # which gets [implicitly coerced to a file set](#sec-fileset-path-coercion).
    negative:
    let
      filesets = _coerceMany "lib.fileset.difference" [
        {
          context = "first argument (positive set)";
          value = positive;
        }
        {
          context = "second argument (negative set)";
          value = negative;
        }
      ];
    in
    _difference
      (elemAt filesets 0)
      (elemAt filesets 1);

  /*
    Incrementally evaluate and trace a file set in a pretty way.
    This function is only intended for debugging purposes.
+80 −0
Original line number Diff line number Diff line
@@ -651,6 +651,86 @@ rec {
      # In all other cases it's the rhs
      rhs;

  # Compute the set difference between two file sets.
  # The filesets must already be coerced and validated to be in the same filesystem root.
  # Type: Fileset -> Fileset -> Fileset
  _difference = positive: negative:
    let
      # The common base components prefix, e.g.
      # (/foo/bar, /foo/bar/baz) -> /foo/bar
      # (/foo/bar, /foo/baz) -> /foo
      commonBaseComponentsLength =
        # TODO: Have a `lib.lists.commonPrefixLength` function such that we don't need the list allocation from commonPrefix here
        length (
          commonPrefix
            positive._internalBaseComponents
            negative._internalBaseComponents
        );

      # We need filesetTree's with the same base to be able to compute the difference between them
      # This here is the filesetTree from the negative file set, but for a base path that matches the positive file set.
      # Examples:
      # For `difference /foo /foo/bar`, `negativeTreeWithPositiveBase = { bar = "directory"; }`
      #   because under the base path of `/foo`, only `bar` from the negative file set is included
      # For `difference /foo/bar /foo`, `negativeTreeWithPositiveBase = "directory"`
      #   because under the base path of `/foo/bar`, everything from the negative file set is included
      # For `difference /foo /bar`, `negativeTreeWithPositiveBase = null`
      #   because under the base path of `/foo`, nothing from the negative file set is included
      negativeTreeWithPositiveBase =
        if commonBaseComponentsLength == length positive._internalBaseComponents then
          # The common prefix is the same as the positive base path, so the second path is equal or longer.
          # We need to _shorten_ the negative filesetTree to the same base path as the positive one
          # E.g. for `difference /foo /foo/bar` the common prefix is /foo, equal to the positive file set's base
          # So we need to shorten the base of the tree for the negative argument from /foo/bar to just /foo
          _shortenTreeBase positive._internalBaseComponents negative
        else if commonBaseComponentsLength == length negative._internalBaseComponents then
          # The common prefix is the same as the negative base path, so the first path is longer.
          # We need to lengthen the negative filesetTree to the same base path as the positive one.
          # E.g. for `difference /foo/bar /foo` the common prefix is /foo, equal to the negative file set's base
          # So we need to lengthen the base of the tree for the negative argument from /foo to /foo/bar
          _lengthenTreeBase positive._internalBaseComponents negative
        else
          # The common prefix is neither the first nor the second path.
          # This means there's no overlap between the two file sets,
          # and nothing from the negative argument should get removed from the positive one
          # E.g for `difference /foo /bar`, we remove nothing to get the same as `/foo`
          null;

      resultingTree =
        _differenceTree
        positive._internalBase
        positive._internalTree
        negativeTreeWithPositiveBase;
    in
    # If the first file set is empty, we can never have any files in the result
    if positive._internalIsEmptyWithoutBase then
      _emptyWithoutBase
    # If the second file set is empty, nothing gets removed, so the result is just the first file set
    else if negative._internalIsEmptyWithoutBase then
      positive
    else
      # We use the positive file set base for the result,
      # because only files from the positive side may be included,
      # which is what base path is for
      _create positive._internalBase resultingTree;

  # Computes the set difference of two filesetTree's
  # Type: Path -> filesetTree -> filesetTree
  _differenceTree = path: lhs: rhs:
    # If the lhs doesn't have any files, or the right hand side includes all files
    if lhs == null || isString rhs then
      # The result will always be empty
      null
    # If the right hand side has no files
    else if rhs == null then
      # The result is always the left hand side, because nothing gets removed
      lhs
    else
      # Otherwise we always have two attribute sets to recurse into
      mapAttrs (name: lhsValue:
        _differenceTree (path + "/${name}") lhsValue (rhs.${name} or null)
      ) (_directoryEntries path lhs);

  _fileFilter = predicate: fileset:
    let
      recurse = path: tree:
+98 −0
Original line number Diff line number Diff line
@@ -684,6 +684,104 @@ tree=(
)
checkFileset 'intersection (unions [ ./a/b ./c/d ./c/e ]) (unions [ ./a ./c/d/f ./c/e ])'

## Difference

# Subtracting something from itself results in nothing
tree=(
    [a]=0
)
checkFileset 'difference ./. ./.'

# The tree of the second argument should not be evaluated if not needed
checkFileset 'difference _emptyWithoutBase (_create ./. (abort "This should not be used!"))'
checkFileset 'difference (_create ./. null) (_create ./. (abort "This should not be used!"))'

# Subtracting nothing gives the same thing back
tree=(
    [a]=1
)
checkFileset 'difference ./. _emptyWithoutBase'
checkFileset 'difference ./. (_create ./. null)'

# Subtracting doesn't influence the base path
mkdir a b
touch {a,b}/x
expectEqual 'toSource { root = ./a; fileset = difference ./a ./b; }' 'toSource { root = ./a; fileset = ./a; }'
rm -rf -- *

# Also not the other way around
mkdir a
expectFailure 'toSource { root = ./a; fileset = difference ./. ./a; }' 'lib.fileset.toSource: `fileset` could contain files in '"$work"', which is not under the `root` \('"$work"'/a\). Potential solutions:
\s*- Set `root` to '"$work"' or any directory higher up. This changes the layout of the resulting store path.
\s*- Set `fileset` to a file set that cannot contain files outside the `root` \('"$work"'/a\). This could change the files included in the result.'
rm -rf -- *

# Difference actually works
# We test all combinations of ./., ./a, ./a/x and ./b
tree=(
    [a/x]=0
    [a/y]=0
    [b]=0
    [c]=0
)
checkFileset 'difference ./. ./.'
checkFileset 'difference ./a ./.'
checkFileset 'difference ./a/x ./.'
checkFileset 'difference ./b ./.'
checkFileset 'difference ./a ./a'
checkFileset 'difference ./a/x ./a'
checkFileset 'difference ./a/x ./a/x'
checkFileset 'difference ./b ./b'
tree=(
    [a/x]=0
    [a/y]=0
    [b]=1
    [c]=1
)
checkFileset 'difference ./. ./a'
tree=(
    [a/x]=1
    [a/y]=1
    [b]=0
    [c]=0
)
checkFileset 'difference ./a ./b'
tree=(
    [a/x]=1
    [a/y]=0
    [b]=0
    [c]=0
)
checkFileset 'difference ./a/x ./b'
tree=(
    [a/x]=0
    [a/y]=1
    [b]=0
    [c]=0
)
checkFileset 'difference ./a ./a/x'
tree=(
    [a/x]=0
    [a/y]=0
    [b]=1
    [c]=0
)
checkFileset 'difference ./b ./a'
checkFileset 'difference ./b ./a/x'
tree=(
    [a/x]=0
    [a/y]=1
    [b]=1
    [c]=1
)
checkFileset 'difference ./. ./a/x'
tree=(
    [a/x]=1
    [a/y]=1
    [b]=0
    [c]=1
)
checkFileset 'difference ./. ./b'

## File filter