Commit 356a4b43 authored by Michael Spencer's avatar Michael Spencer Committed by Michael Spencer
Browse files

[clang][clang-scan-deps] Aggregate the full dependency information.

Differential Revision: https://reviews.llvm.org/D70268

This is a recommit of f978ea49 with a fix for the PowerPC failure.

The issue was that:
* `CompilerInstance::ExecuteAction` calls
  `getTarget().adjust(getLangOpts());`.
* `PPCTargetInfo::adjust` changes `LangOptions::HasAltivec`.
* This happens after the first few calls to `getModuleHash`.

There’s even a FIXME saying:
```
  // FIXME: We shouldn't need to do this, the target should be immutable once
  // created. This complexity should be lifted elsewhere.
```

This only showed up on PowerPC because it's one of the few targets that
almost always changes a hashed langopt.

I looked into addressing the fixme, but that would be a much larger
change, and it's not the only thing that happens in `ExecuteAction` that
can change the module context hash. Instead I changed the code to not
call `getModuleHash` until after it has been modified in `ExecuteAction`.
parent 25315359
Loading
Loading
Loading
Loading
+72 −1
Original line number Diff line number Diff line
@@ -11,13 +11,69 @@

#include "clang/Tooling/DependencyScanning/DependencyScanningService.h"
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
#include "clang/Tooling/JSONCompilationDatabase.h"
#include "llvm/ADT/StringSet.h"
#include <string>

namespace clang{
namespace tooling{
namespace dependencies{

/// The full dependencies and module graph for a specific input.
struct FullDependencies {
  /// The name of the C++20 module this translation unit exports. This may
  /// include `:` for C++20 module partitons.
  ///
  /// If the translation unit is not a module then this will be empty.
  std::string ExportedModuleName;

  /// The context hash represents the set of compiler options that may make one
  /// version of a module incompatible with another. This includes things like
  /// language mode, predefined macros, header search paths, etc...
  ///
  /// Modules with the same name but a different \c ContextHash should be
  /// treated as separate modules for the purpose of a build.
  std::string ContextHash;

  /// A collection of absolute paths to files that this translation unit
  /// directly depends on, not including transitive dependencies.
  std::vector<std::string> FileDeps;

  /// A list of modules this translation unit directly depends on, not including
  /// transitive dependencies.
  ///
  /// This may include modules with a different context hash when it can be
  /// determined that the differences are benign for this compilation.
  std::vector<ClangModuleDep> ClangModuleDeps;

  /// A partial addtional set of command line arguments that can be used to
  /// build this translation unit.
  ///
  /// Call \c getFullAdditionalCommandLine() to get a command line suitable for
  /// appending to the original command line to pass to clang.
  std::vector<std::string> AdditionalNonPathCommandLine;

  /// Gets the full addtional command line suitable for appending to the
  /// original command line to pass to clang.
  ///
  /// \param LookupPCMPath this function is called to fill in `-fmodule-file=`
  ///                      flags and for the `-o` flag. It needs to return a
  ///                      path for where the PCM for the given module is to
  ///                      be located.
  /// \param LookupModuleDeps this fucntion is called to collect the full
  ///                         transitive set of dependencies for this
  ///                         compilation.
  std::vector<std::string> getAdditionalCommandLine(
      std::function<StringRef(ClangModuleDep)> LookupPCMPath,
      std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const;
};

struct FullDependenciesResult {
  FullDependencies FullDeps;
  std::vector<ModuleDeps> DiscoveredModules;
};

/// The high-level implementation of the dependency discovery tool that runs on
/// an individual worker thread.
class DependencyScanningTool {
@@ -35,8 +91,23 @@ public:
  getDependencyFile(const tooling::CompilationDatabase &Compilations,
                    StringRef CWD);

  /// Collect the full module depenedency graph for the input, ignoring any
  /// modules which have already been seen.
  ///
  /// \param AlreadySeen this is used to not report modules that have previously
  ///                    been reported. Use the same `llvm::StringSet<>` for all
  ///                    calls to `getFullDependencies` for a single
  ///                    `DependencyScanningTool` for a single build. Use a
  ///                    different one for different tools, and clear it between
  ///                    builds.
  ///
  /// \returns a \c StringError with the diagnostic output if clang errors
  /// occurred, \c FullDependencies otherwise.
  llvm::Expected<FullDependenciesResult>
  getFullDependencies(const tooling::CompilationDatabase &Compilations,
                      StringRef CWD, const llvm::StringSet<> &AlreadySeen);

private:
  const ScanningOutputFormat Format;
  DependencyScanningWorker Worker;
};

+79 −8
Original line number Diff line number Diff line
@@ -28,16 +28,82 @@ namespace dependencies {

class DependencyConsumer;

/// This is used to refer to a specific module.
///
/// See \c ModuleDeps for details about what these members mean.
struct ClangModuleDep {
  std::string ModuleName;
  std::string ContextHash;
};

struct ModuleDeps {
  /// The name of the module. This may include `:` for C++20 module partitons,
  /// or a header-name for C++20 header units.
  std::string ModuleName;
  std::string ClangModuleMapFile;
  std::string ModulePCMPath;

  /// The context hash of a module represents the set of compiler options that
  /// may make one version of a module incompatible with another. This includes
  /// things like language mode, predefined macros, header search paths, etc...
  ///
  /// Modules with the same name but a different \c ContextHash should be
  /// treated as separate modules for the purpose of a build.
  std::string ContextHash;

  /// The path to the modulemap file which defines this module.
  ///
  /// This can be used to explicitly build this module. This file will
  /// additionally appear in \c FileDeps as a dependency.
  std::string ClangModuleMapFile;

  /// The path to where an implicit build would put the PCM for this module.
  std::string ImplicitModulePCMPath;

  /// A collection of absolute paths to files that this module directly depends
  /// on, not including transitive dependencies.
  llvm::StringSet<> FileDeps;
  llvm::StringSet<> ClangModuleDeps;

  /// A list of modules this module directly depends on, not including
  /// transitive dependencies.
  ///
  /// This may include modules with a different context hash when it can be
  /// determined that the differences are benign for this compilation.
  std::vector<ClangModuleDep> ClangModuleDeps;

  /// A partial command line that can be used to build this module.
  ///
  /// Call \c getFullCommandLine() to get a command line suitable for passing to
  /// clang.
  std::vector<std::string> NonPathCommandLine;

  // Used to track which modules that were discovered were directly imported by
  // the primary TU.
  bool ImportedByMainFile = false;

  /// Gets the full command line suitable for passing to clang.
  ///
  /// \param LookupPCMPath this function is called to fill in `-fmodule-file=`
  ///                      flags and for the `-o` flag. It needs to return a
  ///                      path for where the PCM for the given module is to
  ///                      be located.
  /// \param LookupModuleDeps this fucntion is called to collect the full
  ///                         transitive set of dependencies for this
  ///                         compilation.
  std::vector<std::string> getFullCommandLine(
      std::function<StringRef(ClangModuleDep)> LookupPCMPath,
      std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const;
};

namespace detail {
/// Append the `-fmodule-file=` and `-fmodule-map-file=` arguments for the
/// modules in \c Modules transitively, along with other needed arguments to
/// use explicitly built modules.
void appendCommonModuleArguments(
    llvm::ArrayRef<ClangModuleDep> Modules,
    std::function<StringRef(ClangModuleDep)> LookupPCMPath,
    std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps,
    std::vector<std::string> &Result);
} // namespace detail

class ModuleDepCollector;

class ModuleDepCollectorPP final : public PPCallbacks {
@@ -54,6 +120,8 @@ public:
                          StringRef SearchPath, StringRef RelativePath,
                          const Module *Imported,
                          SrcMgr::CharacteristicKind FileType) override;
  void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
                    const Module *Imported) override;

  void EndOfMainFile() override;

@@ -62,16 +130,18 @@ private:
  ModuleDepCollector &MDC;
  llvm::DenseSet<const Module *> DirectDeps;

  void handleImport(const Module *Imported);
  void handleTopLevelModule(const Module *M);
  void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD);
  void addModuleDep(const Module *M, ModuleDeps &MD);

  void addDirectDependencies(const Module *Mod);
  void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
                           llvm::DenseSet<const Module *> &AddedModules);
  void addModuleDep(const Module *M, ModuleDeps &MD,
                    llvm::DenseSet<const Module *> &AddedModules);
};

class ModuleDepCollector final : public DependencyCollector {
public:
  ModuleDepCollector(CompilerInstance &I, DependencyConsumer &C);
  ModuleDepCollector(std::unique_ptr<DependencyOutputOptions> Opts,
                     CompilerInstance &I, DependencyConsumer &C);

  void attachToPreprocessor(Preprocessor &PP) override;
  void attachToASTReader(ASTReader &R) override;
@@ -85,6 +155,7 @@ private:
  std::string ContextHash;
  std::vector<std::string> MainDeps;
  std::unordered_map<std::string, ModuleDeps> Deps;
  std::unique_ptr<DependencyOutputOptions> Opts;
};

} // end namespace dependencies
+63 −64
Original line number Diff line number Diff line
@@ -8,24 +8,25 @@

#include "clang/Tooling/DependencyScanning/DependencyScanningTool.h"
#include "clang/Frontend/Utils.h"
#include "llvm/Support/JSON.h"

static llvm::json::Array toJSONSorted(const llvm::StringSet<> &Set) {
  std::vector<llvm::StringRef> Strings;
  for (auto &&I : Set)
    Strings.push_back(I.getKey());
  std::sort(Strings.begin(), Strings.end());
  return llvm::json::Array(Strings);
}

namespace clang{
namespace tooling{
namespace dependencies{

std::vector<std::string> FullDependencies::getAdditionalCommandLine(
    std::function<StringRef(ClangModuleDep)> LookupPCMPath,
    std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const {
  std::vector<std::string> Ret = AdditionalNonPathCommandLine;

  dependencies::detail::appendCommonModuleArguments(
      ClangModuleDeps, LookupPCMPath, LookupModuleDeps, Ret);

  return Ret;
}

DependencyScanningTool::DependencyScanningTool(
    DependencyScanningService &Service)
    : Format(Service.getFormat()), Worker(Service) {
}
    : Worker(Service) {}

llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
    const tooling::CompilationDatabase &Compilations, StringRef CWD) {
@@ -75,8 +76,33 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
    std::vector<std::string> Dependencies;
  };

  // We expect a single command here because if a source file occurs multiple
  // times in the original CDB, then `computeDependencies` would run the
  // `DependencyScanningAction` once for every time the input occured in the
  // CDB. Instead we split up the CDB into single command chunks to avoid this
  // behavior.
  assert(Compilations.getAllCompileCommands().size() == 1 &&
         "Expected a compilation database with a single command!");
  std::string Input = Compilations.getAllCompileCommands().front().Filename;

  MakeDependencyPrinterConsumer Consumer;
  auto Result = Worker.computeDependencies(Input, CWD, Compilations, Consumer);
  if (Result)
    return std::move(Result);
  std::string Output;
  Consumer.printDependencies(Output);
  return Output;
}

llvm::Expected<FullDependenciesResult>
DependencyScanningTool::getFullDependencies(
    const tooling::CompilationDatabase &Compilations, StringRef CWD,
    const llvm::StringSet<> &AlreadySeen) {
  class FullDependencyPrinterConsumer : public DependencyConsumer {
  public:
    FullDependencyPrinterConsumer(const llvm::StringSet<> &AlreadySeen)
        : AlreadySeen(AlreadySeen) {}

    void handleFileDependency(const DependencyOutputOptions &Opts,
                              StringRef File) override {
      Dependencies.push_back(File);
@@ -90,55 +116,41 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
      ContextHash = std::move(Hash);
    }

    void printDependencies(std::string &S, StringRef MainFile) {
      // Sort the modules by name to get a deterministic order.
      std::vector<StringRef> Modules;
      for (auto &&Dep : ClangModuleDeps)
        Modules.push_back(Dep.first);
      std::sort(Modules.begin(), Modules.end());
    FullDependenciesResult getFullDependencies() const {
      FullDependencies FD;

      llvm::raw_string_ostream OS(S);
      FD.ContextHash = std::move(ContextHash);

      using namespace llvm::json;
      FD.FileDeps.assign(Dependencies.begin(), Dependencies.end());

      Array Imports;
      for (auto &&ModName : Modules) {
        auto &MD = ClangModuleDeps[ModName];
      for (auto &&M : ClangModuleDeps) {
        auto &MD = M.second;
        if (MD.ImportedByMainFile)
          Imports.push_back(MD.ModuleName);
          FD.ClangModuleDeps.push_back({MD.ModuleName, ContextHash});
      }

      Array Mods;
      for (auto &&ModName : Modules) {
        auto &MD = ClangModuleDeps[ModName];
        Object Mod{
            {"name", MD.ModuleName},
            {"file-deps", toJSONSorted(MD.FileDeps)},
            {"clang-module-deps", toJSONSorted(MD.ClangModuleDeps)},
            {"clang-modulemap-file", MD.ClangModuleMapFile},
        };
        Mods.push_back(std::move(Mod));
      }
      FullDependenciesResult FDR;

      Object O{
          {"input-file", MainFile},
          {"clang-context-hash", ContextHash},
          {"file-deps", Dependencies},
          {"clang-module-deps", std::move(Imports)},
          {"clang-modules", std::move(Mods)},
      };
      for (auto &&M : ClangModuleDeps) {
        // TODO: Avoid handleModuleDependency even being called for modules
        //   we've already seen.
        if (AlreadySeen.count(M.first))
          continue;
        FDR.DiscoveredModules.push_back(std::move(M.second));
      }

      S = llvm::formatv("{0:2},\n", Value(std::move(O))).str();
      return;
      FDR.FullDeps = std::move(FD);
      return FDR;
    }

  private:
    std::vector<std::string> Dependencies;
    std::unordered_map<std::string, ModuleDeps> ClangModuleDeps;
    std::string ContextHash;
    std::vector<std::string> OutputPaths;
    const llvm::StringSet<> &AlreadySeen;
  };

  
  // We expect a single command here because if a source file occurs multiple
  // times in the original CDB, then `computeDependencies` would run the
  // `DependencyScanningAction` once for every time the input occured in the
@@ -148,25 +160,12 @@ llvm::Expected<std::string> DependencyScanningTool::getDependencyFile(
         "Expected a compilation database with a single command!");
  std::string Input = Compilations.getAllCompileCommands().front().Filename;

  if (Format == ScanningOutputFormat::Make) {
    MakeDependencyPrinterConsumer Consumer;
    auto Result =
        Worker.computeDependencies(Input, CWD, Compilations, Consumer);
    if (Result)
      return std::move(Result);
    std::string Output;
    Consumer.printDependencies(Output);
    return Output;
  } else {
    FullDependencyPrinterConsumer Consumer;
    auto Result =
  FullDependencyPrinterConsumer Consumer(AlreadySeen);
  llvm::Error Result =
      Worker.computeDependencies(Input, CWD, Compilations, Consumer);
  if (Result)
    return std::move(Result);
    std::string Output;
    Consumer.printDependencies(Output, Input);
    return Output;
  }
  return Consumer.getFullDependencies();
}

} // end namespace dependencies
+8 −3
Original line number Diff line number Diff line
@@ -142,12 +142,17 @@ public:
                                                        Consumer));
      break;
    case ScanningOutputFormat::Full:
      Compiler.addDependencyCollector(
          std::make_shared<ModuleDepCollector>(Compiler, Consumer));
      Compiler.addDependencyCollector(std::make_shared<ModuleDepCollector>(
          std::move(Opts), Compiler, Consumer));
      break;
    }

    Consumer.handleContextHash(Compiler.getInvocation().getModuleHash());
    // Consider different header search and diagnostic options to create
    // different modules. This avoids the unsound aliasing of module PCMs.
    //
    // TODO: Implement diagnostic bucketing and header search pruning to reduce
    // the impact of strict context hashing.
    Compiler.getHeaderSearchOpts().ModulesStrictContextHash = true;

    auto Action = std::make_unique<PreprocessOnlyAction>();
    const bool Result = Compiler.ExecuteAction(*Action);
+77 −14
Original line number Diff line number Diff line
@@ -17,6 +17,47 @@ using namespace clang;
using namespace tooling;
using namespace dependencies;

std::vector<std::string> ModuleDeps::getFullCommandLine(
    std::function<StringRef(ClangModuleDep)> LookupPCMPath,
    std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps) const {
  std::vector<std::string> Ret = NonPathCommandLine;

  // TODO: Build full command line. That also means capturing the original
  //       command line into NonPathCommandLine.

  dependencies::detail::appendCommonModuleArguments(
      ClangModuleDeps, LookupPCMPath, LookupModuleDeps, Ret);

  return Ret;
}

void dependencies::detail::appendCommonModuleArguments(
    llvm::ArrayRef<ClangModuleDep> Modules,
    std::function<StringRef(ClangModuleDep)> LookupPCMPath,
    std::function<const ModuleDeps &(ClangModuleDep)> LookupModuleDeps,
    std::vector<std::string> &Result) {
  llvm::StringSet<> AlreadyAdded;

  std::function<void(llvm::ArrayRef<ClangModuleDep>)> AddArgs =
      [&](llvm::ArrayRef<ClangModuleDep> Modules) {
        for (const ClangModuleDep &CMD : Modules) {
          if (!AlreadyAdded.insert(CMD.ModuleName + CMD.ContextHash).second)
            continue;
          const ModuleDeps &M = LookupModuleDeps(CMD);
          // Depth first traversal.
          AddArgs(M.ClangModuleDeps);
          Result.push_back(("-fmodule-file=" + LookupPCMPath(CMD)).str());
          if (!M.ClangModuleMapFile.empty()) {
            Result.push_back("-fmodule-map-file=" + M.ClangModuleMapFile);
          }
        }
      };

  Result.push_back("-fno-implicit-modules");
  Result.push_back("-fno-implicit-module-maps");
  AddArgs(Modules);
}

void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
                                       FileChangeReason Reason,
                                       SrcMgr::CharacteristicKind FileType,
@@ -24,6 +65,13 @@ void ModuleDepCollectorPP::FileChanged(SourceLocation Loc,
  if (Reason != PPCallbacks::EnterFile)
    return;
  
  // This has to be delayed as the context hash can change at the start of
  // `CompilerInstance::ExecuteAction`.
  if (MDC.ContextHash.empty()) {
    MDC.ContextHash = Instance.getInvocation().getModuleHash();
    MDC.Consumer.handleContextHash(MDC.ContextHash);
  }

  SourceManager &SM = Instance.getSourceManager();

  // Dependency generation really does want to go all the way to the
@@ -50,7 +98,16 @@ void ModuleDepCollectorPP::InclusionDirective(
    // here as `FileChanged` will never see it.
    MDC.MainDeps.push_back(FileName);
  }
  handleImport(Imported);
}

void ModuleDepCollectorPP::moduleImport(SourceLocation ImportLoc,
                                        ModuleIdPath Path,
                                        const Module *Imported) {
  handleImport(Imported);
}

void ModuleDepCollectorPP::handleImport(const Module *Imported) {
  if (!Imported)
    return;

@@ -71,9 +128,8 @@ void ModuleDepCollectorPP::EndOfMainFile() {
  for (auto &&I : MDC.Deps)
    MDC.Consumer.handleModuleDependency(I.second);

  DependencyOutputOptions Opts;
  for (auto &&I : MDC.MainDeps)
    MDC.Consumer.handleFileDependency(Opts, I);
    MDC.Consumer.handleFileDependency(*MDC.Opts, I);
}

void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
@@ -94,7 +150,7 @@ void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {

  MD.ClangModuleMapFile = ModuleMap ? ModuleMap->getName() : "";
  MD.ModuleName = M->getFullModuleName();
  MD.ModulePCMPath = M->getASTFile()->getName();
  MD.ImplicitModulePCMPath = M->getASTFile()->getName();
  MD.ContextHash = MDC.ContextHash;
  serialization::ModuleFile *MF =
      MDC.Instance.getASTReader()->getModuleManager().lookup(M->getASTFile());
@@ -103,30 +159,37 @@ void ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
        MD.FileDeps.insert(IF.getFile()->getName());
      });

  addAllSubmoduleDeps(M, MD);
  llvm::DenseSet<const Module *> AddedModules;
  addAllSubmoduleDeps(M, MD, AddedModules);
}

void ModuleDepCollectorPP::addAllSubmoduleDeps(const Module *M,
                                               ModuleDeps &MD) {
  addModuleDep(M, MD);
void ModuleDepCollectorPP::addAllSubmoduleDeps(
    const Module *M, ModuleDeps &MD,
    llvm::DenseSet<const Module *> &AddedModules) {
  addModuleDep(M, MD, AddedModules);

  for (const Module *SubM : M->submodules())
    addAllSubmoduleDeps(SubM, MD);
    addAllSubmoduleDeps(SubM, MD, AddedModules);
}

void ModuleDepCollectorPP::addModuleDep(const Module *M, ModuleDeps &MD) {
void ModuleDepCollectorPP::addModuleDep(
    const Module *M, ModuleDeps &MD,
    llvm::DenseSet<const Module *> &AddedModules) {
  for (const Module *Import : M->Imports) {
    if (Import->getTopLevelModule() != M->getTopLevelModule()) {
      MD.ClangModuleDeps.insert(Import->getTopLevelModuleName());
      if (AddedModules.insert(Import->getTopLevelModule()).second)
        MD.ClangModuleDeps.push_back(
            {Import->getTopLevelModuleName(),
             Instance.getInvocation().getModuleHash()});
      handleTopLevelModule(Import->getTopLevelModule());
    }
  }
}

ModuleDepCollector::ModuleDepCollector(CompilerInstance &I,
ModuleDepCollector::ModuleDepCollector(
    std::unique_ptr<DependencyOutputOptions> Opts, CompilerInstance &I,
    DependencyConsumer &C)
    : Instance(I), Consumer(C), ContextHash(I.getInvocation().getModuleHash()) {
}
    : Instance(I), Consumer(C), Opts(std::move(Opts)) {}

void ModuleDepCollector::attachToPreprocessor(Preprocessor &PP) {
  PP.addPPCallbacks(std::make_unique<ModuleDepCollectorPP>(Instance, *this));
Loading