Unverified Commit ec7c8bae authored by Kadir Cetinkaya's avatar Kadir Cetinkaya
Browse files

[clangd] Make use of syntax tokens in ReplayPreamble

Summary: Replace usage of RawLexer with syntax tokens inside ReplayPreamble.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74842
parent e6b81818
Loading
Loading
Loading
Loading
+57 −33
Original line number Diff line number Diff line
@@ -114,16 +114,16 @@ public:
  // Attach preprocessor hooks such that preamble events will be injected at
  // the appropriate time.
  // Events will be delivered to the *currently registered* PP callbacks.
  static void attach(const IncludeStructure &Includes,
                     CompilerInstance &Clang) {
  static void attach(const IncludeStructure &Includes, CompilerInstance &Clang,
                     const PreambleBounds &PB) {
    auto &PP = Clang.getPreprocessor();
    auto *ExistingCallbacks = PP.getPPCallbacks();
    // No need to replay events if nobody is listening.
    if (!ExistingCallbacks)
      return;
    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(
        new ReplayPreamble(Includes, ExistingCallbacks,
                           Clang.getSourceManager(), PP, Clang.getLangOpts())));
    PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(new ReplayPreamble(
        Includes, ExistingCallbacks, Clang.getSourceManager(), PP,
        Clang.getLangOpts(), PB)));
    // We're relying on the fact that addPPCallbacks keeps the old PPCallbacks
    // around, creating a chaining wrapper. Guard against other implementations.
    assert(PP.getPPCallbacks() != ExistingCallbacks &&
@@ -133,9 +133,13 @@ public:
private:
  ReplayPreamble(const IncludeStructure &Includes, PPCallbacks *Delegate,
                 const SourceManager &SM, Preprocessor &PP,
                 const LangOptions &LangOpts)
      : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP),
        LangOpts(LangOpts) {}
                 const LangOptions &LangOpts, const PreambleBounds &PB)
      : Includes(Includes), Delegate(Delegate), SM(SM), PP(PP) {
    // Only tokenize the preamble section of the main file, as we are not
    // interested in the rest of the tokens.
    MainFileTokens = syntax::tokenize(
        syntax::FileRange(SM.getMainFileID(), 0, PB.Size), SM, LangOpts);
  }

  // In a normal compile, the preamble traverses the following structure:
  //
@@ -167,33 +171,53 @@ private:
        if (auto FE = SM.getFileManager().getFile(Inc.Resolved))
          File = *FE;

      // Re-lex the #include directive to find its interesting parts.
      auto HashLoc = SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset);
      auto HashTok = llvm::partition_point(MainFileTokens,
                                           [&HashLoc](const syntax::Token &T) {
                                             return T.location() < HashLoc;
                                           });
      assert(HashTok != MainFileTokens.end() && HashTok->kind() == tok::hash);

      auto IncludeTok = std::next(HashTok);
      assert(IncludeTok != MainFileTokens.end());

      auto FileTok = std::next(IncludeTok);
      assert(FileTok != MainFileTokens.end());

      // Create a fake import/include token, none of the callers seem to care
      // about clang::Token::Flags.
      Token SynthesizedIncludeTok;
      SynthesizedIncludeTok.startToken();
      SynthesizedIncludeTok.setLocation(IncludeTok->location());
      SynthesizedIncludeTok.setLength(IncludeTok->length());
      SynthesizedIncludeTok.setKind(tok::raw_identifier);
      SynthesizedIncludeTok.setRawIdentifierData(IncludeTok->text(SM).data());
      PP.LookUpIdentifierInfo(SynthesizedIncludeTok);

      // Same here, create a fake one for Filename, including angles or quotes.
      Token SynthesizedFilenameTok;
      SynthesizedFilenameTok.startToken();
      SynthesizedFilenameTok.setLocation(FileTok->location());
      // Note that we can't make use of FileTok->length/text in here as in the
      // case of angled includes this will contain tok::less instead of
      // filename. Whereas Inc.Written contains the full header name including
      // quotes/angles.
      SynthesizedFilenameTok.setLength(Inc.Written.length());
      SynthesizedFilenameTok.setKind(tok::header_name);
      SynthesizedFilenameTok.setLiteralData(Inc.Written.data());

      llvm::StringRef WrittenFilename =
          llvm::StringRef(Inc.Written).drop_front().drop_back();
      bool Angled = llvm::StringRef(Inc.Written).startswith("<");

      // Re-lex the #include directive to find its interesting parts.
      llvm::StringRef Src = SM.getBufferData(SM.getMainFileID());
      Lexer RawLexer(SM.getLocForStartOfFile(SM.getMainFileID()), LangOpts,
                     Src.begin(), Src.begin() + Inc.HashOffset, Src.end());
      Token HashTok, IncludeTok, FilenameTok;
      RawLexer.LexFromRawLexer(HashTok);
      assert(HashTok.getKind() == tok::hash);
      RawLexer.setParsingPreprocessorDirective(true);
      RawLexer.LexFromRawLexer(IncludeTok);
      IdentifierInfo *II = PP.getIdentifierInfo(IncludeTok.getRawIdentifier());
      IncludeTok.setIdentifierInfo(II);
      IncludeTok.setKind(II->getTokenID());
      RawLexer.LexIncludeFilename(FilenameTok);

      Delegate->InclusionDirective(
          HashTok.getLocation(), IncludeTok, WrittenFilename, Angled,
          CharSourceRange::getCharRange(FilenameTok.getLocation(),
                                        FilenameTok.getEndLoc()),
          File, "SearchPath", "RelPath", /*Imported=*/nullptr, Inc.FileKind);
      Delegate->InclusionDirective(HashTok->location(), SynthesizedIncludeTok,
                                   WrittenFilename, Inc.Written.front() == '<',
                                   FileTok->range(SM).toCharRange(SM), File,
                                   "SearchPath", "RelPath",
                                   /*Imported=*/nullptr, Inc.FileKind);
      if (File)
        // FIXME: Use correctly named FileEntryRef.
        Delegate->FileSkipped(FileEntryRef(File->getName(), *File), FilenameTok,
                              Inc.FileKind);
        Delegate->FileSkipped(FileEntryRef(File->getName(), *File),
                              SynthesizedFilenameTok, Inc.FileKind);
      else {
        llvm::SmallString<1> UnusedRecovery;
        Delegate->FileNotFound(WrittenFilename, UnusedRecovery);
@@ -205,7 +229,7 @@ private:
  PPCallbacks *Delegate;
  const SourceManager &SM;
  Preprocessor &PP;
  const LangOptions &LangOpts;
  std::vector<syntax::Token> MainFileTokens;
};

} // namespace
@@ -337,7 +361,7 @@ ParsedAST::build(std::unique_ptr<clang::CompilerInvocation> CI,
  auto Includes = Preamble ? Preamble->Includes : IncludeStructure{};
  // Replay the preamble includes so that clang-tidy checks can see them.
  if (Preamble)
    ReplayPreamble::attach(Includes, *Clang);
    ReplayPreamble::attach(Includes, *Clang, Preamble->Preamble.getBounds());
  // Important: collectIncludeStructure is registered *after* ReplayPreamble!
  // Otherwise we would collect the replayed includes again...
  // (We can't *just* use the replayed includes, they don't have Resolved path).
+121 −0
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@
//
//===----------------------------------------------------------------------===//

#include "../../clang-tidy/ClangTidyModule.h"
#include "../../clang-tidy/ClangTidyModuleRegistry.h"
#include "AST.h"
#include "Annotations.h"
#include "Compiler.h"
@@ -20,8 +22,13 @@
#include "TestFS.h"
#include "TestTU.h"
#include "clang/AST/DeclTemplate.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/TokenKinds.h"
#include "clang/Lex/PPCallbacks.h"
#include "clang/Lex/Token.h"
#include "clang/Tooling/Syntax/Tokens.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/ScopedPrinter.h"
#include "gmock/gmock-matchers.h"
#include "gmock/gmock.h"
@@ -71,6 +78,10 @@ MATCHER_P(WithTemplateArgs, ArgName, "") {
  return false;
}

MATCHER_P(RangeIs, R, "") {
  return arg.beginOffset() == R.Begin && arg.endOffset() == R.End;
}

TEST(ParsedASTTest, TopLevelDecls) {
  TestTU TU;
  TU.HeaderCode = R"(
@@ -296,6 +307,116 @@ TEST(ParsedASTTest, CollectsMainFileMacroExpansions) {
              testing::UnorderedElementsAreArray(TestCase.points()));
}

TEST(ParsedASTTest, ReplayPreambleForTidyCheckers) {
  struct Inclusion {
    Inclusion(const SourceManager &SM, SourceLocation HashLoc,
              const Token &IncludeTok, llvm::StringRef FileName, bool IsAngled,
              CharSourceRange FilenameRange)
        : HashOffset(SM.getDecomposedLoc(HashLoc).second), IncTok(IncludeTok),
          IncDirective(IncludeTok.getIdentifierInfo()->getName()),
          FileNameOffset(SM.getDecomposedLoc(FilenameRange.getBegin()).second),
          FileName(FileName), IsAngled(IsAngled) {}
    size_t HashOffset;
    syntax::Token IncTok;
    llvm::StringRef IncDirective;
    size_t FileNameOffset;
    llvm::StringRef FileName;
    bool IsAngled;
  };
  static std::vector<Inclusion> Includes;
  static std::vector<syntax::Token> SkippedFiles;
  struct ReplayPreamblePPCallback : public PPCallbacks {
    const SourceManager &SM;
    explicit ReplayPreamblePPCallback(const SourceManager &SM) : SM(SM) {}

    void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
                            StringRef FileName, bool IsAngled,
                            CharSourceRange FilenameRange, const FileEntry *,
                            StringRef, StringRef, const Module *,
                            SrcMgr::CharacteristicKind) override {
      Includes.emplace_back(SM, HashLoc, IncludeTok, FileName, IsAngled,
                            FilenameRange);
    }

    void FileSkipped(const FileEntryRef &, const Token &FilenameTok,
                     SrcMgr::CharacteristicKind) override {
      SkippedFiles.emplace_back(FilenameTok);
    }
  };
  struct ReplayPreambleCheck : public tidy::ClangTidyCheck {
    ReplayPreambleCheck(StringRef Name, tidy::ClangTidyContext *Context)
        : ClangTidyCheck(Name, Context) {}
    void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
                             Preprocessor *ModuleExpanderPP) override {
      PP->addPPCallbacks(::std::make_unique<ReplayPreamblePPCallback>(SM));
    }
  };
  struct ReplayPreambleModule : public tidy::ClangTidyModule {
    void
    addCheckFactories(tidy::ClangTidyCheckFactories &CheckFactories) override {
      CheckFactories.registerCheck<ReplayPreambleCheck>(
          "replay-preamble-check");
    }
  };

  static tidy::ClangTidyModuleRegistry::Add<ReplayPreambleModule> X(
      "replay-preamble-module", "");
  TestTU TU;
  // This check records inclusion directives replayed by clangd.
  TU.ClangTidyChecks = "replay-preamble-check";
  llvm::Annotations Test(R"cpp(
    $hash^#$include[[import]] $filebegin^"$filerange[[bar.h]]"
    $hash^#$include[[include_next]] $filebegin^"$filerange[[baz.h]]"
    $hash^#$include[[include]] $filebegin^<$filerange[[a.h]]>)cpp");
  llvm::StringRef Code = Test.code();
  TU.Code = Code.str();
  TU.AdditionalFiles["bar.h"] = "";
  TU.AdditionalFiles["baz.h"] = "";
  TU.AdditionalFiles["a.h"] = "";
  TU.ExtraArgs = {"-isystem."};

  const auto &AST = TU.build();
  const auto &SM = AST.getSourceManager();

  auto HashLocs = Test.points("hash");
  ASSERT_EQ(HashLocs.size(), Includes.size());
  auto IncludeRanges = Test.ranges("include");
  ASSERT_EQ(IncludeRanges.size(), Includes.size());
  auto FileBeginLocs = Test.points("filebegin");
  ASSERT_EQ(FileBeginLocs.size(), Includes.size());
  auto FileRanges = Test.ranges("filerange");
  ASSERT_EQ(FileRanges.size(), Includes.size());

  ASSERT_EQ(SkippedFiles.size(), Includes.size());
  for (size_t I = 0; I < Includes.size(); ++I) {
    const auto &Inc = Includes[I];

    EXPECT_EQ(Inc.HashOffset, HashLocs[I]);

    auto IncRange = IncludeRanges[I];
    EXPECT_THAT(Inc.IncTok.range(SM), RangeIs(IncRange));
    EXPECT_EQ(Inc.IncTok.kind(), tok::identifier);
    EXPECT_EQ(Inc.IncDirective,
              Code.substr(IncRange.Begin, IncRange.End - IncRange.Begin));

    EXPECT_EQ(Inc.FileNameOffset, FileBeginLocs[I]);
    EXPECT_EQ(Inc.IsAngled, Code[FileBeginLocs[I]] == '<');

    auto FileRange = FileRanges[I];
    EXPECT_EQ(Inc.FileName,
              Code.substr(FileRange.Begin, FileRange.End - FileRange.Begin));

    EXPECT_EQ(SM.getDecomposedLoc(SkippedFiles[I].location()).second,
              Inc.FileNameOffset);
    // This also contains quotes/angles so increment the range by one from both
    // sides.
    EXPECT_EQ(
        SkippedFiles[I].text(SM),
        Code.substr(FileRange.Begin - 1, FileRange.End - FileRange.Begin + 2));
    EXPECT_EQ(SkippedFiles[I].kind(), tok::header_name);
  }
}

} // namespace
} // namespace clangd
} // namespace clang