Commit 7ea4c6fa authored by Haojian Wu's avatar Haojian Wu
Browse files

[clangd] Implement a function to lex the file to find candidate occurrences.

Summary:
This will be used for incoming cross-file rename (to detect index
staleness issue).

Reviewers: ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69615
parent f349cc37
Loading
Loading
Loading
Loading
+30 −17
Original line number Diff line number Diff line
@@ -719,41 +719,53 @@ cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
  return formatReplacements(Code, std::move(*CleanReplaces), Style);
}

void lex(llvm::StringRef Code, const format::FormatStyle &Style,
         llvm::function_ref<void(const clang::Token &, Position)> Action) {
void lex(llvm::StringRef Code, const LangOptions &LangOpts,
         llvm::function_ref<void(const clang::Token &, const SourceManager &SM)>
             Action) {
  // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated!
  std::string NullTerminatedCode = Code.str();
  SourceManagerForFile FileSM("dummy.cpp", NullTerminatedCode);
  auto &SM = FileSM.get();
  auto FID = SM.getMainFileID();
  Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style));
  // Create a raw lexer (with no associated preprocessor object).
  Lexer Lex(FID, SM.getBuffer(FID), SM, LangOpts);
  Token Tok;

  while (!Lex.LexFromRawLexer(Tok))
    Action(Tok, sourceLocToPosition(SM, Tok.getLocation()));
    Action(Tok, SM);
  // LexFromRawLexer returns true after it lexes last token, so we still have
  // one more token to report.
  Action(Tok, sourceLocToPosition(SM, Tok.getLocation()));
  Action(Tok, SM);
}

llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
                                             const format::FormatStyle &Style) {
  llvm::StringMap<unsigned> Identifiers;
  lex(Content, Style, [&](const clang::Token &Tok, Position) {
    switch (Tok.getKind()) {
    case tok::identifier:
      ++Identifiers[Tok.getIdentifierInfo()->getName()];
      break;
    case tok::raw_identifier:
  auto LangOpt = format::getFormattingLangOpts(Style);
  lex(Content, LangOpt, [&](const clang::Token &Tok, const SourceManager &) {
    if (Tok.getKind() == tok::raw_identifier)
      ++Identifiers[Tok.getRawIdentifier()];
      break;
    default:
      break;
    }
  });
  return Identifiers;
}

std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
                                           llvm::StringRef Content,
                                           const LangOptions &LangOpts) {
  std::vector<Range> Ranges;
  lex(Content, LangOpts, [&](const clang::Token &Tok, const SourceManager &SM) {
    if (Tok.getKind() != tok::raw_identifier)
      return;
    if (Tok.getRawIdentifier() != Identifier)
      return;
    auto Range = getTokenRange(SM, LangOpts, Tok.getLocation());
    if (!Range)
      return;
    Ranges.push_back(*Range);
  });
  return Ranges;
}

namespace {
struct NamespaceEvent {
  enum {
@@ -786,8 +798,9 @@ void parseNamespaceEvents(llvm::StringRef Code,
  std::string NSName;

  NamespaceEvent Event;
  lex(Code, Style, [&](const clang::Token &Tok, Position P) {
    Event.Pos = std::move(P);
  lex(Code, format::getFormattingLangOpts(Style),
      [&](const clang::Token &Tok,const SourceManager &SM) {
    Event.Pos = sourceLocToPosition(SM, Tok.getLocation());
    switch (Tok.getKind()) {
    case tok::raw_identifier:
      // In raw mode, this could be a keyword or a name.
+5 −0
Original line number Diff line number Diff line
@@ -232,6 +232,11 @@ llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style);
llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
                                             const format::FormatStyle &Style);

/// Collects all ranges of the given identifier in the source code.
std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
                                           llvm::StringRef Content,
                                           const LangOptions &LangOpts);

/// Collects words from the source code.
/// Unlike collectIdentifiers:
/// - also finds text in comments:
+21 −0
Original line number Diff line number Diff line
@@ -680,6 +680,27 @@ TEST(SourceCodeTests, GetEligiblePoints) {
    EXPECT_EQ(Res.EnclosingNamespace, Case.EnclosingNamespace) << Test.code();
  }
}

TEST(SourceCodeTests, IdentifierRanges) {
  Annotations Code(R"cpp(
   class [[Foo]] {};
   // Foo
   /* Foo */
   void f([[Foo]]* foo1) {
     [[Foo]] foo2;
     auto S = [[Foo]]();
// cross-line identifier is not supported.
F\
o\
o foo2;
   }
  )cpp");
  LangOptions LangOpts;
  LangOpts.CPlusPlus = true;
  EXPECT_EQ(Code.ranges(),
            collectIdentifierRanges("Foo", Code.code(), LangOpts));
}

} // namespace
} // namespace clangd
} // namespace clang