Loading clang/lib/Format/FormatTokenLexer.cpp +69 −18 Original line number Diff line number Diff line Loading @@ -57,6 +57,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { if (Style.Language == FormatStyle::LK_TextProto) tryParsePythonComment(); tryMergePreviousTokens(); if (Style.isCSharp()) // This needs to come after tokens have been merged so that C# // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); Loading Loading @@ -181,12 +185,12 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() { // Search for verbatim or interpolated string literals @"ABC" or // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to // prevent splitting of @, $ and ". // Merging of multiline verbatim strings with embedded '"' is handled in // handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing. bool FormatTokenLexer::tryMergeCSharpStringLiteral() { if (Tokens.size() < 2) return false; auto &CSharpStringLiteral = *(Tokens.end() - 2); // Interpolated strings could contain { } with " characters inside. // $"{x ?? "null"}" // should not be split into $"{x ?? ", null, "}" but should treated as a Loading Loading @@ -236,27 +240,12 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() { } } // verbatim strings could contain "" which C# sees as an escaped ". // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs // merging into a single string literal. // Look for @"aaaaaa" or $"aaaaaa". auto &String = *(Tokens.end() - 1); if (!String->is(tok::string_literal)) return false; if (CSharpStringLiteral->Type == TT_CSharpStringLiteral && (CSharpStringLiteral->TokenText.startswith(R"(@")") || CSharpStringLiteral->TokenText.startswith(R"($@")"))) { CSharpStringLiteral->TokenText = StringRef( CSharpStringLiteral->TokenText.begin(), String->TokenText.end() - CSharpStringLiteral->TokenText.begin()); CSharpStringLiteral->ColumnWidth += String->ColumnWidth; Tokens.erase(Tokens.end() - 1); return true; } auto &At = *(Tokens.end() - 2); // Look for @"aaaaaa" or $"aaaaaa". if (!(At->is(tok::at) || At->TokenText == "$")) return false; Loading Loading @@ -498,6 +487,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { FormatToken *CSharpStringLiteral = Tokens.back(); if (CSharpStringLiteral->Type != TT_CSharpStringLiteral) return; // Deal with multiline strings. if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") || CSharpStringLiteral->TokenText.startswith(R"($@")"))) return; const char *StrBegin = Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size(); const char *Offset = StrBegin; if (CSharpStringLiteral->TokenText.startswith(R"(@")")) Offset += 2; else // CSharpStringLiteral->TokenText.startswith(R"($@")") Offset += 3; // Look for a terminating '"' in the current file buffer. // Make no effort to format code within an interpolated or verbatim string. for (; Offset != Lex->getBuffer().end(); ++Offset) { if (Offset[0] == '"') { // "" within a verbatim string is an escaped double quote: skip it. if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"') ++Offset; else break; } } // Make no attempt to format code properly if a verbatim string is // unterminated. if (Offset == Lex->getBuffer().end()) return; StringRef LiteralText(StrBegin, Offset - StrBegin + 1); CSharpStringLiteral->TokenText = LiteralText; // Adjust width for potentially multiline string literals. size_t FirstBreak = LiteralText.find('\n'); StringRef FirstLineText = FirstBreak == StringRef::npos ? LiteralText : LiteralText.substr(0, FirstBreak); CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs( FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth, Encoding); size_t LastBreak = LiteralText.rfind('\n'); if (LastBreak != StringRef::npos) { CSharpStringLiteral->IsMultiline = true; unsigned StartColumn = 0; // The template tail spans the entire line. CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs( LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, Style.TabWidth, Encoding); } SourceLocation loc = Offset < Lex->getBuffer().end() ? Lex->getSourceLocation(Offset + 1) : SourceMgr.getLocForEndOfFile(ID); resetLexer(SourceMgr.getFileOffset(loc)); } void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); Loading clang/lib/Format/FormatTokenLexer.h +2 −0 Original line number Diff line number Diff line Loading @@ -79,6 +79,8 @@ private: // nested template parts by balancing curly braces. void handleTemplateStrings(); void handleCSharpVerbatimAndInterpolatedStrings(); void tryParsePythonComment(); bool tryMerge_TMacro(); Loading clang/unittests/Format/FormatTestCSharp.cpp +35 −3 Original line number Diff line number Diff line Loading @@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpSpaceAfterCStyleCast) { TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) { FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp); verifyFormat(R"(string str = @"""")", Style); verifyFormat(R"(string str = @"""Hello world""")", Style); verifyFormat(R"(string str = $@"""Hello {friend}""")", Style); verifyFormat(R"(string str = @"""";)", Style); verifyFormat(R"(string str = @"""Hello world""";)", Style); verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style); } TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) { Loading @@ -425,5 +425,37 @@ TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) { verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style); } TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) { // Use MS style as Google Style inserts a line break before multiline strings. // verifyFormat does not understand multiline C# string-literals // so check the format explicitly. FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp); std::string Code = R"(string s1 = $@"some code: class {className} {{ {className}() {{}} }}";)"; EXPECT_EQ(Code, format(Code, Style)); // Multiline string in the middle of a function call. Code = R"( var x = foo(className, $@"some code: class {className} {{ {className}() {{}} }}", y);)"; // y aligned with `className` arg. EXPECT_EQ(Code, format(Code, Style)); // Interpolated string with embedded multiline string. Code = R"(Console.WriteLine($"{string.Join(@", ", values)}");)"; EXPECT_EQ(Code, format(Code, Style)); } } // namespace format } // end namespace clang clang/lib/Format/ContinuationIndenter.cpp +1 −1 File changed.Contains only whitespace changes. Show changes Loading
clang/lib/Format/FormatTokenLexer.cpp +69 −18 Original line number Diff line number Diff line Loading @@ -57,6 +57,10 @@ ArrayRef<FormatToken *> FormatTokenLexer::lex() { if (Style.Language == FormatStyle::LK_TextProto) tryParsePythonComment(); tryMergePreviousTokens(); if (Style.isCSharp()) // This needs to come after tokens have been merged so that C# // string literals are correctly identified. handleCSharpVerbatimAndInterpolatedStrings(); if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->Tok.isNot(tok::eof)); Loading Loading @@ -181,12 +185,12 @@ bool FormatTokenLexer::tryMergeJSPrivateIdentifier() { // Search for verbatim or interpolated string literals @"ABC" or // $"aaaaa{abc}aaaaa" i and mark the token as TT_CSharpStringLiteral, and to // prevent splitting of @, $ and ". // Merging of multiline verbatim strings with embedded '"' is handled in // handleCSharpVerbatimAndInterpolatedStrings with lower-level lexing. bool FormatTokenLexer::tryMergeCSharpStringLiteral() { if (Tokens.size() < 2) return false; auto &CSharpStringLiteral = *(Tokens.end() - 2); // Interpolated strings could contain { } with " characters inside. // $"{x ?? "null"}" // should not be split into $"{x ?? ", null, "}" but should treated as a Loading Loading @@ -236,27 +240,12 @@ bool FormatTokenLexer::tryMergeCSharpStringLiteral() { } } // verbatim strings could contain "" which C# sees as an escaped ". // @"""Hello""" will have been tokenized as @"" "Hello" "" and needs // merging into a single string literal. // Look for @"aaaaaa" or $"aaaaaa". auto &String = *(Tokens.end() - 1); if (!String->is(tok::string_literal)) return false; if (CSharpStringLiteral->Type == TT_CSharpStringLiteral && (CSharpStringLiteral->TokenText.startswith(R"(@")") || CSharpStringLiteral->TokenText.startswith(R"($@")"))) { CSharpStringLiteral->TokenText = StringRef( CSharpStringLiteral->TokenText.begin(), String->TokenText.end() - CSharpStringLiteral->TokenText.begin()); CSharpStringLiteral->ColumnWidth += String->ColumnWidth; Tokens.erase(Tokens.end() - 1); return true; } auto &At = *(Tokens.end() - 2); // Look for @"aaaaaa" or $"aaaaaa". if (!(At->is(tok::at) || At->TokenText == "$")) return false; Loading Loading @@ -498,6 +487,68 @@ void FormatTokenLexer::tryParseJSRegexLiteral() { resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); } void FormatTokenLexer::handleCSharpVerbatimAndInterpolatedStrings() { FormatToken *CSharpStringLiteral = Tokens.back(); if (CSharpStringLiteral->Type != TT_CSharpStringLiteral) return; // Deal with multiline strings. if (!(CSharpStringLiteral->TokenText.startswith(R"(@")") || CSharpStringLiteral->TokenText.startswith(R"($@")"))) return; const char *StrBegin = Lex->getBufferLocation() - CSharpStringLiteral->TokenText.size(); const char *Offset = StrBegin; if (CSharpStringLiteral->TokenText.startswith(R"(@")")) Offset += 2; else // CSharpStringLiteral->TokenText.startswith(R"($@")") Offset += 3; // Look for a terminating '"' in the current file buffer. // Make no effort to format code within an interpolated or verbatim string. for (; Offset != Lex->getBuffer().end(); ++Offset) { if (Offset[0] == '"') { // "" within a verbatim string is an escaped double quote: skip it. if (Offset + 1 < Lex->getBuffer().end() && Offset[1] == '"') ++Offset; else break; } } // Make no attempt to format code properly if a verbatim string is // unterminated. if (Offset == Lex->getBuffer().end()) return; StringRef LiteralText(StrBegin, Offset - StrBegin + 1); CSharpStringLiteral->TokenText = LiteralText; // Adjust width for potentially multiline string literals. size_t FirstBreak = LiteralText.find('\n'); StringRef FirstLineText = FirstBreak == StringRef::npos ? LiteralText : LiteralText.substr(0, FirstBreak); CSharpStringLiteral->ColumnWidth = encoding::columnWidthWithTabs( FirstLineText, CSharpStringLiteral->OriginalColumn, Style.TabWidth, Encoding); size_t LastBreak = LiteralText.rfind('\n'); if (LastBreak != StringRef::npos) { CSharpStringLiteral->IsMultiline = true; unsigned StartColumn = 0; // The template tail spans the entire line. CSharpStringLiteral->LastLineColumnWidth = encoding::columnWidthWithTabs( LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, Style.TabWidth, Encoding); } SourceLocation loc = Offset < Lex->getBuffer().end() ? Lex->getSourceLocation(Offset + 1) : SourceMgr.getLocForEndOfFile(ID); resetLexer(SourceMgr.getFileOffset(loc)); } void FormatTokenLexer::handleTemplateStrings() { FormatToken *BacktickToken = Tokens.back(); Loading
clang/lib/Format/FormatTokenLexer.h +2 −0 Original line number Diff line number Diff line Loading @@ -79,6 +79,8 @@ private: // nested template parts by balancing curly braces. void handleTemplateStrings(); void handleCSharpVerbatimAndInterpolatedStrings(); void tryParsePythonComment(); bool tryMerge_TMacro(); Loading
clang/unittests/Format/FormatTestCSharp.cpp +35 −3 Original line number Diff line number Diff line Loading @@ -412,9 +412,9 @@ TEST_F(FormatTestCSharp, CSharpSpaceAfterCStyleCast) { TEST_F(FormatTestCSharp, CSharpEscapedQuotesInVerbatimStrings) { FormatStyle Style = getGoogleStyle(FormatStyle::LK_CSharp); verifyFormat(R"(string str = @"""")", Style); verifyFormat(R"(string str = @"""Hello world""")", Style); verifyFormat(R"(string str = $@"""Hello {friend}""")", Style); verifyFormat(R"(string str = @"""";)", Style); verifyFormat(R"(string str = @"""Hello world""";)", Style); verifyFormat(R"(string str = $@"""Hello {friend}""";)", Style); } TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) { Loading @@ -425,5 +425,37 @@ TEST_F(FormatTestCSharp, CSharpQuotesInInterpolatedStrings) { verifyFormat(R"(string str3 = $"{braceCount}}} braces";)", Style); } TEST_F(FormatTestCSharp, CSharpNewlinesInVerbatimStrings) { // Use MS style as Google Style inserts a line break before multiline strings. // verifyFormat does not understand multiline C# string-literals // so check the format explicitly. FormatStyle Style = getMicrosoftStyle(FormatStyle::LK_CSharp); std::string Code = R"(string s1 = $@"some code: class {className} {{ {className}() {{}} }}";)"; EXPECT_EQ(Code, format(Code, Style)); // Multiline string in the middle of a function call. Code = R"( var x = foo(className, $@"some code: class {className} {{ {className}() {{}} }}", y);)"; // y aligned with `className` arg. EXPECT_EQ(Code, format(Code, Style)); // Interpolated string with embedded multiline string. Code = R"(Console.WriteLine($"{string.Join(@", ", values)}");)"; EXPECT_EQ(Code, format(Code, Style)); } } // namespace format } // end namespace clang
clang/lib/Format/ContinuationIndenter.cpp +1 −1 File changed.Contains only whitespace changes. Show changes