Unverified Commit 0156b6ed authored by Greg Clayton's avatar Greg Clayton Committed by GitHub
Browse files

Modify llvm-gsymutil to ignore invalid file indexes (#71431)

DWARF produced by LTO and BOLT can sometimes be broken where file
indexes are beyond the end of the line table's file list in the
prologue. This patch allows llvm-gsymutil to convert this DWARF without
crashing, and emits errors when:

line table contains entries with an invalid file index (line entry will
be removed) inline functions that have invalid DW_AT_call_file file
indexes when there are no line table entries for a function and we fall
back to making a single line table entry from the functions
DW_AT_decl_file/DW_AT_decl_line attributes, we make sure the
DW_AT_decl_file attribute is valid before emitting it.
parent e15fcd7b
Loading
Loading
Loading
Loading
+49 −14
Original line number Diff line number Diff line
@@ -65,10 +65,10 @@ struct llvm::gsym::CUInfo {
  /// the first client that asks for a compile unit file index will end up
  /// doing the conversion, and subsequent clients will get the cached GSYM
  /// index.
  uint32_t DWARFToGSYMFileIndex(GsymCreator &Gsym, uint32_t DwarfFileIdx) {
    if (!LineTable)
      return 0;
    assert(DwarfFileIdx < FileCache.size());
  std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
                                               uint32_t DwarfFileIdx) {
    if (!LineTable || DwarfFileIdx >= FileCache.size())
      return std::nullopt;
    uint32_t &GsymFileIdx = FileCache[DwarfFileIdx];
    if (GsymFileIdx != UINT32_MAX)
      return GsymFileIdx;
@@ -272,14 +272,24 @@ static void parseInlineInfo(GsymCreator &Gsym, raw_ostream *Log, CUInfo &CUI,

    if (auto NameIndex = getQualifiedNameIndex(Die, CUI.Language, Gsym))
      II.Name = *NameIndex;
    II.CallFile = CUI.DWARFToGSYMFileIndex(
        Gsym, dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_file), 0));
    const uint64_t DwarfFileIdx = dwarf::toUnsigned(
        Die.findRecursively(dwarf::DW_AT_call_file), UINT32_MAX);
    std::optional<uint32_t> OptGSymFileIdx =
        CUI.DWARFToGSYMFileIndex(Gsym, DwarfFileIdx);
    if (OptGSymFileIdx) {
      II.CallFile = OptGSymFileIdx.value();
      II.CallLine = dwarf::toUnsigned(Die.find(dwarf::DW_AT_call_line), 0);
      // parse all children and append to parent
      for (DWARFDie ChildDie : Die.children())
        parseInlineInfo(Gsym, Log, CUI, ChildDie, Depth + 1, FI, II,
                        AllInlineRanges, WarnIfEmpty);
      Parent.Children.emplace_back(std::move(II));
    } else if (Log) {
      *Log << "error: inlined function DIE at " << HEX32(Die.getOffset())
           << " has an invalid file index " << DwarfFileIdx
           << " in its DW_AT_call_file attribute, this inline entry and all "
           << "children will be removed.\n";
    }
    return;
  }
  if (Tag == dwarf::DW_TAG_subprogram || Tag == dwarf::DW_TAG_lexical_block) {
@@ -306,8 +316,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
    // the DW_AT_decl_file an d DW_AT_decl_line if we have both attributes.
    std::string FilePath = Die.getDeclFile(
        DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath);
    if (FilePath.empty())
    if (FilePath.empty()) {
      // If we had a DW_AT_decl_file, but got no file then we need to emit a
      // warning.
      if (Log) {
        const uint64_t DwarfFileIdx = dwarf::toUnsigned(
            Die.findRecursively(dwarf::DW_AT_decl_file), UINT32_MAX);
        *Log << "error: function DIE at " << HEX32(Die.getOffset())
             << " has an invalid file index " << DwarfFileIdx
             << " in its DW_AT_decl_file attribute, unable to create a single "
             << "line entry from the DW_AT_decl_file/DW_AT_decl_line "
             << "attributes.\n";
      }
      return;
    }
    if (auto Line =
            dwarf::toUnsigned(Die.findRecursively({dwarf::DW_AT_decl_line}))) {
      LineEntry LE(StartAddress, Gsym.insertFile(FilePath), *Line);
@@ -322,7 +344,20 @@ static void convertFunctionLineTable(raw_ostream *Log, CUInfo &CUI,
  for (uint32_t RowIndex : RowVector) {
    // Take file number and line/column from the row.
    const DWARFDebugLine::Row &Row = CUI.LineTable->Rows[RowIndex];
    const uint32_t FileIdx = CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
    std::optional<uint32_t> OptFileIdx =
        CUI.DWARFToGSYMFileIndex(Gsym, Row.File);
    if (!OptFileIdx) {
      if (Log) {
        *Log << "error: function DIE at " << HEX32(Die.getOffset()) << " has "
             << "a line entry with invalid DWARF file index, this entry will "
             << "be removed:\n";
        Row.dumpTableHeader(*Log, /*Indent=*/0);
        Row.dump(*Log);
        *Log << "\n";
      }
      continue;
    }
    const uint32_t FileIdx = OptFileIdx.value();
    uint64_t RowAddress = Row.Address.Address;
    // Watch out for a RowAddress that is in the middle of a line table entry
    // in the DWARF. If we pass an address in between two line table entries
+356 −0
Original line number Diff line number Diff line
@@ -4325,3 +4325,359 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
  SourceLocation src_loc = {"foo", "/tmp", "main.cpp", 12, 16};
  EXPECT_THAT(LR->Locations, testing::ElementsAre(src_loc));
}

TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
  // Test that llvm-gsymutil can handle invalid file indexes in the following
  // cases:
  //  - In line entries in the line table
  //  - When parsing inline entries that have a DW_AT_call_file
  //  - When parsing function dies with no line table entries and it tries to
  //    use the DW_AT_decl_file
  //
  //
  // 0x0000000b: DW_TAG_compile_unit
  //               DW_AT_name        ("/tmp/main.cpp")
  //               DW_AT_language    (DW_LANG_C)
  //               DW_AT_stmt_list   (0x00000000)
  //
  // 0x00000015:   DW_TAG_subprogram
  //                 DW_AT_name      ("foo")
  //                 DW_AT_low_pc    (0x0000000000001000)
  //                 DW_AT_high_pc   (0x0000000000001050)
  //
  // 0x0000002a:     DW_TAG_inlined_subroutine
  //                   DW_AT_name    ("inline_with_invalid_call_file")
  //                   DW_AT_low_pc  (0x0000000000001010)
  //                   DW_AT_high_pc (0x0000000000001020)
  //                   DW_AT_call_file       (0x0000000a)
  //                   DW_AT_call_line       (11)
  //
  // 0x00000047:       DW_TAG_inlined_subroutine
  //                     DW_AT_name
  //                     ("inline_inside_parent_with_invalid_call_file")
  //                     DW_AT_low_pc        (0x0000000000001010)
  //                     DW_AT_high_pc       (0x0000000000001015)
  //                     DW_AT_call_file     ("/tmp/main.cpp")
  //                     DW_AT_call_line     (12)
  //
  // 0x00000064:       NULL
  //
  // 0x00000065:     DW_TAG_inlined_subroutine
  //                   DW_AT_name    ("inline_with_valid_call_file")
  //                   DW_AT_low_pc  (0x0000000000001020)
  //                   DW_AT_high_pc (0x0000000000001030)
  //                   DW_AT_call_file       ("/tmp/main.cpp")
  //                   DW_AT_call_line       (13)
  //
  // 0x00000082:       DW_TAG_inlined_subroutine
  //                     DW_AT_name
  //                     ("inline_inside_parent_with_valid_call_file")
  //                     DW_AT_low_pc        (0x0000000000001020)
  //                     DW_AT_high_pc       (0x0000000000001025)
  //                     DW_AT_call_file     ("/tmp/main.cpp")
  //                     DW_AT_call_line     (14)
  //
  // 0x0000009f:       NULL
  //
  // 0x000000a0:     NULL
  //
  // 0x000000a1:   DW_TAG_subprogram
  //                 DW_AT_name      ("func_with_valid_decl_file")
  //                 DW_AT_decl_file ("/tmp/main.cpp")
  //                 DW_AT_decl_line (20)
  //                 DW_AT_low_pc    (0x0000000000002000)
  //                 DW_AT_high_pc   (0x0000000000002050)
  //
  // 0x000000b8:   DW_TAG_subprogram
  //                 DW_AT_name      ("func_with_invalid_decl_file")
  //                 DW_AT_decl_file (0x0a)
  //                 DW_AT_decl_line (20)
  //                 DW_AT_low_pc    (0x0000000000003000)
  //                 DW_AT_high_pc   (0x0000000000003050)
  //
  // 0x000000cf:   NULL
  //
  // The table looks has an entry at address 0x0000000000001010 that has an
  // invalid file index that needs to be removed.
  //
  // Address            Line   Column File   ISA Discriminator Flags
  // ---------- ------ ------ ------ --- ------------- -------------
  // 0x00001000     10      0      1   0             0  is_stmt
  // 0x00001010     11      0     10   0             0  is_stmt
  // 0x00001020     11      0      1   0             0  is_stmt
  // 0x00001030     12      0      1   0             0  is_stmt
  // 0x00001050     12      0      1   0             0  is_stmt end_sequence

  StringRef yamldata = R"(
  debug_str:
    - ''
    - '/tmp/main.cpp'
    - foo
    - inline_with_invalid_call_file
    - inline_inside_parent_with_invalid_call_file
    - inline_with_valid_call_file
    - inline_inside_parent_with_valid_call_file
    - func_with_valid_decl_file
    - func_with_invalid_decl_file
  debug_abbrev:
    - ID:              0
      Table:
        - Code:            0x1
          Tag:             DW_TAG_compile_unit
          Children:        DW_CHILDREN_yes
          Attributes:
            - Attribute:       DW_AT_name
              Form:            DW_FORM_strp
            - Attribute:       DW_AT_language
              Form:            DW_FORM_udata
            - Attribute:       DW_AT_stmt_list
              Form:            DW_FORM_sec_offset
        - Code:            0x2
          Tag:             DW_TAG_subprogram
          Children:        DW_CHILDREN_yes
          Attributes:
            - Attribute:       DW_AT_name
              Form:            DW_FORM_strp
            - Attribute:       DW_AT_low_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_high_pc
              Form:            DW_FORM_addr
        - Code:            0x3
          Tag:             DW_TAG_inlined_subroutine
          Children:        DW_CHILDREN_yes
          Attributes:
            - Attribute:       DW_AT_name
              Form:            DW_FORM_strp
            - Attribute:       DW_AT_low_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_high_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_call_file
              Form:            DW_FORM_data4
            - Attribute:       DW_AT_call_line
              Form:            DW_FORM_data4
        - Code:            0x4
          Tag:             DW_TAG_inlined_subroutine
          Children:        DW_CHILDREN_no
          Attributes:
            - Attribute:       DW_AT_name
              Form:            DW_FORM_strp
            - Attribute:       DW_AT_low_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_high_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_call_file
              Form:            DW_FORM_data4
            - Attribute:       DW_AT_call_line
              Form:            DW_FORM_data4
        - Code:            0x5
          Tag:             DW_TAG_subprogram
          Children:        DW_CHILDREN_no
          Attributes:
            - Attribute:       DW_AT_name
              Form:            DW_FORM_strp
            - Attribute:       DW_AT_decl_file
              Form:            DW_FORM_data1
            - Attribute:       DW_AT_decl_line
              Form:            DW_FORM_data1
            - Attribute:       DW_AT_low_pc
              Form:            DW_FORM_addr
            - Attribute:       DW_AT_high_pc
              Form:            DW_FORM_addr
  debug_info:
    - Length:          0xCC
      Version:         4
      AbbrevTableID:   0
      AbbrOffset:      0x0
      AddrSize:        8
      Entries:
        - AbbrCode:        0x1
          Values:
            - Value:           0x1
            - Value:           0x2
            - Value:           0x0
        - AbbrCode:        0x2
          Values:
            - Value:           0xF
            - Value:           0x1000
            - Value:           0x1050
        - AbbrCode:        0x3
          Values:
            - Value:           0x13
            - Value:           0x1010
            - Value:           0x1020
            - Value:           0xA
            - Value:           0xB
        - AbbrCode:        0x4
          Values:
            - Value:           0x31
            - Value:           0x1010
            - Value:           0x1015
            - Value:           0x1
            - Value:           0xC
        - AbbrCode:        0x0
        - AbbrCode:        0x3
          Values:
            - Value:           0x5D
            - Value:           0x1020
            - Value:           0x1030
            - Value:           0x1
            - Value:           0xD
        - AbbrCode:        0x4
          Values:
            - Value:           0x79
            - Value:           0x1020
            - Value:           0x1025
            - Value:           0x1
            - Value:           0xE
        - AbbrCode:        0x0
        - AbbrCode:        0x0
        - AbbrCode:        0x5
          Values:
            - Value:           0xA3
            - Value:           0x1
            - Value:           0x14
            - Value:           0x2000
            - Value:           0x2050
        - AbbrCode:        0x5
          Values:
            - Value:           0xBD
            - Value:           0xA
            - Value:           0x14
            - Value:           0x3000
            - Value:           0x3050
        - AbbrCode:        0x0
  debug_line:
    - Length:          78
      Version:         2
      PrologueLength:  36
      MinInstLength:   1
      DefaultIsStmt:   1
      LineBase:        251
      LineRange:       14
      OpcodeBase:      13
      StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
      IncludeDirs:
        - '/tmp'
      Files:
        - Name:            main.cpp
          DirIdx:          1
          ModTime:         0
          Length:          0
      Opcodes:
        - Opcode:          DW_LNS_extended_op
          ExtLen:          9
          SubOpcode:       DW_LNE_set_address
          Data:            4096
        - Opcode:          DW_LNS_advance_line
          SData:           9
          Data:            0
        - Opcode:          DW_LNS_copy
          Data:            0
        - Opcode:          DW_LNS_advance_pc
          Data:            16
        - Opcode:          DW_LNS_set_file
          Data:            10
        - Opcode:          DW_LNS_advance_line
          SData:           1
          Data:            0
        - Opcode:          DW_LNS_copy
          Data:            0
        - Opcode:          DW_LNS_advance_pc
          Data:            16
        - Opcode:          DW_LNS_set_file
          Data:            1
        - Opcode:          DW_LNS_copy
          Data:            0
        - Opcode:          DW_LNS_advance_pc
          Data:            16
        - Opcode:          DW_LNS_advance_line
          SData:           1
          Data:            0
        - Opcode:          DW_LNS_copy
          Data:            0
        - Opcode:          DW_LNS_advance_pc
          Data:            32
        - Opcode:          DW_LNS_extended_op
          ExtLen:          1
          SubOpcode:       DW_LNE_end_sequence
          Data:            0
  )";
  auto ErrOrSections = DWARFYAML::emitDebugSections(yamldata);
  ASSERT_THAT_EXPECTED(ErrOrSections, Succeeded());
  std::unique_ptr<DWARFContext> DwarfContext =
      DWARFContext::create(*ErrOrSections, 8);
  ASSERT_TRUE(DwarfContext.get() != nullptr);
  std::string errors;
  raw_string_ostream OS(errors);
  GsymCreator GC;
  DwarfTransformer DT(*DwarfContext, GC);
  const uint32_t ThreadCount = 1;
  ASSERT_THAT_ERROR(DT.convert(ThreadCount, &OS), Succeeded());
  ASSERT_THAT_ERROR(GC.finalize(OS), Succeeded());
  OS.flush();
  SmallString<512> Str;
  raw_svector_ostream OutStrm(Str);
  const auto ByteOrder = llvm::endianness::native;
  FileWriter FW(OutStrm, ByteOrder);
  ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
  Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
  ASSERT_THAT_EXPECTED(GR, Succeeded());
  // There should be one function in our GSYM.
  EXPECT_EQ(GR->getNumAddresses(), 3u);
  // Verify "foo" is present and has a line table and no inline info.
  auto ExpFI = GR->getFunctionInfo(0x1000);
  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
  ASSERT_EQ(ExpFI->Range, AddressRange(0x1000, 0x1050));
  StringRef FuncName = GR->getString(ExpFI->Name);
  EXPECT_EQ(FuncName, "foo");

  EXPECT_TRUE(ExpFI->OptLineTable.has_value());
  // Make sure we only have 3 entries to show we removed the line entry with
  // the invalid file index whose address is 0x0000000000001010.
  ASSERT_EQ(ExpFI->OptLineTable->size(), 3u);
  EXPECT_TRUE(ExpFI->Inline.has_value());

  // Make sure that we only have one inline function, not two. We remove one of
  // the inline functions because it has an invalid DW_AT_call_file attribute.
  ASSERT_EQ(ExpFI->Inline->Children.size(), 1u);
  StringRef InlineName = GR->getString(ExpFI->Inline->Children[0].Name);
  EXPECT_EQ(InlineName, "inline_with_valid_call_file");

  ExpFI = GR->getFunctionInfo(0x0000000000002000);
  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
  ASSERT_EQ(ExpFI->Range, AddressRange(0x2000, 0x2050));
  FuncName = GR->getString(ExpFI->Name);
  EXPECT_EQ(FuncName, "func_with_valid_decl_file");
  EXPECT_FALSE(ExpFI->Inline.has_value());
  // Make sure we only have 1 entry in the line table which indicates we were
  // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
  EXPECT_TRUE(ExpFI->OptLineTable.has_value());
  ASSERT_EQ(ExpFI->OptLineTable->size(), 1u);

  ExpFI = GR->getFunctionInfo(0x0000000000003000);
  ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
  ASSERT_EQ(ExpFI->Range, AddressRange(0x3000, 0x3050));
  FuncName = GR->getString(ExpFI->Name);
  EXPECT_EQ(FuncName, "func_with_invalid_decl_file");
  EXPECT_FALSE(ExpFI->Inline.has_value());
  // Make sure we only no line table because there are no line entries in the
  // line table and the DW_AT_decl_file attribute was invalid so we were not
  // able to parse the DW_AT_decl_file/DW_AT_decl_line correctly.
  EXPECT_FALSE(ExpFI->OptLineTable.has_value());

  // Make sure we don't see spurious errors in the output:
  std::vector<std::string> ExpectedLogErrors = {
      "error: function DIE at 0x00000015 has a line entry with invalid DWARF "
      "file index, this entry will be removed:",
      "error: inlined function DIE at 0x0000002a has an invalid file index 10 "
      "in its DW_AT_call_file attribute, this inline entry and all children "
      "will be removed.",
      "error: function DIE at 0x000000b8 has an invalid file index 10 in its "
      "DW_AT_decl_file attribute, unable to create a single line entry from "
      "the DW_AT_decl_file/DW_AT_decl_line attributes."};
  // Make sure all expected errors are in the error stream for the two invalid
  // inlined functions that we removed due to invalid range scoping.
  for (const auto &Error : ExpectedLogErrors)
    EXPECT_TRUE(errors.find(Error) != std::string::npos);
}