Commit cf1c774d authored by Paul Robinson's avatar Paul Robinson
Browse files

[FastISel] Flush local value map on ever instruction

Local values are constants or addresses that can't be folded into
the instruction that uses them. FastISel materializes these in a
"local value" area that always dominates the current insertion
point, to try to avoid materializing these values more than once
(per block).

https://reviews.llvm.org/D43093 added code to sink these local
value instructions to their first use, which has two beneficial
effects. One, it is likely to avoid some unnecessary spills and
reloads; two, it allows us to attach the debug location of the
user to the local value instruction. The latter effect can
improve the debugging experience for debuggers with a "set next
statement" feature, such as the Visual Studio debugger and PS4
debugger, because instructions to set up constants for a given
statement will be associated with the appropriate source line.

There are also some constants (primarily addresses) that could be
produced by no-op casts or GEP instructions; the main difference
from "local value" instructions is that these are values from
separate IR instructions, and therefore could have multiple users
across multiple basic blocks. D43093 avoided sinking these, even
though they were emitted to the same "local value" area as the
other instructions. The patch comment for D43093 states:

  Local values may also be used by no-op casts, which adds the
  register to the RegFixups table. Without reversing the RegFixups
  map direction, we don't have enough information to sink these
  instructions.

This patch undoes most of D43093, and instead flushes the local
value map after(*) every IR instruction, using that instruction's
debug location. This avoids sometimes incorrect locations used
previously, and emits instructions in a more natural order.

This does mean materialized values are not re-used across IR
instruction boundaries; however, only about 5% of those values
were reused in an experimental self-build of clang.

(*) Actually, just prior to the next instruction. It seems like
it would be cleaner the other way, but I was having trouble
getting that to work.

Differential Revision: https://reviews.llvm.org/D91734
parent c26e8697
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3,7 +3,7 @@
; RUN: llvm-dwarfdump -debug-line -debug-ranges %t.wasm | FileCheck %s

; CHECK: Address
; CHECK: 0x0000000000000005
; CHECK: 0x0000000000000003
; CHECK: 0x0000000000000000

; CHECK: .debug_ranges contents:
+2 −2
Original line number Diff line number Diff line
@@ -28,9 +28,9 @@ int main(int argc, char **argv) {
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+17>: mov    dword ptr [rsp + 0x24], ecx
// CHECK:      ** 15     foo();
// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+21>: call   {{.*}}               ; foo at disassembly.cpp:12
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor    eax, eax
// CHECK:      ** 16     return 0;
// CHECK-NEXT:    17   }
// CHECK-NEXT:    18
// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+28>: add    rsp, 0x38
// CHECK:      disassembly.cpp.tmp.exe[{{.*}}] <+26>: xor    eax, eax
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+28>: add    rsp, 0x38
// CHECK-NEXT: disassembly.cpp.tmp.exe[{{.*}}] <+32>: ret
+1 −1
Original line number Diff line number Diff line
@@ -22,7 +22,7 @@ int main(int argc, char** argv) {
// CHECK: (lldb) target symbols add {{.*}}bar.pdb
// CHECK: symbol file '{{.*}}bar.pdb' has been added to '{{.*}}foo.exe'
// CHECK: (lldb) b main
// CHECK: Breakpoint 1: where = foo.exe`main + 23 at load-pdb.cpp:19, address = 0x0000000140001017
// CHECK: Breakpoint 1: where = foo.exe`main + 21 at load-pdb.cpp:19, address = 0x0000000140001015
// CHECK: (lldb) image dump symfile
// CHECK: Types:
// CHECK: {{.*}}: Type{0x00010024} , size = 0, compiler_type = {{.*}} int (int, char **)
+2 −19
Original line number Diff line number Diff line
@@ -246,7 +246,7 @@ public:
  /// be appended.
  void startNewBlock();

  /// Flush the local value map and sink local values if possible.
  /// Flush the local value map.
  void finishBasicBlock();

  /// Return current debug location information.
@@ -313,10 +313,7 @@ public:
  void removeDeadCode(MachineBasicBlock::iterator I,
                      MachineBasicBlock::iterator E);

  struct SavePoint {
    MachineBasicBlock::iterator InsertPt;
    DebugLoc DL;
  };
  using SavePoint = MachineBasicBlock::iterator;

  /// Prepare InsertPt to begin inserting instructions into the local
  /// value area and return the old insert position.
@@ -560,20 +557,6 @@ private:
  /// Removes dead local value instructions after SavedLastLocalvalue.
  void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);

  struct InstOrderMap {
    DenseMap<MachineInstr *, unsigned> Orders;
    MachineInstr *FirstTerminator = nullptr;
    unsigned FirstTerminatorOrder = std::numeric_limits<unsigned>::max();

    void initialize(MachineBasicBlock *MBB,
                    MachineBasicBlock::iterator LastFlushPoint);
  };

  /// Sinks the local value materialization instruction LocalMI to its first use
  /// in the basic block, or deletes it if it is not used.
  void sinkLocalValueMaterialization(MachineInstr &LocalMI, Register DefReg,
                                     InstOrderMap &OrderMap);

  /// Insertion point before trying to select the current instruction.
  MachineBasicBlock::iterator SavedInsertPt;

+34 −136
Original line number Diff line number Diff line
@@ -139,7 +139,6 @@ void FastISel::startNewBlock() {
  LastLocalValue = EmitStartPt;
}

/// Flush the local CSE map and sink anything we can.
void FastISel::finishBasicBlock() { flushLocalValueMap(); }

bool FastISel::lowerArguments() {
@@ -164,48 +163,56 @@ bool FastISel::lowerArguments() {

/// Return the defined register if this instruction defines exactly one
/// virtual register and uses no other virtual registers. Otherwise return 0.
static Register findSinkableLocalRegDef(MachineInstr &MI) {
static Register findLocalRegDef(MachineInstr &MI) {
  Register RegDef;
  for (const MachineOperand &MO : MI.operands()) {
    if (!MO.isReg())
      continue;
    if (MO.isDef()) {
      if (RegDef)
        return 0;
        return Register();
      RegDef = MO.getReg();
    } else if (MO.getReg().isVirtual()) {
      // This is another use of a vreg. Don't try to sink it.
      // This is another use of a vreg. Don't delete it.
      return Register();
    }
  }
  return RegDef;
}

static bool isRegUsedByPhiNodes(Register DefReg,
                                FunctionLoweringInfo &FuncInfo) {
  for (auto &P : FuncInfo.PHINodesToUpdate)
    if (P.second == DefReg)
      return true;
  return false;
}

void FastISel::flushLocalValueMap() {
  // Try to sink local values down to their first use so that we can give them a
  // better debug location. This has the side effect of shrinking local value
  // live ranges, which helps out fast regalloc.
  if (SinkLocalValues && LastLocalValue != EmitStartPt) {
    // Sink local value materialization instructions between EmitStartPt and
    // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
    // avoid inserting into the range that we're iterating over.
  // If FastISel bails out, it could leave local value instructions behind
  // that aren't used for anything.  Detect and erase those.
  if (LastLocalValue != EmitStartPt) {
    MachineBasicBlock::reverse_iterator RE =
        EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
                    : FuncInfo.MBB->rend();
    MachineBasicBlock::reverse_iterator RI(LastLocalValue);

    InstOrderMap OrderMap;
    for (; RI != RE;) {
      MachineInstr &LocalMI = *RI;
      // Increment before erasing what it points to.
      ++RI;
      bool Store = true;
      if (!LocalMI.isSafeToMove(nullptr, Store))
      Register DefReg = findLocalRegDef(LocalMI);
      if (!DefReg)
        continue;
      Register DefReg = findSinkableLocalRegDef(LocalMI);
      if (DefReg == 0)
      if (FuncInfo.RegsWithFixups.count(DefReg))
        continue;

      sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
      bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
      if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
        if (EmitStartPt == &LocalMI)
          EmitStartPt = EmitStartPt->getPrevNode();
        LLVM_DEBUG(dbgs() << "removing dead local value materialization"
                          << LocalMI);
        LocalMI.eraseFromParent();
      }
    }
  }

@@ -216,14 +223,6 @@ void FastISel::flushLocalValueMap() {
  LastFlushPoint = FuncInfo.InsertPt;
}

static bool isRegUsedByPhiNodes(Register DefReg,
                                FunctionLoweringInfo &FuncInfo) {
  for (auto &P : FuncInfo.PHINodesToUpdate)
    if (P.second == DefReg)
      return true;
  return false;
}

static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
  // Ignore non-EH labels.
  if (!MI.isEHLabel())
@@ -239,108 +238,6 @@ static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) {
  return MI.getIterator() != MBB->getFirstNonPHI();
}

/// Build a map of instruction orders. Return the first terminator and its
/// order. Consider EH_LABEL instructions to be terminators as well, since local
/// values for phis after invokes must be materialized before the call.
void FastISel::InstOrderMap::initialize(
    MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) {
  unsigned Order = 0;
  for (MachineInstr &I : *MBB) {
    if (!FirstTerminator &&
        (I.isTerminator() || isTerminatingEHLabel(MBB, I))) {
      FirstTerminator = &I;
      FirstTerminatorOrder = Order;
    }
    Orders[&I] = Order++;

    // We don't need to order instructions past the last flush point.
    if (I.getIterator() == LastFlushPoint)
      break;
  }
}

void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
                                             Register DefReg,
                                             InstOrderMap &OrderMap) {
  // If this register is used by a register fixup, MRI will not contain all
  // the uses until after register fixups, so don't attempt to sink or DCE
  // this instruction. Register fixups typically come from no-op cast
  // instructions, which replace the cast instruction vreg with the local
  // value vreg.
  if (FuncInfo.RegsWithFixups.count(DefReg))
    return;

  // We can DCE this instruction if there are no uses and it wasn't a
  // materialized for a successor PHI node.
  bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
  if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
    if (EmitStartPt == &LocalMI)
      EmitStartPt = EmitStartPt->getPrevNode();
    LLVM_DEBUG(dbgs() << "removing dead local value materialization "
                      << LocalMI);
    OrderMap.Orders.erase(&LocalMI);
    LocalMI.eraseFromParent();
    return;
  }

  // Number the instructions if we haven't yet so we can efficiently find the
  // earliest use.
  if (OrderMap.Orders.empty())
    OrderMap.initialize(FuncInfo.MBB, LastFlushPoint);

  // Find the first user in the BB.
  MachineInstr *FirstUser = nullptr;
  unsigned FirstOrder = std::numeric_limits<unsigned>::max();
  for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
    auto I = OrderMap.Orders.find(&UseInst);
    assert(I != OrderMap.Orders.end() &&
           "local value used by instruction outside local region");
    unsigned UseOrder = I->second;
    if (UseOrder < FirstOrder) {
      FirstOrder = UseOrder;
      FirstUser = &UseInst;
    }
  }

  // The insertion point will be the first terminator or the first user,
  // whichever came first. If there was no terminator, this must be a
  // fallthrough block and the insertion point is the end of the block.
  MachineBasicBlock::instr_iterator SinkPos;
  if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
    FirstOrder = OrderMap.FirstTerminatorOrder;
    SinkPos = OrderMap.FirstTerminator->getIterator();
  } else if (FirstUser) {
    SinkPos = FirstUser->getIterator();
  } else {
    assert(UsedByPHI && "must be users if not used by a phi");
    SinkPos = FuncInfo.MBB->instr_end();
  }

  // Collect all DBG_VALUEs before the new insertion position so that we can
  // sink them.
  SmallVector<MachineInstr *, 1> DbgValues;
  for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
    if (!DbgVal.isDebugValue())
      continue;
    unsigned UseOrder = OrderMap.Orders[&DbgVal];
    if (UseOrder < FirstOrder)
      DbgValues.push_back(&DbgVal);
  }

  // Sink LocalMI before SinkPos and assign it the same DebugLoc.
  LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
  FuncInfo.MBB->remove(&LocalMI);
  FuncInfo.MBB->insert(SinkPos, &LocalMI);
  if (SinkPos != FuncInfo.MBB->end())
    LocalMI.setDebugLoc(SinkPos->getDebugLoc());

  // Sink any debug values that we've collected.
  for (MachineInstr *DI : DbgValues) {
    FuncInfo.MBB->remove(DI);
    FuncInfo.MBB->insert(SinkPos, DI);
  }
}

bool FastISel::hasTrivialKill(const Value *V) {
  // Don't consider constants or arguments to have trivial kills.
  const Instruction *I = dyn_cast<Instruction>(V);
@@ -578,12 +475,9 @@ void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
}

FastISel::SavePoint FastISel::enterLocalValueArea() {
  MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
  DebugLoc OldDL = DbgLoc;
  SavePoint OldInsertPt = FuncInfo.InsertPt;
  recomputeInsertPt();
  DbgLoc = DebugLoc();
  SavePoint SP = {OldInsertPt, OldDL};
  return SP;
  return OldInsertPt;
}

void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
@@ -591,8 +485,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
    LastLocalValue = &*std::prev(FuncInfo.InsertPt);

  // Restore the previous insert position.
  FuncInfo.InsertPt = OldInsertPt.InsertPt;
  DbgLoc = OldInsertPt.DL;
  FuncInfo.InsertPt = OldInsertPt;
}

bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
@@ -1657,6 +1550,11 @@ void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
}

bool FastISel::selectInstruction(const Instruction *I) {
  // Flush the local value map before starting each instruction.
  // This improves locality and debugging, and can reduce spills.
  // Reuse of values across IR instructions is relatively uncommon.
  flushLocalValueMap();

  MachineInstr *SavedLastLocalValue = getLastLocalValue();
  // Just before the terminator instruction, insert instructions to
  // feed PHI nodes in successor blocks.
Loading