Commit b0af7ced authored by Chandler Carruth's avatar Chandler Carruth
Browse files

Merge r330264 for the fix to PR37100, a regression introduced with the new

EFLAGS lowering.

llvm-svn: 332938
parent 62761191
Loading
Loading
Loading
Loading
+125 −82
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -98,6 +99,7 @@ private:
  const X86InstrInfo *TII;
  const TargetRegisterInfo *TRI;
  const TargetRegisterClass *PromoteRC;
  MachineDominatorTree *MDT;

  CondRegArray collectCondsInRegs(MachineBasicBlock &MBB,
                                  MachineInstr &CopyDefI);
@@ -145,6 +147,7 @@ FunctionPass *llvm::createX86FlagsCopyLoweringPass() {
char X86FlagsCopyLoweringPass::ID = 0;

void X86FlagsCopyLoweringPass::getAnalysisUsage(AnalysisUsage &AU) const {
  AU.addRequired<MachineDominatorTree>();
  MachineFunctionPass::getAnalysisUsage(AU);
}

@@ -342,6 +345,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
  MRI = &MF.getRegInfo();
  TII = Subtarget.getInstrInfo();
  TRI = Subtarget.getRegisterInfo();
  MDT = &getAnalysis<MachineDominatorTree>();
  PromoteRC = &X86::GR8RegClass;

  if (MF.begin() == MF.end())
@@ -416,7 +420,45 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
    // of these up front instead.
    CondRegArray CondRegs = collectCondsInRegs(TestMBB, CopyDefI);

    for (auto MII = std::next(CopyI->getIterator()), MIE = MBB.instr_end();
    // Collect the basic blocks we need to scan. Typically this will just be
    // a single basic block but we may have to scan multiple blocks if the
    // EFLAGS copy lives into successors.
    SmallVector<MachineBasicBlock *, 2> Blocks;
    SmallPtrSet<MachineBasicBlock *, 2> VisitedBlocks;
    Blocks.push_back(&MBB);
    VisitedBlocks.insert(&MBB);

    do {
      MachineBasicBlock &UseMBB = *Blocks.pop_back_val();

      // We currently don't do any PHI insertion and so we require that the
      // test basic block dominates all of the use basic blocks.
      //
      // We could in theory do PHI insertion here if it becomes useful by just
      // taking undef values in along every edge that we don't trace this
      // EFLAGS copy along. This isn't as bad as fully general PHI insertion,
      // but still seems like a great deal of complexity.
      //
      // Because it is theoretically possible that some earlier MI pass or
      // other lowering transformation could induce this to happen, we do
      // a hard check even in non-debug builds here.
      if (&TestMBB != &UseMBB && !MDT->dominates(&TestMBB, &UseMBB)) {
        DEBUG({
          dbgs() << "ERROR: Encountered use that is not dominated by our test "
                    "basic block! Rewriting this would require inserting PHI "
                    "nodes to track the flag state across the CFG.\n\nTest "
                    "block:\n";
          TestMBB.dump();
          dbgs() << "Use block:\n";
          UseMBB.dump();
        });
        report_fatal_error("Cannot lower EFLAGS copy when original copy def "
                           "does not dominate all uses.");
      }

      for (auto MII = &UseMBB == &MBB ? std::next(CopyI->getIterator())
                                      : UseMBB.instr_begin(),
                MIE = UseMBB.instr_end();
           MII != MIE;) {
        MachineInstr &MI = *MII++;
        MachineOperand *FlagUse = MI.findRegisterUseOperand(X86::EFLAGS);
@@ -426,9 +468,9 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
            // scanning here.
            //
            // NB!!! Many instructions only modify some flags. LLVM currently
          // models this as clobbering all flags, but if that ever changes this
          // will need to be carefully updated to handle that more complex
          // logic.
            // models this as clobbering all flags, but if that ever changes
            // this will need to be carefully updated to handle that more
            // complex logic.
            FlagsKilled = true;
            break;
          }
@@ -454,7 +496,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
          do {
            JmpIs.push_back(&*JmpIt);
            ++JmpIt;
        } while (JmpIt != MBB.instr_end() &&
          } while (JmpIt != UseMBB.instr_end() &&
                   X86::getCondFromBranchOpc(JmpIt->getOpcode()) !=
                       X86::COND_INVALID);
          break;
@@ -463,12 +505,14 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
        // Otherwise we can just rewrite in-place.
        if (X86::getCondFromCMovOpc(MI.getOpcode()) != X86::COND_INVALID) {
          rewriteCMov(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
      } else if (X86::getCondFromSETOpc(MI.getOpcode()) != X86::COND_INVALID) {
        } else if (X86::getCondFromSETOpc(MI.getOpcode()) !=
                   X86::COND_INVALID) {
          rewriteSetCC(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
        } else if (MI.getOpcode() == TargetOpcode::COPY) {
          rewriteCopy(MI, *FlagUse, CopyDefI);
        } else {
        // We assume that arithmetic instructions that use flags also def them.
          // We assume that arithmetic instructions that use flags also def
          // them.
          assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
                 "Expected a def of EFLAGS for this instruction!");

@@ -489,30 +533,29 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
          break;
      }

    // If we didn't find a kill (or equivalent) check that the flags don't
    // live-out of the basic block. Currently we don't support lowering copies
    // of flags that live out in this fashion.
    if (!FlagsKilled &&
        llvm::any_of(MBB.successors(), [](MachineBasicBlock *SuccMBB) {
          return SuccMBB->isLiveIn(X86::EFLAGS);
        })) {
      DEBUG({
        dbgs() << "ERROR: Found a copied EFLAGS live-out from basic block:\n"
               << "----\n";
        MBB.dump();
        dbgs() << "----\n"
               << "ERROR: Cannot lower this EFLAGS copy!\n";
      });
      report_fatal_error(
          "Cannot lower EFLAGS copy that lives out of a basic block!");
    }
      // If the flags were killed, we're done with this block.
      if (FlagsKilled)
        break;

      // Otherwise we need to scan successors for ones where the flags live-in
      // and queue those up for processing.
      for (MachineBasicBlock *SuccMBB : UseMBB.successors())
        if (SuccMBB->isLiveIn(X86::EFLAGS) &&
            VisitedBlocks.insert(SuccMBB).second)
          Blocks.push_back(SuccMBB);
    } while (!Blocks.empty());

    // Now rewrite the jumps that use the flags. These we handle specially
    // because if there are multiple jumps we'll have to do surgery on the CFG.
    // because if there are multiple jumps in a single basic block we'll have
    // to do surgery on the CFG.
    MachineBasicBlock *LastJmpMBB = nullptr;
    for (MachineInstr *JmpI : JmpIs) {
      // Past the first jump we need to split the blocks apart.
      if (JmpI != JmpIs.front())
      // Past the first jump within a basic block we need to split the blocks
      // apart.
      if (JmpI->getParent() == LastJmpMBB)
        splitBlock(*JmpI->getParent(), *JmpI, *TII);
      else
        LastJmpMBB = JmpI->getParent();

      rewriteCondJmp(TestMBB, TestPos, TestLoc, *JmpI, CondRegs);
    }
+1 −0
Original line number Diff line number Diff line
@@ -37,6 +37,7 @@
; CHECK-NEXT:       X86 PIC Global Base Reg Initialization
; CHECK-NEXT:       Expand ISel Pseudo-instructions
; CHECK-NEXT:       Local Stack Slot Allocation
; CHECK-NEXT:       MachineDominator Tree Construction
; CHECK-NEXT:       X86 EFLAGS copy lowering
; CHECK-NEXT:       X86 WinAlloca Expander
; CHECK-NEXT:       Eliminate PHI nodes for register allocation
+108 −0
Original line number Diff line number Diff line
@@ -196,3 +196,111 @@ else:
  tail call void @external_b()
  ret void
}

; Test a function that gets special select lowering into CFG with copied EFLAGS
; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
; cross-block rewrites in at least some narrow cases.
define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2) {
; X32-LABEL: PR37100:
; X32:       # %bb.0: # %bb
; X32-NEXT:    pushl %ebp
; X32-NEXT:    .cfi_def_cfa_offset 8
; X32-NEXT:    pushl %ebx
; X32-NEXT:    .cfi_def_cfa_offset 12
; X32-NEXT:    pushl %edi
; X32-NEXT:    .cfi_def_cfa_offset 16
; X32-NEXT:    pushl %esi
; X32-NEXT:    .cfi_def_cfa_offset 20
; X32-NEXT:    .cfi_offset %esi, -20
; X32-NEXT:    .cfi_offset %edi, -16
; X32-NEXT:    .cfi_offset %ebx, -12
; X32-NEXT:    .cfi_offset %ebp, -8
; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT:    movb {{[0-9]+}}(%esp), %ch
; X32-NEXT:    movb {{[0-9]+}}(%esp), %cl
; X32-NEXT:    jmp .LBB3_1
; X32-NEXT:    .p2align 4, 0x90
; X32-NEXT:  .LBB3_5: # %bb1
; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X32-NEXT:    xorl %eax, %eax
; X32-NEXT:    xorl %edx, %edx
; X32-NEXT:    idivl %ebp
; X32-NEXT:  .LBB3_1: # %bb1
; X32-NEXT:    # =>This Inner Loop Header: Depth=1
; X32-NEXT:    movsbl %cl, %eax
; X32-NEXT:    movl %eax, %edx
; X32-NEXT:    sarl $31, %edx
; X32-NEXT:    cmpl %eax, %esi
; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X32-NEXT:    sbbl %edx, %eax
; X32-NEXT:    setl %al
; X32-NEXT:    setl %dl
; X32-NEXT:    movzbl %dl, %ebp
; X32-NEXT:    negl %ebp
; X32-NEXT:    testb $-1, %al
; X32-NEXT:    jne .LBB3_3
; X32-NEXT:  # %bb.2: # %bb1
; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X32-NEXT:    movb %ch, %cl
; X32-NEXT:  .LBB3_3: # %bb1
; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X32-NEXT:    movb %cl, (%ebx)
; X32-NEXT:    movl (%edi), %edx
; X32-NEXT:    testb $-1, %al
; X32-NEXT:    jne .LBB3_5
; X32-NEXT:  # %bb.4: # %bb1
; X32-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X32-NEXT:    movl %edx, %ebp
; X32-NEXT:    jmp .LBB3_5
;
; X64-LABEL: PR37100:
; X64:       # %bb.0: # %bb
; X64-NEXT:    movq %rdx, %r10
; X64-NEXT:    jmp .LBB3_1
; X64-NEXT:    .p2align 4, 0x90
; X64-NEXT:  .LBB3_5: # %bb1
; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    xorl %edx, %edx
; X64-NEXT:    idivl %esi
; X64-NEXT:  .LBB3_1: # %bb1
; X64-NEXT:    # =>This Inner Loop Header: Depth=1
; X64-NEXT:    movsbq %dil, %rax
; X64-NEXT:    xorl %esi, %esi
; X64-NEXT:    cmpq %rax, %r10
; X64-NEXT:    setl %sil
; X64-NEXT:    negl %esi
; X64-NEXT:    cmpq %rax, %r10
; X64-NEXT:    jl .LBB3_3
; X64-NEXT:  # %bb.2: # %bb1
; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X64-NEXT:    movl %ecx, %edi
; X64-NEXT:  .LBB3_3: # %bb1
; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X64-NEXT:    movb %dil, (%r8)
; X64-NEXT:    jl .LBB3_5
; X64-NEXT:  # %bb.4: # %bb1
; X64-NEXT:    # in Loop: Header=BB3_1 Depth=1
; X64-NEXT:    movl (%r9), %esi
; X64-NEXT:    jmp .LBB3_5
bb:
  br label %bb1

bb1:
  %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
  %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
  %tmp3 = icmp sgt i16 %tmp2, 7
  %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
  %tmp5 = sext i8 %tmp to i64
  %tmp6 = icmp slt i64 %arg3, %tmp5
  %tmp7 = sext i1 %tmp6 to i32
  %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
  store volatile i8 %tmp8, i8* %ptr1
  %tmp9 = load volatile i32, i32* %ptr2
  %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
  %tmp11 = srem i32 0, %tmp10
  %tmp12 = trunc i32 %tmp11 to i16
  br label %bb1
}