Commit 8d367ae1 authored by Chandler Carruth's avatar Chandler Carruth
Browse files

Merge r332389 to pick up the fix for PR37431, a regression w/ the new EFLAGS lowering.

Required switching $regname to %regname in the MIR test and regenerating the
CHECKs for the other test.

llvm-svn: 332940
parent 7f374dc9
Loading
Loading
Loading
Loading
+142 −3
Original line number Diff line number Diff line
@@ -127,6 +127,10 @@ private:
                      MachineInstr &JmpI, CondRegArray &CondRegs);
  void rewriteCopy(MachineInstr &MI, MachineOperand &FlagUse,
                   MachineInstr &CopyDefI);
  void rewriteSetCarryExtended(MachineBasicBlock &TestMBB,
                               MachineBasicBlock::iterator TestPos,
                               DebugLoc TestLoc, MachineInstr &SetBI,
                               MachineOperand &FlagUse, CondRegArray &CondRegs);
  void rewriteSetCC(MachineBasicBlock &TestMBB,
                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                    MachineInstr &SetCCI, MachineOperand &FlagUse,
@@ -511,8 +515,7 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
        } else if (MI.getOpcode() == TargetOpcode::COPY) {
          rewriteCopy(MI, *FlagUse, CopyDefI);
        } else {
          // We assume that arithmetic instructions that use flags also def
          // them.
          // We assume all other instructions that use flags also def them.
          assert(MI.findRegisterDefOperand(X86::EFLAGS) &&
                 "Expected a def of EFLAGS for this instruction!");

@@ -524,7 +527,23 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
          // logic.
          FlagsKilled = true;

          rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
          switch (MI.getOpcode()) {
          case X86::SETB_C8r:
          case X86::SETB_C16r:
          case X86::SETB_C32r:
          case X86::SETB_C64r:
            // Use custom lowering for arithmetic that is merely extending the
            // carry flag. We model this as the SETB_C* pseudo instructions.
            rewriteSetCarryExtended(TestMBB, TestPos, TestLoc, MI, *FlagUse,
                                    CondRegs);
            break;

          default:
            // Generically handle remaining uses as arithmetic instructions.
            rewriteArithmetic(TestMBB, TestPos, TestLoc, MI, *FlagUse,
                              CondRegs);
            break;
          }
          break;
        }

@@ -756,6 +775,126 @@ void X86FlagsCopyLoweringPass::rewriteCopy(MachineInstr &MI,
  MI.eraseFromParent();
}

void X86FlagsCopyLoweringPass::rewriteSetCarryExtended(
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    DebugLoc TestLoc, MachineInstr &SetBI, MachineOperand &FlagUse,
    CondRegArray &CondRegs) {
  // This routine is only used to handle pseudos for setting a register to zero
  // or all ones based on CF. This is essentially the sign extended from 1-bit
  // form of SETB and modeled with the SETB_C* pseudos. They require special
  // handling as they aren't normal SETcc instructions and are lowered to an
  // EFLAGS clobbering operation (SBB typically). One simplifying aspect is that
  // they are only provided in reg-defining forms. A complicating factor is that
  // they can define many different register widths.
  assert(SetBI.getOperand(0).isReg() &&
         "Cannot have a non-register defined operand to this variant of SETB!");

  // Little helper to do the common final step of replacing the register def'ed
  // by this SETB instruction with a new register and removing the SETB
  // instruction.
  auto RewriteToReg = [&](unsigned Reg) {
    MRI->replaceRegWith(SetBI.getOperand(0).getReg(), Reg);
    SetBI.eraseFromParent();
  };

  // Grab the register class used for this particular instruction.
  auto &SetBRC = *MRI->getRegClass(SetBI.getOperand(0).getReg());

  MachineBasicBlock &MBB = *SetBI.getParent();
  auto SetPos = SetBI.getIterator();
  auto SetLoc = SetBI.getDebugLoc();

  auto AdjustReg = [&](unsigned Reg) {
    auto &OrigRC = *MRI->getRegClass(Reg);
    if (&OrigRC == &SetBRC)
      return Reg;

    unsigned NewReg;

    int OrigRegSize = TRI->getRegSizeInBits(OrigRC) / 8;
    int TargetRegSize = TRI->getRegSizeInBits(SetBRC) / 8;
    assert(OrigRegSize <= 8 && "No GPRs larger than 64-bits!");
    assert(TargetRegSize <= 8 && "No GPRs larger than 64-bits!");
    int SubRegIdx[] = {X86::NoSubRegister, X86::sub_8bit, X86::sub_16bit,
                       X86::NoSubRegister, X86::sub_32bit};

    // If the original size is smaller than the target *and* is smaller than 4
    // bytes, we need to explicitly zero extend it. We always extend to 4-bytes
    // to maximize the chance of being able to CSE that operation and to avoid
    // partial dependency stalls extending to 2-bytes.
    if (OrigRegSize < TargetRegSize && OrigRegSize < 4) {
      NewReg = MRI->createVirtualRegister(&X86::GR32RegClass);
      BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOVZX32rr8), NewReg)
          .addReg(Reg);
      if (&SetBRC == &X86::GR32RegClass)
        return NewReg;
      Reg = NewReg;
      OrigRegSize = 4;
    }

    NewReg = MRI->createVirtualRegister(&SetBRC);
    if (OrigRegSize < TargetRegSize) {
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::SUBREG_TO_REG),
              NewReg)
          .addImm(0)
          .addReg(Reg)
          .addImm(SubRegIdx[OrigRegSize]);
    } else if (OrigRegSize > TargetRegSize) {
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::EXTRACT_SUBREG),
              NewReg)
          .addReg(Reg)
          .addImm(SubRegIdx[TargetRegSize]);
    } else {
      BuildMI(MBB, SetPos, SetLoc, TII->get(TargetOpcode::COPY), NewReg)
          .addReg(Reg);
    }
    return NewReg;
  };

  unsigned &CondReg = CondRegs[X86::COND_B];
  if (!CondReg)
    CondReg = promoteCondToReg(TestMBB, TestPos, TestLoc, X86::COND_B);

  // Adjust the condition to have the desired register width by zero-extending
  // as needed.
  // FIXME: We should use a better API to avoid the local reference and using a
  // different variable here.
  unsigned ExtCondReg = AdjustReg(CondReg);

  // Now we need to turn this into a bitmask. We do this by subtracting it from
  // zero.
  unsigned ZeroReg = MRI->createVirtualRegister(&X86::GR32RegClass);
  BuildMI(MBB, SetPos, SetLoc, TII->get(X86::MOV32r0), ZeroReg);
  ZeroReg = AdjustReg(ZeroReg);

  unsigned Sub;
  switch (SetBI.getOpcode()) {
  case X86::SETB_C8r:
    Sub = X86::SUB8rr;
    break;

  case X86::SETB_C16r:
    Sub = X86::SUB16rr;
    break;

  case X86::SETB_C32r:
    Sub = X86::SUB32rr;
    break;

  case X86::SETB_C64r:
    Sub = X86::SUB64rr;
    break;

  default:
    llvm_unreachable("Invalid SETB_C* opcode!");
  }
  unsigned ResultReg = MRI->createVirtualRegister(&SetBRC);
  BuildMI(MBB, SetPos, SetLoc, TII->get(Sub), ResultReg)
      .addReg(ZeroReg)
      .addReg(ExtCondReg);
  return RewriteToReg(ResultReg);
}

void X86FlagsCopyLoweringPass::rewriteSetCC(MachineBasicBlock &TestMBB,
                                            MachineBasicBlock::iterator TestPos,
                                            DebugLoc TestLoc,
+58 −0
Original line number Diff line number Diff line
@@ -304,3 +304,61 @@ bb1:
  %tmp12 = trunc i32 %tmp11 to i16
  br label %bb1
}

; Use a particular instruction pattern in order to lower to the post-RA pseudo
; used to lower SETB into an SBB pattern in order to make sure that kind of
; usage of a copied EFLAGS continues to work.
define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3) {
; X32-LABEL: PR37431:
; X32:       # %bb.0: # %entry
; X32-NEXT:    pushl %esi
; X32-NEXT:    .cfi_def_cfa_offset 8
; X32-NEXT:    .cfi_offset %esi, -8
; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X32-NEXT:    movl (%eax), %eax
; X32-NEXT:    movl %eax, %ecx
; X32-NEXT:    sarl $31, %ecx
; X32-NEXT:    cmpl %eax, %eax
; X32-NEXT:    sbbl %ecx, %eax
; X32-NEXT:    setb %al
; X32-NEXT:    sbbb %cl, %cl
; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
; X32-NEXT:    movb %cl, (%edx)
; X32-NEXT:    movzbl %al, %eax
; X32-NEXT:    xorl %ecx, %ecx
; X32-NEXT:    subl %eax, %ecx
; X32-NEXT:    xorl %eax, %eax
; X32-NEXT:    xorl %edx, %edx
; X32-NEXT:    idivl %ecx
; X32-NEXT:    movb %dl, (%esi)
; X32-NEXT:    popl %esi
; X32-NEXT:    retl
;
; X64-LABEL: PR37431:
; X64:       # %bb.0: # %entry
; X64-NEXT:    movq %rdx, %rcx
; X64-NEXT:    movslq (%rdi), %rax
; X64-NEXT:    cmpq %rax, %rax
; X64-NEXT:    sbbb %dl, %dl
; X64-NEXT:    cmpq %rax, %rax
; X64-NEXT:    movb %dl, (%rsi)
; X64-NEXT:    sbbl %esi, %esi
; X64-NEXT:    xorl %eax, %eax
; X64-NEXT:    xorl %edx, %edx
; X64-NEXT:    idivl %esi
; X64-NEXT:    movb %dl, (%rcx)
; X64-NEXT:    retq
entry:
  %tmp = load i32, i32* %arg1
  %tmp1 = sext i32 %tmp to i64
  %tmp2 = icmp ugt i64 %tmp1, undef
  %tmp3 = zext i1 %tmp2 to i8
  %tmp4 = sub i8 0, %tmp3
  store i8 %tmp4, i8* %arg2
  %tmp5 = sext i8 %tmp4 to i32
  %tmp6 = srem i32 0, %tmp5
  %tmp7 = trunc i32 %tmp6 to i8
  store i8 %tmp7, i8* %arg3
  ret void
}
+71 −0
Original line number Diff line number Diff line
@@ -66,6 +66,12 @@
    call void @foo()
    ret void
  }

  define void @test_setb_c(i64 %a, i64 %b) {
  entry:
    call void @foo()
    ret void
  }
...
---
name:            test_branch
@@ -482,3 +488,68 @@ body: |
    RET 0

...
---
name:            test_setb_c
# CHECK-LABEL: name: test_setb_c
liveins:
  - { reg: '%rdi', virtual-reg: '%0' }
  - { reg: '%rsi', virtual-reg: '%1' }
body:             |
  bb.0:
    liveins: %rdi, %rsi

    %0:gr64 = COPY %rdi
    %1:gr64 = COPY %rsi
    %2:gr64 = ADD64rr %0, %1, implicit-def %eflags
    %3:gr64 = COPY %eflags
  ; CHECK-NOT:  COPY{{( killed)?}} %eflags
  ; CHECK:      %[[CF_REG:[^:]*]]:gr8 = SETBr implicit %eflags
  ; CHECK-NOT:  COPY{{( killed)?}} %eflags

    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp
    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %ssp, implicit %rdi, implicit-def %rsp, implicit-def %ssp, implicit-def %eax
    ADJCALLSTACKUP64 0, 0, implicit-def dead %rsp, implicit-def dead %eflags, implicit-def dead %ssp, implicit %rsp, implicit %ssp

    %eflags = COPY %3
    %4:gr8 = SETB_C8r implicit-def %eflags, implicit %eflags
    MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %4
  ; CHECK-NOT:     %eflags =
  ; CHECK:         %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr8 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_8bit
  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr8 = SUB8rr %[[ZERO_SUBREG]], %[[CF_REG]]
  ; CHECK-NEXT:    MOV8mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

    %eflags = COPY %3
    %5:gr16 = SETB_C16r implicit-def %eflags, implicit %eflags
    MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %5
  ; CHECK-NOT:     %eflags =
  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
  ; CHECK-NEXT:    %[[CF_TRUNC:[^:]*]]:gr16 = EXTRACT_SUBREG %[[CF_EXT]], %subreg.sub_16bit
  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
  ; CHECK-NEXT:    %[[ZERO_SUBREG:[^:]*]]:gr16 = EXTRACT_SUBREG %[[ZERO]], %subreg.sub_16bit
  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr16 = SUB16rr %[[ZERO_SUBREG]], %[[CF_TRUNC]]
  ; CHECK-NEXT:    MOV16mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

    %eflags = COPY %3
    %6:gr32 = SETB_C32r implicit-def %eflags, implicit %eflags
    MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %6
  ; CHECK-NOT:     %eflags =
  ; CHECK:         %[[CF_EXT:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr32 = SUB32rr %[[ZERO]], %[[CF_EXT]]
  ; CHECK-NEXT:    MOV32mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

    %eflags = COPY %3
    %7:gr64 = SETB_C64r implicit-def %eflags, implicit %eflags
    MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %7
  ; CHECK-NOT:     %eflags =
  ; CHECK:         %[[CF_EXT1:[^:]*]]:gr32 = MOVZX32rr8 %[[CF_REG]]
  ; CHECK-NEXT:    %[[CF_EXT2:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[CF_EXT1]], %subreg.sub_32bit
  ; CHECK-NEXT:    %[[ZERO:[^:]*]]:gr32 = MOV32r0 implicit-def %eflags
  ; CHECK-NEXT:    %[[ZERO_EXT:[^:]*]]:gr64 = SUBREG_TO_REG 0, %[[ZERO]], %subreg.sub_32bit
  ; CHECK-NEXT:    %[[REPLACEMENT:[^:]*]]:gr64 = SUB64rr %[[ZERO_EXT]], %[[CF_EXT2]]
  ; CHECK-NEXT:    MOV64mr %rsp, 1, %noreg, -16, %noreg, killed %[[REPLACEMENT]]

    RET 0

...