Commit 3fb5b18e authored by Mogball's avatar Mogball
Browse files

Revert 24633eac and 760e7d00 "Enable FoldImmediate for X86"

This reverts commits 24633eac
and 760e7d00.

I have confirmed that these commits are introducing a new crash in the
peephole optimizer. I have minimized a test case, which you can find
below.

```llvmir
; ModuleID = 'bugpoint-reduced-simplified.bc'
source_filename = "/mnt/big/modular/Kernels/mojo/Mogg/MOGG.mojo"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

declare dso_local void @foo({ { ptr, [4 x i64], [4 x i64], i1 }, { ptr, [4 x i64], [4 x i64], i1 } }, { ptr }, { ptr, i64, i8 })

define dso_local void @bad_fn(ptr %0, ptr %1, ptr %2) {
  %4 = load i64, ptr null, align 8
  %5 = insertvalue [4 x i64] poison, i64 12, 1
  %6 = insertvalue [4 x i64] %5, i64 poison, 2
  %7 = insertvalue [4 x i64] %6, i64 poison, 3
  %8 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } poison, [4 x i64] %7, 1
  %9 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } %8, [4 x i64] poison, 2
  %10 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } %9, i1 poison, 3
  %11 = icmp ne i64 %4, 1
  %12 = or i1 false, %11
  %13 = select i1 %12, i64 %4, i64 0
  %14 = zext i1 %12 to i64
  %15 = insertvalue [4 x i64] poison, i64 12, 1
  %16 = insertvalue [4 x i64] %15, i64 poison, 2
  %17 = insertvalue [4 x i64] %16, i64 %13, 3
  %18 = insertvalue [4 x i64] poison, i64 %14, 3
  %19 = icmp eq i64 0, 0
  %20 = icmp eq i64 0, 0
  %21 = icmp eq i64 %13, 0
  %22 = and i1 %20, %19
  %23 = select i1 %22, i1 %21, i1 false
  %24 = select i1 %23, i1 %12, i1 false
  %25 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } poison, [4 x i64] %17, 1
  %26 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } %25, [4 x i64] %18, 2
  %27 = insertvalue { ptr, [4 x i64], [4 x i64], i1 } %26, i1 %24, 3
  %28 = insertvalue { { ptr, [4 x i64], [4 x i64], i1 }, { ptr, [4 x i64], [4 x i64], i1 } } undef, { ptr, [4 x i64], [4 x i64], i1 } %10, 0
  %29 = insertvalue { { ptr, [4 x i64], [4 x i64], i1 }, { ptr, [4 x i64], [4 x i64], i1 } } %28, { ptr, [4 x i64], [4 x i64], i1 } %27, 1
  br label %31

30:                                               ; preds = %3
  br label %softmax_pass

31:                                               ; preds = %31
  %exitcond.not.i = icmp eq i64 poison, 3
  br i1 %exitcond.not.i, label %37, label %31

32:                                               ; preds = %31
  br i1 poison, label %34, label %33

33:                                               ; preds = %32
  br label %34

34:                                               ; preds = %33, %32
  br i1 poison, label %35, label %36

35:                                               ; preds = %34
  br label %softmax_pass

36:                                               ; preds = %34
  br i1 poison, label %37, label %.critedge.i

37:                                               ; preds = %36
  br i1 poison, label %38, label %.critedge.i

38:                                               ; preds = %37
  br i1 poison, label %40, label %39

39:                                               ; preds = %38
  br label %40

40:                                               ; preds = %39, %38
  br i1 poison, label %.lr.ph28.i, label %._crit_edge.i

.lr.ph28.i:                                       ; preds = %40
  br label %41

41:                                               ; preds = %51, %.lr.ph28.i
  br i1 poison, label %.thread, label %42

42:                                               ; preds = %41
  br i1 poison, label %43, label %44

43:                                               ; preds = %42
  br label %45

44:                                               ; preds = %42
  br label %45

45:                                               ; preds = %44, %43
  br i1 poison, label %46, label %.thread

46:                                               ; preds = %45
  br label %47

.thread:                                          ; preds = %45, %41
  br label %47

47:                                               ; preds = %.thread, %46
  br i1 poison, label %51, label %48

48:                                               ; preds = %47
  br i1 poison, label %49, label %50

49:                                               ; preds = %48
  br label %51

50:                                               ; preds = %48
  br label %51

51:                                               ; preds = %50, %49, %47
  call void @foo({ { ptr, [4 x i64], [4 x i64], i1 }, { ptr, [4 x i64], [4 x i64], i1 } } %29, { ptr } poison, { ptr, i64, i8 } poison)
  br i1 poison, label %._crit_edge.i, label %41

._crit_edge.i:                                    ; preds = %51, %40
  br label %softmax_pass

.critedge.i:                                      ; preds = %37, %36
  br i1 poison, label %.lr.ph.i, label %softmax_pass

.lr.ph.i:                                         ; preds = %.lr.ph.i, %.critedge.i
  store { ptr, [4 x i64], [4 x i64], i1 } %10, ptr poison, align 8
  br i1 poison, label %.lr.ph.i, label %softmax_pass

softmax_pass:                                     ; preds = %.lr.ph.i, %.critedge.i, %._crit_edge.i, %35, %30
  ret void
}
```
parent 3fe2be75
Loading
Loading
Loading
Loading
+16 −46
Original line number Diff line number Diff line
@@ -202,8 +202,7 @@ namespace {
    bool isMoveImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
                         DenseMap<Register, MachineInstr *> &ImmDefMIs);
    bool foldImmediate(MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
                       DenseMap<Register, MachineInstr *> &ImmDefMIs,
                       bool &Deleted);
                       DenseMap<Register, MachineInstr *> &ImmDefMIs);

    /// Finds recurrence cycles, but only ones that formulated around
    /// a def operand and a use operand that are tied. If there is a use
@@ -218,11 +217,8 @@ namespace {
    /// set \p CopyMIs. If this virtual register was previously seen as a
    /// copy, replace the uses of this copy with the previously seen copy's
    /// destination register.
    /// \p LocalMIs contains all previous seen instructions. An optimized away
    /// instruction should be deleted from LocalMIs.
    bool foldRedundantCopy(MachineInstr &MI,
                           DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs,
                           SmallPtrSetImpl<MachineInstr *> &LocalMIs);
                           DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs);

    /// Is the register \p Reg a non-allocatable physical register?
    bool isNAPhysCopy(Register Reg);
@@ -1355,28 +1351,26 @@ bool PeepholeOptimizer::isMoveImmediate(
    MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
    DenseMap<Register, MachineInstr *> &ImmDefMIs) {
  const MCInstrDesc &MCID = MI.getDesc();
  if (MCID.getNumDefs() != 1 || !MI.getOperand(0).isReg())
    return false;
  Register Reg = MI.getOperand(0).getReg();
  if (!Reg.isVirtual())
  if (!MI.isMoveImmediate())
    return false;

  int64_t ImmVal;
  if (!MI.isMoveImmediate() && !TII->getConstValDefinedInReg(MI, Reg, ImmVal))
  if (MCID.getNumDefs() != 1)
    return false;

  Register Reg = MI.getOperand(0).getReg();
  if (Reg.isVirtual()) {
    ImmDefMIs.insert(std::make_pair(Reg, &MI));
    ImmDefRegs.insert(Reg);
    return true;
  }

  return false;
}

/// Try folding register operands that are defined by move immediate
/// instructions, i.e. a trivial constant folding optimization, if
/// and only if the def and use are in the same BB.
bool PeepholeOptimizer::foldImmediate(
    MachineInstr &MI, SmallSet<Register, 4> &ImmDefRegs,
    DenseMap<Register, MachineInstr *> &ImmDefMIs, bool &Deleted) {
  Deleted = false;
    DenseMap<Register, MachineInstr *> &ImmDefMIs) {
  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
    MachineOperand &MO = MI.getOperand(i);
    if (!MO.isReg() || MO.isDef())
@@ -1390,19 +1384,6 @@ bool PeepholeOptimizer::foldImmediate(
    assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
    if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
      ++NumImmFold;
      // FoldImmediate can delete ImmDefMI if MI was its only user. If ImmDefMI
      // is not deleted, and we happened to get a same MI, we can delete MI and
      // replace its users.
      if (MRI->getVRegDef(Reg) &&
          MI.isIdenticalTo(*II->second, MachineInstr::IgnoreVRegDefs)) {
        Register DstReg = MI.getOperand(0).getReg();
        if (DstReg.isVirtual() &&
            MRI->getRegClass(DstReg) == MRI->getRegClass(Reg)) {
          MRI->replaceRegWith(DstReg, Reg);
          MI.eraseFromParent();
          Deleted = true;
        }
      }
      return true;
    }
  }
@@ -1424,8 +1405,7 @@ bool PeepholeOptimizer::foldImmediate(
//
// Should replace %2 uses with %1:sub1
bool PeepholeOptimizer::foldRedundantCopy(
    MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs,
    SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    MachineInstr &MI, DenseMap<RegSubRegPair, MachineInstr *> &CopyMIs) {
  assert(MI.isCopy() && "expected a COPY machine instruction");

  Register SrcReg = MI.getOperand(1).getReg();
@@ -1445,10 +1425,6 @@ bool PeepholeOptimizer::foldRedundantCopy(
  }

  MachineInstr *PrevCopy = CopyMIs.find(SrcPair)->second;
  // A COPY instruction can be deleted or changed by other optimizations.
  // Check if the previous COPY instruction is existing and still a COPY.
  if (!LocalMIs.count(PrevCopy) || !PrevCopy->isCopy())
    return false;

  assert(SrcSubReg == PrevCopy->getOperand(1).getSubReg() &&
         "Unexpected mismatching subreg!");
@@ -1756,7 +1732,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
        continue;
      }

      if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs, LocalMIs) ||
      if (MI->isCopy() && (foldRedundantCopy(*MI, CopySrcMIs) ||
                           foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
        LocalMIs.erase(MI);
        LLVM_DEBUG(dbgs() << "Deleting redundant copy: " << *MI << "\n");
@@ -1774,14 +1750,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
        // next iteration sees the new instructions.
        MII = MI;
        ++MII;
        if (SeenMoveImm) {
          bool Deleted;
          Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs, Deleted);
          if (Deleted) {
            LocalMIs.erase(MI);
            continue;
          }
        }
        if (SeenMoveImm)
          Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
      }

      // Check whether MI is a load candidate for folding into a later
+3 −337
Original line number Diff line number Diff line
@@ -3867,42 +3867,12 @@ bool X86InstrInfo::verifyInstruction(const MachineInstr &MI,
bool X86InstrInfo::getConstValDefinedInReg(const MachineInstr &MI,
                                           const Register Reg,
                                           int64_t &ImmVal) const {
  Register MovReg = Reg;
  const MachineInstr *MovMI = &MI;

  // Follow use-def for SUBREG_TO_REG to find the real move immediate
  // instruction. It is quite common for x86-64.
  if (MI.isSubregToReg()) {
    // We use following pattern to setup 64b immediate.
    //      %8:gr32 = MOV32r0 implicit-def dead $eflags
    //      %6:gr64 = SUBREG_TO_REG 0, killed %8:gr32, %subreg.sub_32bit
    if (!MI.getOperand(1).isImm())
      return false;
    unsigned FillBits = MI.getOperand(1).getImm();
    unsigned SubIdx = MI.getOperand(3).getImm();
    MovReg = MI.getOperand(2).getReg();
    if (SubIdx != X86::sub_32bit || FillBits != 0)
      return false;
    const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
    MovMI = MRI.getUniqueVRegDef(MovReg);
    if (!MovMI)
      return false;
  }

  if (MovMI->getOpcode() == X86::MOV32r0 &&
      MovMI->getOperand(0).getReg() == MovReg) {
    ImmVal = 0;
    return true;
  }

  if (MovMI->getOpcode() != X86::MOV32ri &&
      MovMI->getOpcode() != X86::MOV64ri &&
      MovMI->getOpcode() != X86::MOV32ri64 && MovMI->getOpcode() != X86::MOV8ri)
  if (MI.getOpcode() != X86::MOV32ri && MI.getOpcode() != X86::MOV64ri)
    return false;
  // Mov Src can be a global address.
  if (!MovMI->getOperand(1).isImm() || MovMI->getOperand(0).getReg() != MovReg)
  if (!MI.getOperand(1).isImm() || MI.getOperand(0).getReg() != Reg)
    return false;
  ImmVal = MovMI->getOperand(1).getImm();
  ImmVal = MI.getOperand(1).getImm();
  return true;
}

@@ -4799,310 +4769,6 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
  return nullptr;
}

/// Convert an ALUrr opcode to corresponding ALUri opcode. Such as
///     ADD32rr  ==>  ADD32ri
/// ShiftRotate will be set to true if the Opcode is shift or rotate.
/// If the ALUri can be further changed to COPY when the immediate is 0, set
/// CanConvert2Copy to true.
static unsigned ConvertALUrr2ALUri(unsigned Opcode, bool &CanConvert2Copy,
                                   bool &ShiftRotate) {
  CanConvert2Copy = false;
  ShiftRotate = false;
  unsigned NewOpcode = 0;
  switch (Opcode) {
    case X86::ADD64rr:
      NewOpcode = X86::ADD64ri32;
      CanConvert2Copy = true;
      break;
    case X86::ADC64rr:
      NewOpcode = X86::ADC64ri32;
      break;
    case X86::SUB64rr:
      NewOpcode = X86::SUB64ri32;
      CanConvert2Copy = true;
      break;
    case X86::SBB64rr:
      NewOpcode = X86::SBB64ri32;
      break;
    case X86::AND64rr:
      NewOpcode = X86::AND64ri32;
      break;
    case X86::OR64rr:
      NewOpcode = X86::OR64ri32;
      CanConvert2Copy = true;
      break;
    case X86::XOR64rr:
      NewOpcode = X86::XOR64ri32;
      CanConvert2Copy = true;
      break;
    case X86::TEST64rr:
      NewOpcode = X86::TEST64ri32;
      break;
    case X86::CMP64rr:
      NewOpcode = X86::CMP64ri32;
      break;
    case X86::SHR64rCL:
      NewOpcode = X86::SHR64ri;
      ShiftRotate = true;
      break;
    case X86::SHL64rCL:
      NewOpcode = X86::SHL64ri;
      ShiftRotate = true;
      break;
    case X86::SAR64rCL:
      NewOpcode = X86::SAR64ri;
      ShiftRotate = true;
      break;
    case X86::ROL64rCL:
      NewOpcode = X86::ROL64ri;
      ShiftRotate = true;
      break;
    case X86::ROR64rCL:
      NewOpcode = X86::ROR64ri;
      ShiftRotate = true;
      break;
    case X86::RCL64rCL:
      NewOpcode = X86::RCL64ri;
      ShiftRotate = true;
      break;
    case X86::RCR64rCL:
      NewOpcode = X86::RCR64ri;
      ShiftRotate = true;
      break;
    case X86::ADD32rr:
      NewOpcode = X86::ADD32ri;
      CanConvert2Copy = true;
      break;
    case X86::ADC32rr:
      NewOpcode = X86::ADC32ri;
      break;
    case X86::SUB32rr:
      NewOpcode = X86::SUB32ri;
      CanConvert2Copy = true;
      break;
    case X86::SBB32rr:
      NewOpcode = X86::SBB32ri;
      break;
    case X86::AND32rr:
      NewOpcode = X86::AND32ri;
      break;
    case X86::OR32rr:
      NewOpcode = X86::OR32ri;
      CanConvert2Copy = true;
      break;
    case X86::XOR32rr:
      NewOpcode = X86::XOR32ri;
      CanConvert2Copy = true;
      break;
    case X86::TEST32rr:
      NewOpcode = X86::TEST32ri;
      break;
    case X86::CMP32rr:
      NewOpcode = X86::CMP32ri;
      break;
    case X86::SHR32rCL:
      NewOpcode = X86::SHR32ri;
      ShiftRotate = true;
      break;
    case X86::SHL32rCL:
      NewOpcode = X86::SHL32ri;
      ShiftRotate = true;
      break;
    case X86::SAR32rCL:
      NewOpcode = X86::SAR32ri;
      ShiftRotate = true;
      break;
    case X86::ROL32rCL:
      NewOpcode = X86::ROL32ri;
      ShiftRotate = true;
      break;
    case X86::ROR32rCL:
      NewOpcode = X86::ROR32ri;
      ShiftRotate = true;
      break;
    case X86::RCL32rCL:
      NewOpcode = X86::RCL32ri;
      ShiftRotate = true;
      break;
    case X86::RCR32rCL:
      NewOpcode = X86::RCR32ri;
      ShiftRotate = true;
      break;
  }
  return NewOpcode;
}

/// Real implementation of FoldImmediate.
/// Reg is assigned ImmVal in DefMI, and is used in UseMI.
/// If MakeChange is true, this function tries to replace Reg by ImmVal in
/// UseMI. If MakeChange is false, just check if folding is possible.
/// Return true if folding is successful or possible.
bool X86InstrInfo::FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI,
                                     Register Reg, int64_t ImmVal,
                                     MachineRegisterInfo *MRI,
                                     bool MakeChange) const {
  bool Modified = false;
  bool ShiftRotate = false;
  // When ImmVal is 0, some instructions can be changed to COPY.
  bool CanChangeToCopy = false;
  unsigned Opc = UseMI.getOpcode();

  // 64 bit operations accept sign extended 32 bit immediates.
  // 32 bit operations accept all 32 bit immediates, so we don't need to check
  // them.
  const TargetRegisterClass *RC = nullptr;
  if (Reg.isVirtual())
    RC = MRI->getRegClass(Reg);
  if ((Reg.isPhysical() && X86::GR64RegClass.contains(Reg)) ||
      (Reg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC))) {
    if (!isInt<32>(ImmVal))
      return false;
  }

  if (UseMI.findRegisterUseOperand(Reg)->getSubReg())
    return false;
  // Immediate has larger code size than register. So avoid folding the
  // immediate if it has more than 1 use and we are optimizing for size.
  if (UseMI.getMF()->getFunction().hasOptSize() && Reg.isVirtual() &&
      !MRI->hasOneNonDBGUse(Reg))
    return false;

  unsigned NewOpc;
  if (Opc == TargetOpcode::COPY) {
    Register ToReg = UseMI.getOperand(0).getReg();
    const TargetRegisterClass *RC = nullptr;
    if (ToReg.isVirtual())
      RC = MRI->getRegClass(ToReg);
    bool GR32Reg = (ToReg.isVirtual() && X86::GR32RegClass.hasSubClassEq(RC)) ||
                   (ToReg.isPhysical() && X86::GR32RegClass.contains(ToReg));
    bool GR64Reg = (ToReg.isVirtual() && X86::GR64RegClass.hasSubClassEq(RC)) ||
                   (ToReg.isPhysical() && X86::GR64RegClass.contains(ToReg));
    bool GR8Reg = (ToReg.isVirtual() && X86::GR8RegClass.hasSubClassEq(RC)) ||
                  (ToReg.isPhysical() && X86::GR8RegClass.contains(ToReg));

    if (ImmVal == 0) {
      // We have MOV32r0 only.
      if (!GR32Reg)
        return false;
    }

    if (GR64Reg) {
      if (isUInt<32>(ImmVal))
        NewOpc = X86::MOV32ri64;
      else
        NewOpc = X86::MOV64ri;
    } else if (GR32Reg) {
      NewOpc = X86::MOV32ri;
      if (ImmVal == 0) {
        // MOV32r0 clobbers EFLAGS.
        const TargetRegisterInfo *TRI = &getRegisterInfo();
        if (UseMI.getParent()->computeRegisterLiveness(TRI, X86::EFLAGS, UseMI)
            != MachineBasicBlock::LQR_Dead)
          return false;

        // MOV32r0 is different than other cases because it doesn't encode the
        // immediate in the instruction. So we directly modify it here.
        if (!MakeChange)
          return true;
        UseMI.setDesc(get(X86::MOV32r0));
        UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg));
        UseMI.addOperand(MachineOperand::CreateReg(X86::EFLAGS, /*isDef=*/ true,
                                                   /*isImp=*/ true,
                                                   /*isKill=*/ false,
                                                   /*isDead=*/ true));
        Modified = true;
      }
    } else if (GR8Reg)
      NewOpc = X86::MOV8ri;
    else
      return false;
  } else
    NewOpc = ConvertALUrr2ALUri(Opc, CanChangeToCopy, ShiftRotate);

  if (!NewOpc)
    return false;

  // For SUB instructions the immediate can only be the second source operand.
  if ((NewOpc == X86::SUB64ri32 || NewOpc == X86::SUB32ri ||
       NewOpc == X86::SBB64ri32 || NewOpc == X86::SBB32ri) &&
      UseMI.findRegisterUseOperandIdx(Reg) != 2)
    return false;
  // For CMP instructions the immediate can only be at index 1.
  if ((NewOpc == X86::CMP64ri32 || NewOpc == X86::CMP32ri) &&
      UseMI.findRegisterUseOperandIdx(Reg) != 1)
    return false;

  if (ShiftRotate) {
    unsigned RegIdx = UseMI.findRegisterUseOperandIdx(Reg);
    if (RegIdx < 2)
      return false;
    if (!isInt<8>(ImmVal))
      return false;
    assert(Reg == X86::CL);

    if (!MakeChange)
      return true;
    UseMI.setDesc(get(NewOpc));
    UseMI.removeOperand(RegIdx);
    UseMI.addOperand(MachineOperand::CreateImm(ImmVal));
    // Reg is physical register $cl, so we don't know if DefMI is dead through
    // MRI. Let the caller handle it, or pass dead-mi-elimination can delete
    // the dead physical register define instruction.
    return true;
  }

  if (!MakeChange)
    return true;

  if (!Modified) {
    // Modify the instruction.
    if (ImmVal == 0 && CanChangeToCopy &&
        UseMI.registerDefIsDead(X86::EFLAGS)) {
      //          %100 = add %101, 0
      //    ==>
      //          %100 = COPY %101
      UseMI.setDesc(get(TargetOpcode::COPY));
      UseMI.removeOperand(UseMI.findRegisterUseOperandIdx(Reg));
      UseMI.removeOperand(UseMI.findRegisterDefOperandIdx(X86::EFLAGS));
      UseMI.untieRegOperand(0);
      UseMI.clearFlag(MachineInstr::MIFlag::NoSWrap);
      UseMI.clearFlag(MachineInstr::MIFlag::NoUWrap);
    } else {
      unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
      unsigned ImmOpNum = 2;
      if (!UseMI.getOperand(0).isDef()) {
        Op1 = 0;                                      // TEST, CMP
        ImmOpNum = 1;
      }
      if (Opc == TargetOpcode::COPY)
        ImmOpNum = 1;
      if (findCommutedOpIndices(UseMI, Op1, Op2) &&
          UseMI.getOperand(Op1).getReg() == Reg)
        commuteInstruction(UseMI);

      assert(UseMI.getOperand(ImmOpNum).getReg() == Reg);
      UseMI.setDesc(get(NewOpc));
      UseMI.getOperand(ImmOpNum).ChangeToImmediate(ImmVal);
    }
  }

  if (Reg.isVirtual() && MRI->use_nodbg_empty(Reg))
    DefMI->eraseFromBundle();

  return true;
}

/// FoldImmediate - 'Reg' is known to be defined by a move immediate
/// instruction, try to fold the immediate into the use instruction.
bool X86InstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
                                 Register Reg, MachineRegisterInfo *MRI) const {
  int64_t ImmVal;
  if (!getConstValDefinedInReg(DefMI, Reg, ImmVal))
    return false;

  return FoldImmediateImpl(UseMI, &DefMI, Reg, ImmVal, MRI, true);
}

/// Expand a single-def pseudo instruction to a two-addr
/// instruction with two undef reads of the register being defined.
/// This is used for mapping:
+0 −9
Original line number Diff line number Diff line
@@ -550,15 +550,6 @@ public:
                                  Register &FoldAsLoadDefReg,
                                  MachineInstr *&DefMI) const override;

  bool FoldImmediateImpl(MachineInstr &UseMI, MachineInstr *DefMI, Register Reg,
                         int64_t ImmVal, MachineRegisterInfo *MRI,
                         bool MakeChange) const;

  /// Reg is known to be defined by a move immediate instruction, try to fold
  /// the immediate into the use instruction.
  bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
                     MachineRegisterInfo *MRI) const override;

  std::pair<unsigned, unsigned>
  decomposeMachineOperandsTargetFlags(unsigned TF) const override;

+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@ body: |

    ; GCN-LABEL: name: fold_simm_virtual
    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
    ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0
    ; GCN-NEXT: SI_RETURN_TO_EPILOG
    %0:sreg_32 = S_MOV_B32 0
    %1:sreg_32 = COPY killed %0
+12 −6
Original line number Diff line number Diff line
@@ -4,7 +4,8 @@
define i8 @test_i8(i32 %a, i8 %f, i8 %t) {
; ALL-LABEL: test_i8:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %ecx, %ecx
; ALL-NEXT:    cmpl %ecx, %edi
; ALL-NEXT:    setg %cl
; ALL-NEXT:    testb $1, %cl
; ALL-NEXT:    je .LBB0_2
@@ -34,7 +35,8 @@ cond.end: ; preds = %cond.false, %cond.t
define i16 @test_i16(i32 %a, i16 %f, i16 %t) {
; ALL-LABEL: test_i16:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %ecx, %ecx
; ALL-NEXT:    cmpl %ecx, %edi
; ALL-NEXT:    setg %cl
; ALL-NEXT:    testb $1, %cl
; ALL-NEXT:    je .LBB1_2
@@ -65,7 +67,8 @@ define i32 @test_i32(i32 %a, i32 %f, i32 %t) {
; ALL-LABEL: test_i32:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    movl %esi, %eax
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %ecx, %ecx
; ALL-NEXT:    cmpl %ecx, %edi
; ALL-NEXT:    setg %cl
; ALL-NEXT:    testb $1, %cl
; ALL-NEXT:    je .LBB2_1
@@ -93,7 +96,8 @@ define i64 @test_i64(i32 %a, i64 %f, i64 %t) {
; ALL-LABEL: test_i64:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    movq %rsi, %rax
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %ecx, %ecx
; ALL-NEXT:    cmpl %ecx, %edi
; ALL-NEXT:    setg %cl
; ALL-NEXT:    testb $1, %cl
; ALL-NEXT:    je .LBB3_1
@@ -120,7 +124,8 @@ cond.end: ; preds = %cond.false, %cond.t
define float @test_float(i32 %a, float %f, float %t) {
; ALL-LABEL: test_float:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %eax, %eax
; ALL-NEXT:    cmpl %eax, %edi
; ALL-NEXT:    setg %al
; ALL-NEXT:    testb $1, %al
; ALL-NEXT:    je .LBB4_1
@@ -147,7 +152,8 @@ cond.end: ; preds = %cond.false, %cond.t
define double @test_double(i32 %a, double %f, double %t) {
; ALL-LABEL: test_double:
; ALL:       # %bb.0: # %entry
; ALL-NEXT:    cmpl $0, %edi
; ALL-NEXT:    xorl %eax, %eax
; ALL-NEXT:    cmpl %eax, %edi
; ALL-NEXT:    setg %al
; ALL-NEXT:    testb $1, %al
; ALL-NEXT:    je .LBB5_1
Loading