Commit 468a0cb5 authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Add X87 FCMOV support to X86FlagsCopyLowering.

Fixes PR44396
parent 4d7201e7
Loading
Loading
Loading
Loading
+73 −0
Original line number Diff line number Diff line
@@ -115,6 +115,10 @@ private:
                   MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                   MachineInstr &CMovI, MachineOperand &FlagUse,
                   CondRegArray &CondRegs);
  void rewriteFCMov(MachineBasicBlock &TestMBB,
                    MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                    MachineInstr &CMovI, MachineOperand &FlagUse,
                    CondRegArray &CondRegs);
  void rewriteCondJmp(MachineBasicBlock &TestMBB,
                      MachineBasicBlock::iterator TestPos, DebugLoc TestLoc,
                      MachineInstr &JmpI, CondRegArray &CondRegs);
@@ -334,6 +338,28 @@ static MachineBasicBlock &splitBlock(MachineBasicBlock &MBB,
  return NewMBB;
}

static X86::CondCode getCondFromFCMOV(unsigned Opcode) {
  switch (Opcode) {
  default: return X86::COND_INVALID;
  case X86::CMOVBE_Fp32:  case X86::CMOVBE_Fp64:  case X86::CMOVBE_Fp80:
    return X86::COND_BE;
  case X86::CMOVB_Fp32:   case X86::CMOVB_Fp64:   case X86::CMOVB_Fp80:
    return X86::COND_B;
  case X86::CMOVE_Fp32:   case X86::CMOVE_Fp64:   case X86::CMOVE_Fp80:
    return X86::COND_E;
  case X86::CMOVNBE_Fp32: case X86::CMOVNBE_Fp64: case X86::CMOVNBE_Fp80:
    return X86::COND_A;
  case X86::CMOVNB_Fp32:  case X86::CMOVNB_Fp64:  case X86::CMOVNB_Fp80:
    return X86::COND_AE;
  case X86::CMOVNE_Fp32:  case X86::CMOVNE_Fp64:  case X86::CMOVNE_Fp80:
    return X86::COND_NE;
  case X86::CMOVNP_Fp32:  case X86::CMOVNP_Fp64:  case X86::CMOVNP_Fp80:
    return X86::COND_NP;
  case X86::CMOVP_Fp32:   case X86::CMOVP_Fp64:   case X86::CMOVP_Fp80:
    return X86::COND_P;
  }
}

bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
                    << " **********\n");
@@ -593,6 +619,8 @@ bool X86FlagsCopyLoweringPass::runOnMachineFunction(MachineFunction &MF) {
        // Otherwise we can just rewrite in-place.
        if (X86::getCondFromCMov(MI) != X86::COND_INVALID) {
          rewriteCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
        } else if (getCondFromFCMOV(MI.getOpcode()) != X86::COND_INVALID) {
          rewriteFCMov(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
        } else if (X86::getCondFromSETCC(MI) != X86::COND_INVALID) {
          rewriteSetCC(*TestMBB, TestPos, TestLoc, MI, *FlagUse, CondRegs);
        } else if (MI.getOpcode() == TargetOpcode::COPY) {
@@ -852,6 +880,51 @@ void X86FlagsCopyLoweringPass::rewriteCMov(MachineBasicBlock &TestMBB,
  LLVM_DEBUG(dbgs() << "    fixed cmov: "; CMovI.dump());
}

void X86FlagsCopyLoweringPass::rewriteFCMov(MachineBasicBlock &TestMBB,
                                            MachineBasicBlock::iterator TestPos,
                                            DebugLoc TestLoc,
                                            MachineInstr &CMovI,
                                            MachineOperand &FlagUse,
                                            CondRegArray &CondRegs) {
  // First get the register containing this specific condition.
  X86::CondCode Cond = getCondFromFCMOV(CMovI.getOpcode());
  unsigned CondReg;
  bool Inverted;
  std::tie(CondReg, Inverted) =
      getCondOrInverseInReg(TestMBB, TestPos, TestLoc, Cond, CondRegs);

  MachineBasicBlock &MBB = *CMovI.getParent();

  // Insert a direct test of the saved register.
  insertTest(MBB, CMovI.getIterator(), CMovI.getDebugLoc(), CondReg);

  auto getFCMOVOpcode = [](unsigned Opcode, bool Inverted) {
    switch (Opcode) {
    default: llvm_unreachable("Unexpected opcode!");
    case X86::CMOVBE_Fp32: case X86::CMOVNBE_Fp32:
    case X86::CMOVB_Fp32:  case X86::CMOVNB_Fp32:
    case X86::CMOVE_Fp32:  case X86::CMOVNE_Fp32:
    case X86::CMOVP_Fp32:  case X86::CMOVNP_Fp32:
      return Inverted ? X86::CMOVE_Fp32 : X86::CMOVNE_Fp32;
    case X86::CMOVBE_Fp64: case X86::CMOVNBE_Fp64:
    case X86::CMOVB_Fp64:  case X86::CMOVNB_Fp64:
    case X86::CMOVE_Fp64:  case X86::CMOVNE_Fp64:
    case X86::CMOVP_Fp64:  case X86::CMOVNP_Fp64:
      return Inverted ? X86::CMOVE_Fp64 : X86::CMOVNE_Fp64;
    case X86::CMOVBE_Fp80: case X86::CMOVNBE_Fp80:
    case X86::CMOVB_Fp80:  case X86::CMOVNB_Fp80:
    case X86::CMOVE_Fp80:  case X86::CMOVNE_Fp80:
    case X86::CMOVP_Fp80:  case X86::CMOVNP_Fp80:
      return Inverted ? X86::CMOVE_Fp80 : X86::CMOVNE_Fp80;
    }
  };

  // Rewrite the CMov to use the !ZF flag from the test.
  CMovI.setDesc(TII->get(getFCMOVOpcode(CMovI.getOpcode(), Inverted)));
  FlagUse.setIsKill(true);
  LLVM_DEBUG(dbgs() << "    fixed fcmov: "; CMovI.dump());
}

void X86FlagsCopyLoweringPass::rewriteCondJmp(
    MachineBasicBlock &TestMBB, MachineBasicBlock::iterator TestPos,
    DebugLoc TestLoc, MachineInstr &JmpI, CondRegArray &CondRegs) {
+47 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-apple-macosx10.15.0 -mattr=+cmov | FileCheck %s

@b = global i32 0, align 4
@a = global i64 0, align 8

define double @c() nounwind {
; CHECK-LABEL: c:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    pushl %esi
; CHECK-NEXT:    subl $16, %esp
; CHECK-NEXT:    movl _b, %eax
; CHECK-NEXT:    movl %eax, %ecx
; CHECK-NEXT:    sarl $31, %ecx
; CHECK-NEXT:    movl _a+4, %edx
; CHECK-NEXT:    movl _a, %esi
; CHECK-NEXT:    subl %eax, %esi
; CHECK-NEXT:    sbbl %ecx, %edx
; CHECK-NEXT:    setb %al
; CHECK-NEXT:    xorl %ecx, %ecx
; CHECK-NEXT:    testl %edx, %edx
; CHECK-NEXT:    setns %cl
; CHECK-NEXT:    movl %esi, (%esp)
; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%esp)
; CHECK-NEXT:    fildll (%esp)
; CHECK-NEXT:    fadds LCPI0_0(,%ecx,4)
; CHECK-NEXT:    fstpl {{[0-9]+}}(%esp)
; CHECK-NEXT:    fldl {{[0-9]+}}(%esp)
; CHECK-NEXT:    fldz
; CHECK-NEXT:    testb %al, %al
; CHECK-NEXT:    fxch %st(1)
; CHECK-NEXT:    fcmovne %st(1), %st
; CHECK-NEXT:    fstp %st(1)
; CHECK-NEXT:    addl $16, %esp
; CHECK-NEXT:    popl %esi
; CHECK-NEXT:    retl
entry:
  %0 = load i32, i32* @b, align 4
  %conv = sext i32 %0 to i64
  %1 = load i64, i64* @a, align 8
  %cmp = icmp ult i64 %1, %conv
  %sub = sub i64 %1, %conv
  %conv3 = uitofp i64 %sub to double
  %cond = select i1 %cmp, double 0.000000e+00, double %conv3
  ret double %cond
}