Commit 75afc010 authored by Craig Topper's avatar Craig Topper
Browse files

[X86] Sign extend the 8-bit immediate when commuting blend instructions to match isel.

Conversion from ConstantSDNode to MachineInstr sign extends immediates from their APInt representation to int64_t.

This commit makes sure we do the same for commuting. The tests changes show how this improves CSE. This issue was made worse by the MachineCSE using commuteInstruction to undo a commute. So we virtually guarantee the sign extend from isel would be lost.

The improved CSE also occurred with r354363, but that was reverted. I'm working to undo the revert, but wanted to get this fix in while it was easy to see the results.

llvm-svn: 354724
parent 6f1268a4
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
@@ -1462,7 +1462,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
  case X86::VPBLENDWrri:
  case X86::VPBLENDDYrri:
  case X86::VPBLENDWYrri:{
    unsigned Mask;
    int8_t Mask;
    switch (MI.getOpcode()) {
    default: llvm_unreachable("Unreachable!");
    case X86::BLENDPDrri:    Mask = 0x03; break;
@@ -1478,7 +1478,9 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
    case X86::VPBLENDWYrri:  Mask = 0xFF; break;
    }
    // Only the least significant bits of Imm are used.
    unsigned Imm = MI.getOperand(3).getImm() & Mask;
    // Using int8_t to ensure it will be sign extended to the int64_t that
    // setImm takes in order to match isel behavior.
    int8_t Imm = MI.getOperand(3).getImm() & Mask;
    auto &WorkingMI = cloneIfNew(MI);
    WorkingMI.getOperand(3).setImm(Mask ^ Imm);
    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
@@ -1593,7 +1595,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
    // Flip permute source immediate.
    // Imm & 0x02: lo = if set, select Op1.lo/hi else Op0.lo/hi.
    // Imm & 0x20: hi = if set, select Op1.lo/hi else Op0.lo/hi.
    unsigned Imm = MI.getOperand(3).getImm() & 0xFF;
    int8_t Imm = MI.getOperand(3).getImm() & 0xFF;
    auto &WorkingMI = cloneIfNew(MI);
    WorkingMI.getOperand(3).setImm(Imm ^ 0x22);
    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+12 −20
Original line number Diff line number Diff line
@@ -1261,18 +1261,15 @@ define <2 x float> @load_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2
; SSE42-LABEL: load_v2f32_v2i32:
; SSE42:       ## %bb.0:
; SSE42-NEXT:    pxor %xmm2, %xmm2
; SSE42-NEXT:    movdqa %xmm0, %xmm3
; SSE42-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm3
; SSE42-NEXT:    pextrb $0, %xmm3, %eax
; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE42-NEXT:    pextrb $0, %xmm0, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    je LBB10_2
; SSE42-NEXT:  ## %bb.1: ## %cond.load
; SSE42-NEXT:    movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm3[0,1],xmm1[2,3,4,5,6,7]
; SSE42-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3,4,5,6,7]
; SSE42-NEXT:  LBB10_2: ## %else
; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE42-NEXT:    pextrb $8, %xmm0, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    je LBB10_4
@@ -1357,18 +1354,15 @@ define <2 x i32> @load_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i3
; SSE42-LABEL: load_v2i32_v2i32:
; SSE42:       ## %bb.0:
; SSE42-NEXT:    pxor %xmm2, %xmm2
; SSE42-NEXT:    movdqa %xmm0, %xmm3
; SSE42-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm3
; SSE42-NEXT:    pextrb $0, %xmm3, %eax
; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE42-NEXT:    pextrb $0, %xmm0, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    je LBB11_2
; SSE42-NEXT:  ## %bb.1: ## %cond.load
; SSE42-NEXT:    movl (%rdi), %eax
; SSE42-NEXT:    pinsrq $0, %rax, %xmm1
; SSE42-NEXT:  LBB11_2: ## %else
; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE42-NEXT:    pextrb $8, %xmm0, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    je LBB11_4
@@ -1459,18 +1453,16 @@ define <2 x float> @load_undef_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %add
; SSE42-LABEL: load_undef_v2f32_v2i32:
; SSE42:       ## %bb.0:
; SSE42-NEXT:    movdqa %xmm0, %xmm1
; SSE42-NEXT:    pxor %xmm2, %xmm2
; SSE42-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE42-NEXT:    pextrb $0, %xmm0, %eax
; SSE42-NEXT:    pxor %xmm0, %xmm0
; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
; SSE42-NEXT:    pcmpeqq %xmm0, %xmm1
; SSE42-NEXT:    pextrb $0, %xmm1, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    ## implicit-def: $xmm0
; SSE42-NEXT:    je LBB12_2
; SSE42-NEXT:  ## %bb.1: ## %cond.load
; SSE42-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE42-NEXT:  LBB12_2: ## %else
; SSE42-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
; SSE42-NEXT:    pcmpeqq %xmm2, %xmm1
; SSE42-NEXT:    pextrb $8, %xmm1, %eax
; SSE42-NEXT:    testb $1, %al
; SSE42-NEXT:    je LBB12_4
+6 −12
Original line number Diff line number Diff line
@@ -330,17 +330,14 @@ define void @store_v2f32_v2i32(<2 x i32> %trigger, <2 x float>* %addr, <2 x floa
; SSE4-LABEL: store_v2f32_v2i32:
; SSE4:       ## %bb.0:
; SSE4-NEXT:    pxor %xmm2, %xmm2
; SSE4-NEXT:    movdqa %xmm0, %xmm3
; SSE4-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm3
; SSE4-NEXT:    pextrb $0, %xmm3, %eax
; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE4-NEXT:    pextrb $0, %xmm0, %eax
; SSE4-NEXT:    testb $1, %al
; SSE4-NEXT:    je LBB3_2
; SSE4-NEXT:  ## %bb.1: ## %cond.store
; SSE4-NEXT:    movss %xmm1, (%rdi)
; SSE4-NEXT:  LBB3_2: ## %else
; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE4-NEXT:    pextrb $8, %xmm0, %eax
; SSE4-NEXT:    testb $1, %al
; SSE4-NEXT:    je LBB3_4
@@ -417,17 +414,14 @@ define void @store_v2i32_v2i32(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %
; SSE4-LABEL: store_v2i32_v2i32:
; SSE4:       ## %bb.0:
; SSE4-NEXT:    pxor %xmm2, %xmm2
; SSE4-NEXT:    movdqa %xmm0, %xmm3
; SSE4-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm3
; SSE4-NEXT:    pextrb $0, %xmm3, %eax
; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE4-NEXT:    pextrb $0, %xmm0, %eax
; SSE4-NEXT:    testb $1, %al
; SSE4-NEXT:    je LBB4_2
; SSE4-NEXT:  ## %bb.1: ## %cond.store
; SSE4-NEXT:    movss %xmm1, (%rdi)
; SSE4-NEXT:  LBB4_2: ## %else
; SSE4-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
; SSE4-NEXT:    pcmpeqq %xmm2, %xmm0
; SSE4-NEXT:    pextrb $8, %xmm0, %eax
; SSE4-NEXT:    testb $1, %al
; SSE4-NEXT:    je LBB4_4