Commit 874a8004 authored by QingShan Zhang's avatar QingShan Zhang
Browse files

[PowerPC] Exploit the rlwinm instructions for "and" with constant

For now, PowerPC will using several instructions to get the constant and "and" it with the following case:

define i32 @test1(i32 %a) {
  %and = and i32 %a, -2
  ret i32 %and
}

However, we could exploit it with the rotate mask instructions.
               MB  ME
+----------------------+
|xxxxxxxxxxx00011111000|
+----------------------+
 0         32         64
Notice that, we can only do it if the MB is larger than 32 and MB <= ME as
RLWINM will replace the content of [0 - 32) with [32 - 64) even we didn't rotate it.

Differential Revision: https://reviews.llvm.org/D71829
parent 266cd771
Loading
Loading
Loading
Loading
+24 −0
Original line number Diff line number Diff line
@@ -82,6 +82,30 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
  return false;
}

static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
  if (!Val)
    return false;

  if (isShiftedMask_64(Val)) {
    // look for the first non-zero bit
    MB = countLeadingZeros(Val);
    // look for the first zero bit after the run of ones
    ME = countLeadingZeros((Val - 1) ^ Val);
    return true;
  } else {
    Val = ~Val; // invert mask
    if (isShiftedMask_64(Val)) {
      // effectively look for the first zero bit
      ME = countLeadingZeros(Val) - 1;
      // effectively look for the first one bit after the run of zeros
      MB = countLeadingZeros((Val - 1) ^ Val) + 1;
      return true;
    }
  }
  // no run present
  return false;
}

} // end namespace llvm

// Generated files will use "namespace PPC". To avoid symbol clash,
+20 −0
Original line number Diff line number Diff line
@@ -4456,6 +4456,26 @@ bool PPCDAGToDAGISel::tryAndWithMask(SDNode *N) {
      CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
      return true;
    }

    // It is not 16-bit imm that means we need two instructions at least if
    // using "and" instruction. Try to exploit it with rotate mask instructions.
    if (isRunOfOnes64(Imm64, MB, ME)) {
      if (MB >= 32 && MB <= ME) {
        //                MB  ME
        // +----------------------+
        // |xxxxxxxxxxx00011111000|
        // +----------------------+
        //  0         32         64
        // We can only do it if the MB is larger than 32 and MB <= ME
        // as RLWINM will replace the content of [0 - 32) with [32 - 64) even
        // we didn't rotate it.
        SDValue Ops[] = { Val, getI64Imm(0, dl), getI64Imm(MB - 32, dl),
                          getI64Imm(ME - 32, dl) };
        CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
        return true;
      }
      // TODO - handle it with rldicl + rldicl
    }
  }

  return false;
+1 −4
Original line number Diff line number Diff line
@@ -5,10 +5,7 @@
define i32 @test1(i32 %a) {
; CHECK-LABEL: test1:
; CHECK:       # %bb.0:
; CHECK-NEXT:    lis 4, 32767
; CHECK-NEXT:    ori 4, 4, 65535
; CHECK-NEXT:    sldi 4, 4, 1
; CHECK-NEXT:    and 3, 3, 4
; CHECK-NEXT:    rlwinm 3, 3, 0, 0, 30
; CHECK-NEXT:    blr
  %and = and i32 %a, -2
  ret i32 %and
+2 −2
Original line number Diff line number Diff line
@@ -7,12 +7,12 @@ define signext i32 @fn1(i32 %baz) {
  %2 = zext i32 %1 to i64
  %3 = shl i64 %2, 48
  %4 = ashr exact i64 %3, 48
; CHECK: ANDI8o killed {{[^,]+}}, 65520, implicit-def dead $cr0
; CHECK: RLWINM8 killed {{[^,]+}}, 0, 16, 27
; CHECK: CMPLDI
; CHECK: BCC

; CHECK: ANDI8o {{[^,]+}}, 65520, implicit-def $cr0
; CHECK: COPY $cr0
; CHECK: COPY killed $cr0
; CHECK: BCC
  %5 = icmp eq i64 %4, 0
  br i1 %5, label %foo, label %bar
+2 −2
Original line number Diff line number Diff line
@@ -299,7 +299,7 @@ define i64 @popa_i16_i64(i16 %x) {
; FAST:       # %bb.0:
; FAST-NEXT:    clrldi 3, 3, 48
; FAST-NEXT:    popcntd 3, 3
; FAST-NEXT:    andi. 3, 3, 16
; FAST-NEXT:    rlwinm 3, 3, 0, 27, 27
; FAST-NEXT:    blr
;
; SLOW-LABEL: popa_i16_i64:
@@ -325,7 +325,7 @@ define i64 @popa_i16_i64(i16 %x) {
; SLOW-NEXT:    ori 4, 4, 257
; SLOW-NEXT:    mullw 3, 3, 4
; SLOW-NEXT:    srwi 3, 3, 24
; SLOW-NEXT:    andi. 3, 3, 16
; SLOW-NEXT:    rlwinm 3, 3, 0, 27, 27
; SLOW-NEXT:    blr
  %pop = call i16 @llvm.ctpop.i16(i16 %x)
  %z = zext i16 %pop to i64 ; SimplifyDemandedBits may turn zext (or sext) into aext