Unverified Commit a604c4b5 authored by chuongg3's avatar chuongg3 Committed by GitHub
Browse files

[AArch64][GlobalISel] TableGen Selection for G_VECREDUCE_ADD (#70785)

Instruction Selection for G_VECREDUCE_ADD now uses TableGen
parent de58aa83
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -177,6 +177,7 @@ def : GINodeEquiv<G_VECREDUCE_UMIN, vecreduce_umin>;
def : GINodeEquiv<G_VECREDUCE_UMAX, vecreduce_umax>;
def : GINodeEquiv<G_VECREDUCE_SMIN, vecreduce_smin>;
def : GINodeEquiv<G_VECREDUCE_SMAX, vecreduce_smax>;
def : GINodeEquiv<G_VECREDUCE_ADD, vecreduce_add>;

def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
+16 −0
Original line number Diff line number Diff line
@@ -6676,6 +6676,22 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
          ssub))>;
}

// For vecreduce_add, used by GlobalISel not SDAG
def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), 
          (i8 (ADDVv8i8v V64:$Rn))>;
def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), 
          (i8 (ADDVv16i8v V128:$Rn))>;
def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), 
          (i16 (ADDVv4i16v V64:$Rn))>;
def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), 
          (i16 (ADDVv8i16v V128:$Rn))>;
def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), 
          (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), 
          (i32 (ADDVv4i32v V128:$Rn))>;
def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), 
          (i64 (ADDPv2i64p V128:$Rn))>;

defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
+0 −45
Original line number Diff line number Diff line
@@ -3558,8 +3558,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
    return selectConcatVectors(I, MRI);
  case TargetOpcode::G_JUMP_TABLE:
    return selectJumpTable(I, MRI);
  case TargetOpcode::G_VECREDUCE_ADD:
    return selectReduction(I, MRI);
  case TargetOpcode::G_MEMCPY:
  case TargetOpcode::G_MEMCPY_INLINE:
  case TargetOpcode::G_MEMMOVE:
@@ -3578,49 +3576,6 @@ bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
  return Success;
}

bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
                                                 MachineRegisterInfo &MRI) {
  Register VecReg = I.getOperand(1).getReg();
  LLT VecTy = MRI.getType(VecReg);
  if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
    // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
    // a subregister copy afterwards.
    if (VecTy == LLT::fixed_vector(2, 32)) {
      Register DstReg = I.getOperand(0).getReg();
      auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
                                 {VecReg, VecReg});
      auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
                      .addReg(AddP.getReg(0), 0, AArch64::ssub)
                      .getReg(0);
      RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
      I.eraseFromParent();
      return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
    }

    unsigned Opc = 0;
    if (VecTy == LLT::fixed_vector(16, 8))
      Opc = AArch64::ADDVv16i8v;
    else if (VecTy == LLT::fixed_vector(8, 8))
      Opc = AArch64::ADDVv8i8v;
    else if (VecTy == LLT::fixed_vector(8, 16))
      Opc = AArch64::ADDVv8i16v;
    else if (VecTy == LLT::fixed_vector(4, 16))
      Opc = AArch64::ADDVv4i16v;
    else if (VecTy == LLT::fixed_vector(4, 32))
      Opc = AArch64::ADDVv4i32v;
    else if (VecTy == LLT::fixed_vector(2, 64))
      Opc = AArch64::ADDPv2i64p;
    else {
      LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
      return false;
    }
    I.setDesc(TII.get(Opc));
    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
  }

  return false;
}

bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
                                            MachineRegisterInfo &MRI) {
  unsigned Mopcode;