Commit 0f84e3ee authored by Hans Wennborg's avatar Hans Wennborg
Browse files

Merging r323908:

------------------------------------------------------------------------
r323908 | mareko | 2018-01-31 21:18:04 +0100 (Wed, 31 Jan 2018) | 7 lines

AMDGPU: Add intrinsics llvm.amdgcn.cvt.{pknorm.i16, pknorm.u16, pk.i16, pk.u16}

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye

Differential Revision: https://reviews.llvm.org/D41663
------------------------------------------------------------------------

llvm-svn: 324103
parent 9b8da2b9
Loading
Loading
Loading
Loading
+20 −0
Original line number Diff line number Diff line
@@ -238,6 +238,26 @@ def int_amdgcn_cvt_pkrtz : Intrinsic<
  [IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  [IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
  [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
  [IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_cvt_pk_i16 : Intrinsic<
  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  [IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_cvt_pk_u16 : Intrinsic<
  [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
  [IntrNoMem, IntrSpeculatable]
>;

def int_amdgcn_class : Intrinsic<
  [llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
  [IntrNoMem, IntrSpeculatable]
+4 −0
Original line number Diff line number Diff line
@@ -3957,6 +3957,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
  NODE_NAME_CASE(CVT_F32_UBYTE2)
  NODE_NAME_CASE(CVT_F32_UBYTE3)
  NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
  NODE_NAME_CASE(CVT_PKNORM_I16_F32)
  NODE_NAME_CASE(CVT_PKNORM_U16_F32)
  NODE_NAME_CASE(CVT_PK_I16_I32)
  NODE_NAME_CASE(CVT_PK_U16_U32)
  NODE_NAME_CASE(FP_TO_FP16)
  NODE_NAME_CASE(FP16_ZEXT)
  NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
+4 −0
Original line number Diff line number Diff line
@@ -417,6 +417,10 @@ enum NodeType : unsigned {
  // Convert two float 32 numbers into a single register holding two packed f16
  // with round to zero.
  CVT_PKRTZ_F16_F32,
  CVT_PKNORM_I16_F32,
  CVT_PKNORM_U16_F32,
  CVT_PK_I16_I32,
  CVT_PK_U16_U32,

  // Same as the standard node, except the high bits of the resulting integer
  // are known 0.
+8 −0
Original line number Diff line number Diff line
@@ -35,6 +35,10 @@ def AMDGPUFPPackOp : SDTypeProfile<1, 2,
  [SDTCisFP<1>, SDTCisSameAs<1, 2>]
>;

def AMDGPUIntPackOp : SDTypeProfile<1, 2,
  [SDTCisInt<1>, SDTCisSameAs<1, 2>]
>;

def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
  [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -142,6 +146,10 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;

def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;

+46 −4
Original line number Diff line number Diff line
@@ -205,6 +205,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);

  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -3517,7 +3518,8 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
  }
  case ISD::INTRINSIC_WO_CHAIN: {
    unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
    if (IID == Intrinsic::amdgcn_cvt_pkrtz) {
    switch (IID) {
    case Intrinsic::amdgcn_cvt_pkrtz: {
      SDValue Src0 = N->getOperand(1);
      SDValue Src1 = N->getOperand(2);
      SDLoc SL(N);
@@ -3526,6 +3528,29 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
      Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
      return;
    }
    case Intrinsic::amdgcn_cvt_pknorm_i16:
    case Intrinsic::amdgcn_cvt_pknorm_u16:
    case Intrinsic::amdgcn_cvt_pk_i16:
    case Intrinsic::amdgcn_cvt_pk_u16: {
      SDValue Src0 = N->getOperand(1);
      SDValue Src1 = N->getOperand(2);
      SDLoc SL(N);
      unsigned Opcode;

      if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
        Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
      else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
        Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
      else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
        Opcode = AMDGPUISD::CVT_PK_I16_I32;
      else
        Opcode = AMDGPUISD::CVT_PK_U16_U32;

      SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
      Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
      return;
    }
    }
    break;
  }
  case ISD::SELECT: {
@@ -4424,10 +4449,27 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
  case Intrinsic::amdgcn_ubfe:
    return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
  case Intrinsic::amdgcn_cvt_pkrtz: {
    // FIXME: Stop adding cast if v2f16 legal.
  case Intrinsic::amdgcn_cvt_pkrtz:
  case Intrinsic::amdgcn_cvt_pknorm_i16:
  case Intrinsic::amdgcn_cvt_pknorm_u16:
  case Intrinsic::amdgcn_cvt_pk_i16:
  case Intrinsic::amdgcn_cvt_pk_u16: {
    // FIXME: Stop adding cast if v2f16/v2i16 are legal.
    EVT VT = Op.getValueType();
    SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32,
    unsigned Opcode;

    if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
      Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32;
    else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
      Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
    else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
      Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
    else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
      Opcode = AMDGPUISD::CVT_PK_I16_I32;
    else
      Opcode = AMDGPUISD::CVT_PK_U16_U32;

    SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
                               Op.getOperand(1), Op.getOperand(2));
    return DAG.getNode(ISD::BITCAST, DL, VT, Node);
  }
Loading