Commit 97a1d4bc authored by Matt Arsenault's avatar Matt Arsenault
Browse files

AMDGPU: Don't use separate cache arguments for s_buffer_load node

There's not much value to this separate node from the intrinsic. Make
the operand structure the same as the intrinsic, so we can reuse the
same pattern for GlobalISel.
parent 90b5ed99
Loading
Loading
Loading
Loading
+0 −16
Original line number Diff line number Diff line
@@ -1152,22 +1152,6 @@ let SubtargetPredicate = isGFX10Plus in {
// MUBUF Patterns
//===----------------------------------------------------------------------===//

def extract_glc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
}]>;

def extract_slc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
}]>;

def extract_dlc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
}]>;

def extract_swz : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
}]>;

//===----------------------------------------------------------------------===//
// buffer_load/store_format patterns
//===----------------------------------------------------------------------===//
+4 −6
Original line number Diff line number Diff line
@@ -5622,7 +5622,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}

SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
                                       SDValue Offset, SDValue GLC, SDValue DLC,
                                       SDValue Offset, SDValue CachePolicy,
                                       SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();

@@ -5640,8 +5640,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
    SDValue Ops[] = {
        Rsrc,
        Offset, // Offset
        GLC,
        DLC,
        CachePolicy
    };

    // Widen vec3 load to vec4.
@@ -5675,7 +5674,6 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
  }

  SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
  unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
  SDValue Ops[] = {
      DAG.getEntryNode(),                               // Chain
      Rsrc,                                             // rsrc
@@ -5683,7 +5681,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
      {},                                               // voffset
      {},                                               // soffset
      {},                                               // offset
      DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
      CachePolicy,                                      // cachepolicy
      DAG.getTargetConstant(0, DL, MVT::i1),            // idxen
  };

@@ -5867,7 +5865,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
    if (!parseCachePolicy(Op.getOperand(3), DAG, &GLC, nullptr,
                          IsGFX10 ? &DLC : nullptr))
      return Op;
    return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), GLC, DLC,
    return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
                        DAG);
  }
  case Intrinsic::amdgcn_fdiv_fast:
+1 −1
Original line number Diff line number Diff line
@@ -60,7 +60,7 @@ private:
  SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr,
                     SelectionDAG &DAG) const;
  SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset,
                       SDValue GLC, SDValue DLC, SelectionDAG &DAG) const;
                       SDValue CachePolicy, SelectionDAG &DAG) const;

  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+21 −2
Original line number Diff line number Diff line
@@ -39,8 +39,7 @@ def SIEncodingFamily {
def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPUnaryOp>;

def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
  SDTypeProfile<1, 4, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i1>,
                       SDTCisVT<4, i1>]>,
  SDTypeProfile<1, 3, [SDTCisVT<1, v4i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>,
  [SDNPMayLoad, SDNPMemOperand]
>;

@@ -794,6 +793,26 @@ def NegSubInlineConstV216 : PatLeaf<(build_vector), [{
         (isNullConstantOrUndef(Src1) && isNegInlineImmediate(Src0.getNode()));
}], getNegV2I16Imm>;

//===----------------------------------------------------------------------===//
// MUBUF/SMEM Patterns
//===----------------------------------------------------------------------===//

def extract_glc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant(N->getZExtValue() & 1, SDLoc(N), MVT::i8);
}]>;

def extract_slc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 1) & 1, SDLoc(N), MVT::i8);
}]>;

def extract_dlc : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
}]>;

def extract_swz : SDNodeXForm<timm, [{
  return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
}]>;

//===----------------------------------------------------------------------===//
// Custom Operands
//===----------------------------------------------------------------------===//
+8 −8
Original line number Diff line number Diff line
@@ -768,23 +768,23 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
multiclass SMLoad_Pattern <string Instr, ValueType vt> {
  // 1. Offset as an immediate
  def : GCNPat <
    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc),
    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1timm $glc),
                                        (as_i1timm $dlc)))
    (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy),
    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (extract_glc $cachepolicy),
                                        (extract_dlc $cachepolicy)))
  >;

  // 2. 32-bit IMM offset on CI
  def : GCNPat <
    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)),
    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))> {
    (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)),
    (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (extract_glc $cachepolicy), (extract_dlc $cachepolicy))> {
    let OtherPredicates = [isGFX7Only];
  }

  // 3. Offset loaded in an 32bit SGPR
  def : GCNPat <
    (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc),
    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1timm $glc),
                                         (as_i1timm $dlc)))
    (SIsbuffer_load v4i32:$sbase, i32:$offset, timm:$cachepolicy),
    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (extract_glc $cachepolicy),
                                         (extract_dlc $cachepolicy)))
  >;
}