Commit 8a0f335a authored by Marek Olsak's avatar Marek Olsak
Browse files

AMDGPU/SI: Add support for non-void functions

Summary:
Return values can be stored in SGPRs (i32) and VGPRs (f32).

This will be used by functions which expect some bytecode or other binary to
be appended at the end. It allows defining in which registers the return
values will be stored.

v2: don't do this for compute shaders

Reviewers: tstellarAMD, arsenm

Subscribers: arsenm

Differential Revision: http://reviews.llvm.org/D16033

llvm-svn: 257621
parent 9c3bf318
Loading
Loading
Loading
Loading
+31 −0
Original line number Diff line number Diff line
@@ -66,6 +66,37 @@ def CC_SI : CallingConv<[

]>;

def RetCC_SI : CallingConv<[
  CCIfType<[i32] , CCAssignToReg<[
    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
    SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
    SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
    SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
  ]>>,

  // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
  CCIfType<[f32] , CCAssignToReg<[
    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
    VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
    VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
    VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
    VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
    VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
    VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
    VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
    VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
    VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
    VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
    VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
    VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
    VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
  ]>>
]>;

// Calling convention for R600
def CC_R600 : CallingConv<[
  CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+6 −0
Original line number Diff line number Diff line
@@ -572,6 +572,12 @@ void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}

void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
                           const SmallVectorImpl<ISD::OutputArg> &Outs) const {

  State.AnalyzeReturn(Outs, RetCC_SI);
}

SDValue AMDGPUTargetLowering::LowerReturn(
                                     SDValue Chain,
                                     CallingConv::ID CallConv,
+2 −0
Original line number Diff line number Diff line
@@ -115,6 +115,8 @@ protected:
                               SmallVectorImpl<ISD::InputArg> &OrigIns) const;
  void AnalyzeFormalArguments(CCState &State,
                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
  void AnalyzeReturn(CCState &State,
                     const SmallVectorImpl<ISD::OutputArg> &Outs) const;

public:
  AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
+1 −1
Original line number Diff line number Diff line
@@ -242,4 +242,4 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag       : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
    [SDNPHasChain, SDNPOptInGlue]>;
    [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+89 −0
Original line number Diff line number Diff line
@@ -880,6 +880,95 @@ SDValue SITargetLowering::LowerFormalArguments(
  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}

SDValue SITargetLowering::LowerReturn(SDValue Chain,
                                      CallingConv::ID CallConv,
                                      bool isVarArg,
                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                      const SmallVectorImpl<SDValue> &OutVals,
                                      SDLoc DL, SelectionDAG &DAG) const {
  MachineFunction &MF = DAG.getMachineFunction();
  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

  if (Info->getShaderType() == ShaderType::COMPUTE)
    return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
                                             OutVals, DL, DAG);

  SmallVector<ISD::OutputArg, 48> Splits;
  SmallVector<SDValue, 48> SplitVals;

  // Split vectors into their elements.
  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
    const ISD::OutputArg &Out = Outs[i];

    if (Out.VT.isVector()) {
      MVT VT = Out.VT.getVectorElementType();
      ISD::OutputArg NewOut = Out;
      NewOut.Flags.setSplit();
      NewOut.VT = VT;

      // We want the original number of vector elements here, e.g.
      // three or five, not four or eight.
      unsigned NumElements = Out.ArgVT.getVectorNumElements();

      for (unsigned j = 0; j != NumElements; ++j) {
        SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
                                   DAG.getConstant(j, DL, MVT::i32));
        SplitVals.push_back(Elem);
        Splits.push_back(NewOut);
        NewOut.PartOffset += NewOut.VT.getStoreSize();
      }
    } else {
      SplitVals.push_back(OutVals[i]);
      Splits.push_back(Out);
    }
  }

  // CCValAssign - represent the assignment of the return value to a location.
  SmallVector<CCValAssign, 48> RVLocs;

  // CCState - Info about the registers and stack slots.
  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                 *DAG.getContext());

  // Analyze outgoing return values.
  AnalyzeReturn(CCInfo, Splits);

  SDValue Flag;
  SmallVector<SDValue, 48> RetOps;
  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)

  // Copy the result values into the output registers.
  for (unsigned i = 0, realRVLocIdx = 0;
       i != RVLocs.size();
       ++i, ++realRVLocIdx) {
    CCValAssign &VA = RVLocs[i];
    assert(VA.isRegLoc() && "Can only return in registers!");

    SDValue Arg = SplitVals[realRVLocIdx];

    // Copied from other backends.
    switch (VA.getLocInfo()) {
    default: llvm_unreachable("Unknown loc info!");
    case CCValAssign::Full:
      break;
    case CCValAssign::BCvt:
      Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
      break;
    }

    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
    Flag = Chain.getValue(1);
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
  }

  // Update chain and glue.
  RetOps[0] = Chain;
  if (Flag.getNode())
    RetOps.push_back(Flag);

  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
}

MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
    MachineInstr * MI, MachineBasicBlock * BB) const {

Loading