Loading llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +19 −0 Original line number Diff line number Diff line Loading @@ -2324,6 +2324,15 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index return; } case Intrinsic::amdgcn_permlane16: case Intrinsic::amdgcn_permlanex16: { // Doing a waterfall loop over these wouldn't make any sense. substituteSimpleCopyRegs(OpdMapper, 2); substituteSimpleCopyRegs(OpdMapper, 3); constrainOpWithReadfirstlane(MI, MRI, 4); constrainOpWithReadfirstlane(MI, MRI, 5); return; } default: break; } Loading Loading @@ -3334,6 +3343,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } case Intrinsic::amdgcn_permlane16: case Intrinsic::amdgcn_permlanex16: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_f32_4x4x1f32: case Intrinsic::amdgcn_mfma_f32_4x4x4f16: case Intrinsic::amdgcn_mfma_i32_4x4x4i8: Loading llvm/lib/Target/AMDGPU/SIInstrInfo.td +0 −4 Original line number Diff line number Diff line Loading @@ -665,10 +665,6 @@ defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def as_i1imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; def as_i1timm : SDNodeXForm<timm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; Loading llvm/lib/Target/AMDGPU/SMInstructions.td +5 −5 Original line number Diff line number Diff line Loading @@ -769,22 +769,22 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> { // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc), (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))) (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))) >; // 2. 32-bit IMM offset on CI def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)), (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> { (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))> { let OtherPredicates = [isGFX7Only]; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc), (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))) (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))) >; } Loading llvm/lib/Target/AMDGPU/VOP3Instructions.td +10 −5 Original line number Diff line number Diff line Loading @@ -646,8 +646,8 @@ class PermlanePat<SDPatternOperator permlane, Instruction inst> : GCNPat< (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc), SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in) >; // Permlane intrinsic that has either fetch invalid or bound control Loading @@ -659,13 +659,19 @@ class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> : $src1, node:$src2, node:$fi, node:$bc)> { let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 || N->getConstantOperandVal(6) != 0; }]; let GISelPredicateCode = [{ return MI.getOperand(6).getImm() != 0 || MI.getOperand(7).getImm() != 0; }]; } // Drop the input value if it won't be read. class PermlaneDiscardVDstIn<SDPatternOperator permlane, Instruction inst> : GCNPat< (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc), SCSrc_b32:$src1, 0, SCSrc_b32:$src2, (IMPLICIT_DEF)) >; Loading @@ -688,7 +694,6 @@ let SubtargetPredicate = isGFX10Plus in { def : PermlaneDiscardVDstIn< BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>, V_PERMLANEX16_B32>; } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===// Loading llvm/lib/Target/AMDGPU/VOP3PInstructions.td +1 −1 Original line number Diff line number Diff line Loading @@ -287,7 +287,7 @@ multiclass DotPats<SDPatternOperator dot_op, (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp), (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>; (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1timm $clamp))>; } defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>; Loading Loading
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +19 −0 Original line number Diff line number Diff line Loading @@ -2324,6 +2324,15 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(MI, MRI, MI.getNumOperands() - 1); // Index return; } case Intrinsic::amdgcn_permlane16: case Intrinsic::amdgcn_permlanex16: { // Doing a waterfall loop over these wouldn't make any sense. substituteSimpleCopyRegs(OpdMapper, 2); substituteSimpleCopyRegs(OpdMapper, 3); constrainOpWithReadfirstlane(MI, MRI, 4); constrainOpWithReadfirstlane(MI, MRI, 5); return; } default: break; } Loading Loading @@ -3334,6 +3343,16 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } case Intrinsic::amdgcn_permlane16: case Intrinsic::amdgcn_permlanex16: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } case Intrinsic::amdgcn_mfma_f32_4x4x1f32: case Intrinsic::amdgcn_mfma_f32_4x4x4f16: case Intrinsic::amdgcn_mfma_i32_4x4x4i8: Loading
llvm/lib/Target/AMDGPU/SIInstrInfo.td +0 −4 Original line number Diff line number Diff line Loading @@ -665,10 +665,6 @@ defm atomic_load_fadd : SIAtomicM0Glue2 <"LOAD_FADD", 0, SDTAtomic2_f32, 0>; defm atomic_load_fmin : SIAtomicM0Glue2 <"LOAD_FMIN", 1, SDTAtomic2_f32, 0>; defm atomic_load_fmax : SIAtomicM0Glue2 <"LOAD_FMAX", 1, SDTAtomic2_f32, 0>; def as_i1imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; def as_i1timm : SDNodeXForm<timm, [{ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1); }]>; Loading
llvm/lib/Target/AMDGPU/SMInstructions.td +5 −5 Original line number Diff line number Diff line Loading @@ -769,22 +769,22 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> { // 1. Offset as an immediate def : GCNPat < (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), i1:$glc, i1:$dlc), (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))) (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))) >; // 2. 32-bit IMM offset on CI def : GCNPat < (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), i1:$glc, i1:$dlc)), (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))> { (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))> { let OtherPredicates = [isGFX7Only]; } // 3. Offset loaded in an 32bit SGPR def : GCNPat < (SIsbuffer_load v4i32:$sbase, i32:$offset, i1:$glc, i1:$dlc), (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1imm $glc), (as_i1imm $dlc))) (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, (as_i1timm $glc), (as_i1timm $dlc))) >; } Loading
llvm/lib/Target/AMDGPU/VOP3Instructions.td +10 −5 Original line number Diff line number Diff line Loading @@ -646,8 +646,8 @@ class PermlanePat<SDPatternOperator permlane, Instruction inst> : GCNPat< (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc), SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in) >; // Permlane intrinsic that has either fetch invalid or bound control Loading @@ -659,13 +659,19 @@ class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> : $src1, node:$src2, node:$fi, node:$bc)> { let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 || N->getConstantOperandVal(6) != 0; }]; let GISelPredicateCode = [{ return MI.getOperand(6).getImm() != 0 || MI.getOperand(7).getImm() != 0; }]; } // Drop the input value if it won't be read. class PermlaneDiscardVDstIn<SDPatternOperator permlane, Instruction inst> : GCNPat< (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc), SCSrc_b32:$src1, 0, SCSrc_b32:$src2, (IMPLICIT_DEF)) >; Loading @@ -688,7 +694,6 @@ let SubtargetPredicate = isGFX10Plus in { def : PermlaneDiscardVDstIn< BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>, V_PERMLANEX16_B32>; } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===// Loading
llvm/lib/Target/AMDGPU/VOP3PInstructions.td +1 −1 Original line number Diff line number Diff line Loading @@ -287,7 +287,7 @@ multiclass DotPats<SDPatternOperator dot_op, (dot_op (dot_inst.Pfl.Src0VT (VOP3PMods0 dot_inst.Pfl.Src0VT:$src0, i32:$src0_modifiers)), (dot_inst.Pfl.Src1VT (VOP3PMods dot_inst.Pfl.Src1VT:$src1, i32:$src1_modifiers)), (dot_inst.Pfl.Src2VT (VOP3PMods dot_inst.Pfl.Src2VT:$src2, i32:$src2_modifiers)), i1:$clamp), (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1imm $clamp))>; (dot_inst $src0_modifiers, $src0, $src1_modifiers, $src1, $src2_modifiers, $src2, (as_i1timm $clamp))>; } defm : DotPats<AMDGPUfdot2, V_DOT2_F32_F16>; Loading