Commit 3c8e0551 authored by Stanislav Mekhanoshin's avatar Stanislav Mekhanoshin
Browse files

[AMDGPU] Fix mfma scheduling crash

An SUnit can be neither intruction not SDNode. It is all
null if represents a nop. Fixed a crash on using SU->getInstr().

Differential Revision: https://reviews.llvm.org/D69395
parent 5da6d4ec
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -773,6 +773,11 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
    return MI && TII->isSALU(*MI) && !MI->isTerminator();
  }

  bool isVALU(const SUnit *SU) const {
    const MachineInstr *MI = SU->getInstr();
    return MI && TII->isVALU(*MI);
  }

  bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
    if (Pred->NodeNum < Succ->NodeNum)
      return true;
@@ -821,7 +826,7 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {

      for (SDep &SI : From->Succs) {
        SUnit *SUv = SI.getSUnit();
        if (SUv != From && TII->isVALU(*SUv->getInstr()) && canAddEdge(SUv, SU))
        if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
          SUv->addPred(SDep(SU, SDep::Artificial), false);
      }

+34 −0
Original line number Diff line number Diff line
@@ -76,5 +76,39 @@ exit:
  ret void
}

; GCN-LABEL: {{^}}test_mfma_loop_mfma_forward_init:

; GCN-COUNT32: v_accvgpr_write_b32 a{{[0-9]+}}, 0
; GCN: v_mfma_f32_32x32x1f32
; GCN-NOT: v_accvgpr

; GCN: [[LOOP:BB[0-9_]+]]:
; GCN-NOT: v_accvgpr
; GCN: v_mfma_f32_32x32x1f32
; GCN-NOT: v_accvgpr
; GCN: s_cbranch_scc1 [[LOOP]]

; GCN-COUNT32: v_accvgpr_read_b32
define amdgpu_kernel void @test_mfma_loop_mfma_forward_init(<32 x float> addrspace(1)* %arg) {
entry:
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %init = bitcast i32 %tid to float
  %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> zeroinitializer, i32 0, i32 0, i32 0)

  br label %for.cond.preheader

for.cond.preheader:
  %phi = phi <32 x float> [ %mai.0, %entry ], [ %mai.1, %for.cond.preheader ]
  %c = phi i32 [ 0, %entry ], [ %inc, %for.cond.preheader ]
  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0)
  %inc = add nuw nsw i32 %c, 1
  %cc = icmp eq i32 %inc, 16
  br i1 %cc, label %exit, label %for.cond.preheader

exit:
  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
  ret void
}

declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
declare i32 @llvm.amdgcn.workitem.id.x()