Commit 59644662 authored by Jay Foad's avatar Jay Foad
Browse files

[AMDGPU][ConstantFolding] Fold llvm.amdgcn.cube* intrinsics

Summary:
This folds the following family of intrinsics:
llvm.amdgcn.cubeid (face id)
llvm.amdgcn.cubema (major axis)
llvm.amdgcn.cubesc (S coordinate)
llvm.amdgcn.cubetc (T coordinate)

Reviewers: nhaehnle, arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75187
parent 11d1573b
Loading
Loading
Loading
Loading
+68 −0
Original line number Diff line number Diff line
@@ -1447,6 +1447,10 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
  case Intrinsic::convert_from_fp16:
  case Intrinsic::convert_to_fp16:
  case Intrinsic::bitreverse:
  case Intrinsic::amdgcn_cubeid:
  case Intrinsic::amdgcn_cubema:
  case Intrinsic::amdgcn_cubesc:
  case Intrinsic::amdgcn_cubetc:
  case Intrinsic::amdgcn_fmul_legacy:
  case Intrinsic::amdgcn_fract:
  case Intrinsic::x86_sse_cvtss2si:
@@ -2305,6 +2309,61 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
  return nullptr;
}

static APFloat ConstantFoldAMDGCNCubeIntrinsic(Intrinsic::ID IntrinsicID,
                                               const APFloat &S0,
                                               const APFloat &S1,
                                               const APFloat &S2) {
  unsigned ID;
  const fltSemantics &Sem = S0.getSemantics();
  APFloat MA(Sem), SC(Sem), TC(Sem);
  if (abs(S2) >= abs(S0) && abs(S2) >= abs(S1)) {
    if (S2.isNegative() && S2.isNonZero() && !S2.isNaN()) {
      // S2 < 0
      ID = 5;
      SC = -S0;
    } else {
      ID = 4;
      SC = S0;
    }
    MA = S2;
    TC = -S1;
  } else if (abs(S1) >= abs(S0)) {
    if (S1.isNegative() && S1.isNonZero() && !S1.isNaN()) {
      // S1 < 0
      ID = 3;
      TC = -S2;
    } else {
      ID = 2;
      TC = S2;
    }
    MA = S1;
    SC = S0;
  } else {
    if (S0.isNegative() && S0.isNonZero() && !S0.isNaN()) {
      // S0 < 0
      ID = 1;
      SC = S2;
    } else {
      ID = 0;
      SC = -S2;
    }
    MA = S0;
    TC = -S1;
  }
  switch (IntrinsicID) {
  default:
    llvm_unreachable("unhandled amdgcn cube intrinsic");
  case Intrinsic::amdgcn_cubeid:
    return APFloat(Sem, ID);
  case Intrinsic::amdgcn_cubema:
    return MA + MA;
  case Intrinsic::amdgcn_cubesc:
    return SC;
  case Intrinsic::amdgcn_cubetc:
    return TC;
  }
}

static Constant *ConstantFoldScalarCall3(StringRef Name,
                                         Intrinsic::ID IntrinsicID,
                                         Type *Ty,
@@ -2325,6 +2384,15 @@ static Constant *ConstantFoldScalarCall3(StringRef Name,
                             APFloat::rmNearestTiesToEven);
          return ConstantFP::get(Ty->getContext(), V);
        }
        case Intrinsic::amdgcn_cubeid:
        case Intrinsic::amdgcn_cubema:
        case Intrinsic::amdgcn_cubesc:
        case Intrinsic::amdgcn_cubetc: {
          APFloat V = ConstantFoldAMDGCNCubeIntrinsic(
              IntrinsicID, Op1->getValueAPF(), Op2->getValueAPF(),
              Op3->getValueAPF());
          return ConstantFP::get(Ty->getContext(), V);
        }
        }
      }
    }
+155 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s

declare float @llvm.amdgcn.cubeid(float, float, float)

define void @test(float* %p) {
; CHECK-LABEL: @test(
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P:%.*]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 0.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 2.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 4.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 5.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 3.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+00, float* [[P]]
; CHECK-NEXT:    ret void
;
  %p3p4p5 = call float @llvm.amdgcn.cubeid(float +3.0, float +4.0, float +5.0)
  store volatile float %p3p4p5, float* %p
  %p3p5p4 = call float @llvm.amdgcn.cubeid(float +3.0, float +5.0, float +4.0)
  store volatile float %p3p5p4, float* %p
  %p4p3p5 = call float @llvm.amdgcn.cubeid(float +4.0, float +3.0, float +5.0)
  store volatile float %p4p3p5, float* %p
  %p4p5p3 = call float @llvm.amdgcn.cubeid(float +4.0, float +5.0, float +3.0)
  store volatile float %p4p5p3, float* %p
  %p5p3p4 = call float @llvm.amdgcn.cubeid(float +5.0, float +3.0, float +4.0)
  store volatile float %p5p3p4, float* %p
  %p5p4p3 = call float @llvm.amdgcn.cubeid(float +5.0, float +4.0, float +3.0)
  store volatile float %p5p4p3, float* %p
  %p3p4n5 = call float @llvm.amdgcn.cubeid(float +3.0, float +4.0, float -5.0)
  store volatile float %p3p4n5, float* %p
  %p3p5n4 = call float @llvm.amdgcn.cubeid(float +3.0, float +5.0, float -4.0)
  store volatile float %p3p5n4, float* %p
  %p4p3n5 = call float @llvm.amdgcn.cubeid(float +4.0, float +3.0, float -5.0)
  store volatile float %p4p3n5, float* %p
  %p4p5n3 = call float @llvm.amdgcn.cubeid(float +4.0, float +5.0, float -3.0)
  store volatile float %p4p5n3, float* %p
  %p5p3n4 = call float @llvm.amdgcn.cubeid(float +5.0, float +3.0, float -4.0)
  store volatile float %p5p3n4, float* %p
  %p5p4n3 = call float @llvm.amdgcn.cubeid(float +5.0, float +4.0, float -3.0)
  store volatile float %p5p4n3, float* %p
  %p3n4p5 = call float @llvm.amdgcn.cubeid(float +3.0, float -4.0, float +5.0)
  store volatile float %p3n4p5, float* %p
  %p3n5p4 = call float @llvm.amdgcn.cubeid(float +3.0, float -5.0, float +4.0)
  store volatile float %p3n5p4, float* %p
  %p4n3p5 = call float @llvm.amdgcn.cubeid(float +4.0, float -3.0, float +5.0)
  store volatile float %p4n3p5, float* %p
  %p4n5p3 = call float @llvm.amdgcn.cubeid(float +4.0, float -5.0, float +3.0)
  store volatile float %p4n5p3, float* %p
  %p5n3p4 = call float @llvm.amdgcn.cubeid(float +5.0, float -3.0, float +4.0)
  store volatile float %p5n3p4, float* %p
  %p5n4p3 = call float @llvm.amdgcn.cubeid(float +5.0, float -4.0, float +3.0)
  store volatile float %p5n4p3, float* %p
  %p3n4n5 = call float @llvm.amdgcn.cubeid(float +3.0, float -4.0, float -5.0)
  store volatile float %p3n4n5, float* %p
  %p3n5n4 = call float @llvm.amdgcn.cubeid(float +3.0, float -5.0, float -4.0)
  store volatile float %p3n5n4, float* %p
  %p4n3n5 = call float @llvm.amdgcn.cubeid(float +4.0, float -3.0, float -5.0)
  store volatile float %p4n3n5, float* %p
  %p4n5n3 = call float @llvm.amdgcn.cubeid(float +4.0, float -5.0, float -3.0)
  store volatile float %p4n5n3, float* %p
  %p5n3n4 = call float @llvm.amdgcn.cubeid(float +5.0, float -3.0, float -4.0)
  store volatile float %p5n3n4, float* %p
  %p5n4n3 = call float @llvm.amdgcn.cubeid(float +5.0, float -4.0, float -3.0)
  store volatile float %p5n4n3, float* %p
  %n3p4p5 = call float @llvm.amdgcn.cubeid(float -3.0, float +4.0, float +5.0)
  store volatile float %n3p4p5, float* %p
  %n3p5p4 = call float @llvm.amdgcn.cubeid(float -3.0, float +5.0, float +4.0)
  store volatile float %n3p5p4, float* %p
  %n4p3p5 = call float @llvm.amdgcn.cubeid(float -4.0, float +3.0, float +5.0)
  store volatile float %n4p3p5, float* %p
  %n4p5p3 = call float @llvm.amdgcn.cubeid(float -4.0, float +5.0, float +3.0)
  store volatile float %n4p5p3, float* %p
  %n5p3p4 = call float @llvm.amdgcn.cubeid(float -5.0, float +3.0, float +4.0)
  store volatile float %n5p3p4, float* %p
  %n5p4p3 = call float @llvm.amdgcn.cubeid(float -5.0, float +4.0, float +3.0)
  store volatile float %n5p4p3, float* %p
  %n3p4n5 = call float @llvm.amdgcn.cubeid(float -3.0, float +4.0, float -5.0)
  store volatile float %n3p4n5, float* %p
  %n3p5n4 = call float @llvm.amdgcn.cubeid(float -3.0, float +5.0, float -4.0)
  store volatile float %n3p5n4, float* %p
  %n4p3n5 = call float @llvm.amdgcn.cubeid(float -4.0, float +3.0, float -5.0)
  store volatile float %n4p3n5, float* %p
  %n4p5n3 = call float @llvm.amdgcn.cubeid(float -4.0, float +5.0, float -3.0)
  store volatile float %n4p5n3, float* %p
  %n5p3n4 = call float @llvm.amdgcn.cubeid(float -5.0, float +3.0, float -4.0)
  store volatile float %n5p3n4, float* %p
  %n5p4n3 = call float @llvm.amdgcn.cubeid(float -5.0, float +4.0, float -3.0)
  store volatile float %n5p4n3, float* %p
  %n3n4p5 = call float @llvm.amdgcn.cubeid(float -3.0, float -4.0, float +5.0)
  store volatile float %n3n4p5, float* %p
  %n3n5p4 = call float @llvm.amdgcn.cubeid(float -3.0, float -5.0, float +4.0)
  store volatile float %n3n5p4, float* %p
  %n4n3p5 = call float @llvm.amdgcn.cubeid(float -4.0, float -3.0, float +5.0)
  store volatile float %n4n3p5, float* %p
  %n4n5p3 = call float @llvm.amdgcn.cubeid(float -4.0, float -5.0, float +3.0)
  store volatile float %n4n5p3, float* %p
  %n5n3p4 = call float @llvm.amdgcn.cubeid(float -5.0, float -3.0, float +4.0)
  store volatile float %n5n3p4, float* %p
  %n5n4p3 = call float @llvm.amdgcn.cubeid(float -5.0, float -4.0, float +3.0)
  store volatile float %n5n4p3, float* %p
  %n3n4n5 = call float @llvm.amdgcn.cubeid(float -3.0, float -4.0, float -5.0)
  store volatile float %n3n4n5, float* %p
  %n3n5n4 = call float @llvm.amdgcn.cubeid(float -3.0, float -5.0, float -4.0)
  store volatile float %n3n5n4, float* %p
  %n4n3n5 = call float @llvm.amdgcn.cubeid(float -4.0, float -3.0, float -5.0)
  store volatile float %n4n3n5, float* %p
  %n4n5n3 = call float @llvm.amdgcn.cubeid(float -4.0, float -5.0, float -3.0)
  store volatile float %n4n5n3, float* %p
  %n5n3n4 = call float @llvm.amdgcn.cubeid(float -5.0, float -3.0, float -4.0)
  store volatile float %n5n3n4, float* %p
  %n5n4n3 = call float @llvm.amdgcn.cubeid(float -5.0, float -4.0, float -3.0)
  store volatile float %n5n4n3, float* %p
  ret void
}
+155 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s

declare float @llvm.amdgcn.cubema(float, float, float)

define void @test(float* %p) {
; CHECK-LABEL: @test(
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P:%.*]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float 1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    store volatile float -1.000000e+01, float* [[P]]
; CHECK-NEXT:    ret void
;
  %p3p4p5 = call float @llvm.amdgcn.cubema(float +3.0, float +4.0, float +5.0)
  store volatile float %p3p4p5, float* %p
  %p3p5p4 = call float @llvm.amdgcn.cubema(float +3.0, float +5.0, float +4.0)
  store volatile float %p3p5p4, float* %p
  %p4p3p5 = call float @llvm.amdgcn.cubema(float +4.0, float +3.0, float +5.0)
  store volatile float %p4p3p5, float* %p
  %p4p5p3 = call float @llvm.amdgcn.cubema(float +4.0, float +5.0, float +3.0)
  store volatile float %p4p5p3, float* %p
  %p5p3p4 = call float @llvm.amdgcn.cubema(float +5.0, float +3.0, float +4.0)
  store volatile float %p5p3p4, float* %p
  %p5p4p3 = call float @llvm.amdgcn.cubema(float +5.0, float +4.0, float +3.0)
  store volatile float %p5p4p3, float* %p
  %p3p4n5 = call float @llvm.amdgcn.cubema(float +3.0, float +4.0, float -5.0)
  store volatile float %p3p4n5, float* %p
  %p3p5n4 = call float @llvm.amdgcn.cubema(float +3.0, float +5.0, float -4.0)
  store volatile float %p3p5n4, float* %p
  %p4p3n5 = call float @llvm.amdgcn.cubema(float +4.0, float +3.0, float -5.0)
  store volatile float %p4p3n5, float* %p
  %p4p5n3 = call float @llvm.amdgcn.cubema(float +4.0, float +5.0, float -3.0)
  store volatile float %p4p5n3, float* %p
  %p5p3n4 = call float @llvm.amdgcn.cubema(float +5.0, float +3.0, float -4.0)
  store volatile float %p5p3n4, float* %p
  %p5p4n3 = call float @llvm.amdgcn.cubema(float +5.0, float +4.0, float -3.0)
  store volatile float %p5p4n3, float* %p
  %p3n4p5 = call float @llvm.amdgcn.cubema(float +3.0, float -4.0, float +5.0)
  store volatile float %p3n4p5, float* %p
  %p3n5p4 = call float @llvm.amdgcn.cubema(float +3.0, float -5.0, float +4.0)
  store volatile float %p3n5p4, float* %p
  %p4n3p5 = call float @llvm.amdgcn.cubema(float +4.0, float -3.0, float +5.0)
  store volatile float %p4n3p5, float* %p
  %p4n5p3 = call float @llvm.amdgcn.cubema(float +4.0, float -5.0, float +3.0)
  store volatile float %p4n5p3, float* %p
  %p5n3p4 = call float @llvm.amdgcn.cubema(float +5.0, float -3.0, float +4.0)
  store volatile float %p5n3p4, float* %p
  %p5n4p3 = call float @llvm.amdgcn.cubema(float +5.0, float -4.0, float +3.0)
  store volatile float %p5n4p3, float* %p
  %p3n4n5 = call float @llvm.amdgcn.cubema(float +3.0, float -4.0, float -5.0)
  store volatile float %p3n4n5, float* %p
  %p3n5n4 = call float @llvm.amdgcn.cubema(float +3.0, float -5.0, float -4.0)
  store volatile float %p3n5n4, float* %p
  %p4n3n5 = call float @llvm.amdgcn.cubema(float +4.0, float -3.0, float -5.0)
  store volatile float %p4n3n5, float* %p
  %p4n5n3 = call float @llvm.amdgcn.cubema(float +4.0, float -5.0, float -3.0)
  store volatile float %p4n5n3, float* %p
  %p5n3n4 = call float @llvm.amdgcn.cubema(float +5.0, float -3.0, float -4.0)
  store volatile float %p5n3n4, float* %p
  %p5n4n3 = call float @llvm.amdgcn.cubema(float +5.0, float -4.0, float -3.0)
  store volatile float %p5n4n3, float* %p
  %n3p4p5 = call float @llvm.amdgcn.cubema(float -3.0, float +4.0, float +5.0)
  store volatile float %n3p4p5, float* %p
  %n3p5p4 = call float @llvm.amdgcn.cubema(float -3.0, float +5.0, float +4.0)
  store volatile float %n3p5p4, float* %p
  %n4p3p5 = call float @llvm.amdgcn.cubema(float -4.0, float +3.0, float +5.0)
  store volatile float %n4p3p5, float* %p
  %n4p5p3 = call float @llvm.amdgcn.cubema(float -4.0, float +5.0, float +3.0)
  store volatile float %n4p5p3, float* %p
  %n5p3p4 = call float @llvm.amdgcn.cubema(float -5.0, float +3.0, float +4.0)
  store volatile float %n5p3p4, float* %p
  %n5p4p3 = call float @llvm.amdgcn.cubema(float -5.0, float +4.0, float +3.0)
  store volatile float %n5p4p3, float* %p
  %n3p4n5 = call float @llvm.amdgcn.cubema(float -3.0, float +4.0, float -5.0)
  store volatile float %n3p4n5, float* %p
  %n3p5n4 = call float @llvm.amdgcn.cubema(float -3.0, float +5.0, float -4.0)
  store volatile float %n3p5n4, float* %p
  %n4p3n5 = call float @llvm.amdgcn.cubema(float -4.0, float +3.0, float -5.0)
  store volatile float %n4p3n5, float* %p
  %n4p5n3 = call float @llvm.amdgcn.cubema(float -4.0, float +5.0, float -3.0)
  store volatile float %n4p5n3, float* %p
  %n5p3n4 = call float @llvm.amdgcn.cubema(float -5.0, float +3.0, float -4.0)
  store volatile float %n5p3n4, float* %p
  %n5p4n3 = call float @llvm.amdgcn.cubema(float -5.0, float +4.0, float -3.0)
  store volatile float %n5p4n3, float* %p
  %n3n4p5 = call float @llvm.amdgcn.cubema(float -3.0, float -4.0, float +5.0)
  store volatile float %n3n4p5, float* %p
  %n3n5p4 = call float @llvm.amdgcn.cubema(float -3.0, float -5.0, float +4.0)
  store volatile float %n3n5p4, float* %p
  %n4n3p5 = call float @llvm.amdgcn.cubema(float -4.0, float -3.0, float +5.0)
  store volatile float %n4n3p5, float* %p
  %n4n5p3 = call float @llvm.amdgcn.cubema(float -4.0, float -5.0, float +3.0)
  store volatile float %n4n5p3, float* %p
  %n5n3p4 = call float @llvm.amdgcn.cubema(float -5.0, float -3.0, float +4.0)
  store volatile float %n5n3p4, float* %p
  %n5n4p3 = call float @llvm.amdgcn.cubema(float -5.0, float -4.0, float +3.0)
  store volatile float %n5n4p3, float* %p
  %n3n4n5 = call float @llvm.amdgcn.cubema(float -3.0, float -4.0, float -5.0)
  store volatile float %n3n4n5, float* %p
  %n3n5n4 = call float @llvm.amdgcn.cubema(float -3.0, float -5.0, float -4.0)
  store volatile float %n3n5n4, float* %p
  %n4n3n5 = call float @llvm.amdgcn.cubema(float -4.0, float -3.0, float -5.0)
  store volatile float %n4n3n5, float* %p
  %n4n5n3 = call float @llvm.amdgcn.cubema(float -4.0, float -5.0, float -3.0)
  store volatile float %n4n5n3, float* %p
  %n5n3n4 = call float @llvm.amdgcn.cubema(float -5.0, float -3.0, float -4.0)
  store volatile float %n5n3n4, float* %p
  %n5n4n3 = call float @llvm.amdgcn.cubema(float -5.0, float -4.0, float -3.0)
  store volatile float %n5n4n3, float* %p
  ret void
}
+155 −0

File added.

Preview size limit exceeded, changes collapsed.

+155 −0

File added.

Preview size limit exceeded, changes collapsed.