Commit dfa9420f authored by Matt Arsenault's avatar Matt Arsenault Committed by Matt Arsenault
Browse files

AMDGPU/GlobalISel: Don't use legal v2s16 G_BUILD_VECTOR

If we have s_pack_* instructions, legalize this to
G_BUILD_VECTOR_TRUNC from s32 elements. This is closer to how how the
s_pack_* instructions really behave.

If we don't have s_pack_ instructions, expand this by creating a merge
to s32 and bitcasting. This expands to the expected bit operations. I
think this eventually should go in a new bitcast legalize action type
in LegalizerHelper.

We already directly emit the shift operations in RegBankSelect for the
vector case. This could possibly be cleaned up, but I also may want to
defer doing this expansion to selection anyway. I'll see about that
when I try to actually match VOP3P instructions.

This breaks the selection of the build_vector since tablegen doesn't
know how to match G_BUILD_VECTOR_TRUNC yet, so just xfail it for now.
parent 2b7f3289
Loading
Loading
Loading
Loading
+37 −8
Original line number Diff line number Diff line
@@ -1011,23 +1011,29 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
    .clampNumElements(0, V2S64, V16S64)
    .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));

  if (ST.hasScalarPackInsts())
    BuildVector.legalFor({V2S16, S32});

  if (ST.hasScalarPackInsts()) {
    BuildVector
    .minScalarSameAs(1, 0)
    .legalIf(isRegisterType(0))
    .minScalarOrElt(0, S32);
      // FIXME: Should probably widen s1 vectors straight to s32
      .minScalarOrElt(0, S16)
      // Widen source elements and produce a G_BUILD_VECTOR_TRUNC
      .minScalar(1, S32);

  if (ST.hasScalarPackInsts()) {
    getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
      .legalFor({V2S16, S32})
      .lower();
    BuildVector.minScalarOrElt(0, S32);
  } else {
    BuildVector.customFor({V2S16, S16});
    BuildVector.minScalarOrElt(0, S32);

    getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC)
      .customFor({V2S16, S32})
      .lower();
  }

  BuildVector.legalIf(isRegisterType(0));

  // FIXME: Clamp maximum size
  getActionDefinitionsBuilder(G_CONCAT_VECTORS)
    .legalIf(isRegisterType(0));

@@ -1229,6 +1235,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
    return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f);
  case TargetOpcode::G_FEXP:
    return legalizeFExp(MI, B);
  case TargetOpcode::G_BUILD_VECTOR:
    return legalizeBuildVector(MI, MRI, B);
  default:
    return false;
  }
@@ -1947,6 +1955,27 @@ bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
  auto K = B.buildFConstant(Ty, numbers::log2e);
  auto Mul = B.buildFMul(Ty, Src, K, Flags);
  B.buildFExp2(Dst, Mul, Flags);
  MI.eraseFromParent();
  return true;
}

// Turn an illegal packed v2s16 build vector into bit operations.
// TODO: This should probably be a bitcast action in LegalizerHelper.
bool AMDGPULegalizerInfo::legalizeBuildVector(
  MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
  Register Dst = MI.getOperand(0).getReg();
  LLT DstTy = MRI.getType(Dst);
  const LLT S32 = LLT::scalar(32);
  const LLT V2S16 = LLT::vector(2, 16);
  assert(DstTy == V2S16);

  Register Src0 = MI.getOperand(1).getReg();
  Register Src1 = MI.getOperand(2).getReg();
  assert(MRI.getType(Src0) == LLT::scalar(16));

  B.setInstr(MI);
  auto Merge = B.buildMerge(S32, {Src0, Src1});
  B.buildBitcast(Dst, Merge);

  MI.eraseFromParent();
  return true;
+2 −0
Original line number Diff line number Diff line
@@ -82,6 +82,8 @@ public:
  bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B,
                    double Log2BaseInverted) const;
  bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
  bool legalizeBuildVector(MachineInstr &MI, MachineRegisterInfo &MRI,
                           MachineIRBuilder &B) const;

  Register getLiveInRegister(MachineRegisterInfo &MRI,
                             Register Reg, LLT Ty) const;
+9 −4
Original line number Diff line number Diff line
@@ -59,12 +59,17 @@ body: |
    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
    ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32)
    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16)
    ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]]
    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
    ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
    ; CHECK: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
    %1:_(<2 x s8>) = G_TRUNC %0
    %2:_(<2 x s16>) = G_SEXT %1
+7 −3
Original line number Diff line number Diff line
@@ -51,10 +51,14 @@ body: |

    ; CHECK-LABEL: name: test_zext_trunc_v2s32_to_v2s8_to_v2s16
    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
    ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
    ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL]]
    ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>)
    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BUILD_VECTOR]]
    ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[BITCAST]]
    ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>)
    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
    %1:_(<2 x s8>) = G_TRUNC %0
+41 −60
Original line number Diff line number Diff line
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# XFAIL: *
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs  -o - %s | FileCheck -check-prefix=GFX9  %s

---
name: test_build_vector_s_v2s16_s_s16_s_s16
name: test_build_vector_trunc_s_v2s16_s_s32_s_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -11,7 +12,7 @@ body: |
  bb.0:
    liveins: $sgpr0, $sgpr1

    ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_s16
    ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_s32
    ; GFX9: liveins: $sgpr0, $sgpr1
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
@@ -19,16 +20,12 @@ body: |
    ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
    %0:sgpr(s32) = COPY $sgpr0
    %1:sgpr(s32) = COPY $sgpr1

    %2:sgpr(s16) = G_TRUNC %0
    %3:sgpr(s16) = G_TRUNC %1

    %4:sgpr(<2 x s16>) = G_BUILD_VECTOR %2, %3
    %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
    S_ENDPGM 0, implicit %4
...

---
name: test_build_vector_s_pack_lh
name: test_build_vector_trunc_s_pack_lh
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -37,7 +34,7 @@ body: |
  bb.0:
    liveins: $sgpr0, $sgpr1

    ; GFX9-LABEL: name: test_build_vector_s_pack_lh
    ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh
    ; GFX9: liveins: $sgpr0, $sgpr1
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
@@ -49,16 +46,13 @@ body: |
    %2:sgpr(s32) = G_CONSTANT i32 16
    %3:sgpr(s32) = G_LSHR %1, %2

    %4:sgpr(s16) = G_TRUNC %0
    %5:sgpr(s16) = G_TRUNC %3

    %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %4, %5
    S_ENDPGM 0, implicit %6
    %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %3
    S_ENDPGM 0, implicit %4
...

# There is no s_pack_hl_b32
---
name: test_build_vector_s_pack_lh_swapped
name: test_build_vector_trunc_s_pack_lh_swapped
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -67,7 +61,7 @@ body: |
  bb.0:
    liveins: $sgpr0, $sgpr1

    ; GFX9-LABEL: name: test_build_vector_s_pack_lh_swapped
    ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_lh_swapped
    ; GFX9: liveins: $sgpr0, $sgpr1
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
@@ -81,15 +75,12 @@ body: |
    %2:sgpr(s32) = G_CONSTANT i32 16
    %3:sgpr(s32) = G_LSHR %1, %2

    %4:sgpr(s16) = G_TRUNC %0
    %5:sgpr(s16) = G_TRUNC %3

    %6:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %4
    S_ENDPGM 0, implicit %6
    %4:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %0
    S_ENDPGM 0, implicit %4
...

---
name: test_build_vector_s_pack_hh
name: test_build_vector_trunc_s_pack_hh
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -98,7 +89,7 @@ body: |
  bb.0:
    liveins: $sgpr0, $sgpr1

    ; GFX9-LABEL: name: test_build_vector_s_pack_hh
    ; GFX9-LABEL: name: test_build_vector_trunc_s_pack_hh
    ; GFX9: liveins: $sgpr0, $sgpr1
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
@@ -111,16 +102,13 @@ body: |
    %3:sgpr(s32) = G_LSHR %0, %2
    %4:sgpr(s32) = G_LSHR %1, %2

    %5:sgpr(s16) = G_TRUNC %3
    %6:sgpr(s16) = G_TRUNC %4

    %7:sgpr(<2 x s16>) = G_BUILD_VECTOR %5, %6
    S_ENDPGM 0, implicit %7
    %5:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %3, %4
    S_ENDPGM 0, implicit %5
...

# TODO: Should this use an and instead?
---
name: test_build_vector_s_v2s16_s_s16_s_0_s16
name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -129,23 +117,20 @@ body: |
  bb.0:
    liveins: $sgpr0

    ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_0_s16
    ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_0_s32
    ; GFX9: liveins: $sgpr0
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
    ; GFX9: [[S_PACK_LL_B32_B16_:%[0-9]+]]:sreg_32 = S_PACK_LL_B32_B16 [[COPY]], [[S_MOV_B32_]]
    ; GFX9: S_ENDPGM 0, implicit [[S_PACK_LL_B32_B16_]]
    %0:sgpr(s32) = COPY $sgpr0

    %1:sgpr(s16) = G_TRUNC %0
    %2:sgpr(s16) = G_CONSTANT i16 0

    %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
    S_ENDPGM 0, implicit %3
    %1:sgpr(s32) = G_CONSTANT i32 0
    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
    S_ENDPGM 0, implicit %2
...

---
name: test_build_vector_s_v2s16_s_0_s16_s_s16
name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -154,22 +139,21 @@ body: |
  bb.0:
    liveins: $sgpr0

    ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_0_s16_s_s16
    ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_0_s32_s_s32
    ; GFX9: liveins: $sgpr0
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
    ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
    %0:sgpr(s32) = COPY $sgpr0

    %1:sgpr(s16) = G_CONSTANT i16 0
    %2:sgpr(s16) = G_TRUNC %0
    %1:sgpr(s32) = G_CONSTANT i32 0

    %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
    S_ENDPGM 0, implicit %3
    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0
    S_ENDPGM 0, implicit %2
...

---
name: test_build_vector_v_v2s16_v_s16_s_undef_s16
name: test_build_vector_v_v2s16_v_s32_s_undef_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -178,21 +162,20 @@ body: |
  bb.0:
    liveins: $vgpr0

    ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s16_s_undef_s16
    ; GFX9-LABEL: name: test_build_vector_v_v2s16_v_s32_s_undef_s32
    ; GFX9: liveins: $vgpr0
    ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; GFX9: S_ENDPGM 0, implicit [[COPY]]
    %0:vgpr(s32) = COPY $vgpr0

    %1:vgpr(s16) = G_TRUNC %0
    %2:sgpr(s16) = G_IMPLICIT_DEF
    %1:sgpr(s32) = G_IMPLICIT_DEF

    %3:vgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
    S_ENDPGM 0, implicit %3
    %2:vgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
    S_ENDPGM 0, implicit %2
...

---
name: test_build_vector_s_v2s16_s_s16_s_undef_s16
name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -201,21 +184,20 @@ body: |
  bb.0:
    liveins: $sgpr0

    ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_s16_s_undef_s16
    ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_s32_s_undef_s32
    ; GFX9: liveins: $sgpr0
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: S_ENDPGM 0, implicit [[COPY]]
    %0:sgpr(s32) = COPY $sgpr0

    %1:sgpr(s16) = G_TRUNC %0
    %2:sgpr(s16) = G_IMPLICIT_DEF
    %1:sgpr(s32) = G_IMPLICIT_DEF

    %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
    S_ENDPGM 0, implicit %3
    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %0, %1
    S_ENDPGM 0, implicit %2
...

---
name: test_build_vector_s_v2s16_s_undef_s16_s_s16
name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32
legalized:       true
regBankSelected: true
tracksRegLiveness: true
@@ -224,16 +206,15 @@ body: |
  bb.0:
    liveins: $sgpr0

    ; GFX9-LABEL: name: test_build_vector_s_v2s16_s_undef_s16_s_s16
    ; GFX9-LABEL: name: test_build_vector_trunc_s_v2s16_s_undef_s32_s_s32
    ; GFX9: liveins: $sgpr0
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY]], 16, implicit-def $scc
    ; GFX9: S_ENDPGM 0, implicit [[S_LSHL_B32_]]
    %0:sgpr(s32) = COPY $sgpr0

    %1:sgpr(s16) = G_IMPLICIT_DEF
    %2:sgpr(s16) = G_TRUNC %0
    %1:sgpr(s32) = G_IMPLICIT_DEF

    %3:sgpr(<2 x s16>) = G_BUILD_VECTOR %1, %2
    S_ENDPGM 0, implicit %3
    %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %1, %0
    S_ENDPGM 0, implicit %2
...
Loading