Commit 304abde0 authored by Quentin Colombet's avatar Quentin Colombet
Browse files

[GISel][CombinerHelper] Add support for scalar type for the result of shuffle vector

LLVM IR of 1-element vectors get lower into scalar in GISel. As a
result, shuffle vector may also produce a scalar.

This patch teaches the shuffle combiner how to deal with scalars when
they are in the destination type of a shuffle vector.

For now, we just support the easy case where this can be lowered to
a plain copy. For other cases, we leave the shuffle vector as is.

This type of IR are seen in O0 pipelines. E.g., as produced with
SingleSource/UnitTests/Vector/AArch64/aarch64_neon_intrinsics.c.

rdar://problem/57198904
parent 47bd7c57
Loading
Loading
Loading
Loading
+17 −3
Original line number Diff line number Diff line
@@ -189,7 +189,10 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
  LLT DstType = MRI.getType(MI.getOperand(0).getReg());
  Register Src1 = MI.getOperand(1).getReg();
  LLT SrcType = MRI.getType(Src1);
  unsigned DstNumElts = DstType.getNumElements();
  // As bizarre as it may look, shuffle vector can actually produce
  // scalar! This is because at the IR level a <1 x ty> shuffle
  // vector is perfectly valid.
  unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
  unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;

  // If the resulting vector is smaller than the size of the source
@@ -199,7 +202,15 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
  // Note: We may still be able to produce a concat_vectors fed by
  //       extract_vector_elt and so on. It is less clear that would
  //       be better though, so don't bother for now.
  if (DstNumElts < 2 * SrcNumElts)
  //
  // If the destination is a scalar, the size of the sources doesn't
  // matter. we will lower the shuffle to a plain copy. This will
  // work only if the source and destination have the same size. But
  // that's covered by the next condition.
  //
  // TODO: If the size between the source and destination don't match
  //       we could still emit an extract vector element in that case.
  if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
    return false;

  // Check that the shuffle mask can be broken evenly between the
@@ -254,6 +265,9 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
  Builder.setInsertPt(*MI.getParent(), MI);
  Register NewDstReg = MRI.cloneVirtualRegister(DstReg);

  if (Ops.size() == 1)
    Builder.buildCopy(NewDstReg, Ops[0]);
  else
    Builder.buildMerge(NewDstReg, Ops);

  MI.eraseFromParent();
+42 −0
Original line number Diff line number Diff line
@@ -414,3 +414,45 @@ body: |
    %6:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,-1,-1,1)
    RET_ReallyLR implicit %6
...

# Check that shuffle_vector on scalars gets combined into a plain
# copy when the resulting type is a scalar as well and the sizes
# are compatible.
---
name: shuffle_vector_on_scalars_to_copy_ptr
tracksRegLiveness: true
body:             |
  bb.1:
    liveins: $x0

    ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_copy_ptr
    ; CHECK: liveins: $x0
    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
    ; CHECK: RET_ReallyLR implicit [[COPY]](p0)
    %0:_(p0) = COPY $x0
    %6:_(p0) = G_SHUFFLE_VECTOR %0, %0, shufflemask(0)
    RET_ReallyLR implicit %6
...

# Check that shuffle_vector on vector doesn't get combined
# when the resulting type is a scalar.
# We should be able to replace this by an extract vector element,
# but that's not implemented yet.
---
name: shuffle_vector_to_copy_neg
tracksRegLiveness: true
body:             |
  bb.1:
    liveins: $x0, $x1

    ; CHECK-LABEL: name: shuffle_vector_to_copy_neg
    ; CHECK: liveins: $x0, $x1
    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0
    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $x1
    ; CHECK: [[SHUF:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[COPY]](<2 x s32>), [[COPY1]], shufflemask(1)
    ; CHECK: RET_ReallyLR implicit [[SHUF]](s32)
    %0:_(<2 x s32>) = COPY $x0
    %1:_(<2 x s32>) = COPY $x1
    %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1)
    RET_ReallyLR implicit %6
...