Commit 0e49c54a authored by Jon Roelofs's avatar Jon Roelofs
Browse files

[AArch64] Fix selection of G_UNMERGE <2 x s16>

Differential revision: https://reviews.llvm.org/D106007
parent df538fda
Loading
Loading
Loading
Loading
+8 −1
Original line number Diff line number Diff line
@@ -3988,6 +3988,13 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
  } else {
    // No. We have to perform subregister inserts. For each insert, create an
    // implicit def and a subregister insert, and save the register we create.
    const TargetRegisterClass *RC =
        getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
                              WideTy.getScalarSizeInBits() * NumElts);
    unsigned SubReg = 0;
    bool Found = getSubRegForClass(RC, TRI, SubReg);
    (void)Found;
    assert(Found && "expected to find last operand's subeg idx");
    for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
      Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
      MachineInstr &ImpDefMI =
@@ -4001,7 +4008,7 @@ bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
                   TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
               .addUse(ImpDefReg)
               .addUse(SrcReg)
               .addImm(AArch64::dsub);
               .addImm(SubReg);

      constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
      constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
+47 −0
Original line number Diff line number Diff line
@@ -11,6 +11,10 @@
    ret <4 x float> %a
  }

  define <2 x half> @test_v2s16_unmerge(<2 x half> %a) {
    ret <2 x half> %a
  }

  define <4 x half> @test_v4s16_unmerge(<4 x half> %a) {
    ret <4 x half> %a
  }
@@ -87,6 +91,49 @@ body: |
    RET_ReallyLR implicit $q0
...
---
name:            test_v2s16_unmerge
legalized:       true
regBankSelected: true
tracksRegLiveness: true
registers:
  - { id: 0, class: fpr }
  - { id: 1, class: fpr }
  - { id: 2, class: fpr }
  - { id: 3, class: fpr }
  - { id: 4, class: fpr }
  - { id: 5, class: fpr }
body:             |
  bb.1 (%ir-block.0):
    liveins: $s0
    ; CHECK-LABEL: name: test_v2s16_unmerge

    ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY $s0
    %0:fpr(<2 x s16>) = COPY $s0

    ; Since 2 * 16 != 128, we need to widen using implicit defs.
    ; Note that we expect to reuse one of the INSERT_SUBREG results, as CPYi16
    ; expects a lane > 0.
    ; CHECK: [[DEF:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.ssub
    ; CHECK: [[COPY1:%[0-9]+]]:fpr16 = COPY [[INSERT_SUBREG]].hsub
    ; CHECK: [[CPYi16_:%[0-9]+]]:fpr16 = CPYi16 [[INSERT_SUBREG]], 1
    %2:fpr(s16), %3:fpr(s16) = G_UNMERGE_VALUES %0(<2 x s16>)

    ; CHECK: [[DEF1:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG1:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF1]], [[COPY1]], %subreg.hsub
    ; CHECK: [[DEF2:%[0-9]+]]:fpr128 = IMPLICIT_DEF
    ; CHECK: [[INSERT_SUBREG2:%[0-9]+]]:fpr128 = INSERT_SUBREG [[DEF2]], [[CPYi16_]], %subreg.hsub
    ; CHECK: [[INSvi16lane:%[0-9]+]]:fpr128 = INSvi16lane [[INSERT_SUBREG1]], 1, [[INSERT_SUBREG2]], 0
    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY [[INSvi16lane]].ssub
    %1:fpr(<2 x s16>) = G_BUILD_VECTOR %2(s16), %3(s16)

    ; CHECK: $s0 = COPY [[COPY2]]
    $s0 = COPY %1(<2 x s16>)

    ; CHECK: RET_ReallyLR implicit $s0
    RET_ReallyLR implicit $s0
...
---
name:            test_v4s16_unmerge
alignment:       4
legalized:       true