Commit a08c0ade authored by Sjoerd Meijer's avatar Sjoerd Meijer
Browse files

[ARM][MVE] VTP Block Pass fix

Fix a missing and broken test: 2 VPT blocks predicated on the same VCMP
instruction that can be folded. The problem was that for each VPT block, we
record the predicate statements with a list, but the same instruction was added
twice. Thus, we were running in an assert trying to remove the same instruction
twice. To avoid this the instructions are now recorded with a set.

Differential Revision: https://reviews.llvm.org/D72699
parent 013c07f6
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -96,7 +96,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
  bool Modified = false;
  MachineBasicBlock::instr_iterator MBIter = Block.instr_begin();
  MachineBasicBlock::instr_iterator EndIter = Block.instr_end();
  SmallVector<MachineInstr *, 4> RemovedVCMPs;
  SmallSet<MachineInstr *, 4> RemovedVCMPs;

  while (MBIter != EndIter) {
    MachineInstr *MI = &*MBIter;
@@ -154,7 +154,7 @@ bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) {
      // and deleting all instructions in this list in one go after we have
      // created the VPT blocks. We do this in order not to invalidate the
      // ReachingDefAnalysis that is queried by 'findVCMPToFoldIntoVPST'.
      RemovedVCMPs.push_back(VCMP);
      RemovedVCMPs.insert(VCMP);
    } else {
      MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST));
      MIBuilder.addImm(BlockMask);
+88 −0
Original line number Diff line number Diff line
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass arm-mve-vpt %s -o - | FileCheck %s

--- |
  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
  target triple = "thumbv8.1m.main-arm-none-eabi"

  define hidden arm_aapcs_vfpcc <4 x float> @vpt_2_blocks_1_pred(<4 x float> %inactive1, <4 x float> %a, <4 x float> %b, i16 zeroext %p1, i16 zeroext %p2) local_unnamed_addr #0 {
  entry:
    ;
    ; Intentionally left blank, see the MIR sequence below.
    ;
    ret <4 x float> %inactive1
  }

  attributes #0 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="128" "frame-pointer"="none" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+armv8.1-m.main,+hwdiv,+mve.fp,+ras,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" }
  attributes #1 = { nounwind readnone }
  attributes #2 = { nounwind }

...
---
name:            vpt_2_blocks_1_pred
alignment:       4
exposesReturnsTwice: false
legalized:       false
regBankSelected: false
selected:        false
failedISel:      false
tracksRegLiveness: true
hasWinCFI:       false
registers:       []
liveins:
  - { reg: '$q0', virtual-reg: '' }
  - { reg: '$q1', virtual-reg: '' }
  - { reg: '$q2', virtual-reg: '' }
  - { reg: '$q5', virtual-reg: '' }
  - { reg: '$r0', virtual-reg: '' }
  - { reg: '$r1', virtual-reg: '' }
frameInfo:
  isFrameAddressTaken: false
  isReturnAddressTaken: false
  hasStackMap:     false
  hasPatchPoint:   false
  stackSize:       0
  offsetAdjustment: 0
  maxAlignment:    0
  adjustsStack:    false
  hasCalls:        false
  stackProtector:  ''
  maxCallFrameSize: 0
  cvBytesOfCalleeSavedRegisters: 0
  hasOpaqueSPAdjustment: false
  hasVAStart:      false
  hasMustTailInVarArgFunc: false
  localFrameSize:  0
  savePoint:       ''
  restorePoint:    ''
fixedStack:      []
stack:           []
constants:       []
body:             |
  bb.0:
  liveins: $lr, $q0, $q1, $q2, $q3, $q4, $q5, $r0, $r1, $r2, $r7, $r8, $r9, $r10, $r11, $r12

  ; CHECK-LABEL: name: vpt_2_blocks_1_pred
  ; CHECK: renamable $r4 = t2ADDrr renamable $r2, renamable $r10, 14, $noreg, $noreg
  ; CHECK:   BUNDLE implicit-def $vpr, implicit-def $q6, implicit-def $d12, implicit-def $s24, implicit-def $s25, implicit-def $d13, implicit-def $s26, implicit-def $s27, implicit $q1, implicit $q5, implicit killed $r4 {
  ; CHECK:     MVE_VPTv4u32 8, renamable $q1, renamable $q5, 2, implicit-def $vpr
  ; CHECK:     renamable $q6 = MVE_VLDRBU32 killed renamable $r4, 0, 1, internal renamable $vpr
  ; CHECK:   }
  ; CHECK:   renamable $r4 = t2ADDrr renamable $r11, renamable $r10, 14, $noreg, $noreg
  ; CHECK:   BUNDLE implicit-def dead $vpr, implicit-def $q7, implicit-def $d14, implicit-def $s28, implicit-def $s29, implicit-def $d15, implicit-def $s30, implicit-def $s31, implicit $q1, implicit $q5, implicit killed $r4 {
  ; CHECK:     MVE_VPTv4u32 8, renamable $q1, renamable $q5, 2, implicit-def $vpr
  ; CHECK:     renamable $q7 = MVE_VLDRBU32 killed renamable $r4, 0, 1, internal killed renamable $vpr
  ; CHECK:   }
  ; CHECK:   t2LoopEnd renamable $lr, %bb.0, implicit-def dead $cpsr
  ; CHECK:   t2B %bb.0, 14, $noreg


  renamable $vpr = MVE_VCMPu32 renamable $q1, renamable $q5, 2, 0, $noreg
  renamable $r4 = t2ADDrr renamable $r2, renamable $r10, 14, $noreg, $noreg
  renamable $q6 = MVE_VLDRBU32 killed renamable $r4, 0, 1, renamable $vpr
  renamable $r4 = t2ADDrr renamable $r11, renamable $r10, 14, $noreg, $noreg
  renamable $q7 = MVE_VLDRBU32 killed renamable $r4, 0, 1, killed renamable $vpr
  t2LoopEnd renamable $lr, %bb.0, implicit-def dead $cpsr
  t2B %bb.0, 14, $noreg

...