Commit 0ed4cf4b authored by Chen Zheng's avatar Chen Zheng
Browse files

[PowerPC] support register pressure reduction in machine combiner.

Reassociating some patterns to generate more fma instructions to
reduce register pressure.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D92071
parent f4537935
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -29,6 +29,11 @@ enum class MachineCombinerPattern {
  REASSOC_XY_AMM_BMM,
  REASSOC_XMM_AMM_BMM,

  // These are patterns matched by the PowerPC to reassociate FMA and FSUB to
  // reduce register pressure.
  REASSOC_XY_BCA,
  REASSOC_XY_BAC,

  // These are multiply-add patterns matched by the AArch64 machine combiner.
  MULADDW_OP1,
  MULADDW_OP2,
+3 −0
Original line number Diff line number Diff line
@@ -279,6 +279,9 @@ static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
  case MachineCombinerPattern::REASSOC_XY_AMM_BMM:
  case MachineCombinerPattern::REASSOC_XMM_AMM_BMM:
    return CombinerObjective::MustReduceDepth;
  case MachineCombinerPattern::REASSOC_XY_BCA:
  case MachineCombinerPattern::REASSOC_XY_BAC:
    return CombinerObjective::MustReduceRegisterPressure;
  default:
    return CombinerObjective::Default;
  }
+473 −44

File changed.

Preview size limit exceeded, changes collapsed.

+21 −1
Original line number Diff line number Diff line
@@ -252,6 +252,11 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                      SmallVectorImpl<MachineInstr *> &InsInstrs,
                      SmallVectorImpl<MachineInstr *> &DelInstrs,
                      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
  bool isLoadFromConstantPool(MachineInstr *I) const;
  Register
  generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty,
                          SmallVectorImpl<MachineInstr *> &InsInstrs) const;
  const Constant *getConstantFromConstantPool(MachineInstr *I) const;
  virtual void anchor();

protected:
@@ -343,7 +348,8 @@ public:
  /// chain ending in \p Root. All potential patterns are output in the \p
  /// P array.
  bool getFMAPatterns(MachineInstr &Root,
                      SmallVectorImpl<MachineCombinerPattern> &P) const;
                      SmallVectorImpl<MachineCombinerPattern> &P,
                      bool DoRegPressureReduce) const;

  /// Return true when there is potentially a faster code sequence
  /// for an instruction chain ending in <Root>. All potential patterns are
@@ -352,6 +358,20 @@ public:
                                  SmallVectorImpl<MachineCombinerPattern> &P,
                                  bool DoRegPressureReduce) const override;

  /// On PowerPC, we leverage machine combiner pass to reduce register pressure
  /// when the register pressure is high for one BB.
  /// Return true if register pressure for \p MBB is high and ABI is supported
  /// to reduce register pressure. Otherwise return false.
  bool
  shouldReduceRegisterPressure(MachineBasicBlock *MBB,
                               RegisterClassInfo *RegClassInfo) const override;

  /// Fixup the placeholders we put in genAlternativeCodeSequence() for
  /// MachineCombiner.
  void
  finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P,
                    SmallVectorImpl<MachineInstr *> &InsInstrs) const override;

  bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;

  /// On PowerPC, we try to reassociate FMA chain which will increase
+135 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -O3 < %s \
; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -O3 < %s \
; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s --check-prefix=CHECK-P8
; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-fma-rp-factor=0.0 -O3 < %s \
; RUN:   -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 | FileCheck %s --check-prefix=CHECK-FMA

@global_val = external global float, align 4

define float @foo_float(float %0, float %1, float %2, float %3) {
; CHECK-LABEL: foo_float:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-NEXT:    xsmulsp f1, f2, f1
; CHECK-NEXT:    xssubsp f0, f3, f4
; CHECK-NEXT:    lfs f2, .LCPI0_0@toc@l(r3)
; CHECK-NEXT:    xsmaddasp f1, f0, f2
; CHECK-NEXT:    blr
;
; CHECK-P8-LABEL: foo_float:
; CHECK-P8:       # %bb.0:
; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
; CHECK-P8-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-P8-NEXT:    xssubsp f0, f3, f4
; CHECK-P8-NEXT:    lfs f2, .LCPI0_0@toc@l(r3)
; CHECK-P8-NEXT:    xsmaddasp f1, f0, f2
; CHECK-P8-NEXT:    blr
;
; CHECK-FMA-LABEL: foo_float:
; CHECK-FMA:       # %bb.0:
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
; CHECK-FMA-NEXT:    xsmulsp f1, f2, f1
; CHECK-FMA-NEXT:    lfs f0, .LCPI0_0@toc@l(r3)
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
; CHECK-FMA-NEXT:    lfs f2, .LCPI0_1@toc@l(r3)
; CHECK-FMA-NEXT:    xsmaddasp f1, f4, f2
; CHECK-FMA-NEXT:    xsmaddasp f1, f3, f0
; CHECK-FMA-NEXT:    blr
  %5 = fmul reassoc nsz float %1, %0
  %6 = fsub reassoc nsz float %2, %3
  %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000
  %8 = fadd reassoc nsz float %7, %5
  ret float %8
}

define double @foo_double(double %0, double %1, double %2, double %3) {
; CHECK-LABEL: foo_double:
; CHECK:       # %bb.0:
; CHECK-NEXT:    xsmuldp f1, f2, f1
; CHECK-NEXT:    xssubdp f0, f3, f4
; CHECK-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-NEXT:    lfd f2, .LCPI1_0@toc@l(r3)
; CHECK-NEXT:    xsmaddadp f1, f0, f2
; CHECK-NEXT:    blr
;
; CHECK-P8-LABEL: foo_double:
; CHECK-P8:       # %bb.0:
; CHECK-P8-NEXT:    xsmuldp f1, f2, f1
; CHECK-P8-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-P8-NEXT:    xssubdp f0, f3, f4
; CHECK-P8-NEXT:    lfd f2, .LCPI1_0@toc@l(r3)
; CHECK-P8-NEXT:    xsmaddadp f1, f0, f2
; CHECK-P8-NEXT:    blr
;
; CHECK-FMA-LABEL: foo_double:
; CHECK-FMA:       # %bb.0:
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-FMA-NEXT:    xsmuldp f1, f2, f1
; CHECK-FMA-NEXT:    lfd f0, .LCPI1_0@toc@l(r3)
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI1_1@toc@ha
; CHECK-FMA-NEXT:    lfd f2, .LCPI1_1@toc@l(r3)
; CHECK-FMA-NEXT:    xsmaddadp f1, f4, f2
; CHECK-FMA-NEXT:    xsmaddadp f1, f3, f0
; CHECK-FMA-NEXT:    blr
  %5 = fmul reassoc nsz double %1, %0
  %6 = fsub reassoc nsz double %2, %3
  %7 = fmul reassoc nsz double %6, 0x3DB2533FE68CADDE
  %8 = fadd reassoc nsz double %7, %5
  ret double %8
}

define float @foo_float_reuse_const(float %0, float %1, float %2, float %3) {
; CHECK-LABEL: foo_float_reuse_const:
; CHECK:       # %bb.0:
; CHECK-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-NEXT:    xsmulsp f1, f2, f1
; CHECK-NEXT:    xssubsp f0, f3, f4
; CHECK-NEXT:    lfs f3, .LCPI2_0@toc@l(r3)
; CHECK-NEXT:    addis r3, r2, .LCPI2_1@toc@ha
; CHECK-NEXT:    xsmaddasp f1, f0, f3
; CHECK-NEXT:    lfs f0, .LCPI2_1@toc@l(r3)
; CHECK-NEXT:    addis r3, r2, .LC0@toc@ha
; CHECK-NEXT:    ld r3, .LC0@toc@l(r3)
; CHECK-NEXT:    xsmulsp f0, f2, f0
; CHECK-NEXT:    stfs f0, 0(r3)
; CHECK-NEXT:    blr
;
; CHECK-P8-LABEL: foo_float_reuse_const:
; CHECK-P8:       # %bb.0:
; CHECK-P8-NEXT:    xsmulsp f1, f2, f1
; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
; CHECK-P8-NEXT:    xssubsp f0, f3, f4
; CHECK-P8-NEXT:    lfs f3, .LCPI2_0@toc@l(r3)
; CHECK-P8-NEXT:    lfs f4, .LCPI2_1@toc@l(r4)
; CHECK-P8-NEXT:    addis r3, r2, .LC0@toc@ha
; CHECK-P8-NEXT:    ld r3, .LC0@toc@l(r3)
; CHECK-P8-NEXT:    xsmaddasp f1, f0, f3
; CHECK-P8-NEXT:    xsmulsp f0, f2, f4
; CHECK-P8-NEXT:    stfsx f0, 0, r3
; CHECK-P8-NEXT:    blr
;
; CHECK-FMA-LABEL: foo_float_reuse_const:
; CHECK-FMA:       # %bb.0:
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-FMA-NEXT:    xsmulsp f1, f2, f1
; CHECK-FMA-NEXT:    lfs f0, .LCPI2_0@toc@l(r3)
; CHECK-FMA-NEXT:    addis r3, r2, .LCPI2_1@toc@ha
; CHECK-FMA-NEXT:    lfs f5, .LCPI2_1@toc@l(r3)
; CHECK-FMA-NEXT:    addis r3, r2, .LC0@toc@ha
; CHECK-FMA-NEXT:    ld r3, .LC0@toc@l(r3)
; CHECK-FMA-NEXT:    xsmaddasp f1, f4, f5
; CHECK-FMA-NEXT:    xsmaddasp f1, f3, f0
; CHECK-FMA-NEXT:    xsmulsp f0, f2, f5
; CHECK-FMA-NEXT:    stfs f0, 0(r3)
; CHECK-FMA-NEXT:    blr
  %5 = fmul reassoc nsz float %1, %0
  %6 = fsub reassoc nsz float %2, %3
  %7 = fmul reassoc nsz float %6, 0x3DB2533FE0000000
  %8 = fadd reassoc nsz float %7, %5
  %9 = fmul reassoc nsz float %1, 0xBDB2533FE0000000
  store float %9, float* @global_val, align 4
  ret float %8
}