Commit 757384ab authored by Usman Nadeem's avatar Usman Nadeem
Browse files

[AArch64][SVE][InstCombine] Fold redundant zip1/2(uzp1/2) operations

    zip1(uzp1(A, B), uzp2(A, B)) --> A
    zip2(uzp1(A, B), uzp2(A, B)) --> B

Differential Revision: https://reviews.llvm.org/D109666

Change-Id: I4a6578db2fcef9ff71ad0e77b9fe08354e6dbfcd
parent 0db94812
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -792,6 +792,21 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
  return IC.replaceInstUsesWith(II, VectorSplat);
}

static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
                                                 IntrinsicInst &II) {
  // zip1(uzp1(A, B), uzp2(A, B)) --> A
  // zip2(uzp1(A, B), uzp2(A, B)) --> B
  Value *A, *B;
  if (match(II.getArgOperand(0),
            m_Intrinsic<Intrinsic::aarch64_sve_uzp1>(m_Value(A), m_Value(B))) &&
      match(II.getArgOperand(1), m_Intrinsic<Intrinsic::aarch64_sve_uzp2>(
                                     m_Specific(A), m_Specific(B))))
    return IC.replaceInstUsesWith(
        II, (II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ? A : B));

  return None;
}

Optional<Instruction *>
AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
                                     IntrinsicInst &II) const {
@@ -835,6 +850,9 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
  case Intrinsic::aarch64_sve_sunpkhi:
  case Intrinsic::aarch64_sve_sunpklo:
    return instCombineSVEUnpack(IC, II);
  case Intrinsic::aarch64_sve_zip1:
  case Intrinsic::aarch64_sve_zip2:
    return instCombineSVEZip(IC, II);
  }

  return None;
+24 −0
Original line number Diff line number Diff line
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -instcombine < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 4 x i32> @redundant_zip_unzip(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
; CHECK-LABEL: @redundant_zip_unzip(
; CHECK-NEXT:    [[RET:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
; CHECK-NEXT:    ret <vscale x 4 x i32> [[RET]]
;
  %uzp1 = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
  %uzp2 = call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
  %zip1 = call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> %uzp1, <vscale x 4 x i32> %uzp2)
  %zip2 = call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> %uzp1, <vscale x 4 x i32> %uzp2)
  %ret = add <vscale x 4 x i32> %zip1, %zip2
  ret <vscale x 4 x i32> %ret
}

declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)

attributes #0 = { "target-features"="+sve" }