Commit dcf11c5e authored by Victor Campos's avatar Victor Campos
Browse files

[ARM][AArch64] Complex addition Neon intrinsics for Armv8.3-A

Summary:
Add support for vcadd_* family of intrinsics. This set of intrinsics is
available in Armv8.3-A.

The fp16 versions require the FP16 extension, which has been available
(opt-in) since Armv8.2-A.

Reviewers: t.p.northover

Reviewed By: t.p.northover

Subscribers: t.p.northover, kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D70862
parent af0babc9
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -1673,3 +1673,21 @@ let ArchGuard = "defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__)" in {
  def VFMLAL_LANEQ_HIGH : SOpInst<"vfmlal_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLAL_LN_Hi>;
  def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>;
}

// v8.3-A Vector complex addition intrinsics
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)" in {
  def VCADD_ROT90_FP16   : SInst<"vcadd_rot90", "...", "h">;
  def VCADD_ROT270_FP16  : SInst<"vcadd_rot270", "...", "h">;
  def VCADDQ_ROT90_FP16  : SInst<"vcaddq_rot90", "QQQ", "h">;
  def VCADDQ_ROT270_FP16 : SInst<"vcaddq_rot270", "QQQ", "h">;
}
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX)" in {
  def VCADD_ROT90   : SInst<"vcadd_rot90", "...", "f">;
  def VCADD_ROT270  : SInst<"vcadd_rot270", "...", "f">;
  def VCADDQ_ROT90  : SInst<"vcaddq_rot90", "QQQ", "f">;
  def VCADDQ_ROT270 : SInst<"vcaddq_rot270", "QQQ", "f">;
}
let ArchGuard = "defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__)" in {
  def VCADDQ_ROT90_FP64  : SInst<"vcaddq_rot90", "QQQ", "d">;
  def VCADDQ_ROT270_FP64 : SInst<"vcaddq_rot270", "QQQ", "d">;
}
 No newline at end of file
+1 −0
Original line number Diff line number Diff line
@@ -158,6 +158,7 @@ void AArch64TargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,

void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
                                                MacroBuilder &Builder) const {
  Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1");
  Builder.defineMacro("__ARM_FEATURE_JCVT", "1");
  // Also include the Armv8.2 defines
  getTargetDefinesARMV82A(Opts, Builder);
+12 −0
Original line number Diff line number Diff line
@@ -580,6 +580,13 @@ void ARMTargetInfo::getTargetDefinesARMV82A(const LangOptions &Opts,
  getTargetDefinesARMV81A(Opts, Builder);
}

void ARMTargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts,
                                            MacroBuilder &Builder) const {
  // Also include the ARMv8.2-A defines
  Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1");
  getTargetDefinesARMV82A(Opts, Builder);
}

void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
                                     MacroBuilder &Builder) const {
  // Target identification.
@@ -809,6 +816,11 @@ void ARMTargetInfo::getTargetDefines(const LangOptions &Opts,
  case llvm::ARM::ArchKind::ARMV8_2A:
    getTargetDefinesARMV82A(Opts, Builder);
    break;
  case llvm::ARM::ArchKind::ARMV8_3A:
  case llvm::ARM::ArchKind::ARMV8_4A:
  case llvm::ARM::ArchKind::ARMV8_5A:
    getTargetDefinesARMV83A(Opts, Builder);
    break;
  }
}

+2 −1
Original line number Diff line number Diff line
@@ -148,9 +148,10 @@ public:

  void getTargetDefinesARMV81A(const LangOptions &Opts,
                               MacroBuilder &Builder) const;

  void getTargetDefinesARMV82A(const LangOptions &Opts,
                               MacroBuilder &Builder) const;
  void getTargetDefinesARMV83A(const LangOptions &Opts,
                                 MacroBuilder &Builder) const;
  void getTargetDefines(const LangOptions &Opts,
                        MacroBuilder &Builder) const override;

+8 −0
Original line number Diff line number Diff line
@@ -4454,6 +4454,10 @@ static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
  NEONMAP1(vcadd_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
  NEONMAP1(vcadd_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
  NEONMAP1(vcaddq_rot270_v, arm_neon_vcadd_rot270, Add1ArgType),
  NEONMAP1(vcaddq_rot90_v, arm_neon_vcadd_rot90, Add1ArgType),
  NEONMAP1(vcage_v, arm_neon_vacge, 0),
  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
@@ -4727,6 +4731,10 @@ static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
  NEONMAP1(vcadd_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
  NEONMAP1(vcadd_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
  NEONMAP1(vcaddq_rot270_v, aarch64_neon_vcadd_rot270, Add1ArgType),
  NEONMAP1(vcaddq_rot90_v, aarch64_neon_vcadd_rot90, Add1ArgType),
  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
Loading