Commit 3f91705c authored by Tim Northover's avatar Tim Northover
Browse files

ARM-NEON: make type modifiers orthogonal and allow multiple modifiers.

The modifier system used to mutate types on NEON intrinsic definitions had a
separate letter for all kinds of transformations that might be needed, and we
were quite quickly running out of letters to use. This patch converts to a much
smaller set of orthogonal modifiers that can be applied together to achieve the
desired effect.

When merging with downstream it is likely to cause a conflict with any local
modifications to the .td files. There is a new script in
utils/convert_arm_neon.py that was used to convert all .td definitions and I
would suggest running it on the last downstream version of those files before
this commit rather than resolving conflicts manually.
parent e23d6f31
Loading
Loading
Loading
Loading
+83 −83
Original line number Diff line number Diff line
@@ -17,118 +17,118 @@ include "arm_neon_incl.td"
let ArchGuard = "defined(__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) && defined(__aarch64__)" in {

  // Negate
  def VNEGSH          : SInst<"vneg", "ss", "Sh">;
  def VNEGSH          : SInst<"vneg", "11", "Sh">;

  // Reciprocal/Sqrt
  def SCALAR_FRECPSH  : IInst<"vrecps", "sss", "Sh">;
  def FSQRTSH         : SInst<"vsqrt", "ss", "Sh">;
  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "sss", "Sh">;
  def SCALAR_FRECPSH  : IInst<"vrecps", "111", "Sh">;
  def FSQRTSH         : SInst<"vsqrt", "11", "Sh">;
  def SCALAR_FRSQRTSH : IInst<"vrsqrts", "111", "Sh">;

  // Reciprocal Estimate
  def SCALAR_FRECPEH  : IInst<"vrecpe", "ss", "Sh">;
  def SCALAR_FRECPEH  : IInst<"vrecpe", "11", "Sh">;

  // Reciprocal Exponent
  def SCALAR_FRECPXH  : IInst<"vrecpx", "ss", "Sh">;
  def SCALAR_FRECPXH  : IInst<"vrecpx", "11", "Sh">;

  // Reciprocal Square Root Estimate
  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "ss", "Sh">;
  def SCALAR_FRSQRTEH : IInst<"vrsqrte", "11", "Sh">;

  // Rounding
  def FRINTZ_S64H     : SInst<"vrnd", "ss", "Sh">;
  def FRINTA_S64H     : SInst<"vrnda", "ss", "Sh">;
  def FRINTI_S64H     : SInst<"vrndi", "ss", "Sh">;
  def FRINTM_S64H     : SInst<"vrndm", "ss", "Sh">;
  def FRINTN_S64H     : SInst<"vrndn", "ss", "Sh">;
  def FRINTP_S64H     : SInst<"vrndp", "ss", "Sh">;
  def FRINTX_S64H     : SInst<"vrndx", "ss", "Sh">;
  def FRINTZ_S64H     : SInst<"vrnd", "11", "Sh">;
  def FRINTA_S64H     : SInst<"vrnda", "11", "Sh">;
  def FRINTI_S64H     : SInst<"vrndi", "11", "Sh">;
  def FRINTM_S64H     : SInst<"vrndm", "11", "Sh">;
  def FRINTN_S64H     : SInst<"vrndn", "11", "Sh">;
  def FRINTP_S64H     : SInst<"vrndp", "11", "Sh">;
  def FRINTX_S64H     : SInst<"vrndx", "11", "Sh">;

  // Conversion
  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "Ys", "sUs">;
  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "Ys", "iUi">;
  def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "Ys", "lUl">;
  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "$s", "Sh">;
  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "Is", "Sh">;
  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "Ls", "Sh">;
  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "bs", "Sh">;
  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "Us", "Sh">;
  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "Os", "Sh">;
  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "$s", "Sh">;
  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "Is", "Sh">;
  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "Ls", "Sh">;
  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "bs", "Sh">;
  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "Us", "Sh">;
  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "Os", "Sh">;
  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "$s", "Sh">;
  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "Is", "Sh">;
  def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "Ls", "Sh">;
  def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "bs", "Sh">;
  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "Us", "Sh">;
  def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "Os", "Sh">;
  def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "$s", "Sh">;
  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "Is", "Sh">;
  def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "Ls", "Sh">;
  def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "bs", "Sh">;
  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "Us", "Sh">;
  def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "Os", "Sh">;
  def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "$s", "Sh">;
  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "Is", "Sh">;
  def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "Ls", "Sh">;
  def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "bs", "Sh">;
  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "Us", "Sh">;
  def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "Os", "Sh">;
  def SCALAR_SCVTFSH  : SInst<"vcvth_f16", "(1F)(1!)", "sUs">;
  def SCALAR_SCVTFSH1 : SInst<"vcvth_f16", "(1F<)(1!)", "iUi">;
  def SCALAR_SCVTFSH2 : SInst<"vcvth_f16", "(1F<<)(1!)", "lUl">;
  def SCALAR_FCVTZSH  : SInst<"vcvt_s16", "(1S)1", "Sh">;
  def SCALAR_FCVTZSH1 : SInst<"vcvt_s32", "(1S>)1", "Sh">;
  def SCALAR_FCVTZSH2 : SInst<"vcvt_s64", "(1S>>)1", "Sh">;
  def SCALAR_FCVTZUH  : SInst<"vcvt_u16", "(1U)1", "Sh">;
  def SCALAR_FCVTZUH1 : SInst<"vcvt_u32", "(1U>)1", "Sh">;
  def SCALAR_FCVTZUH2 : SInst<"vcvt_u64", "(1U>>)1", "Sh">;
  def SCALAR_FCVTASH  : SInst<"vcvta_s16", "(1S)1", "Sh">;
  def SCALAR_FCVTASH1 : SInst<"vcvta_s32", "(1S>)1", "Sh">;
  def SCALAR_FCVTASH2 : SInst<"vcvta_s64", "(1S>>)1", "Sh">;
  def SCALAR_FCVTAUH  : SInst<"vcvta_u16", "(1U)1", "Sh">;
  def SCALAR_FCVTAUH1 : SInst<"vcvta_u32", "(1U>)1", "Sh">;
  def SCALAR_FCVTAUH2 : SInst<"vcvta_u64", "(1U>>)1", "Sh">;
  def SCALAR_FCVTMSH  : SInst<"vcvtm_s16", "(1S)1", "Sh">;
  def SCALAR_FCVTMSH1 : SInst<"vcvtm_s32", "(1S>)1", "Sh">;
  def SCALAR_FCVTMSH2 : SInst<"vcvtm_s64", "(1S>>)1", "Sh">;
  def SCALAR_FCVTMUH  : SInst<"vcvtm_u16", "(1U)1", "Sh">;
  def SCALAR_FCVTMUH1 : SInst<"vcvtm_u32", "(1U>)1", "Sh">;
  def SCALAR_FCVTMUH2 : SInst<"vcvtm_u64", "(1U>>)1", "Sh">;
  def SCALAR_FCVTNSH  : SInst<"vcvtn_s16", "(1S)1", "Sh">;
  def SCALAR_FCVTNSH1 : SInst<"vcvtn_s32", "(1S>)1", "Sh">;
  def SCALAR_FCVTNSH2 : SInst<"vcvtn_s64", "(1S>>)1", "Sh">;
  def SCALAR_FCVTNUH  : SInst<"vcvtn_u16", "(1U)1", "Sh">;
  def SCALAR_FCVTNUH1 : SInst<"vcvtn_u32", "(1U>)1", "Sh">;
  def SCALAR_FCVTNUH2 : SInst<"vcvtn_u64", "(1U>>)1", "Sh">;
  def SCALAR_FCVTPSH  : SInst<"vcvtp_s16", "(1S)1", "Sh">;
  def SCALAR_FCVTPSH1 : SInst<"vcvtp_s32", "(1S>)1", "Sh">;
  def SCALAR_FCVTPSH2 : SInst<"vcvtp_s64", "(1S>>)1", "Sh">;
  def SCALAR_FCVTPUH  : SInst<"vcvtp_u16", "(1U)1", "Sh">;
  def SCALAR_FCVTPUH1 : SInst<"vcvtp_u32", "(1U>)1", "Sh">;
  def SCALAR_FCVTPUH2 : SInst<"vcvtp_u64", "(1U>>)1", "Sh">;
  let isVCVT_N = 1 in {
    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "Ysi", "sUs">;
    def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "Ysi", "iUi">;
    def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "Ysi", "lUl">;
    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "$si", "Sh">;
    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "Isi", "Sh">;
    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "Lsi", "Sh">;
    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "bsi", "Sh">;
    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "Usi", "Sh">;
    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "Osi", "Sh">;
    def SCALAR_SCVTFSHO : SInst<"vcvth_n_f16", "(1F)(1!)I", "sUs">;
    def SCALAR_SCVTFSH1O: SInst<"vcvth_n_f16", "(1F<)(1!)I", "iUi">;
    def SCALAR_SCVTFSH2O: SInst<"vcvth_n_f16", "(1F<<)(1!)I", "lUl">;
    def SCALAR_FCVTZSHO : SInst<"vcvt_n_s16", "(1S)1I", "Sh">;
    def SCALAR_FCVTZSH1O: SInst<"vcvt_n_s32", "(1S>)1I", "Sh">;
    def SCALAR_FCVTZSH2O: SInst<"vcvt_n_s64", "(1S>>)1I", "Sh">;
    def SCALAR_FCVTZUHO : SInst<"vcvt_n_u16", "(1U)1I", "Sh">;
    def SCALAR_FCVTZUH1O: SInst<"vcvt_n_u32", "(1U>)1I", "Sh">;
    def SCALAR_FCVTZUH2O: SInst<"vcvt_n_u64", "(1U>>)1I", "Sh">;
  }
  // Comparison
  def SCALAR_CMEQRH   : SInst<"vceq", "bss", "Sh">;
  def SCALAR_CMEQZH   : SInst<"vceqz", "bs", "Sh">;
  def SCALAR_CMGERH   : SInst<"vcge", "bss", "Sh">;
  def SCALAR_CMGEZH   : SInst<"vcgez", "bs", "Sh">;
  def SCALAR_CMGTRH   : SInst<"vcgt", "bss", "Sh">;
  def SCALAR_CMGTZH   : SInst<"vcgtz", "bs", "Sh">;
  def SCALAR_CMLERH   : SInst<"vcle", "bss", "Sh">;
  def SCALAR_CMLEZH   : SInst<"vclez", "bs", "Sh">;
  def SCALAR_CMLTH    : SInst<"vclt", "bss", "Sh">;
  def SCALAR_CMLTZH   : SInst<"vcltz", "bs", "Sh">;
  def SCALAR_CMEQRH   : SInst<"vceq", "(1U)11", "Sh">;
  def SCALAR_CMEQZH   : SInst<"vceqz", "(1U)1", "Sh">;
  def SCALAR_CMGERH   : SInst<"vcge", "(1U)11", "Sh">;
  def SCALAR_CMGEZH   : SInst<"vcgez", "(1U)1", "Sh">;
  def SCALAR_CMGTRH   : SInst<"vcgt", "(1U)11", "Sh">;
  def SCALAR_CMGTZH   : SInst<"vcgtz", "(1U)1", "Sh">;
  def SCALAR_CMLERH   : SInst<"vcle", "(1U)11", "Sh">;
  def SCALAR_CMLEZH   : SInst<"vclez", "(1U)1", "Sh">;
  def SCALAR_CMLTH    : SInst<"vclt", "(1U)11", "Sh">;
  def SCALAR_CMLTZH   : SInst<"vcltz", "(1U)1", "Sh">;

  // Absolute Compare Mask Greater Than Or Equal
  def SCALAR_FACGEH   : IInst<"vcage", "bss", "Sh">;
  def SCALAR_FACLEH   : IInst<"vcale", "bss", "Sh">;
  def SCALAR_FACGEH   : IInst<"vcage", "(1U)11", "Sh">;
  def SCALAR_FACLEH   : IInst<"vcale", "(1U)11", "Sh">;

  // Absolute Compare Mask Greater Than
  def SCALAR_FACGT    : IInst<"vcagt", "bss", "Sh">;
  def SCALAR_FACLT    : IInst<"vcalt", "bss", "Sh">;
  def SCALAR_FACGT    : IInst<"vcagt", "(1U)11", "Sh">;
  def SCALAR_FACLT    : IInst<"vcalt", "(1U)11", "Sh">;

  // Scalar Absolute Value
  def SCALAR_ABSH     : SInst<"vabs", "ss", "Sh">;
  def SCALAR_ABSH     : SInst<"vabs", "11", "Sh">;

  // Scalar Absolute Difference
  def SCALAR_ABDH: IInst<"vabd", "sss", "Sh">;
  def SCALAR_ABDH: IInst<"vabd", "111", "Sh">;

  // Add/Sub
  def VADDSH          : SInst<"vadd", "sss", "Sh">;
  def VSUBHS          : SInst<"vsub", "sss", "Sh">;
  def VADDSH          : SInst<"vadd", "111", "Sh">;
  def VSUBHS          : SInst<"vsub", "111", "Sh">;

  // Max/Min
  def VMAXHS          : SInst<"vmax", "sss", "Sh">;
  def VMINHS          : SInst<"vmin", "sss", "Sh">;
  def FMAXNMHS        : SInst<"vmaxnm", "sss", "Sh">;
  def FMINNMHS        : SInst<"vminnm", "sss", "Sh">;
  def VMAXHS          : SInst<"vmax", "111", "Sh">;
  def VMINHS          : SInst<"vmin", "111", "Sh">;
  def FMAXNMHS        : SInst<"vmaxnm", "111", "Sh">;
  def FMINNMHS        : SInst<"vminnm", "111", "Sh">;

  // Multiplication/Division
  def VMULHS          : SInst<"vmul", "sss", "Sh">;
  def MULXHS          : SInst<"vmulx", "sss", "Sh">;
  def FDIVHS          : SInst<"vdiv", "sss",  "Sh">;
  def VMULHS          : SInst<"vmul", "111", "Sh">;
  def MULXHS          : SInst<"vmulx", "111", "Sh">;
  def FDIVHS          : SInst<"vdiv", "111",  "Sh">;

  // Vector fused multiply-add operations
  def VFMAHS          : SInst<"vfma", "ssss", "Sh">;
  def VFMSHS          : SInst<"vfms", "ssss", "Sh">;
  def VFMAHS          : SInst<"vfma", "1111", "Sh">;
  def VFMSHS          : SInst<"vfms", "1111", "Sh">;
}
+715 −715

File changed.

Preview size limit exceeded, changes collapsed.

+30 −39
Original line number Diff line number Diff line
@@ -198,10 +198,8 @@ def OP_UNAVAILABLE : Operation {
//
// The prototype is a string that defines the return type of the intrinsic
// and the type of each argument. The return type and every argument gets a
// "modifier" that can change in some way the "base type" of the intrinsic.
//
// The modifier 'd' means "default" and does not modify the base type in any
// way. The available modifiers are given below.
// set of "modifiers" that can change in some way the "base type" of the
// intrinsic.
//
// Typespecs
// ---------
@@ -226,41 +224,34 @@ def OP_UNAVAILABLE : Operation {
// -------------------
// prototype: return (arg, arg, ...)
//
// v: void
// t: best-fit integer (int/poly args)
// x: signed integer   (int/float args)
// u: unsigned integer (int/float args)
// f: float (int args)
// F: double (int args)
// H: half (int args)
// 0: half (int args), ignore 'Q' size modifier.
// 1: half (int args), force 'Q' size modifier.
// d: default
// g: default, ignore 'Q' size modifier.
// j: default, force 'Q' size modifier.
// w: double width elements, same num elts
// n: double width elements, half num elts
// h: half width elements, double num elts
// q: half width elements, quad num elts
// e: half width elements, double num elts, unsigned
// m: half width elements, same num elts
// i: constant int
// l: constant uint64
// s: scalar of element type
// z: scalar of half width element type, signed
// r: scalar of double width element type, signed
// b: scalar of unsigned integer/long type (int/float args)
// $: scalar of signed integer/long type (int/float args)
// y: scalar of float
// o: scalar of double
// k: default elt width, double num elts
// 2,3,4: array of default vectors
// B,C,D: array of default elts, force 'Q' size modifier.
// p: pointer type
// c: const pointer type
// 7: vector of 8-bit elements, ignore 'Q' size modifier
// 8: vector of 8-bit elements, same width as default type
// 9: vector of 8-bit elements, force 'Q' size modifier
// Each type modifier is either a single character, or a group surrounded by
// parentheses.
//
// .: default
// v: change to void category.
// S: change to signed integer category.
// U: change to unsigned integer category.
// F: change to floating category.
// P: change to polynomial category.
// p: change polynomial to equivalent integer category. Otherwise nop.
//
// >: double element width (vector size unchanged).
// <: half element width (vector size unchanged).
//
// 1: change to scalar.
// 2: change to struct of two vectors.
// 3: change to struct of three vectors.
// 4: change to struct of four vectors.
//
// *: make a pointer argument.
// c: make a constant argument (for pointers).
//
// Q: force 128-bit width.
// q: force 64-bit width.
//
// I: make 32-bit signed scalar immediate
// !: make this the key type passed to CGBuiltin.cpp in a polymorphic call.


// Every intrinsic subclasses Inst.
class Inst <string n, string p, string t, Operation o> {
+0 −4
Original line number Diff line number Diff line
@@ -17756,8 +17756,6 @@ float32_t test_vminnmv_f32(float32x2_t a) {
}
// CHECK-LABEL: @test_vpaddq_s64(
// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
// CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
@@ -17766,8 +17764,6 @@ int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) {
}
// CHECK-LABEL: @test_vpaddq_u64(
// CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK:   [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
// CHECK:   [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8>
// CHECK:   ret <2 x i64> [[VPADDQ_V2_I]]
+6 −10
Original line number Diff line number Diff line
@@ -407,12 +407,10 @@ int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
}

// CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 {
// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0
// CHECK:   [[VGET_LANE7:%.*]] = extractelement <1 x double> <double 0x3FEE211E215AEEF3>, i32 0
// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]])
// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0
// CHECK:   ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_lane_f64_0() {
      float64x1_t arg1;
@@ -426,13 +424,11 @@ float64x1_t test_vmulx_lane_f64_0() {
}

// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 {
// CHECK:   [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
// CHECK:   [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0
// CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x double> <double 0x3FD6304BC43AB5C2>, <1 x double> <double 0x3FEE211E215AEEF3>, <2 x i32> <i32 0, i32 1>
// CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x double> <double 0x3FD6304BC43AB5C2>, i32 0
// CHECK:   [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1
// CHECK:   [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]])
// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0
// CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x double> <double 0x3FD6304BC43AB5C2>, double [[VMULXD_F64_I]], i32 0
// CHECK:   ret <1 x double> [[VSET_LANE]]
float64x1_t test_vmulx_laneq_f64_2() {
      float64x1_t arg1;
Loading