Unverified Commit 22bb938f authored by Jiahao Guo's avatar Jiahao Guo Committed by GitHub
Browse files

[CIR][AArch64] Lower NEON vminv intrinsics (#192901)

### Summary

part of : https://github.com/llvm/llvm-project/issues/185382

Lower all intrinsics in
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#minimum-across-vector
and
https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#minimum-across-vector-(IEEE754)

Add ClangIR (CIR) codegen support for the NEON minimum-across-vector
builtins on AArch64. This covers the ACLE sections 2.1.1.13.4 Minimum
across vector (signed/unsigned integer + float) and 2.1.1.13.6 Minimum
across vector (IEEE 754, NaN-ignoring). Corresponding
classic-CodeGen-only tests are migrated into the shared CIR+LLVM test
file so both pipelines are checked from the same RUN lines.

These builtins are already registered in AArch64SISDIntrinsicMap
(clang/include/clang/Basic/AArch64CodeGenUtils.h) with the correct
LLVMIntrinsic IDs and TypeModifiers, mirroring the classic CodeGen flow
in
clang/lib/CodeGen/TargetBuiltins/ARM.cpp::EmitCommonNeonSISDBuiltinExpr.
In CIR the same map drives emitCommonNeonSISDBuiltinExpr, so the only
missing piece was adding these builtin IDs to the dispatch switch — no
intrinsic-specific lowering is needed.
parent 83b4c5cd
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -283,6 +283,24 @@ static mlir::Value emitCommonNeonSISDBuiltinExpr(
                     std::string("unimplemented AArch64 builtin call: ") +
                         cgf.getContext().BuiltinInfo.getName(info.BuiltinID));
    break;
  case NEON::BI__builtin_neon_vminv_s8:
  case NEON::BI__builtin_neon_vminvq_s8:
  case NEON::BI__builtin_neon_vminv_s16:
  case NEON::BI__builtin_neon_vminvq_s16:
  case NEON::BI__builtin_neon_vminv_s32:
  case NEON::BI__builtin_neon_vminvq_s32:
  case NEON::BI__builtin_neon_vminv_u8:
  case NEON::BI__builtin_neon_vminvq_u8:
  case NEON::BI__builtin_neon_vminv_u16:
  case NEON::BI__builtin_neon_vminvq_u16:
  case NEON::BI__builtin_neon_vminv_u32:
  case NEON::BI__builtin_neon_vminvq_u32:
  case NEON::BI__builtin_neon_vminv_f32:
  case NEON::BI__builtin_neon_vminvq_f32:
  case NEON::BI__builtin_neon_vminvq_f64:
  case NEON::BI__builtin_neon_vminnmv_f32:
  case NEON::BI__builtin_neon_vminnmvq_f32:
  case NEON::BI__builtin_neon_vminnmvq_f64:
  case NEON::BI__builtin_neon_vabdd_f64:
  case NEON::BI__builtin_neon_vabds_f32:
  case NEON::BI__builtin_neon_vshld_s64:
+0 −119
Original line number Diff line number Diff line
@@ -210,106 +210,6 @@ uint32_t test_vmaxvq_u32(uint32x4_t a) {
  return vmaxvq_u32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminv_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]])
// CHECK-NEXT:    ret i8 [[VMINV_S8_I]]
//
int8_t test_vminv_s8(int8x8_t a) {
  return vminv_s8(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminv_s16
// CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]])
// CHECK-NEXT:    ret i16 [[VMINV_S16_I]]
//
int16_t test_vminv_s16(int16x4_t a) {
  return vminv_s16(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminv_u8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]])
// CHECK-NEXT:    ret i8 [[VMINV_U8_I]]
//
uint8_t test_vminv_u8(uint8x8_t a) {
  return vminv_u8(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminv_u16
// CHECK-SAME: (<4 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]])
// CHECK-NEXT:    ret i16 [[VMINV_U16_I]]
//
uint16_t test_vminv_u16(uint16x4_t a) {
  return vminv_u16(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_s8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]])
// CHECK-NEXT:    ret i8 [[VMINVQ_S8_I]]
//
int8_t test_vminvq_s8(int8x16_t a) {
  return vminvq_s8(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_s16
// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]])
// CHECK-NEXT:    ret i16 [[VMINVQ_S16_I]]
//
int16_t test_vminvq_s16(int16x8_t a) {
  return vminvq_s16(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_s32
// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])
// CHECK-NEXT:    ret i32 [[VMINVQ_S32_I]]
//
int32_t test_vminvq_s32(int32x4_t a) {
  return vminvq_s32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_u8
// CHECK-SAME: (<16 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]])
// CHECK-NEXT:    ret i8 [[VMINVQ_U8_I]]
//
uint8_t test_vminvq_u8(uint8x16_t a) {
  return vminvq_u8(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_u16
// CHECK-SAME: (<8 x i16> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]])
// CHECK-NEXT:    ret i16 [[VMINVQ_U16_I]]
//
uint16_t test_vminvq_u16(uint16x8_t a) {
  return vminvq_u16(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_u32
// CHECK-SAME: (<4 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])
// CHECK-NEXT:    ret i32 [[VMINVQ_U32_I]]
//
uint32_t test_vminvq_u32(uint32x4_t a) {
  return vminvq_u32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vaddv_s8
// CHECK-SAME: (<8 x i8> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
@@ -420,16 +320,6 @@ float32_t test_vmaxvq_f32(float32x4_t a) {
  return vmaxvq_f32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminvq_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[A]])
// CHECK-NEXT:    ret float [[VMINVQ_F32_I]]
//
float32_t test_vminvq_f32(float32x4_t a) {
  return vminvq_f32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vmaxnmvq_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
@@ -440,13 +330,4 @@ float32_t test_vmaxnmvq_f32(float32x4_t a) {
  return vmaxnmvq_f32(a);
}

// CHECK-LABEL: define {{[^@]+}}@test_vminnmvq_f32
// CHECK-SAME: (<4 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  entry:
// CHECK-NEXT:    [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[A]])
// CHECK-NEXT:    ret float [[VMINNMVQ_F32_I]]
//
float32_t test_vminnmvq_f32(float32x4_t a) {
  return vminnmvq_f32(a);
}
+0 −60
Original line number Diff line number Diff line
@@ -20437,26 +20437,6 @@ float64_t test_vmaxvq_f64(float64x2_t a) {
  return vmaxvq_f64(a);
}
// CHECK-LABEL: define dso_local float @test_vminv_f32(
// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
// CHECK-NEXT:    ret float [[VMINV_F32_I]]
//
float32_t test_vminv_f32(float32x2_t a) {
  return vminv_f32(a);
}
// CHECK-LABEL: define dso_local double @test_vminvq_f64(
// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
// CHECK-NEXT:    ret double [[VMINVQ_F64_I]]
//
float64_t test_vminvq_f64(float64x2_t a) {
  return vminvq_f64(a);
}
// CHECK-LABEL: define dso_local double @test_vmaxnmvq_f64(
// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
@@ -20477,26 +20457,6 @@ float32_t test_vmaxnmv_f32(float32x2_t a) {
  return vmaxnmv_f32(a);
}
// CHECK-LABEL: define dso_local double @test_vminnmvq_f64(
// CHECK-SAME: <2 x double> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
// CHECK-NEXT:    ret double [[VMINNMVQ_F64_I]]
//
float64_t test_vminnmvq_f64(float64x2_t a) {
  return vminnmvq_f64(a);
}
// CHECK-LABEL: define dso_local float @test_vminnmv_f32(
// CHECK-SAME: <2 x float> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
// CHECK-NEXT:    ret float [[VMINNMV_F32_I]]
//
float32_t test_vminnmv_f32(float32x2_t a) {
  return vminnmv_f32(a);
}
// CHECK-LABEL: define dso_local <2 x i64> @test_vpaddq_s64(
// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
@@ -21045,26 +21005,6 @@ float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) {
  return vrsqrts_f64(a, b);
}
// CHECK-LABEL: define dso_local i32 @test_vminv_s32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]])
// CHECK-NEXT:    ret i32 [[VMINV_S32_I]]
//
int32_t test_vminv_s32(int32x2_t a) {
  return vminv_s32(a);
}
// CHECK-LABEL: define dso_local i32 @test_vminv_u32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
// CHECK-NEXT:    [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]])
// CHECK-NEXT:    ret i32 [[VMINV_U32_I]]
//
uint32_t test_vminv_u32(uint32x2_t a) {
  return vminv_u32(a);
}
// CHECK-LABEL: define dso_local i32 @test_vmaxv_s32(
// CHECK-SAME: <2 x i32> noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT:  [[ENTRY:.*:]]
+226 −0
Original line number Diff line number Diff line
@@ -3939,3 +3939,229 @@ uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) {
  // LLVM: ret i64 [[RES]]
  return (uint64_t)vrsrad_n_u64(a, b, 63);
}

//===------------------------------------------------------===//
// 2.1.1.13.4. Minimum across vector 
// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#minimum-across-vector
//===------------------------------------------------------===//

// LLVM-LABEL: @test_vminv_s8(
// CIR-LABEL: @vminv_s8(
int8_t test_vminv_s8(int8x8_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<8 x !s8i>) -> !s8i
// CIR: cir.return %{{.*}} : !s8i

// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[A]])
// LLVM-NEXT:    ret i8 [[VMINV_S8_I]]
  return vminv_s8(a);
}

// LLVM-LABEL: @test_vminvq_s8(
// CIR-LABEL: @vminvq_s8(
int8_t test_vminvq_s8(int8x16_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<16 x !s8i>) -> !s8i
// CIR: cir.return %{{.*}} : !s8i

// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_S8_I:%.*]] = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> [[A]])
// LLVM-NEXT:    ret i8 [[VMINVQ_S8_I]]
  return vminvq_s8(a);
}

// LLVM-LABEL: @test_vminv_s16(
// CIR-LABEL: @vminv_s16(
int16_t test_vminv_s16(int16x4_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<4 x !s16i>) -> !s16i
// CIR: cir.return %{{.*}} : !s16i

// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[A]])
// LLVM-NEXT:    ret i16 [[VMINV_S16_I]]
  return vminv_s16(a);
}

// LLVM-LABEL: @test_vminvq_s16(
// CIR-LABEL: @vminvq_s16(
int16_t test_vminvq_s16(int16x8_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<8 x !s16i>) -> !s16i
// CIR: cir.return %{{.*}} : !s16i

// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_S16_I:%.*]] = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> [[A]])
// LLVM-NEXT:    ret i16 [[VMINVQ_S16_I]]
  return vminvq_s16(a);
}

// LLVM-LABEL: @test_vminv_s32(
// CIR-LABEL: @vminv_s32(
int32_t test_vminv_s32(int32x2_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<2 x !s32i>) -> !s32i
// CIR: cir.return %{{.*}} : !s32i

// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> [[A]])
// LLVM-NEXT:    ret i32 [[VMINV_S32_I]]
  return vminv_s32(a);
}

// LLVM-LABEL: @test_vminvq_s32(
// CIR-LABEL: @vminvq_s32(
int32_t test_vminvq_s32(int32x4_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.smin" %{{.*}} : (!cir.vector<4 x !s32i>) -> !s32i
// CIR: cir.return %{{.*}} : !s32i

// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_S32_I:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[A]])
// LLVM-NEXT:    ret i32 [[VMINVQ_S32_I]]
  return vminvq_s32(a);
}

// LLVM-LABEL: @test_vminv_u8(
// CIR-LABEL: @vminv_u8(
uint8_t test_vminv_u8(uint8x8_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<8 x !u8i>) -> !u8i
// CIR: cir.return %{{.*}} : !u8i

// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[A]])
// LLVM-NEXT:    ret i8 [[VMINV_U8_I]]
  return vminv_u8(a);
}

// LLVM-LABEL: @test_vminvq_u8(
// CIR-LABEL: @vminvq_u8(
uint8_t test_vminvq_u8(uint8x16_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<16 x !u8i>) -> !u8i
// CIR: cir.return %{{.*}} : !u8i

// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_U8_I:%.*]] = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> [[A]])
// LLVM-NEXT:    ret i8 [[VMINVQ_U8_I]]
  return vminvq_u8(a);
}

// LLVM-LABEL: @test_vminv_u16(
// CIR-LABEL: @vminv_u16(
uint16_t test_vminv_u16(uint16x4_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<4 x !u16i>) -> !u16i
// CIR: cir.return %{{.*}} : !u16i

// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[A]])
// LLVM-NEXT:    ret i16 [[VMINV_U16_I]]
  return vminv_u16(a);
}

// LLVM-LABEL: @test_vminvq_u16(
// CIR-LABEL: @vminvq_u16(
uint16_t test_vminvq_u16(uint16x8_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<8 x !u16i>) -> !u16i
// CIR: cir.return %{{.*}} : !u16i

// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_U16_I:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[A]])
// LLVM-NEXT:    ret i16 [[VMINVQ_U16_I]]
  return vminvq_u16(a);
}

// LLVM-LABEL: @test_vminv_u32(
// CIR-LABEL: @vminv_u32(
uint32_t test_vminv_u32(uint32x2_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<2 x !u32i>) -> !u32i
// CIR: cir.return %{{.*}} : !u32i

// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> [[A]])
// LLVM-NEXT:    ret i32 [[VMINV_U32_I]]
  return vminv_u32(a);
}

// LLVM-LABEL: @test_vminvq_u32(
// CIR-LABEL: @vminvq_u32(
uint32_t test_vminvq_u32(uint32x4_t a) {
// CIR: cir.call_llvm_intrinsic "vector.reduce.umin" %{{.*}} : (!cir.vector<4 x !u32i>) -> !u32i
// CIR: cir.return %{{.*}} : !u32i

// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_U32_I:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[A]])
// LLVM-NEXT:    ret i32 [[VMINVQ_U32_I]]
  return vminvq_u32(a);
}

// LLVM-LABEL: @test_vminv_f32(
// CIR-LABEL: @vminv_f32(
float32_t test_vminv_f32(float32x2_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminv" %{{.*}} : (!cir.vector<2 x !cir.float>) -> !cir.float
// CIR: cir.return %{{.*}} : !cir.float

// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
// LLVM:    [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> [[A]])
// LLVM-NEXT:    ret float [[VMINV_F32_I]]
  return vminv_f32(a);
}

// LLVM-LABEL: @test_vminvq_f32(
// CIR-LABEL: @vminvq_f32(
float32_t test_vminvq_f32(float32x4_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminv" %{{.*}} : (!cir.vector<4 x !cir.float>) -> !cir.float
// CIR: cir.return %{{.*}} : !cir.float

// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> [[A]])
// LLVM-NEXT:    ret float [[VMINVQ_F32_I]]
  return vminvq_f32(a);
}

// LLVM-LABEL: @test_vminvq_f64(
// CIR-LABEL: @vminvq_f64(
float64_t test_vminvq_f64(float64x2_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminv" %{{.*}} : (!cir.vector<2 x !cir.double>) -> !cir.double
// CIR: cir.return %{{.*}} : !cir.double

// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
// LLVM:    [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> [[A]])
// LLVM-NEXT:    ret double [[VMINVQ_F64_I]]
  return vminvq_f64(a);
}

//===------------------------------------------------------===//
// 2.1.1.13.6. Minimum across vector (IEEE754)
// https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#markdown-toc-maximum-across-vector-ieee754
//===------------------------------------------------------===//

// LLVM-LABEL: @test_vminnmv_f32(
// CIR-LABEL: @vminnmv_f32(
float32_t test_vminnmv_f32(float32x2_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmv" %{{.*}} : (!cir.vector<2 x !cir.float>) -> !cir.float
// CIR: cir.return %{{.*}} : !cir.float

// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]])
// LLVM:    [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> [[A]])
// LLVM-NEXT:    ret float [[VMINNMV_F32_I]]
  return vminnmv_f32(a);
}

// LLVM-LABEL: @test_vminnmvq_f32(
// CIR-LABEL: @vminnmvq_f32(
float32_t test_vminnmvq_f32(float32x4_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmv" %{{.*}} : (!cir.vector<4 x !cir.float>) -> !cir.float
// CIR: cir.return %{{.*}} : !cir.float

// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]])
// LLVM:    [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> [[A]])
// LLVM-NEXT:    ret float [[VMINNMVQ_F32_I]]
  return vminnmvq_f32(a);
}

// LLVM-LABEL: @test_vminnmvq_f64(
// CIR-LABEL: @vminnmvq_f64(
float64_t test_vminnmvq_f64(float64x2_t a) {
// CIR: cir.call_llvm_intrinsic "aarch64.neon.fminnmv" %{{.*}} : (!cir.vector<2 x !cir.double>) -> !cir.double
// CIR: cir.return %{{.*}} : !cir.double

// LLVM-SAME: <2 x double> {{.*}} [[A:%.*]])
// LLVM:    [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> [[A]])
// LLVM-NEXT:    ret double [[VMINNMVQ_F64_I]]
  return vminnmvq_f64(a);
}