Commit b720dcba authored by David Green's avatar David Green
Browse files

[AArch64][GISel] Split large f64 vectors for fcmp.

This adds some very basic f64 handling for larger fcmp vectors, which seemed to
be missing.
parent b56ab41d
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -486,7 +486,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
                   Ty.getElementType() != SrcTy.getElementType();
          },
          0, 1)
      .clampNumElements(0, v2s32, v4s32);
      .clampNumElements(0, v2s32, v4s32)
      .clampMaxNumElements(1, s64, 2);

  // Extensions
  auto ExtLegalFunc = [=](const LegalityQuery &Query) {
+42 −0
Original line number Diff line number Diff line
@@ -5449,3 +5449,45 @@ define <4 x i1> @fcmule4xfloat_fast_aext(<4 x float> %A, <4 x float> %B) {
  %tmp3 = fcmp fast ule <4 x float> %A, %B
  ret <4 x i1> %tmp3
}

define <4 x i64> @fcmoeq4xdouble(<4 x double> %A, <4 x double> %B) {
; CHECK-SD-LABEL: fcmoeq4xdouble:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    fcmeq v0.2d, v0.2d, v2.2d
; CHECK-SD-NEXT:    fcmeq v1.2d, v1.2d, v3.2d
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: fcmoeq4xdouble:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    fcmeq v0.2d, v0.2d, v2.2d
; CHECK-GI-NEXT:    fcmeq v1.2d, v1.2d, v3.2d
; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #63
; CHECK-GI-NEXT:    shl v1.2d, v1.2d, #63
; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #63
; CHECK-GI-NEXT:    sshr v1.2d, v1.2d, #63
; CHECK-GI-NEXT:    ret
  %tmp3 = fcmp oeq <4 x double> %A, %B
  %tmp4 = sext <4 x i1> %tmp3 to <4 x i64>
  ret <4 x i64> %tmp4
}

define <8 x i32> @fcmoeq8xfloat(<8 x float> %A, <8 x float> %B) {
; CHECK-SD-LABEL: fcmoeq8xfloat:
; CHECK-SD:       // %bb.0:
; CHECK-SD-NEXT:    fcmeq v0.4s, v0.4s, v2.4s
; CHECK-SD-NEXT:    fcmeq v1.4s, v1.4s, v3.4s
; CHECK-SD-NEXT:    ret
;
; CHECK-GI-LABEL: fcmoeq8xfloat:
; CHECK-GI:       // %bb.0:
; CHECK-GI-NEXT:    fcmeq v0.4s, v0.4s, v2.4s
; CHECK-GI-NEXT:    fcmeq v1.4s, v1.4s, v3.4s
; CHECK-GI-NEXT:    shl v0.4s, v0.4s, #31
; CHECK-GI-NEXT:    shl v1.4s, v1.4s, #31
; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
; CHECK-GI-NEXT:    sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT:    ret
  %tmp3 = fcmp oeq <8 x float> %A, %B
  %tmp4 = sext <8 x i1> %tmp3 to <8 x i32>
  ret <8 x i32> %tmp4
}