Commit 62129878 authored by Matt Arsenault's avatar Matt Arsenault
Browse files

AMDGPU/GlobalISel: Fix tablegen selection for scalar bin ops

Fixes selection for scalar G_SMULH/G_UMULH. Also switches to using
tablegen selected add/sub, which switch to the signed version of the
opcode. This matches the current DAG behavior. We can't drop the
manual selection for add/sub yet, because it's still both for VALU
add/sub and for G_PTR_ADD.
parent b136238b
Loading
Loading
Loading
Loading
+8 −2
Original line number Diff line number Diff line
@@ -412,8 +412,14 @@ class UniformUnaryFrag<SDPatternOperator Op> : PatFrag <
class UniformBinFrag<SDPatternOperator Op> : PatFrag <
  (ops node:$src0, node:$src1),
  (Op $src0, $src1),
  [{ return !N->isDivergent(); }]
>;
  [{ return !N->isDivergent(); }]> {
  // This check is unnecessary as it's captured by the result register
  // bank constraint.
  //
  // FIXME: Should add a way for the emitter to recognize this is a
  // trivially true predicate to eliminate the check.
  let GISelPredicateCode = [{return true;}];
}

let Defs = [SCC] in { // Carry out goes to SCC
let isCommutable = 1 in {
+4 −4
Original line number Diff line number Diff line
@@ -979,7 +979,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec,
; GPRIDX:       ; %bb.0: ; %entry
; GPRIDX-NEXT:    s_mov_b32 s0, s2
; GPRIDX-NEXT:    s_mov_b32 s1, s3
; GPRIDX-NEXT:    s_add_u32 m0, s18, -1
; GPRIDX-NEXT:    s_add_i32 m0, s18, -1
; GPRIDX-NEXT:    s_mov_b32 s2, s4
; GPRIDX-NEXT:    s_mov_b32 s3, s5
; GPRIDX-NEXT:    s_mov_b32 s4, s6
@@ -1001,7 +1001,7 @@ define amdgpu_ps double @dyn_extract_v8f64_s_s_offsetm1(<8 x double> inreg %vec,
; MOVREL:       ; %bb.0: ; %entry
; MOVREL-NEXT:    s_mov_b32 s0, s2
; MOVREL-NEXT:    s_mov_b32 s1, s3
; MOVREL-NEXT:    s_add_u32 m0, s18, -1
; MOVREL-NEXT:    s_add_i32 m0, s18, -1
; MOVREL-NEXT:    s_mov_b32 s2, s4
; MOVREL-NEXT:    s_mov_b32 s3, s5
; MOVREL-NEXT:    s_mov_b32 s4, s6
@@ -1031,7 +1031,7 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
; GPRIDX-NEXT:    s_mov_b64 s[4:5], exec
; GPRIDX-NEXT:  BB22_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT:    v_readfirstlane_b32 s6, v16
; GPRIDX-NEXT:    s_add_u32 s7, s6, 3
; GPRIDX-NEXT:    s_add_i32 s7, s6, 3
; GPRIDX-NEXT:    s_lshl_b32 s7, s7, 1
; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v16
; GPRIDX-NEXT:    s_set_gpr_idx_on s7, gpr_idx(SRC0)
@@ -1056,7 +1056,7 @@ define double @dyn_extract_v8f64_v_v_offset3(<8 x double> %vec, i32 %sel) {
; MOVREL-NEXT:  BB22_1: ; =>This Inner Loop Header: Depth=1
; MOVREL-NEXT:    v_readfirstlane_b32 s6, v16
; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc, s6, v16
; MOVREL-NEXT:    s_add_u32 s6, s6, 3
; MOVREL-NEXT:    s_add_i32 s6, s6, 3
; MOVREL-NEXT:    s_lshl_b32 m0, s6, 1
; MOVREL-NEXT:    v_movrels_b32_e32 v17, v0
; MOVREL-NEXT:    v_movrels_b32_e32 v18, v1
+2 −2
Original line number Diff line number Diff line
@@ -2093,7 +2093,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
; GPRIDX-NEXT:    s_mov_b64 s[0:1], exec
; GPRIDX-NEXT:  BB32_1: ; =>This Inner Loop Header: Depth=1
; GPRIDX-NEXT:    v_readfirstlane_b32 s2, v18
; GPRIDX-NEXT:    s_add_u32 s3, s2, 1
; GPRIDX-NEXT:    s_add_i32 s3, s2, 1
; GPRIDX-NEXT:    s_lshl_b32 s3, s3, 1
; GPRIDX-NEXT:    v_cmp_eq_u32_e32 vcc, s2, v18
; GPRIDX-NEXT:    s_set_gpr_idx_on s3, gpr_idx(DST)
@@ -2139,7 +2139,7 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
; MOVREL-NEXT:    v_mov_b32_e32 v19, v0
; MOVREL-NEXT:    v_mov_b32_e32 v33, v14
; MOVREL-NEXT:    v_mov_b32_e32 v32, v13
; MOVREL-NEXT:    s_add_u32 s2, s1, 1
; MOVREL-NEXT:    s_add_i32 s2, s1, 1
; MOVREL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, s1, v18
; MOVREL-NEXT:    v_mov_b32_e32 v31, v12
; MOVREL-NEXT:    v_mov_b32_e32 v30, v11
+12 −12
Original line number Diff line number Diff line
@@ -17,20 +17,20 @@ body: |
    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
    ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_U32_]], %7, 0, implicit $exec
    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec
    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit %7, implicit %8, implicit %9
    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9
    ; GFX9-LABEL: name: add_s32
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
    ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[COPY1]], implicit-def $scc
    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_U32_]], 0, implicit $exec
    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_U32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
    ; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
    ; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
    ; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]]
    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]]
    %0:sgpr(s32) = COPY $sgpr0
    %1:sgpr(s32) = COPY $sgpr1
    %2:vgpr(s32) = COPY $vgpr0
@@ -123,14 +123,14 @@ body: |
    ; GFX6: liveins: $sgpr0
    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
    ; GFX6: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_U32_]]
    ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
    ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]]
    ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_s
    ; GFX9: liveins: $sgpr0
    ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; GFX9: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16
    ; GFX9: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_U32_]]
    ; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
    ; GFX9: S_ENDPGM 0, implicit [[S_ADD_I32_]]
    %0:sgpr(s32) = COPY $sgpr0
    %1:sgpr(s32) = G_CONSTANT i32 16
    %2:sgpr(s32) = G_ADD %0, %1
+2 −2
Original line number Diff line number Diff line
@@ -123,8 +123,8 @@ body: |
    ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
    ; CHECK: [[S_BCNT1_I32_B32_:%[0-9]+]]:sreg_32 = S_BCNT1_I32_B32 [[COPY]], implicit-def $scc
    ; CHECK: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc
    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_U32_]]
    ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[S_BCNT1_I32_B32_]], [[COPY1]], implicit-def $scc
    ; CHECK: S_ENDPGM 0, implicit [[S_ADD_I32_]]
    %0:sgpr(s32) = COPY $sgpr0
    %1:sgpr(s32) = COPY $sgpr1
    %2:sgpr(s32) = G_CTPOP %0
Loading