Commit 7b0c4184 authored by Craig Topper's avatar Craig Topper
Browse files

[RISCV] Move compressible registers to the beginning of the FP allocation order.

We don't have very many compressible FP instructions, just load and store.
These instruction require the FP register to be f8-f15.

This patch changes the FP allocation order to prioritize f10-f15 first.
These are also the FP argument registers. So I allocated them in reverse
order starting at f15 to avoid taking the first argument registers.
This appears to match gcc allocation order.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D146488
parent b0f02cee
Loading
Loading
Loading
Loading
+14 −9
Original line number Diff line number Diff line
@@ -223,39 +223,44 @@ let RegAltNameIndices = [ABIRegAltName] in {

// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
// We start by allocating argument registers in reverse order since they are
// compressible.
def FPR16 : RegisterClass<"RISCV", [f16], 16, (add
    (sequence "F%u_H", 0, 7),
    (sequence "F%u_H", 10, 17),
    (sequence "F%u_H", 28, 31),
    (sequence "F%u_H", 8, 9),
    (sequence "F%u_H", 18, 27)
    (sequence "F%u_H", 15, 10), // fa5-fa0
    (sequence "F%u_H", 0, 7),   // ft0-f7
    (sequence "F%u_H", 16, 17), // fa6-fa7
    (sequence "F%u_H", 28, 31), // ft8-ft11
    (sequence "F%u_H", 8, 9),   // fs0-fs1
    (sequence "F%u_H", 18, 27)  // fs2-fs11
)>;

def FPR32 : RegisterClass<"RISCV", [f32], 32, (add
    (sequence "F%u_F", 15, 10),
    (sequence "F%u_F", 0, 7),
    (sequence "F%u_F", 10, 17),
    (sequence "F%u_F", 16, 17),
    (sequence "F%u_F", 28, 31),
    (sequence "F%u_F", 8, 9),
    (sequence "F%u_F", 18, 27)
)>;

def FPR32C : RegisterClass<"RISCV", [f32], 32, (add
  (sequence "F%u_F", 10, 15),
  (sequence "F%u_F", 15, 10),
  (sequence "F%u_F", 8, 9)
)>;

// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
def FPR64 : RegisterClass<"RISCV", [f64], 64, (add
    (sequence "F%u_D", 15, 10),
    (sequence "F%u_D", 0, 7),
    (sequence "F%u_D", 10, 17),
    (sequence "F%u_D", 16, 17),
    (sequence "F%u_D", 28, 31),
    (sequence "F%u_D", 8, 9),
    (sequence "F%u_D", 18, 27)
)>;

def FPR64C : RegisterClass<"RISCV", [f64], 64, (add
  (sequence "F%u_D", 10, 15),
  (sequence "F%u_D", 15, 10),
  (sequence "F%u_D", 8, 9)
)>;

+744 −744

File changed.

Preview size limit exceeded, changes collapsed.

+528 −528

File changed.

Preview size limit exceeded, changes collapsed.

+28 −28
Original line number Diff line number Diff line
@@ -53,8 +53,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV32IF-NEXT:    mv s0, a0
; RV32IF-NEXT:    mv a0, a1
; RV32IF-NEXT:    call __extendhfsf2@plt
; RV32IF-NEXT:    fmv.w.x ft0, a0
; RV32IF-NEXT:    fcvt.w.s a0, ft0, rtz
; RV32IF-NEXT:    fmv.w.x fa5, a0
; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
; RV32IF-NEXT:    add a0, s0, a0
; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -69,8 +69,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
; RV64IF-NEXT:    mv s0, a0
; RV64IF-NEXT:    mv a0, a1
; RV64IF-NEXT:    call __extendhfsf2@plt
; RV64IF-NEXT:    fmv.w.x ft0, a0
; RV64IF-NEXT:    fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT:    fmv.w.x fa5, a0
; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
; RV64IF-NEXT:    addw a0, s0, a0
; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
@@ -109,15 +109,15 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
;
; RV32-ILP32ZFHMIN-LABEL: callee_half_in_regs:
; RV32-ILP32ZFHMIN:       # %bb.0:
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a1, ft0, rtz
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
; RV32-ILP32ZFHMIN-NEXT:    add a0, a0, a1
; RV32-ILP32ZFHMIN-NEXT:    ret
;
; RV64-LP64ZFHMIN-LABEL: callee_half_in_regs:
; RV64-LP64ZFHMIN:       # %bb.0:
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a1, ft0, rtz
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a1, fa5, rtz
; RV64-LP64ZFHMIN-NEXT:    addw a0, a0, a1
; RV64-LP64ZFHMIN-NEXT:    ret
  %b_fptosi = fptosi half %b to i32
@@ -164,8 +164,8 @@ define i32 @caller_half_in_regs() nounwind {
; RV64IF-NEXT:    addi sp, sp, -16
; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT:    lui a0, 1048564
; RV64IF-NEXT:    fmv.w.x ft0, a0
; RV64IF-NEXT:    fmv.x.w a1, ft0
; RV64IF-NEXT:    fmv.w.x fa5, a0
; RV64IF-NEXT:    fmv.x.w a1, fa5
; RV64IF-NEXT:    li a0, 1
; RV64IF-NEXT:    call callee_half_in_regs@plt
; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -262,8 +262,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV32IF-NEXT:    lhu a0, 16(sp)
; RV32IF-NEXT:    mv s0, a7
; RV32IF-NEXT:    call __extendhfsf2@plt
; RV32IF-NEXT:    fmv.w.x ft0, a0
; RV32IF-NEXT:    fcvt.w.s a0, ft0, rtz
; RV32IF-NEXT:    fmv.w.x fa5, a0
; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
; RV32IF-NEXT:    add a0, s0, a0
; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -278,8 +278,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
; RV64IF-NEXT:    lhu a0, 16(sp)
; RV64IF-NEXT:    mv s0, a7
; RV64IF-NEXT:    call __extendhfsf2@plt
; RV64IF-NEXT:    fmv.w.x ft0, a0
; RV64IF-NEXT:    fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT:    fmv.w.x fa5, a0
; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
; RV64IF-NEXT:    addw a0, s0, a0
; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
@@ -318,15 +318,15 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
;
; RV32-ILP32ZFHMIN-LABEL: callee_half_on_stack:
; RV32-ILP32ZFHMIN:       # %bb.0:
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a0, ft0, rtz
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a0, fa5, rtz
; RV32-ILP32ZFHMIN-NEXT:    add a0, a7, a0
; RV32-ILP32ZFHMIN-NEXT:    ret
;
; RV64-LP64ZFHMIN-LABEL: callee_half_on_stack:
; RV64-LP64ZFHMIN:       # %bb.0:
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a0, ft0, rtz
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a0, fa5, rtz
; RV64-LP64ZFHMIN-NEXT:    addw a0, a7, a0
; RV64-LP64ZFHMIN-NEXT:    ret
  %1 = fptosi half %i to i32
@@ -516,8 +516,8 @@ define half @callee_half_ret() nounwind {
; RV64IF-LABEL: callee_half_ret:
; RV64IF:       # %bb.0:
; RV64IF-NEXT:    lui a0, %hi(.LCPI4_0)
; RV64IF-NEXT:    flw ft0, %lo(.LCPI4_0)(a0)
; RV64IF-NEXT:    fmv.x.w a0, ft0
; RV64IF-NEXT:    flw fa5, %lo(.LCPI4_0)(a0)
; RV64IF-NEXT:    fmv.x.w a0, fa5
; RV64IF-NEXT:    ret
;
; RV32-ILP32F-LABEL: callee_half_ret:
@@ -579,8 +579,8 @@ define i32 @caller_half_ret() nounwind {
; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
; RV32IF-NEXT:    call callee_half_ret@plt
; RV32IF-NEXT:    call __extendhfsf2@plt
; RV32IF-NEXT:    fmv.w.x ft0, a0
; RV32IF-NEXT:    fcvt.w.s a0, ft0, rtz
; RV32IF-NEXT:    fmv.w.x fa5, a0
; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
; RV32IF-NEXT:    addi sp, sp, 16
; RV32IF-NEXT:    ret
@@ -591,8 +591,8 @@ define i32 @caller_half_ret() nounwind {
; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
; RV64IF-NEXT:    call callee_half_ret@plt
; RV64IF-NEXT:    call __extendhfsf2@plt
; RV64IF-NEXT:    fmv.w.x ft0, a0
; RV64IF-NEXT:    fcvt.l.s a0, ft0, rtz
; RV64IF-NEXT:    fmv.w.x fa5, a0
; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
; RV64IF-NEXT:    addi sp, sp, 16
; RV64IF-NEXT:    ret
@@ -626,8 +626,8 @@ define i32 @caller_half_ret() nounwind {
; RV32-ILP32ZFHMIN-NEXT:    addi sp, sp, -16
; RV32-ILP32ZFHMIN-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
; RV32-ILP32ZFHMIN-NEXT:    call callee_half_ret@plt
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a0, ft0, rtz
; RV32-ILP32ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV32-ILP32ZFHMIN-NEXT:    fcvt.w.s a0, fa5, rtz
; RV32-ILP32ZFHMIN-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
; RV32-ILP32ZFHMIN-NEXT:    addi sp, sp, 16
; RV32-ILP32ZFHMIN-NEXT:    ret
@@ -637,8 +637,8 @@ define i32 @caller_half_ret() nounwind {
; RV64-LP64ZFHMIN-NEXT:    addi sp, sp, -16
; RV64-LP64ZFHMIN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
; RV64-LP64ZFHMIN-NEXT:    call callee_half_ret@plt
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h ft0, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a0, ft0, rtz
; RV64-LP64ZFHMIN-NEXT:    fcvt.s.h fa5, fa0
; RV64-LP64ZFHMIN-NEXT:    fcvt.w.s a0, fa5, rtz
; RV64-LP64ZFHMIN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
; RV64-LP64ZFHMIN-NEXT:    addi sp, sp, 16
; RV64-LP64ZFHMIN-NEXT:    ret
+6 −6
Original line number Diff line number Diff line
@@ -79,9 +79,9 @@ define i32 @callee_double_in_gpr_exhausted_fprs(double %a, double %b, double %c,
; RV32-ILP32D-NEXT:    addi sp, sp, -16
; RV32-ILP32D-NEXT:    sw a0, 8(sp)
; RV32-ILP32D-NEXT:    sw a1, 12(sp)
; RV32-ILP32D-NEXT:    fld ft0, 8(sp)
; RV32-ILP32D-NEXT:    fld fa5, 8(sp)
; RV32-ILP32D-NEXT:    fcvt.w.d a0, fa7, rtz
; RV32-ILP32D-NEXT:    fcvt.w.d a1, ft0, rtz
; RV32-ILP32D-NEXT:    fcvt.w.d a1, fa5, rtz
; RV32-ILP32D-NEXT:    add a0, a0, a1
; RV32-ILP32D-NEXT:    addi sp, sp, 16
; RV32-ILP32D-NEXT:    ret
@@ -132,8 +132,8 @@ define i32 @callee_double_in_gpr_and_stack_almost_exhausted_gprs_fprs(i64 %a, do
; RV32-ILP32D-NEXT:    lw a0, 16(sp)
; RV32-ILP32D-NEXT:    sw a7, 8(sp)
; RV32-ILP32D-NEXT:    sw a0, 12(sp)
; RV32-ILP32D-NEXT:    fld ft0, 8(sp)
; RV32-ILP32D-NEXT:    fcvt.w.d a0, ft0, rtz
; RV32-ILP32D-NEXT:    fld fa5, 8(sp)
; RV32-ILP32D-NEXT:    fcvt.w.d a0, fa5, rtz
; RV32-ILP32D-NEXT:    add a0, a6, a0
; RV32-ILP32D-NEXT:    addi sp, sp, 16
; RV32-ILP32D-NEXT:    ret
@@ -188,8 +188,8 @@ define i32 @caller_double_in_gpr_and_stack_almost_exhausted_gprs_fprs() nounwind
define i32 @callee_double_on_stack_exhausted_gprs_fprs(i64 %a, double %b, i64 %c, double %d, i64 %e, double %f, i64 %g, double %h, double %i, double %j, double %k, double %l, double %m) nounwind {
; RV32-ILP32D-LABEL: callee_double_on_stack_exhausted_gprs_fprs:
; RV32-ILP32D:       # %bb.0:
; RV32-ILP32D-NEXT:    fld ft0, 0(sp)
; RV32-ILP32D-NEXT:    fcvt.w.d a0, ft0, rtz
; RV32-ILP32D-NEXT:    fld fa5, 0(sp)
; RV32-ILP32D-NEXT:    fcvt.w.d a0, fa5, rtz
; RV32-ILP32D-NEXT:    add a0, a6, a0
; RV32-ILP32D-NEXT:    ret
  %g_trunc = trunc i64 %g to i32
Loading