Unverified Commit e3533648 authored by Stanislav Mekhanoshin's avatar Stanislav Mekhanoshin Committed by GitHub
Browse files

[AMDGPU] Refactor setreg handling in the VGPR MSB lowering (#186491)

It can skip inserting S_SET_VGPR_MSB if we set the mode via
piggybacking. We are now relying on the HW bug for correct
behavior. If/when the bug is fixed lowering will be incorrect.

SETREG is not a piggybacking target anymore. Instead piggybacking is
disabled if we have seen a SETREG since the last mode change.
parent ea012e4a
Loading
Loading
Loading
Loading
+14 −34
Original line number Diff line number Diff line
@@ -238,23 +238,13 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
  LLVM_DEBUG(dbgs() << "    Rewritten=" << Rewritten << " after update\n");

  if (MostRecentModeSet && !Rewritten) {
    // Update MostRecentModeSet with the new mode. It can be either
    // S_SET_VGPR_MSB or S_SETREG_IMM32_B32 (with Size <= 12).
    if (MostRecentModeSet->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
    // Update MostRecentModeSet with the new mode.
    MachineOperand &Op = MostRecentModeSet->getOperand(0);
    // Carry old mode bits from the existing instruction.
    int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
    Op.setImm(CurrentMode.encode() | OldModeBits);
    LLVM_DEBUG(dbgs() << "    -> piggybacked onto S_SET_VGPR_MSB: "
                      << *MostRecentModeSet);
    } else {
      assert(MostRecentModeSet->getOpcode() == AMDGPU::S_SETREG_IMM32_B32 &&
             "unexpected MostRecentModeSet opcode");
      updateSetregModeImm(*MostRecentModeSet, CurrentMode.encode());
      LLVM_DEBUG(dbgs() << "    -> piggybacked onto S_SETREG_IMM32_B32: "
                        << *MostRecentModeSet);
    }

    return true;
  }

@@ -506,6 +496,9 @@ bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
    return false;
  }

  // MostRecentModeSet is clobbered by SETREG and not relevant anymore.
  MostRecentModeSet = nullptr;

  int64_t ModeValue = CurrentMode.encode();
  LLVM_DEBUG({
    dbgs() << "    CurrentMode=";
@@ -520,16 +513,9 @@ bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
  if (!Offset || Size <= VGPRMSBShift) {
    // Set imm32[12:19] to the correct VGPR MSBs.
    LLVM_DEBUG(dbgs() << "    Case 1: Size(" << Size << ") <= VGPRMSBShift("
                      << VGPRMSBShift
                      << "), treating as mode scope boundary\n");
    // This instruction is at the boundary of the old mode's control range.
    // Reset CurrentMode so that the next setMode call can freely piggyback
    // the required mode into bits[12:19] without triggering Rewritten.
    MostRecentModeSet = &MI;
    CurrentMode = {};
    bool Changed = updateSetregModeImm(MI, 0);
    LLVM_DEBUG(dbgs() << "    -> reset CurrentMode, cleared bits[12:19]: "
                      << MI);
                      << VGPRMSBShift << "), update mode bits[12:19]\n");
    bool Changed = updateSetregModeImm(MI, ModeValue);
    LLVM_DEBUG(dbgs() << "    -> " << MI);
    return Changed;
  }

@@ -546,13 +532,7 @@ bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
                    << " SetregModeValue=0x"
                    << Twine::utohexstr(SetregModeValue) << '\n');
  if (ImmBits12To19 == SetregModeValue) {
    // Already correct, but we must invalidate MostRecentModeSet because this
    // instruction will overwrite mode[12:19]. We can't update this instruction
    // via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,
    // a new s_set_vgpr_msb will be inserted after this instruction.
    MostRecentModeSet = nullptr;
    LLVM_DEBUG(dbgs() << "    -> bits[12:19] already correct, "
                         "invalidated MostRecentModeSet\n");
    LLVM_DEBUG(dbgs() << "    -> bits[12:19] already correct\n");
    return false;
  }

+9 −3
Original line number Diff line number Diff line
@@ -19,7 +19,9 @@ body: |
    ; CHECK-LABEL: name: setreg_mode_size_gt_12_mismatch
    ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 2748, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 6844, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16384, implicit-def $mode
    ; CHECK-NEXT: SI_RETURN
    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
@@ -39,7 +41,9 @@ body: |
    ; CHECK-LABEL: name: setreg_mode_size_gt_12_matches_next
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
    ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
@@ -96,7 +100,9 @@ body: |
  bb.0:
    ; CHECK-LABEL: name: setreg_mode_size_gt_12_high_vgpr_after
    ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 6844, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 2748, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+119 −50
Original line number Diff line number Diff line
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -start-before=amdgpu-lower-vgpr-encoding -o - %s | FileCheck --check-prefix=ASM %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -start-before=amdgpu-lower-vgpr-encoding -o - %s | llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1250 -filetype=obj -o - | llvm-objdump -d --mcpu=gfx1250 - | FileCheck --check-prefix=DIS %s
@@ -8,11 +7,11 @@
# S_SET_VGPR_MSB format: (src0_msb[0-1], src1_msb[2-3], src2_msb[4-5], dst_msb[6-7])
# MODE register format:  (dst_msb[0-1], src0_msb[2-3], src1_msb[4-5], src2_msb[6-7])
# vgpr256/257 (both MSB=1): S_SET_VGPR_MSB mode = (1 << 0) | (1 << 6) = 65
# Setreg (Size=4 <= 12) resets the mode scope and clears bits[12:19] to 0.
# No VGPR instruction follows, so bits[12:19] remain 0. Setreg imm = 5.
#                           MODE register mode = (1 << 0) | (1 << 2) = 5
# New setreg imm = 0x5 | (5 << 12) = 0x5005 = 20485

# ASM-LABEL: {{^}}setreg_size_lt_12:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 5 ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005 ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_lt_12>:

@@ -23,7 +22,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_lt_12
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
@@ -34,10 +33,11 @@ body: |
---
# Case 1b: Size == 12 (boundary), imm32[12:19]=0
# Setreg (Size=12 <= 12) resets the mode scope and clears bits[12:19] to 0.
# No VGPR instruction follows, so bits[12:19] remain 0. Setreg imm = 0xABC = 2748.
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# New setreg imm = 0xABC | (5 << 12) = 0x5ABC = 23228

# ASM-LABEL: {{^}}setreg_size_eq_12:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 12), 0xabc ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 12), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_eq_12>:

@@ -48,7 +48,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_eq_12
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 2748, 22529, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 22529, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=12, offset=0, hwreg=MODE: simm16 = 0x5801 = 22529
@@ -58,11 +58,11 @@ body: |

---
# Case 1c: Size <= 12 with existing non-zero bits in imm32[12:19]
# imm32 = 0x23005 (bits 12:19 = 0x23). Setreg resets mode scope and clears
# bits[12:19] to 0. No VGPR instruction follows, so result = 0x00005 = 5.
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x23005 (bits 12:19 = 0x23), result = 0x5005 = 20485 (bits 12:19 replaced with 5)

# ASM-LABEL: {{^}}setreg_size_lt_12_nonzero_upper:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 5 ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005 ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_lt_12_nonzero_upper>:

@@ -73,7 +73,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_lt_12_nonzero_upper
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
@@ -88,7 +88,7 @@ body: |
# imm32 = 0x5ABC = 23228 (bits 12:19 = 5), no modification needed

# ASM-LABEL: {{^}}setreg_size_gt_12_match:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 16), 0xabc ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_gt_12_match>:

@@ -99,7 +99,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_gt_12_match
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 2748, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=16, offset=0, hwreg=MODE: simm16 = 0x7801 = 30721
@@ -113,6 +113,8 @@ body: |
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x5ABC = 23228 (bits 12:19 = 5), no modification needed

# ASM-LABEL: {{^}}setreg_size_gt_12_offset_1_match:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_gt_12_offset_1_match>:

@@ -137,6 +139,8 @@ body: |
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23 != 5), update mode bits

# ASM-LABEL: {{^}}setreg_size_gt_12_mismatch:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_gt_12_mismatch>:

@@ -147,7 +151,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_gt_12_mismatch
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 2748, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=16, offset=0, hwreg=MODE: simm16 = 0x7801 = 30721
@@ -161,6 +165,8 @@ body: |
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23 != 5), must insert s_set_vgpr_msb after

# ASM-LABEL: {{^}}setreg_size_gt_12_mismatch_offset_1:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x23abc ;  msbs: dst=3 src0=0 src1=2 src2=0

# DIS-LABEL: <setreg_size_gt_12_mismatch_offset_1>:

@@ -208,12 +214,12 @@ body: |
...

---
# Case 5: Size <= 12 with VGPR MSBs already present in imm32[12:19]
# imm32 = 0x5005 = 20485 (bits 12:19 = 5). Setreg resets mode scope and clears
# bits[12:19] to 0, regardless of prior content. Result = 5.
# Case 5: Size <= 12 but VGPR MSBs already present (no change needed)
# vgpr256/257: S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# imm32 = 0x5005 = 20485 (bits 12:19 = 5 = MODE register mode)

# ASM-LABEL: {{^}}setreg_size_lt_12_already_correct:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 5 ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005 ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_lt_12_already_correct>:

@@ -224,7 +230,7 @@ body: |
    ; CHECK-LABEL: name: setreg_size_lt_12_already_correct
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
@@ -236,10 +242,11 @@ body: |
---
# Case 6: Different VGPR MSB value (using different high VGPRs)
# vgpr512/513 (both MSB=2): S_SET_VGPR_MSB mode = (2 << 0) | (2 << 6) = 130
# Setreg resets mode scope and clears bits[12:19] to 0. No VGPR follows. Result = 5.
#                           MODE register mode = (2 << 0) | (2 << 2) = 10
# New setreg imm = 0x5 | (10 << 12) = 0xA005 = 40965

# ASM-LABEL: {{^}}setreg_different_vgpr_msb:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 5 ;  msbs: dst=0 src0=0 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0xa005 ;  msbs: dst=2 src0=2 src1=0 src2=0

# DIS-LABEL: <setreg_different_vgpr_msb>:

@@ -250,7 +257,7 @@ body: |
    ; CHECK-LABEL: name: setreg_different_vgpr_msb
    ; CHECK: S_SET_VGPR_MSB 130, implicit-def $mode
    ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 40965, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
    ; size=4, offset=0, hwreg=MODE: simm16 = 0x1801 = 6145
@@ -259,29 +266,27 @@ body: |
...

---
# Case 7: Piggybacking successfully updates s_setreg_imm32_b32 (Size <= 12)
# First VGPR (V_MOV vgpr256, vgpr257): S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# Second VGPR (V_ADD_U32 vgpr256, vgpr257, vgpr512):
#   S_SET_VGPR_MSB mode = (1 << 0) | (2 << 2) | (1 << 6) = 73 (src0=1, src1=2, dst=1)
#   MODE register mode = (1 << 0) | (1 << 2) | (2 << 4) = 37 (dst=1, src0=1, src1=2)
# Piggybacking updates setreg imm32[12:19] from 0 to 37.
# Final setreg imm = 5 | (37 << 12) = 151557

# ASM-LABEL: {{^}}setreg_size_le_12_piggyback_superset:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x25005 ;  msbs: dst=1 src0=1 src1=2 src2=0

# DIS-LABEL: <setreg_size_le_12_piggyback_superset>:
# DIS: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x25005
# Case 7: Piggybacking blocked by s_setreg_imm32_b32 in between.

# ASM-LABEL: {{^}}setreg_size_le_12_piggyback_superset_blocked:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005 ; msbs: dst=1 src0=1 src1=0 src2=0
# ASM: s_set_vgpr_msb 0x4149

# DIS-LABEL: <setreg_size_le_12_piggyback_superset_blocked>:
# DIS: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005
# DIS: s_set_vgpr_msb 0x4149
# DIS-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1 /*v257*/, v0 /*v512*/

name:            setreg_size_le_12_piggyback_superset
name:            setreg_size_le_12_piggyback_superset_blocked
tracksRegLiveness: true
body:             |
  bb.0:
    ; CHECK-LABEL: name: setreg_size_le_12_piggyback_superset
    ; CHECK-LABEL: name: setreg_size_le_12_piggyback_superset_blocked
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 151557, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16713, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_ADD_U32_e32 $vgpr257, $vgpr512, implicit $exec
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -295,16 +300,15 @@ body: |
---
# Case 8: s_setreg_imm32_b32 (Size <= 12) followed by VGPR with different mode bits
# First VGPR (V_MOV vgpr256, vgpr0): S_SET_VGPR_MSB mode = 64, MODE register mode = 1
# Second VGPR (V_MOV vgpr256, vgpr256): needs mode = 65, MODE register mode = 5
# The setreg (Size=4 <= 12) resets the mode scope. Its bits[12:19] are cleared to 0.
# The second VGPR's setMode piggybacks mode = 65 into the setreg's bits[12:19],
# giving imm32 = 5 | (5 << 12) = 20485 = 0x5005. No separate S_SET_VGPR_MSB needed.
# Second VGPR (V_MOV vgpr256, vgpr256): S_SET_VGPR_MSB mode = 65, MODE register mode = 5
# Setreg gets MODE mode = 1 from first VGPR. Second VGPR needs different src0 bits,
# so a new S_SET_VGPR_MSB is inserted. The new S_SET_VGPR_MSB has mode = 65 | (64 << 8) = 16449.

# ASM-LABEL: {{^}}setreg_size_le_12_then_different_vgpr:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005 ;  msbs: dst=1 src0=1 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x1005 ;  msbs: dst=1 src0=0 src1=0 src2=0

# DIS-LABEL: <setreg_size_le_12_then_different_vgpr>:
# DIS: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x5005
# DIS: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 4), 0x1005
# DIS: v_mov_b32_e32 v0 /*v256*/, v0 /*v256*/

name:            setreg_size_le_12_then_different_vgpr
@@ -318,7 +322,9 @@ body: |
    ; CHECK-NEXT: {{  $}}
    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr0, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 4101, 6145, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16449, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr256, implicit $exec
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -338,7 +344,7 @@ body: |
# New s_set_vgpr_msb imm = NewMode | (OldMode << 8) = 130 | (65 << 8) = 16770

# ASM-LABEL: {{^}}setreg_size_gt_12_match_then_different_vgpr:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 16), 0xaabc ;  msbs: dst=2 src0=2 src1=0 src2=0
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 0, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_size_gt_12_match_then_different_vgpr>:

@@ -349,7 +355,9 @@ body: |
    ; CHECK-LABEL: name: setreg_size_gt_12_match_then_different_vgpr
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
    ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -367,6 +375,8 @@ body: |
#                           MODE register mode = (1 << 0) | (1 << 2) = 5
# New setreg imm = 0x5 | (5 << 12) = 0x5005 = 20485

# ASM-LABEL: {{^}}setreg_offset_8:
# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 8, 4), 0x5005 ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_offset_8>:

@@ -377,7 +387,7 @@ body: |
    ; CHECK-LABEL: name: setreg_offset_8
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 5, 6657, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20485, 6657, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=4, offset=8, hwreg=MODE: simm16 = 0x1801 = 6145
@@ -387,8 +397,10 @@ body: |

---

# ASM-LABEL: {{^}}setreg_offset_25:
# New imm = 1 | (5 << 12) = 0x5001 = 20481

# ASM: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 0x5001 ;  msbs: dst=1 src0=1 src1=0 src2=0

# DIS-LABEL: <setreg_offset_25>:

@@ -399,7 +411,7 @@ body: |
    ; CHECK-LABEL: name: setreg_offset_25
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 1, 1601, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_SETREG_IMM32_B32 20481, 1601, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=1, offset=25, hwreg=MODE: simm16 = 0x641 = 1601
@@ -448,3 +460,60 @@ body: |
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    S_ENDPGM 0
...

# Piggybacking blocked by s_setreg_imm32_b32.

# ASM-LABEL: {{^}}setreg_size_gt_12_offset_1_match_then_mismatch:
# ASM:      s_set_vgpr_msb 0x41                     ;  msbs: dst=1 src0=1 src1=0 src2=0
# ASM-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 /*v257*/
# ASM-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0
# ASM-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 /*v257*/
# ASM-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0
# ASM-NEXT: s_nop 0
# ASM-NEXT: s_set_vgpr_msb 0x4149                   ;  msbs: dst=1 src0=1 src1=2 src2=0
# ASM-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1 /*v257*/, v0 /*v512*/
# ASM-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc ;  msbs: dst=1 src0=1 src1=0 src2=0
# ASM-NEXT: s_nop 0
# ASM-NEXT: s_set_vgpr_msb 0x4949                   ;  msbs: dst=1 src0=1 src1=2 src2=0

# DIS-LABEL: <setreg_size_gt_12_offset_1_match_then_mismatch>:
# DIS-NEXT: s_set_vgpr_msb 0x41
# DIS-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 /*v257*/
# DIS-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc
# DIS-NEXT: v_mov_b32_e32 v0 /*v256*/, v1 /*v257*/
# DIS-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc
# DIS-NEXT: s_nop 0
# DIS-NEXT: s_set_vgpr_msb 0x4149
# DIS-NEXT: v_add_nc_u32_e32 v0 /*v256*/, v1 /*v257*/, v0 /*v512*/
# DIS-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 1, 16), 0x5abc
# DIS-NEXT: s_nop 0
# DIS-NEXT: s_set_vgpr_msb 0x49

---
name:            setreg_size_gt_12_offset_1_match_then_mismatch
tracksRegLiveness: true
body:             |
  bb.0:
    ; CHECK-LABEL: name: setreg_size_gt_12_offset_1_match_then_mismatch
    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 16713, implicit-def $mode
    ; CHECK-NEXT: $vgpr256 = V_ADD_U32_e32 $vgpr257, $vgpr512, implicit $exec
    ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    ; CHECK-NEXT: S_NOP 0
    ; CHECK-NEXT: S_SET_VGPR_MSB 18761, implicit-def $mode
    ; CHECK-NEXT: S_ENDPGM 0
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    ; size=16, offset=1, hwreg=MODE: simm16 = 0x7841 = 30785
    ; imm32 = 0x5ABC = 23228 (bits 12:19 = 5 = MODE register mode for vgpr256/257)
    S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
    S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    $vgpr256 = V_ADD_U32_e32 $vgpr257, $vgpr512, implicit $exec
    S_SETREG_IMM32_B32 23228, 30785, implicit-def $mode, implicit $mode
    S_ENDPGM 0
...