Loading llvm/test/CodeGen/AMDGPU/permute.ll +217 −49 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}lsh8_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh8_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -15,10 +28,22 @@ bb: ret void } ; GCN-LABEL: {{^}}lsr24_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsr24_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, s0, v2, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -30,10 +55,23 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_lsr24: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -46,10 +84,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -61,9 +111,21 @@ bb: ret void } ; GCN-LABEL: {{^}}lsh8_or_lsr24: ; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh8_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -75,10 +137,22 @@ bb: ret void } ; GCN-LABEL: {{^}}lsh16_or_lsr24: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh16_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -90,10 +164,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_xor_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_xor_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -105,13 +191,25 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_or_and: ; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00 ; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000 ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}} ; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]] ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_and_b32 s0, s0, 0xff00 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2 ; GCN-NEXT: v_or_b32_e32 v2, s0, v2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -124,10 +222,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_and_shl: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_and_shl: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -140,10 +250,22 @@ bb: ret void } ; GCN-LABEL: {{^}}or_and_or: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: or_and_or: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -155,16 +277,31 @@ bb: ret void } ; GCN-LABEL: {{^}}known_ffff0500: ; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 ; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00 ; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000 ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]] ; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]] ; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}} ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_ffff0500: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_bitset1_b32 s0, 15 ; GCN-NEXT: s_and_b32 s0, s0, 0xff00 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v4, 4, v4 ; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4 ; GCN-NEXT: v_or_b32_e32 v4, s0, v4 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v5 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -181,12 +318,27 @@ bb: ret void } ; GCN-LABEL: {{^}}known_050c0c00: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_050c0c00: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00 ; GCN-NEXT: v_mov_b32_e32 v6, 4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_or_b32 s0, s0, 4 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v6 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -202,12 +354,28 @@ bb: ret void } ; GCN-LABEL: {{^}}known_ffff8004: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_ffff8004: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500 ; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_or_b32 s0, s0, 4 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4 ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v6 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading Loading
llvm/test/CodeGen/AMDGPU/permute.ll +217 −49 Original line number Diff line number Diff line ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; GCN-LABEL: {{^}}lsh8_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh8_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x6050400 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -15,10 +28,22 @@ bb: ret void } ; GCN-LABEL: {{^}}lsr24_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsr24_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, s0, v2, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -30,10 +55,23 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_lsr24: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7060503 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -46,10 +84,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020500 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -61,9 +111,21 @@ bb: ret void } ; GCN-LABEL: {{^}}lsh8_or_lsr24: ; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24 define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh8_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_alignbit_b32 v2, v2, s0, 24 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -75,10 +137,22 @@ bb: ret void } ; GCN-LABEL: {{^}}lsh16_or_lsr24: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: lsh16_or_lsr24: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x5040c03 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -90,10 +164,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_xor_and: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_xor_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -105,13 +191,25 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_or_and: ; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00 ; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000 ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}} ; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]] ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_or_and: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_and_b32 s0, s0, 0xff00 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_and_b32_e32 v2, 0xff00ff, v2 ; GCN-NEXT: v_or_b32_e32 v2, s0, v2 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -124,10 +222,22 @@ bb: ret void } ; GCN-LABEL: {{^}}and_or_and_shl: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: and_or_and_shl: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x50c0c00 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -140,10 +250,22 @@ bb: ret void } ; GCN-LABEL: {{^}}or_and_or: ; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: or_and_or: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v3, 0x7020104 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v2, v[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v2, v2, s0, v3 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -155,16 +277,31 @@ bb: ret void } ; GCN-LABEL: {{^}}known_ffff0500: ; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 ; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00 ; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000 ; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]] ; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]] ; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}} ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} ; FIXME here should have been "v_perm_b32" with 0xffff0500 mask. define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_ffff0500: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff8004 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_bitset1_b32 s0, 15 ; GCN-NEXT: s_and_b32 s0, s0, 0xff00 ; GCN-NEXT: s_or_b32 s0, s0, 0xffff0000 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v4, 4, v4 ; GCN-NEXT: v_and_b32_e32 v4, 0xff00ff, v4 ; GCN-NEXT: v_or_b32_e32 v4, s0, v4 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v5 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -181,12 +318,27 @@ bb: ret void } ; GCN-LABEL: {{^}}known_050c0c00: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}} ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_050c0c00: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0x50c0c00 ; GCN-NEXT: v_mov_b32_e32 v6, 4 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_or_b32 s0, s0, 4 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v6 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading @@ -202,12 +354,28 @@ bb: ret void } ; GCN-LABEL: {{^}}known_ffff8004: ; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500 ; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004 ; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]] ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) { ; GCN-LABEL: known_ffff8004: ; GCN: ; %bb.0: ; %bb ; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x2c ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_mov_b32_e32 v5, 0xffff0500 ; GCN-NEXT: v_mov_b32_e32 v6, 0xffff8004 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v1, s3 ; GCN-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GCN-NEXT: flat_load_dword v4, v[0:1] ; GCN-NEXT: s_or_b32 s0, s0, 4 ; GCN-NEXT: v_mov_b32_e32 v2, s2 ; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v4, 0x8000, v4 ; GCN-NEXT: v_perm_b32 v4, v4, s0, v5 ; GCN-NEXT: flat_store_dword v[0:1], v4 ; GCN-NEXT: flat_store_dword v[2:3], v6 ; GCN-NEXT: s_endpgm bb: %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id Loading