Commit b27e5459 authored by David Green's avatar David Green
Browse files

[DAG] Convert truncstore(extend(x)) back to store(x)

Pulled out of D106237, this folds truncstore(extend(x)) back to store(x)
if the original store was legal. This can come up due to the order we
fold nodes. A fold from X86 needs to be adjusted to prevent infinite
loops, to have it pick the operand of a trunc more directly.

Differential Revision: https://reviews.llvm.org/D117901
parent 0283b077
......@@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
// Convert a truncating store of a extension into a standard store.
if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
Value.getOpcode() == ISD::SIGN_EXTEND ||
Value.getOpcode() == ISD::ANY_EXTEND) &&
Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
ST->getMemOperand());
APInt TruncDemandedBits =
APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits());
......
......@@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
St->getValue().getOperand(0));
return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
MVT::v16i8, St->getMemOperand());
}
......@@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s8, s2
; VI-NEXT: s_mov_b32 s9, s3
; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1
; VI-NEXT: buffer_load_ubyte v2, off, s[8:11], 0 offset:2
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0
; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:2
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:1
; VI-NEXT: s_waitcnt vmcnt(2)
; VI-NEXT: buffer_store_byte v2, off, s[4:7], 0 offset:2
; VI-NEXT: s_waitcnt vmcnt(1)
; VI-NEXT: buffer_store_byte v1, off, s[4:7], 0 offset:2
; VI-NEXT: v_lshrrev_b16_e32 v0, 8, v0
; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 offset:1
; VI-NEXT: s_endpgm
%val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment