Loading clang/include/clang/Basic/arm_neon.td +28 −1 Original line number Diff line number Diff line Loading @@ -526,7 +526,7 @@ let isA64 = 1 in { //////////////////////////////////////////////////////////////////////////////// // Load/Store // With additional QUl, Ql, Qd, Pl, QPl type. // With additional QUl, Ql, d, Qd, Pl, QPl type. def LD1 : WInst<"vld1", "dc", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def LD2 : WInst<"vld2", "2c", Loading Loading @@ -558,6 +558,33 @@ def ST1_X3 : WInst<"vst1_x3", "vp3", def ST1_X4 : WInst<"vst1_x4", "vp4", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; // With additional QUl, Ql, d, Qd, Pl, QPl type. def LD1_LANE : WInst<"vld1_lane", "dcdi", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD2_LANE : WInst<"vld2_lane", "2c2i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD3_LANE : WInst<"vld3_lane", "3c3i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD4_LANE : WInst<"vld4_lane", "4c4i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST1_LANE : WInst<"vst1_lane", "vpdi", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST2_LANE : WInst<"vst2_lane", "vp2i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST3_LANE : WInst<"vst3_lane", "vp3i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST4_LANE : WInst<"vst4_lane", "vp4i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD1_DUP : WInst<"vld1_dup", "dc", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD2_DUP : WInst<"vld2_dup", "2c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD3_DUP : WInst<"vld3_dup", "3c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD4_DUP : WInst<"vld4_dup", "4c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; //////////////////////////////////////////////////////////////////////////////// // Addition // With additional Qd type. Loading clang/lib/CodeGen/CGBuiltin.cpp +127 −0 Original line number Diff line number Diff line Loading @@ -2760,6 +2760,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vst1q_x3_v: case AArch64::BI__builtin_neon_vst1_x4_v: case AArch64::BI__builtin_neon_vst1q_x4_v: // Handle ld1/st1 lane in this function a little different from ARM. case AArch64::BI__builtin_neon_vld1_lane_v: case AArch64::BI__builtin_neon_vld1q_lane_v: case AArch64::BI__builtin_neon_vst1_lane_v: case AArch64::BI__builtin_neon_vst1q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. std::pair<llvm::Value *, unsigned> Src = Loading @@ -2777,6 +2782,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vld1q_x3_v: case AArch64::BI__builtin_neon_vld1_x4_v: case AArch64::BI__builtin_neon_vld1q_x4_v: // Handle ld1/st1 dup lane in this function a little different from ARM. case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: case AArch64::BI__builtin_neon_vld2_lane_v: case AArch64::BI__builtin_neon_vld2q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. std::pair<llvm::Value *, unsigned> Src = Loading Loading @@ -3170,6 +3184,119 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); } case AArch64::BI__builtin_neon_vld1_lane_v: case AArch64::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); LoadInst *Ld = Builder.CreateLoad(Ops[0]); Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } case AArch64::BI__builtin_neon_vld2_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); case AArch64::BI__builtin_neon_vld2q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); case AArch64::BI__builtin_neon_vld3_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E); case AArch64::BI__builtin_neon_vld3q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E); case AArch64::BI__builtin_neon_vld4_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E); case AArch64::BI__builtin_neon_vld4q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E); case AArch64::BI__builtin_neon_vst1_lane_v: case AArch64::BI__builtin_neon_vst1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); StoreInst *St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); return St; } case AArch64::BI__builtin_neon_vst2_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E); case AArch64::BI__builtin_neon_vst2q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E); case AArch64::BI__builtin_neon_vst3_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E); case AArch64::BI__builtin_neon_vst3q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E); case AArch64::BI__builtin_neon_vst4_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E); case AArch64::BI__builtin_neon_vst4q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E); case AArch64::BI__builtin_neon_vld1_dup_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E); case AArch64::BI__builtin_neon_vld1q_dup_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E); case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: { // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && VTy->getNumElements() == 1) { switch (BuiltinID) { case AArch64::BI__builtin_neon_vld2_dup_v: Int = Intrinsic::arm_neon_vld2; break; case AArch64::BI__builtin_neon_vld3_dup_v: Int = Intrinsic::arm_neon_vld3; break; case AArch64::BI__builtin_neon_vld4_dup_v: Int = Intrinsic::arm_neon_vld4; break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } Function *F = CGM.getIntrinsic(Int, Ty); Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); } switch (BuiltinID) { case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: Int = Intrinsic::arm_neon_vld2lane; break; case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: Int = Intrinsic::arm_neon_vld3lane; break; case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: Int = Intrinsic::arm_neon_vld4lane; break; } Function *F = CGM.getIntrinsic(Int, Ty); llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); SmallVector<Value *, 6> Args; Args.push_back(Ops[1]); Args.append(STy->getNumElements(), UndefValue::get(Ty)); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Args.push_back(CI); Args.push_back(Align); Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); // splat lane 0 to all elts in each vector of the result. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Value *Val = Builder.CreateExtractValue(Ops[1], i); Value *Elt = Builder.CreateBitCast(Val, Ty); Elt = EmitNeonSplat(Elt, CI); Elt = Builder.CreateBitCast(Elt, Val->getType()); Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); } Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); } // Crypto case AArch64::BI__builtin_neon_vaeseq_v: Loading clang/test/CodeGen/aarch64-neon-ldst-one.c 0 → 100644 +2047 −0 File added.Preview size limit exceeded, changes collapsed. Show changes Loading
clang/include/clang/Basic/arm_neon.td +28 −1 Original line number Diff line number Diff line Loading @@ -526,7 +526,7 @@ let isA64 = 1 in { //////////////////////////////////////////////////////////////////////////////// // Load/Store // With additional QUl, Ql, Qd, Pl, QPl type. // With additional QUl, Ql, d, Qd, Pl, QPl type. def LD1 : WInst<"vld1", "dc", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; def LD2 : WInst<"vld2", "2c", Loading Loading @@ -558,6 +558,33 @@ def ST1_X3 : WInst<"vst1_x3", "vp3", def ST1_X4 : WInst<"vst1_x4", "vp4", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; // With additional QUl, Ql, d, Qd, Pl, QPl type. def LD1_LANE : WInst<"vld1_lane", "dcdi", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD2_LANE : WInst<"vld2_lane", "2c2i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD3_LANE : WInst<"vld3_lane", "3c3i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD4_LANE : WInst<"vld4_lane", "4c4i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST1_LANE : WInst<"vst1_lane", "vpdi", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST2_LANE : WInst<"vst2_lane", "vp2i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST3_LANE : WInst<"vst3_lane", "vp3i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def ST4_LANE : WInst<"vst4_lane", "vp4i", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD1_DUP : WInst<"vld1_dup", "dc", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD2_DUP : WInst<"vld2_dup", "2c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD3_DUP : WInst<"vld3_dup", "3c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; def LD4_DUP : WInst<"vld4_dup", "4c", "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; //////////////////////////////////////////////////////////////////////////////// // Addition // With additional Qd type. Loading
clang/lib/CodeGen/CGBuiltin.cpp +127 −0 Original line number Diff line number Diff line Loading @@ -2760,6 +2760,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vst1q_x3_v: case AArch64::BI__builtin_neon_vst1_x4_v: case AArch64::BI__builtin_neon_vst1q_x4_v: // Handle ld1/st1 lane in this function a little different from ARM. case AArch64::BI__builtin_neon_vld1_lane_v: case AArch64::BI__builtin_neon_vld1q_lane_v: case AArch64::BI__builtin_neon_vst1_lane_v: case AArch64::BI__builtin_neon_vst1q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. std::pair<llvm::Value *, unsigned> Src = Loading @@ -2777,6 +2782,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, case AArch64::BI__builtin_neon_vld1q_x3_v: case AArch64::BI__builtin_neon_vld1_x4_v: case AArch64::BI__builtin_neon_vld1q_x4_v: // Handle ld1/st1 dup lane in this function a little different from ARM. case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: case AArch64::BI__builtin_neon_vld2_lane_v: case AArch64::BI__builtin_neon_vld2q_lane_v: // Get the alignment for the argument in addition to the value; // we'll use it later. std::pair<llvm::Value *, unsigned> Src = Loading Loading @@ -3170,6 +3184,119 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, } return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, ""); } case AArch64::BI__builtin_neon_vld1_lane_v: case AArch64::BI__builtin_neon_vld1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); LoadInst *Ld = Builder.CreateLoad(Ops[0]); Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane"); } case AArch64::BI__builtin_neon_vld2_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); case AArch64::BI__builtin_neon_vld2q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld2q_lane_v, E); case AArch64::BI__builtin_neon_vld3_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3_lane_v, E); case AArch64::BI__builtin_neon_vld3q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld3q_lane_v, E); case AArch64::BI__builtin_neon_vld4_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4_lane_v, E); case AArch64::BI__builtin_neon_vld4q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld4q_lane_v, E); case AArch64::BI__builtin_neon_vst1_lane_v: case AArch64::BI__builtin_neon_vst1q_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); StoreInst *St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty)); St->setAlignment(cast<ConstantInt>(Align)->getZExtValue()); return St; } case AArch64::BI__builtin_neon_vst2_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2_lane_v, E); case AArch64::BI__builtin_neon_vst2q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst2q_lane_v, E); case AArch64::BI__builtin_neon_vst3_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3_lane_v, E); case AArch64::BI__builtin_neon_vst3q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst3q_lane_v, E); case AArch64::BI__builtin_neon_vst4_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4_lane_v, E); case AArch64::BI__builtin_neon_vst4q_lane_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_lane_v, E); case AArch64::BI__builtin_neon_vld1_dup_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1_dup_v, E); case AArch64::BI__builtin_neon_vld1q_dup_v: return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vld1q_dup_v, E); case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: { // Handle 64-bit x 1 elements as a special-case. There is no "dup" needed. if (VTy->getElementType()->getPrimitiveSizeInBits() == 64 && VTy->getNumElements() == 1) { switch (BuiltinID) { case AArch64::BI__builtin_neon_vld2_dup_v: Int = Intrinsic::arm_neon_vld2; break; case AArch64::BI__builtin_neon_vld3_dup_v: Int = Intrinsic::arm_neon_vld3; break; case AArch64::BI__builtin_neon_vld4_dup_v: Int = Intrinsic::arm_neon_vld4; break; default: llvm_unreachable("unknown vld_dup intrinsic?"); } Function *F = CGM.getIntrinsic(Int, Ty); Ops[1] = Builder.CreateCall2(F, Ops[1], Align, "vld_dup"); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); } switch (BuiltinID) { case AArch64::BI__builtin_neon_vld2_dup_v: case AArch64::BI__builtin_neon_vld2q_dup_v: Int = Intrinsic::arm_neon_vld2lane; break; case AArch64::BI__builtin_neon_vld3_dup_v: case AArch64::BI__builtin_neon_vld3q_dup_v: Int = Intrinsic::arm_neon_vld3lane; break; case AArch64::BI__builtin_neon_vld4_dup_v: case AArch64::BI__builtin_neon_vld4q_dup_v: Int = Intrinsic::arm_neon_vld4lane; break; } Function *F = CGM.getIntrinsic(Int, Ty); llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType()); SmallVector<Value *, 6> Args; Args.push_back(Ops[1]); Args.append(STy->getNumElements(), UndefValue::get(Ty)); llvm::Constant *CI = ConstantInt::get(Int32Ty, 0); Args.push_back(CI); Args.push_back(Align); Ops[1] = Builder.CreateCall(F, Args, "vld_dup"); // splat lane 0 to all elts in each vector of the result. for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { Value *Val = Builder.CreateExtractValue(Ops[1], i); Value *Elt = Builder.CreateBitCast(Val, Ty); Elt = EmitNeonSplat(Elt, CI); Elt = Builder.CreateBitCast(Elt, Val->getType()); Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i); } Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); return Builder.CreateStore(Ops[1], Ops[0]); } // Crypto case AArch64::BI__builtin_neon_vaeseq_v: Loading
clang/test/CodeGen/aarch64-neon-ldst-one.c 0 → 100644 +2047 −0 File added.Preview size limit exceeded, changes collapsed. Show changes