From a0d065cd22d1d43c417f6d3db88a04bf57b67ed0 Mon Sep 17 00:00:00 2001
From: Arseny Kapoulkine <arseny.kapoulkine@gmail.com>
Date: Sun, 12 Apr 2015 03:03:56 -0700
Subject: [PATCH] Implment copyless copy for attributes

Previously attributes that were copied with their node used string sharing,
but standalone attributes that were copied using xml_node::*_copy(xml_attribute)
were not.
---
 src/pugixml.cpp           | 57 +++++++++++++++++++++++++----------
 tests/test_dom_modify.cpp | 63 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 16 deletions(-)

diff --git a/src/pugixml.cpp b/src/pugixml.cpp
index 619cc7b7..65854e7c 100644
--- a/src/pugixml.cpp
+++ b/src/pugixml.cpp
@@ -557,11 +557,11 @@ PUGI__NS_BEGIN
 		xml_extra_buffer* extra_buffers;
 	};
 
-	inline xml_allocator& get_allocator(const xml_node_struct* node)
+	template <typename Object> inline xml_allocator& get_allocator(const Object* object)
 	{
-		assert(node);
+		assert(object);
 
-		return *reinterpret_cast<xml_memory_page*>(node->header & xml_memory_page_pointer_mask)->allocator;
+		return *reinterpret_cast<xml_memory_page*>(object->header & xml_memory_page_pointer_mask)->allocator;
 	}
 
 	template <typename Object> inline xml_document_struct& get_document(const Object* object)
@@ -3824,6 +3824,15 @@ PUGI__NS_BEGIN
 		}
 	}
 
+	PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
+	{
+		xml_allocator& alloc = get_allocator(da);
+		xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
+
+		node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
+		node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
+	}
+
 	inline bool is_text_node(xml_node_struct* node)
 	{
 		xml_node_type type = PUGI__NODETYPE(node);
@@ -4986,41 +4995,59 @@ namespace pugi
 	PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
 	{
 		if (!proto) return xml_attribute();
+		if (!impl::allow_insert_attribute(type())) return xml_attribute();
 
-		xml_attribute result = append_attribute(proto.name());
-		result.set_value(proto.value());
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
 
-		return result;
+		impl::append_attribute(a._attr, _root);
+		impl::node_copy_attribute(a._attr, proto._attr);
+
+		return a;
 	}
 
 	PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
 	{
 		if (!proto) return xml_attribute();
+		if (!impl::allow_insert_attribute(type())) return xml_attribute();
 
-		xml_attribute result = prepend_attribute(proto.name());
-		result.set_value(proto.value());
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
 
-		return result;
+		impl::prepend_attribute(a._attr, _root);
+		impl::node_copy_attribute(a._attr, proto._attr);
+
+		return a;
 	}
 
 	PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
 	{
 		if (!proto) return xml_attribute();
+		if (!impl::allow_insert_attribute(type())) return xml_attribute();
+		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
 
-		xml_attribute result = insert_attribute_after(proto.name(), attr);
-		result.set_value(proto.value());
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
 
-		return result;
+		impl::insert_attribute_after(a._attr, attr._attr, _root);
+		impl::node_copy_attribute(a._attr, proto._attr);
+
+		return a;
 	}
 
 	PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
 	{
 		if (!proto) return xml_attribute();
+		if (!impl::allow_insert_attribute(type())) return xml_attribute();
+		if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
+
+		xml_attribute a(impl::allocate_attribute(impl::get_allocator(_root)));
+		if (!a) return xml_attribute();
 
-		xml_attribute result = insert_attribute_before(proto.name(), attr);
-		result.set_value(proto.value());
+		impl::insert_attribute_before(a._attr, attr._attr, _root);
+		impl::node_copy_attribute(a._attr, proto._attr);
 
-		return result;
+		return a;
 	}
 
 	PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
diff --git a/tests/test_dom_modify.cpp b/tests/test_dom_modify.cpp
index fc6dd590..41120e58 100644
--- a/tests/test_dom_modify.cpp
+++ b/tests/test_dom_modify.cpp
@@ -771,6 +771,14 @@ TEST_XML(dom_node_copy_crossdoc, "<node/>")
 	CHECK_NODE(newdoc, STR("<node />"));
 }
 
+TEST_XML(dom_node_copy_crossdoc_attribute, "<node attr='value'/>")
+{
+	xml_document newdoc;
+	newdoc.append_child(STR("copy")).append_copy(doc.child(STR("node")).attribute(STR("attr")));
+	CHECK_NODE(doc, STR("<node attr=\"value\" />"));
+	CHECK_NODE(newdoc, STR("<copy attr=\"value\" />"));
+}
+
 TEST_XML_FLAGS(dom_node_copy_types, "<?xml version='1.0'?><!DOCTYPE id><root><?pi value?><!--comment--><node id='1'>pcdata<![CDATA[cdata]]></node></root>", parse_full)
 {
 	doc.append_copy(doc.child(STR("root")));
@@ -1409,7 +1417,7 @@ TEST(dom_node_copy_copyless_mix)
 	CHECK_NODE(copy2, dataxml.c_str());
 }
 
-TEST_XML(dom_node_copyless_taint, "<node attr=\"value\" />")
+TEST_XML(dom_node_copy_copyless_taint, "<node attr=\"value\" />")
 {
 	xml_node node = doc.child(STR("node"));
 	xml_node copy = doc.append_copy(node);
@@ -1433,6 +1441,59 @@ TEST_XML(dom_node_copyless_taint, "<node attr=\"value\" />")
 	CHECK_NODE(doc, STR("<nod1 attr=\"value\" /><node attr=\"valu2\" /><node att3=\"value\" />"));
 }
 
+TEST(dom_node_copy_attribute_copyless)
+{
+	std::basic_string<char_t> data;
+	data += STR("<node attr=\"");
+	for (int i = 0; i < 10000; ++i)
+		data += STR("data");
+	data += STR("\" />");
+
+	std::basic_string<char_t> datacopy = data;
+
+	// the document is parsed in-place so there should only be 1 page worth of allocations
+	test_runner::_memory_fail_threshold = 32768 + 128;
+
+	xml_document doc;
+	CHECK(doc.load_buffer_inplace(&datacopy[0], datacopy.size() * sizeof(char_t), parse_full));
+
+	// this copy should share all string storage; since there are not a lot of nodes we should not have *any* allocations here (everything will fit in the same page in the document)
+	xml_node copy1 = doc.append_child(STR("node"));
+	copy1.append_copy(doc.first_child().first_attribute());
+
+	xml_node copy2 = doc.append_child(STR("node"));
+	copy2.append_copy(copy1.first_attribute());
+
+	CHECK_NODE(copy1, data.c_str());
+	CHECK_NODE(copy2, data.c_str());
+}
+
+TEST_XML(dom_node_copy_attribute_copyless_taint, "<node attr=\"value\" />")
+{
+	xml_node node = doc.child(STR("node"));
+	xml_attribute attr = node.first_attribute();
+
+	xml_node copy1 = doc.append_child(STR("copy1"));
+	xml_node copy2 = doc.append_child(STR("copy2"));
+	xml_node copy3 = doc.append_child(STR("copy3"));
+
+	CHECK_NODE(doc, STR("<node attr=\"value\" /><copy1 /><copy2 /><copy3 />"));
+
+	copy1.append_copy(attr);
+
+	CHECK_NODE(doc, STR("<node attr=\"value\" /><copy1 attr=\"value\" /><copy2 /><copy3 />"));
+
+	attr.set_name(STR("att1"));
+	copy2.append_copy(attr);
+
+	CHECK_NODE(doc, STR("<node att1=\"value\" /><copy1 attr=\"value\" /><copy2 att1=\"value\" /><copy3 />"));
+
+	copy1.first_attribute().set_value(STR("valu2"));
+	copy3.append_copy(copy1.first_attribute());
+
+	CHECK_NODE(doc, STR("<node att1=\"value\" /><copy1 attr=\"valu2\" /><copy2 att1=\"value\" /><copy3 attr=\"valu2\" />"));
+}
+
 TEST_XML(dom_node_copy_out_of_memory_node, "<node><child1 /><child2 /><child3>text1<child4 />text2</child3></node>")
 {
 	test_runner::_memory_fail_threshold = 32768 * 2 + 4096;
-- 
GitLab