Skip to content
Snippets Groups Projects
test_parse.cpp 43.9 KiB
Newer Older

	CHECK_OFFSET("<node a=2>", parse_default, status_bad_attribute, 8);
	CHECK_OFFSET("<node a='2>", parse_default, status_bad_attribute, 9);

	CHECK_OFFSET("<n></n $>", parse_default, status_bad_end_element, 7);
	CHECK_OFFSET("<n></n", parse_default, status_bad_end_element, 5);

	CHECK_OFFSET("<no></na>", parse_default, status_end_element_mismatch, 8);
	CHECK_OFFSET("<no></nod>", parse_default, status_end_element_mismatch, 9);
}

TEST(parse_result_default)
{
	xml_parse_result result;

	CHECK(!result);
	CHECK(result.status == status_internal_error);
	CHECK(result.offset == 0);
	CHECK(result.encoding == encoding_auto);

TEST(parse_bom_fragment)
{
	struct test_data_t
	{
		xml_encoding encoding;
		const char* data;
		size_t size;
		const char_t* text;
	};

	const test_data_t data[] =
	{
		{ encoding_utf8, "\xef\xbb\xbf", 3, STR("") },
		{ encoding_utf8, "\xef\xbb\xbftest", 7, STR("test") },
		{ encoding_utf16_be, "\xfe\xff", 2, STR("") },
		{ encoding_utf16_be, "\xfe\xff\x00t\x00o\x00s\x00t", 10, STR("tost") },
		{ encoding_utf16_le, "\xff\xfe", 2, STR("") },
		{ encoding_utf16_le, "\xff\xfet\x00o\x00s\x00t\x00", 10, STR("tost") },
		{ encoding_utf32_be, "\x00\x00\xfe\xff", 4, STR("") },
		{ encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t", 20, STR("tost") },
		{ encoding_utf32_le, "\xff\xfe\x00\x00", 4, STR("") },
		{ encoding_utf32_le, "\xff\xfe\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t\x00\x00\x00", 20, STR("tost") },
	};

	for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
	{
		xml_document doc;
		CHECK(doc.load_buffer(data[i].data, data[i].size, parse_fragment, data[i].encoding));
		CHECK_STRING(doc.text().get(), data[i].text);
		CHECK(save_narrow(doc, format_no_declaration | format_raw | format_write_bom, data[i].encoding) == std::string(data[i].data, data[i].size));
	}
}

TEST(parse_bom_fragment_invalid_utf8)
{
	xml_document doc;

	CHECK(doc.load_buffer("\xef\xbb\xbb", 3, parse_fragment, encoding_utf8));

	const char_t* value = doc.text().get();

#ifdef PUGIXML_WCHAR_MODE
	CHECK(value[0] == wchar_cast(0xfefb) && value[1] == 0);
#else
	CHECK_STRING(value, "\xef\xbb\xbb");
#endif
}

TEST(parse_bom_fragment_invalid_utf16)
{
	xml_document doc;

	CHECK(doc.load_buffer("\xff\xfe", 2, parse_fragment, encoding_utf16_be));

	const char_t* value = doc.text().get();

#ifdef PUGIXML_WCHAR_MODE
	CHECK(value[0] == wchar_cast(0xfffe) && value[1] == 0);
#else
	CHECK_STRING(value, "\xef\xbf\xbe");
#endif
}

TEST(parse_bom_fragment_invalid_utf32)
{
	xml_document doc;

	CHECK(doc.load_buffer("\xff\xff\x00\x00", 4, parse_fragment, encoding_utf32_le));

	const char_t* value = doc.text().get();

#ifdef PUGIXML_WCHAR_MODE
	CHECK(value[0] == wchar_cast(0xffff) && value[1] == 0);
#else
	CHECK_STRING(value, "\xef\xbf\xbf");
#endif
}

TEST(parse_pcdata_gap_fragment)
{
	xml_document doc;
	CHECK(doc.load_string(STR("a&amp;b"), parse_fragment | parse_escapes));
	CHECK_STRING(doc.text().get(), STR("a&b"));
Arseny Kapoulkine's avatar
Arseny Kapoulkine committed
}

TEST(parse_name_end_eof)
{
	char_t test[] = STR("<node>");

	xml_document doc;
	CHECK(doc.load_buffer_inplace(test, 6 * sizeof(char_t)).status == status_end_element_mismatch);
	CHECK_STRING(doc.first_child().name(), STR("node"));
}

TEST(parse_close_tag_eof)
{
	char_t test1[] = STR("<node></node");
	char_t test2[] = STR("<node></nodx");

	xml_document doc;
	CHECK(doc.load_buffer_inplace(test1, 12 * sizeof(char_t)).status == status_bad_end_element);
	CHECK_STRING(doc.first_child().name(), STR("node"));

	CHECK(doc.load_buffer_inplace(test2, 12 * sizeof(char_t)).status == status_end_element_mismatch);
	CHECK_STRING(doc.first_child().name(), STR("node"));
}

TEST(parse_fuzz_doctype)
{
	unsigned char data[] =
	{
		0x3b, 0x3c, 0x21, 0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45, 0xef, 0xbb, 0xbf, 0x3c, 0x3f, 0x78,
		0x6d, 0x6c, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x3d, 0x22, 0x31, 0x2e, 0x30, 0x22,
		0x3f, 0x3e, 0x3c, 0x21, 0x2d, 0x2d, 0x20, 0xe9, 0x80, 0xb1, 0xe5, 0xa0, 0xb1, 0xe3, 0x82, 0xb4,
		0xe3, 0x83, 0xb3, 0x20, 0xef, 0x83, 0x97, 0xe3, 0xa9, 0x2a, 0x20, 0x2d, 0x2d, 0x3e
	};

	xml_document doc;
	CHECK(doc.load_buffer(data, sizeof(data)).status == status_bad_doctype);
}

TEST(parse_embed_pcdata)
{
	// parse twice - once with default and once with embed_pcdata flags
	for (int i = 0; i < 2; ++i)
	{
		unsigned int flags = (i == 0) ? parse_default : parse_default | parse_embed_pcdata;

		xml_document doc;
		xml_parse_result res = doc.load_string(STR("<node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data /></two></node>"), flags);
		CHECK(res);

		xml_node child = doc.child(STR("node")).child(STR("child"));

		// parse_embed_pcdata omits PCDATA nodes so DOM is different
		if (flags & parse_embed_pcdata)
		{
			CHECK_STRING(doc.child(STR("node")).child(STR("key")).value(), STR("value"));
			CHECK(!doc.child(STR("node")).child(STR("key")).first_child());
		}
		else
		{
			CHECK_STRING(doc.child(STR("node")).child(STR("key")).value(), STR(""));
			CHECK(doc.child(STR("node")).child(STR("key")).first_child());
			CHECK_STRING(doc.child(STR("node")).child(STR("key")).first_child().value(), STR("value"));
		}

		// higher-level APIs work the same though
		CHECK_STRING(child.text().get(), STR("outer"));
		CHECK_STRING(child.child(STR("inner1")).text().get(), STR("value1"));

		CHECK_STRING(child.child_value(), STR("outer"));
		CHECK_STRING(child.child_value(STR("inner2")), STR("value2"));

	#ifndef PUGIXML_NO_XPATH
		CHECK_XPATH_NUMBER(doc, STR("count(node/child/*[starts-with(., 'value')])"), 2);
	#endif

		CHECK_NODE(doc, STR("<node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data/></two></node>"));
		CHECK_NODE_EX(doc, STR("<node>\n<key>value</key>\n<child>\n<inner1>value1</inner1>\n<inner2>value2</inner2>outer</child>\n<two>text<data />\n</two>\n</node>\n"), STR("\t"), 0);
		CHECK_NODE_EX(doc, STR("<node>\n\t<key>value</key>\n\t<child>\n\t\t<inner1>value1</inner1>\n\t\t<inner2>value2</inner2>outer</child>\n\t<two>text<data />\n\t</two>\n</node>\n"), STR("\t"), format_indent);
	}

TEST(parse_encoding_detect)
{
	char test[] = "<?xml version='1.0' encoding='utf-8'?><n/>";

	xml_document doc;
	CHECK(doc.load_buffer(test, sizeof(test)));
}

TEST(parse_encoding_detect_latin1)
{
	char test0[] = "<?xml version='1.0' encoding='utf-8'?><n/>";
	char test1[] = "<?xml version='1.0' encoding='iso-8859-1'?><n/>";
	char test2[] = "<?xml version='1.0' encoding = \"latin1\"?><n/>";
	char test3[] = "<?xml version='1.0' encoding='ISO-8859-1'?><n/>";
	char test4[] = "<?xml version='1.0' encoding = \"LATIN1\"?><n/>";

	xml_document doc;
	CHECK(doc.load_buffer(test0, sizeof(test0)).encoding == encoding_utf8);
	CHECK(doc.load_buffer(test1, sizeof(test1)).encoding == encoding_latin1);
	CHECK(doc.load_buffer(test2, sizeof(test2)).encoding == encoding_latin1);
	CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1);
	CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1);