Newer
Older
#include "common.hpp"
TEST(parse_pi_skip)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<?pi?><?pi value?>"), flags));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR("<?pi <tag/> value?>"), flags));
CHECK(!doc.first_child());
}
}
TEST(parse_pi_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<?pi1?><?pi2 value?>"), parse_fragment | parse_pi));
xml_node pi1 = doc.first_child();
xml_node pi2 = doc.last_child();
CHECK(pi1 != pi2);
CHECK(pi1.type() == node_pi);
CHECK_STRING(pi1.name(), STR("pi1"));
CHECK_STRING(pi1.value(), STR(""));
CHECK(pi2.type() == node_pi);
CHECK_STRING(pi2.name(), STR("pi2"));
CHECK_STRING(pi2.value(), STR("value"));
}
TEST(parse_pi_parse_spaces)
{
xml_document doc;
CHECK(doc.load_string(STR("<?target \r\n\t value ?>"), parse_fragment | parse_pi));
xml_node pi = doc.first_child();
CHECK(pi.type() == node_pi);
CHECK_STRING(pi.name(), STR("target"));
CHECK_STRING(pi.value(), STR("value "));
}
TEST(parse_pi_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<??"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?#?>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name ?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name? "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name? "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value ?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value ? "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value ? >"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name value ? > "), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name&"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name&?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?xx#?>"), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name&?>"), parse_fragment | parse_pi).status == status_bad_pi);
CHECK(doc.load_string(STR("<?name& x?>"), parse_fragment | parse_pi).status == status_bad_pi);
}
TEST(parse_comments_skip)
{
xml_document doc;
CHECK(doc.load_string(STR("<!----><!--value-->"), parse_fragment));
CHECK(!doc.first_child());
}
TEST(parse_comments_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<!----><!--value-->"), parse_fragment | parse_comments));
xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child();
CHECK(c1 != c2);
CHECK(c1.type() == node_comment);
CHECK_STRING(c1.name(), STR(""));
CHECK_STRING(c1.value(), STR(""));
CHECK(c2.type() == node_comment);
CHECK_STRING(c2.name(), STR(""));
CHECK_STRING(c2.value(), STR("value"));
}
TEST(parse_comments_parse_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_fragment | parse_comments));
xml_node c = doc.first_child();
CHECK(c.type() == node_comment);
CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_comments_parse_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<!--\r\rval1\rval2\r\nval3\nval4\r\r-->"), parse_fragment | parse_comments | parse_eol));
xml_node c = doc.first_child();
CHECK(c.type() == node_comment);
CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_comments_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_comments, parse_fragment | parse_comments | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<!-"), flags).status == status_bad_comment);
CHECK(doc.load_string(STR("<!--"), flags).status == status_bad_comment);
CHECK(doc.load_string(STR("<!--v"), flags).status == status_bad_comment);
CHECK(doc.load_string(STR("<!-->"), flags).status == status_bad_comment);
CHECK(doc.load_string(STR("<!--->"), flags).status == status_bad_comment);
CHECK(doc.load_string(STR("<!-- <!-- --><!- -->"), flags).status == status_bad_comment);
}
}
TEST(parse_cdata_skip)
{
xml_document doc;
CHECK(doc.load_string(STR("<![CDATA[]]><![CDATA[value]]>"), parse_fragment));
CHECK(!doc.first_child());
}
TEST(parse_cdata_skip_contents)
{
xml_document doc;
CHECK(doc.load_string(STR("<node><![CDATA[]]>hello<![CDATA[value]]>, world!</node>"), parse_fragment));
CHECK_NODE(doc, STR("<node>hello, world!</node>"));
}
TEST(parse_cdata_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<![CDATA[]]><![CDATA[value]]>"), parse_fragment | parse_cdata));
xml_node c1 = doc.first_child();
xml_node c2 = doc.last_child();
CHECK(c1 != c2);
CHECK(c1.type() == node_cdata);
CHECK_STRING(c1.name(), STR(""));
CHECK_STRING(c1.value(), STR(""));
CHECK(c2.type() == node_cdata);
CHECK_STRING(c2.name(), STR(""));
CHECK_STRING(c2.value(), STR("value"));
}
TEST(parse_cdata_parse_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_fragment | parse_cdata));
xml_node c = doc.first_child();
CHECK(c.type() == node_cdata);
CHECK_STRING(c.value(), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_cdata_parse_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<![CDATA[\r\rval1\rval2\r\nval3\nval4\r\r]]>"), parse_fragment | parse_cdata | parse_eol));
xml_node c = doc.first_child();
CHECK(c.type() == node_cdata);
CHECK_STRING(c.value(), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_cdata_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_cdata, parse_fragment | parse_cdata | parse_eol};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<!["), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![C"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CD"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDA"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDAT"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA["), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[]"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[data"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[data]"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[data]]"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[>"), flags).status == status_bad_cdata);
CHECK(doc.load_string(STR("<![CDATA[ <![CDATA[]]><![CDATA ]]>"), flags).status == status_bad_cdata);
}
}
TEST(parse_ws_pcdata_skip)
{
xml_document doc;
CHECK(doc.load_string(STR(" "), parse_fragment));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR("<root> <node> </node> </root>"), parse_minimal));
xml_node root = doc.child(STR("root"));
CHECK(root.first_child() == root.last_child());
CHECK(!root.first_child().first_child());
}
TEST(parse_ws_pcdata_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<root> <node> </node> </root>"), parse_minimal | parse_ws_pcdata));
xml_node root = doc.child(STR("root"));
xml_node c1 = root.first_child();
xml_node c2 = c1.next_sibling();
xml_node c3 = c2.next_sibling();
CHECK(c3 == root.last_child());
CHECK(c1.type() == node_pcdata);
CHECK_STRING(c1.value(), STR(" "));
CHECK(c3.type() == node_pcdata);
CHECK_STRING(c3.value(), STR(" "));
CHECK(c2.first_child() == c2.last_child());
CHECK(c2.first_child().type() == node_pcdata);
CHECK_STRING(c2.first_child().value(), STR(" "));
}
arseny.kapoulkine
committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
static int get_tree_node_count(xml_node n)
{
int result = 1;
for (xml_node c = n.first_child(); c; c = c.next_sibling())
result += get_tree_node_count(c);
return result;
}
TEST(parse_ws_pcdata_permutations)
{
struct test_data_t
{
unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
const pugi::char_t* source;
const pugi::char_t* result;
int nodes; // negative if parsing should fail
};
test_data_t test_data[] =
{
// external pcdata should be discarded (whitespace or not)
{7, STR("ext1<node/>"), STR("<node />"), 2},
{7, STR("ext1<node/>ext2"), STR("<node />"), 2},
{7, STR(" <node/>"), STR("<node />"), 2},
{7, STR("<node/> "), STR("<node />"), 2},
{7, STR(" <node/> "), STR("<node />"), 2},
// inner pcdata should be preserved
{7, STR("<node>inner</node>"), STR("<node>inner</node>"), 3},
{7, STR("<node>inner1<child/>inner2</node>"), STR("<node>inner1<child />inner2</node>"), 5},
{7, STR("<node>inner1<child>deep</child>inner2</node>"), STR("<node>inner1<child>deep</child>inner2</node>"), 6},
// empty pcdata nodes should never be created
{7, STR("<node>inner1<child></child>inner2</node>"), STR("<node>inner1<child />inner2</node>"), 5},
{7, STR("<node><child></child>inner2</node>"), STR("<node><child />inner2</node>"), 4},
{7, STR("<node>inner1<child></child></node>"), STR("<node>inner1<child /></node>"), 4},
{7, STR("<node><child></child></node>"), STR("<node><child /></node>"), 3},
// comments, pi or other nodes should not cause pcdata creation either
{7, STR("<node><!----><child><?pi?></child><![CDATA[x]]></node>"), STR("<node><child /><![CDATA[x]]></node>"), 4},
// leading/trailing pcdata whitespace should be preserved (note: this will change if parse_ws_pcdata_trim is introduced)
{7, STR("<node>\t \tinner1<child> deep </child>\t\ninner2\n\t</node>"), STR("<node>\t \tinner1<child> deep </child>\t\ninner2\n\t</node>"), 6},
// whitespace-only pcdata preservation depends on the parsing mode
{1, STR("<node>\n\t<child> </child>\n\t<child> <deep> </deep> </child>\n\t<!---->\n\t</node>"), STR("<node><child /><child><deep /></child></node>"), 5},
{2, STR("<node>\n\t<child> </child>\n\t<child> <deep> </deep> </child>\n\t<!---->\n\t</node>"), STR("<node>\n\t<child> </child>\n\t<child> <deep> </deep> </child>\n\t\n\t</node>"), 13},
{4, STR("<node>\n\t<child> </child>\n\t<child> <deep> </deep> </child>\n\t<!---->\n\t</node>"), STR("<node><child> </child><child><deep> </deep></child></node>"), 7},
// current implementation of parse_ws_pcdata_single has an unfortunate bug; reproduce it here
{4, STR("<node>\t\t<!---->\n\n</node>"), STR("<node>\n\n</node>"), 3},
// error case: terminate PCDATA in the middle
{7, STR("<node>abcdef"), STR("<node>abcdef</node>"), -3},
{5, STR("<node> "), STR("<node />"), -2},
{2, STR("<node> "), STR("<node> </node>"), -3},
arseny.kapoulkine
committed
// error case: terminate PCDATA as early as possible
{7, STR("<node>"), STR("<node />"), -2},
{7, STR("<node>a"), STR("<node>a</node>"), -3},
{5, STR("<node> "), STR("<node />"), -2},
{2, STR("<node> "), STR("<node> </node>"), -3},
arseny.kapoulkine
committed
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
for (int flag = 0; flag < 3; ++flag)
{
if (td.mask & (1 << flag))
{
unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single};
xml_document doc;
CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag]));
arseny.kapoulkine
committed
CHECK_NODE(doc, td.result);
int nodes = get_tree_node_count(doc);
CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes);
}
}
}
}
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
TEST(parse_ws_pcdata_fragment_permutations)
{
struct test_data_t
{
unsigned int mask; // 1 = default flags, 2 = parse_ws_pcdata, 4 = parse_ws_pcdata_single
const pugi::char_t* source;
const pugi::char_t* result;
int nodes; // negative if parsing should fail
};
test_data_t test_data[] =
{
// external pcdata should be preserved
{7, STR("ext1"), STR("ext1"), 2},
{5, STR(" "), STR(""), 1},
{2, STR(" "), STR(" "), 2},
{7, STR("ext1<node/>"), STR("ext1<node />"), 3},
{7, STR("<node/>ext2"), STR("<node />ext2"), 3},
{7, STR("ext1<node/>ext2"), STR("ext1<node />ext2"), 4},
{7, STR("ext1<node1/>ext2<node2/>ext3"), STR("ext1<node1 />ext2<node2 />ext3"), 6},
{5, STR(" <node/>"), STR("<node />"), 2},
{2, STR(" <node/>"), STR(" <node />"), 3},
{5, STR("<node/> "), STR("<node />"), 2},
{2, STR("<node/> "), STR("<node /> "), 3},
{5, STR(" <node/> "), STR("<node />"), 2},
{2, STR(" <node/> "), STR(" <node /> "), 4},
{5, STR(" <node1/> <node2/> "), STR("<node1 /><node2 />"), 3},
{2, STR(" <node1/> <node2/> "), STR(" <node1 /> <node2 /> "), 6},
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
for (int flag = 0; flag < 3; ++flag)
{
if (td.mask & (1 << flag))
{
unsigned int flags[] = {parse_default, parse_default | parse_ws_pcdata, parse_default | parse_ws_pcdata_single};
xml_document doc;
CHECK((td.nodes > 0) == doc.load_string(td.source, flags[flag] | parse_fragment));
CHECK_NODE(doc, td.result);
int nodes = get_tree_node_count(doc);
CHECK((td.nodes < 0 ? -td.nodes : td.nodes) == nodes);
}
}
}
}
TEST(parse_pcdata_no_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<root>\r\rval1\rval2\r\nval3\nval4\r\r</root>"), parse_minimal));
CHECK_STRING(doc.child_value(STR("root")), STR("\r\rval1\rval2\r\nval3\nval4\r\r"));
}
TEST(parse_pcdata_eol)
{
xml_document doc;
CHECK(doc.load_string(STR("<root>\r\rval1\rval2\r\nval3\nval4\r\r</root>"), parse_minimal | parse_eol));
CHECK_STRING(doc.child_value(STR("root")), STR("\n\nval1\nval2\nval3\nval4\n\n"));
}
TEST(parse_pcdata_skip_ext)
{
xml_document doc;
CHECK(doc.load_string(STR("pre<root/>post"), parse_minimal));
CHECK(doc.first_child() == doc.last_child());
CHECK(doc.first_child().type() == node_element);
}
TEST(parse_pcdata_error)
{
xml_document doc;
CHECK(doc.load_string(STR("<root>pcdata"), parse_minimal).status == status_end_element_mismatch);
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
TEST(parse_pcdata_trim)
{
struct test_data_t
{
const pugi::char_t* source;
const pugi::char_t* result;
unsigned int flags;
};
test_data_t test_data[] =
{
{ STR("<node> text</node>"), STR("text"), 0 },
{ STR("<node>\t\n text</node>"), STR("text"), 0 },
{ STR("<node>text </node>"), STR("text"), 0 },
{ STR("<node>text \t\n</node>"), STR("text"), 0 },
{ STR("<node>\r\n\t text \t\n\r</node>"), STR("text"), 0 },
{ STR(" text"), STR("text"), parse_fragment },
{ STR("\t\n text"), STR("text"), parse_fragment },
{ STR("text "), STR("text"), parse_fragment },
{ STR("text \t\n"), STR("text"), parse_fragment },
{ STR("\r\n\t text \t\n\r"), STR("text"), parse_fragment },
{ STR("<node>\r\n\t text \t\n\r more \r\n\t</node>"), STR("text \t\n\r more"), 0 },
{ STR("<node>\r\n\t text \t\n\r more \r\n\t</node>"), STR("text \t\n\n more"), parse_eol },
{ STR("<node>\r\n\t text \r\n\r\n\r\n\r\n\r\n\r\n\r\n more \r\n\t</node>"), STR("text \n\n\n\n\n\n\n more"), parse_eol },
{ STR("<node> test&&&&&&& </node>"), STR("test&&&&&&&"), 0 },
{ STR("<node> test&&&&&&& </node>"), STR("test&&&&&&&"), parse_escapes },
{ STR(" test&&&&&&& "), STR("test&&&&&&&"), parse_fragment | parse_escapes },
{ STR("<node>\r\n\t text \t\n\r m&&e \r\n\t</node>"), STR("text \t\n\n m&&e"), parse_eol | parse_escapes }
};
for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i)
{
const test_data_t& td = test_data[i];
xml_document doc;
CHECK(doc.load_string(td.source, td.flags | parse_trim_pcdata));
const pugi::char_t* value = doc.child(STR("node")) ? doc.child_value(STR("node")) : doc.text().get();
CHECK_STRING(value, td.result);
}
}
TEST(parse_pcdata_trim_empty)
{
unsigned int flags[] = { 0, parse_ws_pcdata, parse_ws_pcdata_single, parse_ws_pcdata | parse_ws_pcdata_single };
for (size_t i = 0; i < sizeof(flags) / sizeof(flags[0]); ++i)
{
xml_document doc;
CHECK(doc.load_string(STR("<node> </node>"), flags[i] | parse_trim_pcdata));
xml_node node = doc.child(STR("node"));
CHECK(node);
CHECK(!node.first_child());
}
}
TEST(parse_escapes_skip)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id='<>&'"'><>&'"</node>"), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'""));
}
TEST(parse_escapes_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id='<>&'"'><>&'"</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("<>&'\""));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("<>&'\""));
}
TEST(parse_escapes_code)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>  </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("\01 "));
}
TEST(parse_escapes_code_exhaustive_dec)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>&#/;	&#:;&#a;&#A;
</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#:;&#a;&#A;
"));
}
TEST(parse_escapes_code_exhaustive_hex)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>&#x/;	&#x:;&#x@;

&#xG;&#x`;

&#xg;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#x/;\x1\x2\x3\x4\x5\x6\x7\x8\x9&#x:;&#x@;\xa\xb\xc\xd\xe\xf&#xG;&#x`;\xa\xb\xc\xd\xe\xf&#xg;"));
}
TEST(parse_escapes_code_restore)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>  - - </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("  - - "));
}
TEST(parse_escapes_char_restore)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>&q &qu &quo " </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&q &qu &quo " "));
CHECK(doc.load_string(STR("<node>&a &ap &apo &apos </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &ap &apo &apos "));
CHECK(doc.load_string(STR("<node>&a &am & </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&a &am & "));
CHECK(doc.load_string(STR("<node>&l < </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&l < "));
CHECK(doc.load_string(STR("<node>&g > </node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&g > "));
}
TEST(parse_escapes_unicode)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>γγ𤭢</node>"), parse_minimal | parse_escapes));
#ifdef PUGIXML_WCHAR_MODE
const pugi::char_t* v = doc.child_value(STR("node"));
size_t wcharsize = sizeof(wchar_t);
CHECK(v[0] == 0x3b3 && v[1] == 0x3b3 && (wcharsize == 2 ? v[2] == wchar_cast(0xd852) && v[3] == wchar_cast(0xdf62) : v[2] == wchar_cast(0x24b62)));
#else
CHECK_STRING(doc.child_value(STR("node")), "\xce\xb3\xce\xb3\xf0\xa4\xad\xa2");
#endif
}
TEST(parse_escapes_error)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>g;&#ab;"</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("g;&#ab;""));
CHECK(!doc.load_string(STR("<node id='")));
CHECK(!doc.load_string(STR("<node id='&g")));
CHECK(!doc.load_string(STR("<node id='>")));
CHECK(!doc.load_string(STR("<node id='&l")));
CHECK(!doc.load_string(STR("<node id='<")));
CHECK(!doc.load_string(STR("<node id='&a")));
CHECK(!doc.load_string(STR("<node id='&")));
CHECK(!doc.load_string(STR("<node id='&apos")));
}
TEST(parse_escapes_code_invalid)
{
xml_document doc;
CHECK(doc.load_string(STR("<node>&#;&#x;&;&#x-;&#-;</node>"), parse_minimal | parse_escapes));
CHECK_STRING(doc.child_value(STR("node")), STR("&#;&#x;&;&#x-;&#-;"));
}
TEST(parse_escapes_attribute)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_escapes;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR("<node id='"'/>"), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("\""));
}
}
TEST(parse_attribute_spaces)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id1='v1' id2 ='v2' id3= 'v3' id4 = 'v4' id5 \n\r\t = \r\t\n 'v5' />"), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id3")).value(), STR("v3"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id4")).value(), STR("v4"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id5")).value(), STR("v5"));
}
TEST(parse_attribute_quot)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id1='v1' id2=\"v2\"/>"), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("v1"));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("v2"));
}
TEST(parse_attribute_no_eol_no_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id=' \t\r\rval1 \rval2\r\nval3\nval4\r\r'/>"), parse_minimal));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\r\rval1 \rval2\r\nval3\nval4\r\r"));
}
TEST(parse_attribute_eol_no_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id=' \t\r\rval1 \rval2\r\nval3\nval4\r\r'/>"), parse_minimal | parse_eol));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" \t\n\nval1 \nval2\nval3\nval4\n\n"));
}
TEST(parse_attribute_no_eol_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id=' \t\r\rval1 \rval2\r\nval3\nval4\r\r'/>"), parse_minimal | parse_wconv_attribute));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 "));
}
TEST(parse_attribute_eol_wconv)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id=' \t\r\rval1 \rval2\r\nval3\nval4\r\r'/>"), parse_minimal | parse_eol | parse_wconv_attribute));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR(" val1 val2 val3 val4 "));
}
TEST(parse_attribute_wnorm)
{
xml_document doc;
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_minimal | parse_wnorm_attribute | (eol ? parse_eol : 0) | (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR("<node id=' \t\r\rval1 \rval2\r\nval3\nval4\r\r'/>"), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("val1 val2 val3 val4"));
}
}
TEST(parse_attribute_variations)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
for (int escapes = 0; escapes < 2; ++escapes)
{
unsigned int flags = parse_minimal;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
flags |= (escapes ? parse_escapes : 0);
CHECK(doc.load_string(STR("<node id='1'/>"), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id")).value(), STR("1"));
}
}
TEST(parse_attribute_error)
{
xml_document doc;
CHECK(doc.load_string(STR("<node id"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id "), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id "), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id "), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id/"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id?/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id=/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id='/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id=\"/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id=\"'/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id='\"/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id='\"/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node #/>"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node#/>"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node id1='1'id2='2'/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node id&='1'/>"), parse_minimal).status == status_bad_attribute);
CHECK(doc.load_string(STR("<node &='1'/>"), parse_minimal).status == status_bad_start_element);
TEST(parse_attribute_termination_error)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_minimal;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR("<node id='value"), flags).status == status_bad_attribute);
}
}
TEST(parse_attribute_quot_inside)
{
xml_document doc;
for (int wnorm = 0; wnorm < 2; ++wnorm)
for (int eol = 0; eol < 2; ++eol)
for (int wconv = 0; wconv < 2; ++wconv)
{
unsigned int flags = parse_escapes;
flags |= (wnorm ? parse_wnorm_attribute : 0);
flags |= (eol ? parse_eol : 0);
flags |= (wconv ? parse_wconv_attribute : 0);
CHECK(doc.load_string(STR("<node id1='\"' id2=\"'\"/>"), flags));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id1")).value(), STR("\""));
CHECK_STRING(doc.child(STR("node")).attribute(STR("id2")).value(), STR("'"));
}
}
TEST(parse_tag_single)
{
xml_document doc;
CHECK(doc.load_string(STR("<node/><node /><node\n/>"), parse_minimal));
CHECK_NODE(doc, STR("<node /><node /><node />"));
}
TEST(parse_tag_hierarchy)
{
xml_document doc;
CHECK(doc.load_string(STR("<node><n1><n2/></n1><n3><n4><n5></n5></n4></n3 \r\n></node>"), parse_minimal));
CHECK_NODE(doc, STR("<node><n1><n2 /></n1><n3><n4><n5 /></n4></n3></node>"));
}
TEST(parse_tag_error)
{
xml_document doc;
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
CHECK(doc.load_string(STR("<"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR("<!"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR("<!D"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR("<#"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR("<node#"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node/"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node /"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node / "), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node / >"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node/ >"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("</ node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("</node"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("</node "), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<node></ node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<node></node"), parse_minimal).status == status_bad_end_element);
CHECK(doc.load_string(STR("<node></node "), parse_minimal).status == status_bad_end_element);
CHECK(doc.load_string(STR("<node></nodes>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<node/><"), parse_minimal).status == status_unrecognized_tag);
CHECK(doc.load_string(STR("<node attr='value'>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("</></node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("</node>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("</>"), parse_minimal).status == status_end_element_mismatch);
CHECK(doc.load_string(STR("<node></node v>"), parse_minimal).status == status_bad_end_element);
CHECK(doc.load_string(STR("<node&/>"), parse_minimal).status == status_bad_start_element);
CHECK(doc.load_string(STR("<node& v='1'/>"), parse_minimal).status == status_bad_start_element);
}
TEST(parse_declaration_cases)
{
xml_document doc;
CHECK(doc.load_string(STR("<?xml?><?xmL?><?xMl?><?xML?><?Xml?><?XmL?><?XMl?><?XML?>"), parse_fragment | parse_pi));
CHECK(!doc.first_child());
}
TEST(parse_declaration_attr_cases)
{
xml_document doc;
CHECK(doc.load_string(STR("<?xml ?><?xmL ?><?xMl ?><?xML ?><?Xml ?><?XmL ?><?XMl ?><?XML ?>"), parse_fragment | parse_pi));
CHECK(!doc.first_child());
}
TEST(parse_declaration_skip)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_pi};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<?xml?><?xml version='1.0'?>"), flags));
CHECK(!doc.first_child());
CHECK(doc.load_string(STR("<?xml <tag/> ?>"), flags));
CHECK(!doc.first_child());
}
}
TEST(parse_declaration_parse)
{
xml_document doc;
CHECK(doc.load_string(STR("<?xml?><?xml version='1.0'?>"), parse_fragment | parse_declaration));
xml_node d1 = doc.first_child();
xml_node d2 = doc.last_child();
CHECK(d1 != d2);
CHECK(d1.type() == node_declaration);
CHECK_STRING(d1.name(), STR("xml"));
CHECK(d2.type() == node_declaration);
CHECK_STRING(d2.name(), STR("xml"));
CHECK_STRING(d2.attribute(STR("version")).value(), STR("1.0"));
}
TEST(parse_declaration_error)
{
xml_document doc;
unsigned int flag_sets[] = {parse_fragment, parse_fragment | parse_declaration};
for (unsigned int i = 0; i < sizeof(flag_sets) / sizeof(flag_sets[0]); ++i)
{
unsigned int flags = flag_sets[i];
CHECK(doc.load_string(STR("<?xml"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?xml?"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?xml>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?xml version='1>"), flags).status == status_bad_pi);
CHECK(doc.load_string(STR("<?xml version='1?>"), parse_fragment | parse_declaration).status == status_bad_attribute);
CHECK(doc.load_string(STR("<foo><?xml version='1'?></foo>"), parse_fragment | parse_declaration).status == status_bad_pi);
}
TEST(parse_empty)
{
xml_document doc;
Arseny Kapoulkine
committed
CHECK(doc.load_string(STR("")).status == status_no_document_element && !doc.first_child());
CHECK(doc.load_string(STR(""), parse_fragment) && !doc.first_child());
}
TEST(parse_out_of_memory)
{
test_runner::_memory_fail_threshold = 256;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_string(STR("<foo a='1'/>")).status == status_out_of_memory));
CHECK(!doc.first_child());
}
const unsigned int count = 10000;
static char_t text[count * 4];
for (unsigned int i = 0; i < count; ++i)
{
text[4*i + 0] = '<';
text[4*i + 1] = 'n';
text[4*i + 2] = '/';
text[4*i + 3] = '>';
}
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory));
CHECK_NODE(doc.first_child(), STR("<n />"));
}
TEST(parse_out_of_memory_halfway_attr)
{
const unsigned int count = 10000;
static char_t text[count * 5 + 4];
text[0] = '<';
text[1] = 'n';
for (unsigned int i = 0; i < count; ++i)
{
text[5*i + 2] = ' ';
text[5*i + 3] = 'a';
text[5*i + 4] = '=';
text[5*i + 5] = '"';
text[5*i + 6] = '"';
}
text[5 * count + 2] = '/';
text[5 * count + 3] = '>';
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 5 + 4).status == status_out_of_memory));
CHECK_STRING(doc.first_child().name(), STR("n"));
CHECK_STRING(doc.first_child().first_attribute().name(), STR("a"));
CHECK_STRING(doc.first_child().last_attribute().name(), STR("a"));
}
TEST(parse_out_of_memory_conversion)
{
test_runner::_memory_fail_threshold = 256;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer("<foo\x90/>", 7, parse_default, encoding_latin1).status == status_out_of_memory));
CHECK(!doc.first_child());
}
TEST(parse_out_of_memory_allocator_state_sync)
{
const unsigned int count = 10000;
static char_t text[count * 4];
for (unsigned int i = 0; i < count; ++i)
{
text[4*i + 0] = '<';
text[4*i + 1] = 'n';
text[4*i + 2] = '/';
text[4*i + 3] = '>';
}
test_runner::_memory_fail_threshold = 65536;
xml_document doc;
CHECK_ALLOC_FAIL(CHECK(doc.load_buffer_inplace(text, count * 4).status == status_out_of_memory));
CHECK_NODE(doc.first_child(), STR("<n />"));
test_runner::_memory_fail_threshold = 0;
for (unsigned int i = 0; i < count; ++i)
CHECK(doc.append_child(STR("n")));
}
static bool test_offset(const char_t* contents, unsigned int options, pugi::xml_parse_status status, ptrdiff_t offset)
{
xml_document doc;
xml_parse_result res = doc.load_string(contents, options);
return res.status == status && res.offset == offset;
}
#define CHECK_OFFSET(contents, options, status, offset) CHECK(test_offset(STR(contents), options, status, offset))
TEST(parse_error_offset)
{
CHECK_OFFSET("<node/>", parse_default, status_ok, 0);
test_runner::_memory_fail_threshold = 1;
CHECK_ALLOC_FAIL(CHECK_OFFSET("<node/>", parse_default, status_out_of_memory, 0));
test_runner::_memory_fail_threshold = 0;
CHECK_OFFSET("<3d/>", parse_default, status_unrecognized_tag, 1);
CHECK_OFFSET(" <3d/>", parse_default, status_unrecognized_tag, 2);
CHECK_OFFSET(" <", parse_default, status_unrecognized_tag, 1);
CHECK_OFFSET("<?pi", parse_default, status_bad_pi, 3);
CHECK_OFFSET("<?pi", parse_default | parse_pi, status_bad_pi, 3);
CHECK_OFFSET("<?xml", parse_default | parse_declaration, status_bad_pi, 4);
CHECK_OFFSET("<!----", parse_default, status_bad_comment, 5);
CHECK_OFFSET("<!----", parse_default | parse_comments, status_bad_comment, 4);
CHECK_OFFSET("<![CDA", parse_default, status_bad_cdata, 5);
CHECK_OFFSET("<![CDATA[non-terminated]]", parse_default, status_bad_cdata, 9);
CHECK_OFFSET("<!DOCTYPE doc", parse_default, status_bad_doctype, 12);
CHECK_OFFSET("<!DOCTYPE greeting [ <!ATTLIST list type (bullets|ordered|glossary) \"orde", parse_default, status_bad_doctype, 76);
CHECK_OFFSET("<node", parse_default, status_bad_start_element, 4);
CHECK_OFFSET("<node ", parse_default, status_bad_start_element, 5);
CHECK_OFFSET("<nod%>", parse_default, status_bad_start_element, 5);