Newer
Older
CHECK_OFFSET("<node a=2>", parse_default, status_bad_attribute, 8);
CHECK_OFFSET("<node a='2>", parse_default, status_bad_attribute, 9);
CHECK_OFFSET("<n></n $>", parse_default, status_bad_end_element, 7);
CHECK_OFFSET("<n></n", parse_default, status_bad_end_element, 5);
CHECK_OFFSET("<no></na>", parse_default, status_end_element_mismatch, 8);
CHECK_OFFSET("<no></nod>", parse_default, status_end_element_mismatch, 9);
}
TEST(parse_result_default)
{
xml_parse_result result;
CHECK(!result);
CHECK(result.status == status_internal_error);
CHECK(result.offset == 0);
CHECK(result.encoding == encoding_auto);
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
TEST(parse_bom_fragment)
{
struct test_data_t
{
xml_encoding encoding;
const char* data;
size_t size;
const char_t* text;
};
const test_data_t data[] =
{
{ encoding_utf8, "\xef\xbb\xbf", 3, STR("") },
{ encoding_utf8, "\xef\xbb\xbftest", 7, STR("test") },
{ encoding_utf16_be, "\xfe\xff", 2, STR("") },
{ encoding_utf16_be, "\xfe\xff\x00t\x00o\x00s\x00t", 10, STR("tost") },
{ encoding_utf16_le, "\xff\xfe", 2, STR("") },
{ encoding_utf16_le, "\xff\xfet\x00o\x00s\x00t\x00", 10, STR("tost") },
{ encoding_utf32_be, "\x00\x00\xfe\xff", 4, STR("") },
{ encoding_utf32_be, "\x00\x00\xfe\xff\x00\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t", 20, STR("tost") },
{ encoding_utf32_le, "\xff\xfe\x00\x00", 4, STR("") },
{ encoding_utf32_le, "\xff\xfe\x00\x00t\x00\x00\x00o\x00\x00\x00s\x00\x00\x00t\x00\x00\x00", 20, STR("tost") },
};
for (size_t i = 0; i < sizeof(data) / sizeof(data[0]); ++i)
{
xml_document doc;
CHECK(doc.load_buffer(data[i].data, data[i].size, parse_fragment, data[i].encoding));
CHECK_STRING(doc.text().get(), data[i].text);
CHECK(save_narrow(doc, format_no_declaration | format_raw | format_write_bom, data[i].encoding) == std::string(data[i].data, data[i].size));
}
}
TEST(parse_bom_fragment_invalid_utf8)
{
xml_document doc;
CHECK(doc.load_buffer("\xef\xbb\xbb", 3, parse_fragment, encoding_utf8));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xfefb) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbb\xbb");
#endif
}
TEST(parse_bom_fragment_invalid_utf16)
{
xml_document doc;
CHECK(doc.load_buffer("\xff\xfe", 2, parse_fragment, encoding_utf16_be));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xfffe) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbf\xbe");
#endif
}
TEST(parse_bom_fragment_invalid_utf32)
{
xml_document doc;
CHECK(doc.load_buffer("\xff\xff\x00\x00", 4, parse_fragment, encoding_utf32_le));
const char_t* value = doc.text().get();
#ifdef PUGIXML_WCHAR_MODE
CHECK(value[0] == wchar_cast(0xffff) && value[1] == 0);
#else
CHECK_STRING(value, "\xef\xbf\xbf");
#endif
}
TEST(parse_pcdata_gap_fragment)
{
xml_document doc;
CHECK(doc.load_string(STR("a&b"), parse_fragment | parse_escapes));
CHECK_STRING(doc.text().get(), STR("a&b"));
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
TEST(parse_name_end_eof)
{
char_t test[] = STR("<node>");
xml_document doc;
CHECK(doc.load_buffer_inplace(test, 6 * sizeof(char_t)).status == status_end_element_mismatch);
CHECK_STRING(doc.first_child().name(), STR("node"));
}
TEST(parse_close_tag_eof)
{
char_t test1[] = STR("<node></node");
char_t test2[] = STR("<node></nodx");
xml_document doc;
CHECK(doc.load_buffer_inplace(test1, 12 * sizeof(char_t)).status == status_bad_end_element);
CHECK_STRING(doc.first_child().name(), STR("node"));
CHECK(doc.load_buffer_inplace(test2, 12 * sizeof(char_t)).status == status_end_element_mismatch);
CHECK_STRING(doc.first_child().name(), STR("node"));
}
TEST(parse_fuzz_doctype)
{
unsigned char data[] =
{
0x3b, 0x3c, 0x21, 0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45, 0xef, 0xbb, 0xbf, 0x3c, 0x3f, 0x78,
0x6d, 0x6c, 0x20, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x3d, 0x22, 0x31, 0x2e, 0x30, 0x22,
0x3f, 0x3e, 0x3c, 0x21, 0x2d, 0x2d, 0x20, 0xe9, 0x80, 0xb1, 0xe5, 0xa0, 0xb1, 0xe3, 0x82, 0xb4,
0xe3, 0x83, 0xb3, 0x20, 0xef, 0x83, 0x97, 0xe3, 0xa9, 0x2a, 0x20, 0x2d, 0x2d, 0x3e
};
xml_document doc;
CHECK(doc.load_buffer(data, sizeof(data)).status == status_bad_doctype);
}
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
TEST(parse_embed_pcdata)
{
// parse twice - once with default and once with embed_pcdata flags
for (int i = 0; i < 2; ++i)
{
unsigned int flags = (i == 0) ? parse_default : parse_default | parse_embed_pcdata;
xml_document doc;
xml_parse_result res = doc.load_string(STR("<node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data /></two></node>"), flags);
CHECK(res);
xml_node child = doc.child(STR("node")).child(STR("child"));
// parse_embed_pcdata omits PCDATA nodes so DOM is different
if (flags & parse_embed_pcdata)
{
CHECK_STRING(doc.child(STR("node")).child(STR("key")).value(), STR("value"));
CHECK(!doc.child(STR("node")).child(STR("key")).first_child());
}
else
{
CHECK_STRING(doc.child(STR("node")).child(STR("key")).value(), STR(""));
CHECK(doc.child(STR("node")).child(STR("key")).first_child());
CHECK_STRING(doc.child(STR("node")).child(STR("key")).first_child().value(), STR("value"));
}
// higher-level APIs work the same though
CHECK_STRING(child.text().get(), STR("outer"));
CHECK_STRING(child.child(STR("inner1")).text().get(), STR("value1"));
CHECK_STRING(child.child_value(), STR("outer"));
CHECK_STRING(child.child_value(STR("inner2")), STR("value2"));
#ifndef PUGIXML_NO_XPATH
CHECK_XPATH_NUMBER(doc, STR("count(node/child/*[starts-with(., 'value')])"), 2);
#endif
CHECK_NODE(doc, STR("<node><key>value</key><child><inner1>value1</inner1><inner2>value2</inner2>outer</child><two>text<data/></two></node>"));
CHECK_NODE_EX(doc, STR("<node>\n<key>value</key>\n<child>\n<inner1>value1</inner1>\n<inner2>value2</inner2>outer</child>\n<two>text<data />\n</two>\n</node>\n"), STR("\t"), 0);
CHECK_NODE_EX(doc, STR("<node>\n\t<key>value</key>\n\t<child>\n\t\t<inner1>value1</inner1>\n\t\t<inner2>value2</inner2>outer</child>\n\t<two>text<data />\n\t</two>\n</node>\n"), STR("\t"), format_indent);
}
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
TEST(parse_encoding_detect)
{
char test[] = "<?xml version='1.0' encoding='utf-8'?><n/>";
xml_document doc;
CHECK(doc.load_buffer(test, sizeof(test)));
}
TEST(parse_encoding_detect_latin1)
{
char test0[] = "<?xml version='1.0' encoding='utf-8'?><n/>";
char test1[] = "<?xml version='1.0' encoding='iso-8859-1'?><n/>";
char test2[] = "<?xml version='1.0' encoding = \"latin1\"?><n/>";
char test3[] = "<?xml version='1.0' encoding='ISO-8859-1'?><n/>";
char test4[] = "<?xml version='1.0' encoding = \"LATIN1\"?><n/>";
xml_document doc;
CHECK(doc.load_buffer(test0, sizeof(test0)).encoding == encoding_utf8);
CHECK(doc.load_buffer(test1, sizeof(test1)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test2, sizeof(test2)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test3, sizeof(test3)).encoding == encoding_latin1);
CHECK(doc.load_buffer(test4, sizeof(test4)).encoding == encoding_latin1);