Newer
Older
arseny.kapoulkine@gmail.com
committed
PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
cursor->value = s; // Save the offset.
arseny.kapoulkine@gmail.com
committed
if (PUGI__OPTSET(parse_eol))
{
s = strconv_cdata(s, endch);
if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
}
else
{
// Scan for terminating ']]>'.
PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
arseny.kapoulkine@gmail.com
committed
PUGI__CHECK_ERROR(status_bad_cdata, s);
*s++ = 0; // Zero-terminate this segment.
}
}
else // Flagged for discard, but we still have to scan for the terminator.
{
// Scan for terminating ']]>'.
PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
arseny.kapoulkine@gmail.com
committed
PUGI__CHECK_ERROR(status_bad_cdata, s);
++s;
}
s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
}
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_cdata, s);
else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
arseny.kapoulkine
committed
char_t* mark = s + 9;
arseny.kapoulkine
committed
s = parse_doctype_group(s, endch);
if (!s) return s;
arseny.kapoulkine
committed
assert((*s == 0 && endch == '>') || *s == '>');
if (*s) *s++ = 0;
if (PUGI__OPTSET(parse_doctype))
{
while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
arseny.kapoulkine
committed
PUGI__PUSHNODE(node_doctype);
arseny.kapoulkine
committed
arseny.kapoulkine@gmail.com
committed
else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
else PUGI__THROW_ERROR(status_unrecognized_tag, s);
arseny.kapoulkine
committed
return s;
arseny.kapoulkine
committed
char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
{
// load into registers
xml_node_struct* cursor = ref_cursor;
char_t ch = 0;
// parse node contents, starting with question mark
++s;
// read PI target
char_t* target = s;
arseny.kapoulkine@gmail.com
committed
if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
arseny.kapoulkine@gmail.com
committed
PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
PUGI__CHECK_ERROR(status_bad_pi, s);
// determine node type; stricmp / strcasecmp is not portable
bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
arseny.kapoulkine@gmail.com
committed
if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
{
if (declaration)
{
// disallow non top-level declarations
arseny.kapoulkine@gmail.com
committed
if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
arseny.kapoulkine@gmail.com
committed
PUGI__PUSHNODE(node_declaration);
arseny.kapoulkine@gmail.com
committed
PUGI__PUSHNODE(node_pi);
arseny.kapoulkine@gmail.com
committed
PUGI__ENDSEG();
// parse value/attributes
if (ch == '?')
{
// empty node
if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
s += (*s == '>');
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE();
arseny.kapoulkine@gmail.com
committed
else if (PUGI__IS_CHARTYPE(ch, ct_space))
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS();
// scan for tag end
char_t* value = s;
PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
arseny.kapoulkine@gmail.com
committed
PUGI__CHECK_ERROR(status_bad_pi, s);
if (declaration)
{
// replace ending ? with / so that 'element' terminates properly
*s = '/';
// we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
s = value;
}
else
{
// store value and step over >
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE();
arseny.kapoulkine@gmail.com
committed
PUGI__ENDSEG();
s += (*s == '>');
}
}
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_pi, s);
}
else
{
// scan for tag end
PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
arseny.kapoulkine@gmail.com
committed
PUGI__CHECK_ERROR(status_bad_pi, s);
s += (s[1] == '>' ? 2 : 1);
}
// store from registers
ref_cursor = cursor;
arseny.kapoulkine
committed
return s;
Arseny Kapoulkine
committed
char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
{
strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
char_t ch = 0;
arseny.kapoulkine@gmail.com
committed
xml_node_struct* cursor = root;
char_t* mark = s;
while (*s != 0)
{
if (*s == '<')
{
++s;
LOC_TAG:
arseny.kapoulkine@gmail.com
committed
if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
arseny.kapoulkine@gmail.com
committed
PUGI__PUSHNODE(node_element); // Append a new node to the tree.
PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
arseny.kapoulkine@gmail.com
committed
PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
if (ch == '>')
{
// end of tag
}
arseny.kapoulkine@gmail.com
committed
else if (PUGI__IS_CHARTYPE(ch, ct_space))
{
LOC_ATTRIBUTES:
while (true)
{
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS(); // Eat any whitespace.
arseny.kapoulkine@gmail.com
committed
if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
xml_attribute_struct* a = append_new_attribute(cursor, alloc); // Make space for this attribute.
arseny.kapoulkine@gmail.com
committed
if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
a->name = s; // Save the offset.
PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
arseny.kapoulkine@gmail.com
committed
PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
arseny.kapoulkine@gmail.com
committed
if (PUGI__IS_CHARTYPE(ch, ct_space))
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS(); // Eat any whitespace.
ch = *s;
++s;
}
if (ch == '=') // '<... #=...'
{
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS(); // Eat any whitespace.
if (*s == '"' || *s == '\'') // '<... #="...'
{
ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
++s; // Step over the quote.
a->value = s; // Save the offset.
s = strconv_attribute(s, ch);
arseny.kapoulkine@gmail.com
committed
if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
// After this line the loop continues from the start;
// Whitespaces, / and > are ok, symbols and EOF are wrong,
// everything else will be detected
arseny.kapoulkine@gmail.com
committed
if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_attribute, s);
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_attribute, s);
}
else if (*s == '/')
{
++s;
if (*s == '>')
{
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE();
s++;
break;
}
else if (*s == 0 && endch == '>')
{
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE();
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_start_element, s);
}
else if (*s == '>')
{
++s;
break;
}
else if (*s == 0 && endch == '>')
{
break;
}
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_start_element, s);
}
// !!!
}
else if (ch == '/') // '<#.../'
{
if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE(); // Pop.
s += (*s == '>');
}
else if (ch == 0)
{
// we stepped over null terminator, backtrack & handle closing tag
--s;
arseny.kapoulkine@gmail.com
committed
if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
arseny.kapoulkine@gmail.com
committed
else PUGI__THROW_ERROR(status_bad_start_element, s);
}
else if (*s == '/')
{
++s;
char_t* name = cursor->name;
arseny.kapoulkine@gmail.com
committed
if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, s);
arseny.kapoulkine@gmail.com
committed
while (PUGI__IS_CHARTYPE(*s, ct_symbol))
arseny.kapoulkine@gmail.com
committed
if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, s);
}
if (*name)
{
arseny.kapoulkine@gmail.com
committed
if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
else PUGI__THROW_ERROR(status_end_element_mismatch, s);
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE(); // Pop.
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS();
if (*s == 0)
{
arseny.kapoulkine@gmail.com
committed
if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
arseny.kapoulkine@gmail.com
committed
if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
++s;
}
}
else if (*s == '?') // '<?...'
{
arseny.kapoulkine
committed
s = parse_question(s, cursor, optmsk, endch);
if (!s) return s;
assert(cursor);
if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
}
else if (*s == '!') // '<!...'
{
arseny.kapoulkine
committed
s = parse_exclamation(s, cursor, optmsk, endch);
if (!s) return s;
arseny.kapoulkine@gmail.com
committed
else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
else PUGI__THROW_ERROR(status_unrecognized_tag, s);
}
else
{
mark = s; // Save this offset while searching for a terminator.
arseny.kapoulkine@gmail.com
committed
PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
{
// We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
assert(mark != s);
if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
{
continue;
}
else if (PUGI__OPTSET(parse_ws_pcdata_single))
{
if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
}
}
if (!PUGI__OPTSET(parse_trim_pcdata))
s = mark;
if (cursor->parent || PUGI__OPTSET(parse_fragment))
arseny.kapoulkine@gmail.com
committed
PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
cursor->value = s; // Save the offset.
s = strconv_pcdata(s);
arseny.kapoulkine@gmail.com
committed
PUGI__POPNODE(); // Pop since this is a standalone.
if (!*s) break;
}
else
{
arseny.kapoulkine@gmail.com
committed
PUGI__SCANFOR(*s == '<'); // '...<'
if (!*s) break;
++s;
}
// We're after '<'
goto LOC_TAG;
}
}
// check that last tag is closed
arseny.kapoulkine@gmail.com
committed
if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
arseny.kapoulkine@gmail.com
committed
return s;
#ifdef PUGIXML_WCHAR_MODE
static char_t* parse_skip_bom(char_t* s)
{
unsigned int bom = 0xfeff;
return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
}
#else
static char_t* parse_skip_bom(char_t* s)
{
return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
}
#endif
static bool has_element_node_siblings(xml_node_struct* node)
{
while (node)
{
if (PUGI__NODETYPE(node) == node_element) return true;
node = node->next_sibling;
}
return false;
}
arseny.kapoulkine@gmail.com
committed
static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
{
// early-out for empty documents
if (length == 0)
return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
// get last child of the root before parsing
xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
// create parser on stack
xml_parser parser(static_cast<xml_allocator*>(xmldoc));
// save last character and make buffer zero-terminated (speeds up parsing)
char_t endch = buffer[length - 1];
buffer[length - 1] = 0;
// skip BOM to make sure it does not end up as part of parse output
char_t* buffer_data = parse_skip_bom(buffer);
// perform actual parsing
parser.parse_tree(buffer_data, root, optmsk, endch);
arseny.kapoulkine@gmail.com
committed
xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
if (result)
{
// since we removed last character, we have to handle the only possible false positive (stray <)
if (endch == '<')
return make_parse_result(status_unrecognized_tag, length - 1);
// check if there are any element nodes parsed
xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
return make_parse_result(status_no_document_element, length - 1);
}
else
// roll back offset if it occurs on a null terminator in the source buffer
if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
result.offset--;
}
return result;
}
};
// Output facilities
arseny.kapoulkine@gmail.com
committed
PUGI__FN xml_encoding get_write_native_encoding()
{
#ifdef PUGIXML_WCHAR_MODE
return get_wchar_encoding();
#else
return encoding_utf8;
#endif
}
arseny.kapoulkine@gmail.com
committed
PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
{
// replace wchar encoding with utf implementation
if (encoding == encoding_wchar) return get_wchar_encoding();
// replace utf16 encoding with utf16 with specific endianness
if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
// replace utf32 encoding with utf32 with specific endianness
if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
// only do autodetection if no explicit encoding is requested
if (encoding != encoding_auto) return encoding;
// assume utf8 encoding
return encoding_utf8;
}
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
{
PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
return static_cast<size_t>(end - dest) * sizeof(*dest);
}
template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
{
PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
if (opt_swap)
{
for (typename T::value_type i = dest; i != end; ++i)
*i = endian_swap(*i);
}
return static_cast<size_t>(end - dest) * sizeof(*dest);
}
#ifdef PUGIXML_WCHAR_MODE
arseny.kapoulkine@gmail.com
committed
PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
if (length < 1) return 0;
// discard last character if it's the lead of a surrogate pair
return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
Arseny Kapoulkine
committed
PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
{
// only endian-swapping is required
if (need_endian_swap_utf(encoding, get_wchar_encoding()))
{
arseny.kapoulkine@gmail.com
committed
convert_wchar_endian_swap(r_char, data, length);
return length * sizeof(char_t);
}
// convert to utf8
if (encoding == encoding_utf8)
return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
// convert to utf16
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
{
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
}
// convert to utf32
if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
{
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
arseny.kapoulkine
committed
// convert to latin1
if (encoding == encoding_latin1)
return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
arseny.kapoulkine
committed
return 0;
}
#else
arseny.kapoulkine@gmail.com
committed
PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
if (length < 5) return 0;
for (size_t i = 1; i <= 4; ++i)
{
uint8_t ch = static_cast<uint8_t>(data[length - i]);
// either a standalone character or a leading one
if ((ch & 0xc0) != 0x80) return length - i;
}
// there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
return length;
}
Arseny Kapoulkine
committed
PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
{
if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
{
xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
}
if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
{
xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
arseny.kapoulkine
committed
if (encoding == encoding_latin1)
return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
arseny.kapoulkine
committed
return 0;
}
#endif
class xml_buffered_writer
{
xml_buffered_writer(const xml_buffered_writer&);
xml_buffered_writer& operator=(const xml_buffered_writer&);
public:
xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
PUGI__STATIC_ASSERT(bufcapacity >= 8);
{
flush(buffer, bufsize);
bufsize = 0;
}
void flush(const char_t* data, size_t size)
{
if (size == 0) return;
// fast path, just write data
if (encoding == get_write_native_encoding())
writer.write(data, size * sizeof(char_t));
else
{
// convert chunk
Arseny Kapoulkine
committed
size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
assert(result <= sizeof(scratch));
// write data
arseny.kapoulkine@gmail.com
committed
writer.write(scratch.data_u8, result);
void write_direct(const char_t* data, size_t length)
// flush the remaining buffer contents
flush();
// handle large chunks
if (length > bufcapacity)
{
if (encoding == get_write_native_encoding())
// fast path, can just write data chunk
writer.write(data, length * sizeof(char_t));
return;
}
// need to convert in suitable chunks
while (length > bufcapacity)
{
// get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
// and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
size_t chunk_size = get_valid_length(data, bufcapacity);
assert(chunk_size);
// convert chunk and write
flush(data, chunk_size);
// iterate
data += chunk_size;
length -= chunk_size;
// small tail is copied below
bufsize = 0;
}
memcpy(buffer + bufsize, data, length * sizeof(char_t));
bufsize += length;
}
void write_buffer(const char_t* data, size_t length)
{
size_t offset = bufsize;
if (offset + length <= bufcapacity)
{
memcpy(buffer + offset, data, length * sizeof(char_t));
bufsize = offset + length;
}
else
{
write_direct(data, length);
}
}
void write_string(const char_t* data)
3688
3689
3690
3691
3692
3693
3694
3695
3696
3697
3698
3699
3700
3701
3702
3703
3704
3705
3706
3707
3708
// write the part of the string that fits in the buffer
size_t offset = bufsize;
while (*data && offset < bufcapacity)
buffer[offset++] = *data++;
// write the rest
if (offset < bufcapacity)
{
bufsize = offset;
}
else
{
// backtrack a bit if we have split the codepoint
size_t length = offset - bufsize;
size_t extra = length - get_valid_length(data - length, length);
bufsize = offset - extra;
write_direct(data - extra, strlength(data) + extra);
}
}
void write(char_t d0)
{
size_t offset = bufsize;
if (offset > bufcapacity - 1) offset = flush();
buffer[offset + 0] = d0;
bufsize = offset + 1;
}
void write(char_t d0, char_t d1)
{
size_t offset = bufsize;
if (offset > bufcapacity - 2) offset = flush();
buffer[offset + 0] = d0;
buffer[offset + 1] = d1;
bufsize = offset + 2;
}
void write(char_t d0, char_t d1, char_t d2)
{
size_t offset = bufsize;
if (offset > bufcapacity - 3) offset = flush();
buffer[offset + 0] = d0;
buffer[offset + 1] = d1;
buffer[offset + 2] = d2;
bufsize = offset + 3;
}
void write(char_t d0, char_t d1, char_t d2, char_t d3)
{
size_t offset = bufsize;
if (offset > bufcapacity - 4) offset = flush();
buffer[offset + 0] = d0;
buffer[offset + 1] = d1;
buffer[offset + 2] = d2;
buffer[offset + 3] = d3;
bufsize = offset + 4;
}
void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
{
size_t offset = bufsize;
if (offset > bufcapacity - 5) offset = flush();
buffer[offset + 0] = d0;
buffer[offset + 1] = d1;
buffer[offset + 2] = d2;
buffer[offset + 3] = d3;
buffer[offset + 4] = d4;
bufsize = offset + 5;
}
void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
{
size_t offset = bufsize;
if (offset > bufcapacity - 6) offset = flush();
buffer[offset + 0] = d0;
buffer[offset + 1] = d1;
buffer[offset + 2] = d2;
buffer[offset + 3] = d3;
buffer[offset + 4] = d4;
buffer[offset + 5] = d5;
bufsize = offset + 6;
}
// utf8 maximum expansion: x4 (-> utf32)
// utf16 maximum expansion: x2 (-> utf32)
// utf32 maximum expansion: x1
arseny.kapoulkine@gmail.com
committed
enum
{
bufcapacitybytes =
#ifdef PUGIXML_MEMORY_OUTPUT_STACK
PUGIXML_MEMORY_OUTPUT_STACK
#else
10240
#endif
,
bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
};
char_t buffer[bufcapacity];
arseny.kapoulkine@gmail.com
committed
union
{
uint8_t data_u8[4 * bufcapacity];
uint16_t data_u16[2 * bufcapacity];
uint32_t data_u32[bufcapacity];
char_t data_char[bufcapacity];
} scratch;
xml_writer& writer;
size_t bufsize;
xml_encoding encoding;
};
arseny.kapoulkine@gmail.com
committed
PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type)
{
while (*s)
{
const char_t* prev = s;
// While *s is a usual symbol
PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
writer.write_buffer(prev, static_cast<size_t>(s - prev));
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834
3835
3836
3837
3838
3839
3840
3841
3842
3843
3844
3845
3846
3847
3848
3849
3850
switch (*s)
{
case 0: break;
case '&':
writer.write('&', 'a', 'm', 'p', ';');
++s;
break;
case '<':
writer.write('&', 'l', 't', ';');
++s;
break;
case '>':
writer.write('&', 'g', 't', ';');
++s;
break;
case '"':
writer.write('&', 'q', 'u', 'o', 't', ';');
++s;
break;
default: // s is not a usual symbol
{
unsigned int ch = static_cast<unsigned int>(*s++);
assert(ch < 32);
writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
}
}
}
}
arseny.kapoulkine@gmail.com
committed
PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
{
if (flags & format_no_escapes)
writer.write_string(s);
else
text_output_escaped(writer, s, type);
}
arseny.kapoulkine@gmail.com
committed
PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
{
do
{
writer.write('<', '!', '[', 'C', 'D');
writer.write('A', 'T', 'A', '[');
const char_t* prev = s;
// look for ]]> sequence - we can't output it as is since it terminates CDATA
while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
// skip ]] if we stopped at ]]>, > will go to the next CDATA section
if (*s) s += 2;
writer.write_buffer(prev, static_cast<size_t>(s - prev));
writer.write(']', ']', '>');
}
while (*s);
}
PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
switch (indent_length)
{
case 1:
{
for (unsigned int i = 0; i < depth; ++i)
writer.write(indent[0]);
break;
}
case 2:
{
for (unsigned int i = 0; i < depth; ++i)
writer.write(indent[0], indent[1]);
break;
}
case 3:
{
for (unsigned int i = 0; i < depth; ++i)
writer.write(indent[0], indent[1], indent[2]);
break;
}
case 4:
{
for (unsigned int i = 0; i < depth; ++i)
writer.write(indent[0], indent[1], indent[2], indent[3]);
break;
}
default:
{
for (unsigned int i = 0; i < depth; ++i)
writer.write_buffer(indent, indent_length);
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
{
writer.write('<', '!', '-', '-');
while (*s)
{
const char_t* prev = s;
// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
writer.write_buffer(prev, static_cast<size_t>(s - prev));
if (*s)
{
assert(*s == '-');
writer.write('-', ' ');
++s;
}
}
writer.write('-', '-', '>');
}
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
{
while (*s)
{
const char_t* prev = s;
// look for ?> sequence - we can't output it since ?> terminates PI
while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
writer.write_buffer(prev, static_cast<size_t>(s - prev));
if (*s)
{
assert(s[0] == '?' && s[1] == '>');
writer.write('?', ' ', '>');
s += 2;
}
}
}
PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
{
writer.write('\n');
text_output_indent(writer, indent, indent_length, depth + 1);
}
else
{
writer.write(' ');
}
writer.write_string(a->name ? a->name + 0 : default_name);
writer.write('=', '"');
if (a->value)
text_output(writer, a->value, ctx_special_attr, flags);
writer.write('"');
}
}
PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
const char_t* name = node->name ? node->name + 0 : default_name;
writer.write_string(name);