Newer
Older
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
while (*s)
{
const char_t* prev = s;
// look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
writer.write_buffer(prev, static_cast<size_t>(s - prev));
if (*s)
{
assert(*s == '-');
writer.write('-', ' ');
++s;
}
}
writer.write('-', '-', '>');
}
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
{
while (*s)
{
const char_t* prev = s;
// look for ?> sequence - we can't output it since ?> terminates PI
while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
writer.write_buffer(prev, static_cast<size_t>(s - prev));
if (*s)
{
assert(s[0] == '?' && s[1] == '>');
writer.write('?', ' ', '>');
s += 2;
}
}
}
PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
{
writer.write('\n');
text_output_indent(writer, indent, indent_length, depth + 1);
}
else
{
writer.write(' ');
}
writer.write_string(a->name ? a->name + 0 : default_name);
writer.write('=', '"');
if (a->value)
text_output(writer, a->value, ctx_special_attr, flags);
writer.write('"');
}
}
PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
const char_t* name = node->name ? node->name + 0 : default_name;
writer.write_string(name);
if (node->first_attribute)
node_output_attributes(writer, node, indent, indent_length, flags, depth);
// element nodes can have value if parse_embed_pcdata was used
if (!node->value)
if (!node->first_child)
{
if (flags & format_no_empty_element_tags)
{
writer.write('>', '<', '/');
writer.write_string(name);
writer.write('>');
return false;
}
else
{
if ((flags & format_raw) == 0)
writer.write(' ');
writer.write('/', '>');
return false;
}
}
else
{
writer.write('>');
return true;
}
else
writer.write('>');
text_output(writer, node->value, ctx_special_pcdata, flags);
if (!node->first_child)
{
writer.write('<', '/');
writer.write_string(name);
writer.write('>');
return false;
}
else
{
return true;
}
PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
const char_t* name = node->name ? node->name + 0 : default_name;
writer.write_string(name);
writer.write('>');
PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
{
const char_t* default_name = PUGIXML_TEXT(":anonymous");
switch (PUGI__NODETYPE(node))
text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
break;
case node_cdata:
text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
break;
case node_comment:
node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
break;
case node_pi:
writer.write('<', '?');
writer.write_string(node->name ? node->name + 0 : default_name);
node_output_pi_value(writer, node->value);
}
writer.write('?', '>');
break;
case node_declaration:
writer.write('<', '?');
writer.write_string(node->name ? node->name + 0 : default_name);
node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
writer.write('?', '>');
break;
case node_doctype:
writer.write('<', '!', 'D', 'O', 'C');
writer.write('T', 'Y', 'P', 'E');
writer.write_string(node->value);
assert(false && "Invalid node type"); // unreachable
enum indent_flags_t
{
indent_newline = 1,
indent_indent = 2
};
PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
unsigned int indent_flags = indent_indent;
xml_node_struct* node = root;
if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
{
node_output_simple(writer, node, flags);
indent_flags = 0;
}
else
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
writer.write('\n');
if ((indent_flags & indent_indent) && indent_length)
text_output_indent(writer, indent, indent_length, depth);
if (PUGI__NODETYPE(node) == node_element)
indent_flags = indent_newline | indent_indent;
if (node_output_start(writer, node, indent, indent_length, flags, depth))
// element nodes can have value if parse_embed_pcdata was used
if (node->value)
indent_flags = 0;
node = node->first_child;
depth++;
continue;
}
else if (PUGI__NODETYPE(node) == node_document)
indent_flags = indent_indent;
if (node->first_child)
{
node = node->first_child;
continue;
}
}
else
{
node_output_simple(writer, node, flags);
indent_flags = indent_newline | indent_indent;
arseny.kapoulkine
committed
// continue to the next node
while (node != root)
{
if (node->next_sibling)
node = node->next_sibling;
arseny.kapoulkine
committed
if (PUGI__NODETYPE(node) == node_element)
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
writer.write('\n');
if ((indent_flags & indent_indent) && indent_length)
text_output_indent(writer, indent, indent_length, depth);
node_output_end(writer, node);
indent_flags = indent_newline | indent_indent;
if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
writer.write('\n');
PUGI__FN bool has_declaration(xml_node_struct* node)
for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
xml_node_type type = PUGI__NODETYPE(child);
if (type == node_declaration) return true;
if (type == node_element) return false;
}
return false;
}
PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
{
for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
if (a == attr)
return true;
return false;
}
PUGI__FN bool allow_insert_attribute(xml_node_type parent)
{
return parent == node_element || parent == node_declaration;
}
PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
{
if (parent != node_document && parent != node_element) return false;
if (child == node_document || child == node_null) return false;
arseny.kapoulkine
committed
if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
return true;
}
PUGI__FN bool allow_move(xml_node parent, xml_node child)
4335
4336
4337
4338
4339
4340
4341
4342
4343
4344
4345
4346
4347
4348
4349
4350
4351
4352
4353
4354
4355
4356
4357
{
// check that child can be a child of parent
if (!allow_insert_child(parent.type(), child.type()))
return false;
// check that node is not moved between documents
if (parent.root() != child.root())
return false;
// check that new parent is not in the child subtree
xml_node cur = parent;
while (cur)
{
if (cur == child)
return false;
cur = cur.parent();
}
return true;
}
template <typename String, typename Header>
PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
assert(!dest && (header & header_mask) == 0);
if (alloc && (source_header & header_mask) == 0)
{
dest = source;
// since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
header |= xml_memory_page_contents_shared_mask;
source_header |= xml_memory_page_contents_shared_mask;
strcpy_insitu(dest, header, header_mask, source, strlength(source));
PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
{
xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
if (da)
{
node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
}
PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
xml_allocator& alloc = get_allocator(dn);
xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
node_copy_contents(dn, sn, shared_alloc);
xml_node_struct* dit = dn;
xml_node_struct* sit = sn->first_child;
while (sit && sit != sn)
xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
node_copy_contents(copy, sit, shared_alloc);
if (sit->first_child)
dit = copy;
sit = sit->first_child;
// continue to the next node
do
{
if (sit->next_sibling)
sit = sit->next_sibling;
sit = sit->parent;
dit = dit->parent;
while (sit != sn);
PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
{
xml_allocator& alloc = get_allocator(da);
xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
}
inline bool is_text_node(xml_node_struct* node)
{
xml_node_type type = PUGI__NODETYPE(node);
return type == node_pcdata || type == node_cdata;
}
// get value with conversion functions
template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
Arseny Kapoulkine
committed
{
Arseny Kapoulkine
committed
const char_t* s = value;
while (PUGI__IS_CHARTYPE(*s, ct_space))
s++;
bool negative = (*s == '-');
s += (*s == '+' || *s == '-');
bool overflow = false;
if (s[0] == '0' && (s[1] | ' ') == 'x')
{
s += 2;
// since overflow detection relies on length of the sequence skip leading zeros
while (*s == '0')
s++;
const char_t* start = s;
for (;;)
{
if (static_cast<unsigned>(*s - '0') < 10)
result = result * 16 + (*s - '0');
else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
result = result * 16 + ((*s | ' ') - 'a' + 10);
else
break;
s++;
}
size_t digits = static_cast<size_t>(s - start);
overflow = digits > sizeof(U) * 2;
}
else
{
// since overflow detection relies on length of the sequence skip leading zeros
while (*s == '0')
s++;
const char_t* start = s;
for (;;)
{
if (static_cast<unsigned>(*s - '0') < 10)
result = result * 10 + (*s - '0');
else
break;
s++;
}
size_t digits = static_cast<size_t>(s - start);
PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
Arseny Kapoulkine
committed
const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
const size_t high_bit = sizeof(U) * 8 - 1;
overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
}
if (negative)
{
// Workaround for crayc++ CC-3059: Expected no overflow in routine.
#ifdef _CRAYC
return (overflow || result > ~minv + 1) ? minv : ~result + 1;
return (overflow || result > 0 - minv) ? minv : 0 - result;
return (overflow || result > maxv) ? maxv : result;
Arseny Kapoulkine
committed
}
PUGI__FN int get_value_int(const char_t* value)
arseny.kapoulkine@gmail.com
committed
{
return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
arseny.kapoulkine@gmail.com
committed
}
PUGI__FN unsigned int get_value_uint(const char_t* value)
arseny.kapoulkine@gmail.com
committed
{
return string_to_integer<unsigned int>(value, 0, UINT_MAX);
arseny.kapoulkine@gmail.com
committed
}
PUGI__FN double get_value_double(const char_t* value)
arseny.kapoulkine@gmail.com
committed
{
#ifdef PUGIXML_WCHAR_MODE
return wcstod(value, 0);
#else
return strtod(value, 0);
#endif
}
PUGI__FN float get_value_float(const char_t* value)
arseny.kapoulkine@gmail.com
committed
{
#ifdef PUGIXML_WCHAR_MODE
return static_cast<float>(wcstod(value, 0));
#else
return static_cast<float>(strtod(value, 0));
#endif
}
PUGI__FN bool get_value_bool(const char_t* value)
arseny.kapoulkine@gmail.com
committed
{
// only look at first char
char_t first = *value;
// 1*, t* (true), T* (True), y* (yes), Y* (YES)
return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
}
Arseny Kapoulkine
committed
#ifdef PUGIXML_HAS_LONG_LONG
PUGI__FN long long get_value_llong(const char_t* value)
Arseny Kapoulkine
committed
{
return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
Arseny Kapoulkine
committed
}
PUGI__FN unsigned long long get_value_ullong(const char_t* value)
Arseny Kapoulkine
committed
{
return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
Arseny Kapoulkine
committed
}
#endif
template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
char_t* result = end - 1;
U rest = negative ? 0 - value : value;
*result-- = static_cast<char_t>('0' + (rest % 10));
rest /= 10;
}
while (rest);
assert(result >= begin);
(void)begin;
*result = '-';
return result + !negative;
// set value with conversion functions
template <typename String, typename Header>
PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
arseny.kapoulkine@gmail.com
committed
#ifdef PUGIXML_WCHAR_MODE
char_t wbuf[128];
assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
arseny.kapoulkine@gmail.com
committed
size_t offset = 0;
for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
return strcpy_insitu(dest, header, header_mask, wbuf, offset);
arseny.kapoulkine@gmail.com
committed
#else
return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
arseny.kapoulkine@gmail.com
committed
#endif
arseny.kapoulkine@gmail.com
committed
template <typename U, typename String, typename Header>
PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
{
char_t buf[64];
char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
char_t* begin = integer_to_string(buf, end, value, negative);
return strcpy_insitu(dest, header, header_mask, begin, end - begin);
}
template <typename String, typename Header>
PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value)
#if defined(PUGI__HAVE_SNPRINTF)
snprintf(buf, 128, "%.9g", value);
#elif defined(PUGI__MSVC_CRT_VERSION)
buf[127] = '\0';
return set_value_ascii(dest, header, header_mask, buf);
template <typename String, typename Header>
PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value)
arseny.kapoulkine@gmail.com
committed
{
char buf[128];
#if defined(PUGI__HAVE_SNPRINTF)
snprintf(buf, 128, "%.17g", value);
#elif defined (PUGI__MSVC_CRT_VERSION)
buf[127] = '\0';
arseny.kapoulkine@gmail.com
committed
return set_value_ascii(dest, header, header_mask, buf);
arseny.kapoulkine@gmail.com
committed
}
template <typename String, typename Header>
PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
arseny.kapoulkine@gmail.com
committed
{
return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
arseny.kapoulkine@gmail.com
committed
}
4673
4674
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689
4690
4691
4692
4693
4694
4695
4696
4697
4698
4699
4700
4701
4702
4703
4704
PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
{
// check input buffer
if (!contents && size) return make_parse_result(status_io_error);
// get actual encoding
xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
// get private buffer
char_t* buffer = 0;
size_t length = 0;
if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
// delete original buffer if we performed a conversion
if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
// grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
if (own || buffer != contents) *out_buffer = buffer;
// store buffer for offset_debug
doc->buffer = buffer;
// parse
xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
// remember encoding
res.encoding = buffer_encoding;
return res;
}
// we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
arseny.kapoulkine@gmail.com
committed
PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
arseny.kapoulkine@gmail.com
committed
#if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 && !defined(_WIN32_WCE)
arseny.kapoulkine
committed
// there are 64-bit versions of fseek/ftell, let's use them
typedef __int64 length_type;
_fseeki64(file, 0, SEEK_END);
length_type length = _ftelli64(file);
_fseeki64(file, 0, SEEK_SET);
#elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
// there are 64-bit versions of fseek/ftell, let's use them
typedef off64_t length_type;
fseeko64(file, 0, SEEK_END);
length_type length = ftello64(file);
fseeko64(file, 0, SEEK_SET);
arseny.kapoulkine
committed
#else
// if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
typedef long length_type;
fseek(file, 0, SEEK_END);
arseny.kapoulkine
committed
length_type length = ftell(file);
fseek(file, 0, SEEK_SET);
arseny.kapoulkine
committed
#endif
arseny.kapoulkine
committed
// check for I/O errors
if (length < 0) return status_io_error;
arseny.kapoulkine
committed
// check for overflow
size_t result = static_cast<size_t>(length);
if (static_cast<length_type>(result) != length) return status_out_of_memory;
// finalize
out_result = result;
return status_ok;
}
// This function assumes that buffer has extra sizeof(char_t) writable bytes after size
PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756
4757
4758
4759
4760
4761
4762
4763
4764
4765
4766
4767
4768
4769
{
// We only need to zero-terminate if encoding conversion does not do it for us
#ifdef PUGIXML_WCHAR_MODE
xml_encoding wchar_encoding = get_wchar_encoding();
if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
{
size_t length = size / sizeof(char_t);
static_cast<char_t*>(buffer)[length] = 0;
return (length + 1) * sizeof(char_t);
}
#else
if (encoding == encoding_utf8)
{
static_cast<char*>(buffer)[size] = 0;
return size + 1;
}
#endif
return size;
}
PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
{
if (!file) return make_parse_result(status_file_not_found);
// get file size (can result in I/O errors)
size_t size = 0;
xml_parse_status size_status = get_file_size(file, size);
if (size_status != status_ok) return make_parse_result(size_status);
size_t max_suffix_size = sizeof(char_t);
arseny.kapoulkine
committed
// allocate buffer for the whole file
char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
if (!contents) return make_parse_result(status_out_of_memory);
arseny.kapoulkine
committed
// read file in memory
size_t read_size = fread(contents, 1, size, file);
arseny.kapoulkine
committed
if (read_size != size)
arseny.kapoulkine@gmail.com
committed
xml_memory::deallocate(contents);
return make_parse_result(status_io_error);
}
xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
}
PUGI__FN void close_file(FILE* file)
{
fclose(file);
}
#ifndef PUGIXML_NO_STL
template <typename T> struct xml_stream_chunk
{
static xml_stream_chunk* create()
{
void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
return new (memory) xml_stream_chunk();
}
static void destroy(xml_stream_chunk* chunk)
{
// free chunk chain
while (chunk)
{
xml_memory::deallocate(chunk);
}
}
xml_stream_chunk(): next(0), size(0)
{
}
xml_stream_chunk* next;
size_t size;
T data[xml_memory_page_size / sizeof(T)];
};
arseny.kapoulkine@gmail.com
committed
template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
// read file to a chunk list
size_t total = 0;
xml_stream_chunk<T>* last = 0;
while (!stream.eof())
{
// allocate new chunk
xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
if (!chunk) return status_out_of_memory;
// append chunk to list
if (last) last = last->next = chunk;
else chunks.data = last = chunk;
// read data to chunk
stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
// read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
// guard against huge files (chunk size is small enough to make this overflow check work)
if (total + chunk->size < total) return status_out_of_memory;
total += chunk->size;
}
size_t max_suffix_size = sizeof(char_t);
// copy chunk list to a contiguous buffer
char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
if (!buffer) return status_out_of_memory;
char* write = buffer;
for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
{
assert(write + chunk->size <= buffer + total);
memcpy(write, chunk->data, chunk->size);
write += chunk->size;
}
assert(write == buffer + total);
// return buffer
*out_buffer = buffer;
*out_size = total;
return status_ok;
}
arseny.kapoulkine@gmail.com
committed
template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
arseny.kapoulkine
committed
{
// get length of remaining data in stream
arseny.kapoulkine
committed
typename std::basic_istream<T>::pos_type pos = stream.tellg();
stream.seekg(0, std::ios::end);
std::streamoff length = stream.tellg() - pos;
arseny.kapoulkine
committed
stream.seekg(pos);
if (stream.fail() || pos < 0) return status_io_error;
arseny.kapoulkine
committed
// guard against huge files
size_t read_length = static_cast<size_t>(length);
if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
size_t max_suffix_size = sizeof(char_t);
arseny.kapoulkine
committed
// read stream data into memory (guard against stream exceptions with buffer holder)
auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
if (!buffer.data) return status_out_of_memory;
arseny.kapoulkine
committed
stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
arseny.kapoulkine
committed
// read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
arseny.kapoulkine
committed
size_t actual_length = static_cast<size_t>(stream.gcount());
assert(actual_length <= read_length);
*out_buffer = buffer.release();
*out_size = actual_length * sizeof(T);
return status_ok;
template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
void* buffer = 0;
size_t size = 0;
Arseny Kapoulkine
committed
xml_parse_status status = status_ok;
// if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
if (stream.fail()) return make_parse_result(status_io_error);
// load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
Arseny Kapoulkine
committed
if (stream.tellg() < 0)
{
stream.clear(); // clear error flags that could be set by a failing tellg
status = load_stream_data_noseek(stream, &buffer, &size);
}
else
status = load_stream_data_seek(stream, &buffer, &size);
if (status != status_ok) return make_parse_result(status);
xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
#if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
arseny.kapoulkine@gmail.com
committed
PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
{
return _wfopen(path, mode);
}
#else
arseny.kapoulkine@gmail.com
committed
PUGI__FN char* convert_path_heap(const wchar_t* str)
{
assert(str);
// first pass: get length in utf8 characters
arseny.kapoulkine@gmail.com
committed
size_t length = strlength_wide(str);
size_t size = as_utf8_begin(str, length);
// allocate resulting string
arseny.kapoulkine@gmail.com
committed
char* result = static_cast<char*>(xml_memory::allocate(size + 1));
if (!result) return 0;
// second pass: convert to utf8
as_utf8_end(result, size, str, length);
// zero-terminate
result[size] = 0;
return result;
}
arseny.kapoulkine@gmail.com
committed
PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
{
// there is no standard function to open wide paths, so our best bet is to try utf8 path
char* path_utf8 = convert_path_heap(path);
if (!path_utf8) return 0;
// convert mode to ASCII (we mirror _wfopen interface)
char mode_ascii[4] = {0};
for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
// try to open the utf8 path
FILE* result = fopen(path_utf8, mode_ascii);
// free dummy buffer
arseny.kapoulkine@gmail.com
committed
xml_memory::deallocate(path_utf8);
return result;
}
#endif