Commit 873ecbba authored by Lefebvre, Jordan's avatar Lefebvre, Jordan
Browse files

Merge branch 'json_parser' into 'master'

Json parser

See merge request !92
parents 2ce6ce48 ccb9ebe0
Pipeline #96790 passed with stages
in 20 minutes and 36 seconds
......@@ -143,6 +143,14 @@
throw std::runtime_error(stream.str()); \
}
#define radix_not_implemented(msg) \
{ \
std::ostringstream stream; \
stream << __FILE__ << ":" << __LINE__ << " : " << msg \
<< " is not implemented. " << std::endl; \
throw std::runtime_error(stream.str()); \
}
/// set default timing to off
#ifndef RADIX_TIMING
#define RADIX_TIMING 0
......@@ -206,7 +214,10 @@ class Timer
return seconds(mDuration).count();
}
}; // class Timer
bool running() const { return mRunning; };
};
// class Timer
} // namespace radix
#if RADIX_TIMING > 0
......
......@@ -3,13 +3,16 @@ TRIBITS_SUBPACKAGE(core)
TRIBITS_CONFIGURE_FILE(visibility.hh)
SET(HEADERS
json.hh
system.hh
stringfunctions.i.hh
stringfunctions.hh
value.hh
)
SET(SOURCES
system.cc
stringfunctions.cc
value.cc
)
TRIBITS_ADD_LIBRARY(radixcore
......
#ifndef RADIX_RADIXCORE_JSON_HH_
#define RADIX_RADIXCORE_JSON_HH_
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include "radixcore/value.hh"
#include "radixcore/visibility.hh"
//-----------------------------------------------------------------------------
// see www.json.org for parsing grammar
namespace radix
{
template <class value_type, class array_type, class object_type>
class RADIX_PUBLIC JSONParserImpl
{
public:
//-------------------------------------------------------------------------
JSONParserImpl()
{
literals[0] = "false";
literals[1] = "null";
literals[2] = "true";
for (size_t i = 0; i < N_LITERALS; i++)
literal_lens[i] = strlen(literals[i]);
}
//-------------------------------------------------------------------------
value_type& root() { return m_root; }
//-------------------------------------------------------------------------
std::string last_error()
{
return m_last_error + " at line " + std::to_string(m_line) + " column " +
std::to_string(m_col);
}
//-------------------------------------------------------------------------
// there can be only one value at root node
bool parse()
{
m_po = 0;
m_line = 1;
m_col = 1;
m_last_error = "";
m_root = value_type();
bool result = parse_value(m_root);
if (!result) return false;
skip_whitespace();
if (m_po != m_text.size())
{
set_error("unexpected trailing character(s)");
// clear text copy of json file
m_text.clear();
return false;
}
// clear text copy of json file
m_text.clear();
return true;
}
//-------------------------------------------------------------------------
bool parse_from_stream(std::istream& in_stream)
{
in_stream.seekg(0, std::ios::end);
std::streampos file_size = in_stream.tellg();
in_stream.seekg(0);
m_text.clear();
m_text.resize(static_cast<unsigned>(file_size), '\0');
in_stream.read(&m_text[0], file_size);
if (!in_stream)
{
set_error("could only read " + std::to_string(file_size) + " of " +
std::to_string(in_stream.gcount()) + " bytes");
// clear text copy of json file
m_text.clear();
return false;
}
return parse();
}
//-------------------------------------------------------------------------
bool parse_from_file(const std::string& fn)
{
std::ifstream file;
file.open(fn, std::ios::in | std::ios::binary | std::ios::ate);
if (!file.is_open())
{
set_error("could not open file: " + fn);
return false;
}
bool result = parse_from_stream(file);
file.close();
return result;
}
private:
void set_error(const std::string& error)
{
// save the error meesage
m_last_error = error;
}
//-------------------------------------------------------------------------
bool is_whitespace()
{
char ch = m_text[m_po];
return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n');
}
//-------------------------------------------------------------------------
bool is_structural_character()
{
char ch = m_text[m_po];
return (ch == ',' || ch == ':' || ch == '[' || ch == ']' || ch == '{' ||
ch == '}');
}
//-------------------------------------------------------------------------
void skip_whitespace()
{
for (; m_po < m_text.size(); m_po++)
{
if (m_text[m_po] == ' ' || m_text[m_po] == '\t')
{
m_col++;
continue;
}
if (m_text[m_po] == '\r')
{
m_line++;
m_col = 1;
continue;
}
if (m_text[m_po] == '\n')
{
// treat \r\n as one new-line
if (m_po > 0 && m_text[m_po - 1] == '\r') continue;
m_line++;
m_col = 1;
continue;
}
break;
}
}
//-------------------------------------------------------------------------
bool parse_array(value_type& value)
{
value = value_type();
if (m_po >= m_text.size()) return false;
if (m_text[m_po] != '[') return false;
m_po++;
m_col++;
value_type parent = array_type();
bool found_child = false;
for (; m_po < m_text.size(); m_po++, m_col++)
{
skip_whitespace();
if (m_text[m_po] == ']') break;
value_type child;
if (!parse_value(child))
{
found_child = false;
if (m_po >= m_text.size())
{
set_error("no closing bracket ']' for array");
}
else if (m_text[m_po] == ']')
break;
}
else
{
found_child = true;
parent.as_array().push_back(child);
}
skip_whitespace();
char ch = m_text[m_po];
if (ch == ',')
{
if (!found_child)
{
set_error("missing value in array");
return false;
}
continue;
}
else if (ch == ']')
break;
else
{
std::string err_msg = "invalid character '";
err_msg += ch;
err_msg += "' in array";
set_error(err_msg);
return false;
}
}
if (m_po >= m_text.size() || m_text[m_po] != ']' || m_last_error != "")
{
set_error("no closing bracket ']' for array");
return false;
}
m_po++;
m_col++;
value = parent;
return true;
}
//-------------------------------------------------------------------------
// parsed to match the following regular expression:
// (-)?
// (0|([1-9][0-9]*))
// (\.[0-9]+)?
// ([Ee][+-]?[0-9]+)?
bool parse_number(value_type& value)
{
value = value_type();
if (m_po >= m_text.size()) return false;
size_t len = 0;
// (-)?
if (m_text[m_po] == '-')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
set_error("invalid number (no digits after -)");
return false;
}
}
// (0|([1-9][0-9]*))
char ch = m_text[m_po];
if (m_po >= m_text.size() || !(ch >= '0' && m_text[m_po] <= '9'))
{
set_error("invalid number (no digits)");
return false;
}
m_po++;
m_col++;
len++;
// [1-9][0-9]*
if (ch >= '1' && ch <= '9')
{
// [0-9]*
for (; m_po < m_text.size(); m_po++, m_col++, len++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
}
if (m_po >= m_text.size())
{
try
{
value = value_type(std::stod(std::string(&m_text[m_po - len], len)));
return true;
}
catch (...)
{
return false;
}
}
ch = m_text[m_po];
// (\.[0-9]+)?
if (ch == '.')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
set_error("invalid number (no digits after decimal)");
return false;
}
ch = m_text[m_po];
size_t n_digits = 0;
// [0-9]+
for (; m_po < m_text.size(); m_po++, m_col++, len++, n_digits++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
if (n_digits == 0)
{
set_error("invalid number (no digits after decimal)");
return false;
}
}
// ([Ee][+-]?[0-9]+)?
if (ch == 'E' || ch == 'e')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
set_error("invalid number (no digits for exponent)");
return false;
}
ch = m_text[m_po];
// [+-]?
if (ch == '+' || ch == '-')
{
m_po++;
m_col++;
len++;
}
if (m_po >= m_text.size())
{
set_error("invalid number (no digits for exponent)");
return false;
}
size_t n_digits = 0;
// [0-9]+
for (; m_po < m_text.size(); m_po++, m_col++, len++, n_digits++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
if (n_digits == 0)
{
set_error("invalid number (no digits for exponent)");
return false;
}
}
try
{
value = value_type(std::stod(std::string(&m_text[m_po - len], len)));
return true;
}
catch (...)
{
return false;
}
}
//-------------------------------------------------------------------------
// str_known must be a null-terminated string
inline bool sub_str_eq(char* sub_str_unknown, const char* str_known)
{
size_t i = 0;
while (str_known[i] != '\0')
{
if (sub_str_unknown[i] != str_known[i]) return false;
i++;
}
return true;
}
//-------------------------------------------------------------------------
// only 3 valid literals all in lower case: false, null, true
// TODO: need to refactor code to support null return (currently it is
// treated as an error)
bool parse_literal(value_type& value)
{
size_t len = 0;
value = value_type();
for (; m_po + len < m_text.size(); len++)
{
char ch = m_text[m_po + len];
if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) break;
}
for (size_t i = 0; i < N_LITERALS; i++)
{
if (literal_lens[i] == len && sub_str_eq(&m_text[m_po], literals[i]))
{
m_po += len;
m_col += len;
if (std::string("true") == literals[i])
{
value = value_type(true);
return true;
}
else if (std::string("false") == literals[i])
{
value = value_type(false);
return true;
}
// "null" default is already handled
return true;
}
}
set_error("invalid literal");
return false;
}
//-------------------------------------------------------------------------
bool parse_object(value_type& value)
{
value = value_type();
if (m_po >= m_text.size()) return false;
if (m_text[m_po] != '{') return false;
m_po++;
m_col++;
value_type parent = object_type();
for (; m_po < m_text.size(); m_po++, m_col++)
{
skip_whitespace();
if (m_po >= m_text.size() || m_text[m_po] == '}') break;
// parse key
std::string key = parse_string_contents();
if (m_last_error != "") return false;
skip_whitespace();
// parse ':'
if (m_po >= m_text.size() || m_text[m_po] != ':')
{
set_error("no ':' following key in object");
return false;
}
m_po++;
m_col++;
// parse value
bool result = false;
value_type child = value_type();
result = parse_value(child);
if (!result)
{
set_error("missing value in object");
return false;
}
else
{
parent.as_object()[key] = child;
}
skip_whitespace();
char ch = m_text[m_po];
if (ch == ',')
continue;
else if (ch == '}')
break;
else
{
set_error("invalid character in object");
return false;
}
}
if (m_po >= m_text.size() || m_text[m_po] != '}')
{
set_error("no closing curly bracket '}' for object");
return false;
}
m_po++;
m_col++;
value = parent;
return true;
}
//-------------------------------------------------------------------------
inline bool parse_escape_seq(size_t* len)
{
if (m_po >= m_text.size()) return false;
if (m_text[m_po] != '\\') return false;
m_po++;
m_col++;
(*len)++;
if (m_po >= m_text.size())
{
set_error("incomplete unicode character escape sequence in string");
return false;
}
char ch = m_text[m_po];
if (ch == '"' || ch == '\\' || ch == '/' || ch == 'b' || ch == 'f' ||
ch == 'n' || ch == 'r' || ch == 't')
{
m_po++;
m_col++;
(*len)++;
return true;
}
else if (ch == 'u')
{
m_po++;
m_col++;
(*len)++;
size_t code_len = 0;
// parse 4-digit unicode character escape sequence
for (; m_po < m_text.size() && code_len < 4;
m_po++, m_col++, (*len)++, code_len++)
{
char ch = m_text[m_po];
if (!((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
(ch >= 'a' && ch <= 'f')))
{
set_error("invalid unicode character escape sequence in string");
return false;
}
}
if (code_len < 4)
{
set_error("incomplete unicode character escape sequence in string");
return false;
}
return true;
}
set_error("invalid escape sequence in string");
return false;
}
//-------------------------------------------------------------------------
// parse a quoted string from m_text
// used for string values and object keys
// processes and discards leading and trailing quotes
// on success, returns string contents without quotes
// on error, returns empty string and sets m_last_error
std::string parse_string_contents()
{
if (m_po >= m_text.size() || m_text[m_po] != '"')
{
set_error("string missing opening quote");
return "";
}
m_po++;
m_col++;
for (size_t len = 0; m_po < m_text.size();)
{
char ch = m_text[m_po];
// disallow control characters <= 0x1f
if (static_cast<unsigned>(ch) <= 0x1f)
{
set_error("invalid character in string");
return "";
}
if (ch == '\\')
{
bool ok = parse_escape_seq(&len);
if (!ok) return "";
continue;
}
if (ch == '"')
{
m_po++;
m_col++;