Commit 1be8397e authored by Norby, Tom's avatar Norby, Tom
Browse files

Bring JSONParser to radix.

parent 2ce6ce48
Pipeline #95524 failed with stages
in 17 minutes and 26 seconds
......@@ -143,6 +143,14 @@
throw std::runtime_error(stream.str()); \
}
#define radix_not_implemented(msg) \
{ \
std::ostringstream stream; \
stream << __FILE__ << ":" << __LINE__ << " : " << msg \
<< " is not implemented. " << std::endl; \
throw std::runtime_error(stream.str()); \
}
/// set default timing to off
#ifndef RADIX_TIMING
#define RADIX_TIMING 0
......
......@@ -3,13 +3,16 @@ TRIBITS_SUBPACKAGE(core)
TRIBITS_CONFIGURE_FILE(visibility.hh)
SET(HEADERS
json.hh
system.hh
stringfunctions.i.hh
stringfunctions.hh
value.hh
)
SET(SOURCES
system.cc
stringfunctions.cc
value.cc
)
TRIBITS_ADD_LIBRARY(radixcore
......
#ifndef RADIX_RADIXCORE_JSON_HH_
#define RADIX_RADIXCORE_JSON_HH_
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include "value.hh"
//-----------------------------------------------------------------------------
// see www.json.org for parsing grammar
namespace radix
{
class JSONParser
{
public:
//-------------------------------------------------------------------------
JSONParser()
{
literals[0] = "false";
literals[1] = "null";
literals[2] = "true";
for (size_t i = 0; i < N_LITERALS; i++)
literal_lens[i] = strlen(literals[i]);
}
//-------------------------------------------------------------------------
Value& root() { return m_root; }
//-------------------------------------------------------------------------
std::string last_error()
{
return m_last_error + " at line " + std::to_string(m_line) + " column " +
std::to_string(m_col);
}
//-------------------------------------------------------------------------
// there can be only one value at root node
bool parse()
{
m_po = 0;
m_line = 1;
m_col = 1;
m_last_error = "";
m_root = parse_value();
if (m_root.is_null()) return false;
skip_whitespace();
if (m_po != m_text.size())
{
m_last_error = "unexpected trailing character(s)";
return false;
}
return true;
}
//-------------------------------------------------------------------------
bool parse_from_stream(std::istream& in_stream)
{
in_stream.seekg(0, std::ios::end);
std::streampos file_size = in_stream.tellg();
in_stream.seekg(0);
m_text.clear();
m_text.resize(static_cast<unsigned>(file_size), '\0');
in_stream.read(&m_text[0], file_size);
if (!in_stream)
{
m_last_error = "could only read " + std::to_string(file_size) + " of " +
std::to_string(in_stream.gcount()) + " bytes";
return false;
}
return parse();
}
//-------------------------------------------------------------------------
bool parse_from_file(std::string fn)
{
std::ifstream file;
file.open(fn, std::ios::in | std::ios::binary | std::ios::ate);
if (!file.is_open())
{
m_last_error = "could not open file";
return false;
}
bool result = parse_from_stream(file);
file.close();
return result;
}
private:
//-------------------------------------------------------------------------
bool is_whitespace()
{
char ch = m_text[m_po];
return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n');
}
//-------------------------------------------------------------------------
bool is_structural_character()
{
char ch = m_text[m_po];
return (ch == ',' || ch == ':' || ch == '[' || ch == ']' || ch == '{' ||
ch == '}');
}
//-------------------------------------------------------------------------
void skip_whitespace()
{
for (; m_po < m_text.size(); m_po++)
{
if (m_text[m_po] == ' ' || m_text[m_po] == '\t')
{
m_col++;
continue;
}
if (m_text[m_po] == '\r')
{
m_line++;
m_col = 1;
continue;
}
if (m_text[m_po] == '\n')
{
// treat \r\n as one new-line
if (m_po > 0 && m_text[m_po - 1] == '\r') continue;
m_line++;
m_col = 1;
continue;
}
break;
}
}
//-------------------------------------------------------------------------
Value parse_array()
{
if (m_po >= m_text.size()) return nullptr;
if (m_text[m_po] != '[') return nullptr;
m_po++;
m_col++;
size_t line_prev = m_line;
size_t col_prev = m_col;
bool trailing_comma = false;
Value parent = Value(DataArray());
for (; m_po < m_text.size(); m_po++, m_col++)
{
skip_whitespace();
Value child = parse_value();
if (child.is_null())
{
if (m_last_error != "")
{
}
else if (m_po >= m_text.size())
{
m_last_error = "no closing bracket ']' for array";
}
else if (trailing_comma)
{
m_last_error = "trailing comma in array";
m_line = line_prev;
m_col = col_prev;
}
else if (m_text[m_po] == ']')
break;
return Value();
}
else
{
trailing_comma = false;
parent.as_array().push_back(child);
}
skip_whitespace();
char ch = m_text[m_po];
if (ch == ',')
{
trailing_comma = true;
line_prev = m_line;
col_prev = m_col;
continue;
}
else if (ch == ']')
break;
else
{
m_last_error = "invalid character '";
m_last_error += ch;
m_last_error += "' in array";
return Value();
}
}
if (m_po >= m_text.size() || m_text[m_po] != ']')
{
m_last_error = "no closing bracket ']' for array";
return Value();
}
m_po++;
m_col++;
return parent;
}
//-------------------------------------------------------------------------
// parsed to match the following regular expression:
// (-)?
// (0|([1-9][0-9]*))
// (\.[0-9]+)?
// ([Ee][+-]?[0-9]+)?
Value parse_number()
{
if (m_po >= m_text.size()) return Value();
size_t len = 0;
// (-)?
if (m_text[m_po] == '-')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
m_last_error = "invalid number (no digits after -)";
return Value();
}
}
// (0|([1-9][0-9]*))
char ch = m_text[m_po];
if (m_po >= m_text.size() || !(ch >= '0' && m_text[m_po] <= '9'))
{
m_last_error = "invalid number (no digits)";
return Value();
}
m_po++;
m_col++;
len++;
// [1-9][0-9]*
if (ch >= '1' && ch <= '9')
{
// [0-9]*
for (; m_po < m_text.size(); m_po++, m_col++, len++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
}
if (m_po >= m_text.size())
{
try
{
Value node = Value(std::stod(std::string(&m_text[m_po - len], len)));
return node;
}
catch (...)
{
return Value();
}
}
ch = m_text[m_po];
// (\.[0-9]+)?
if (ch == '.')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
m_last_error = "invalid number (no digits after decimal)";
return Value();
}
ch = m_text[m_po];
size_t n_digits = 0;
// [0-9]+
for (; m_po < m_text.size(); m_po++, m_col++, len++, n_digits++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
if (n_digits == 0)
{
m_last_error = "invalid number (no digits after decimal)";
return Value();
}
}
// ([Ee][+-]?[0-9]+)?
if (ch == 'E' || ch == 'e')
{
m_po++;
m_col++;
len++;
if (m_po >= m_text.size())
{
m_last_error = "invalid number (no digits for exponent)";
return Value();
}
ch = m_text[m_po];
// [+-]?
if (ch == '+' || ch == '-')
{
m_po++;
m_col++;
len++;
}
if (m_po >= m_text.size())
{
m_last_error = "invalid number (no digits for exponent)";
return Value();
}
size_t n_digits = 0;
// [0-9]+
for (; m_po < m_text.size(); m_po++, m_col++, len++, n_digits++)
{
ch = m_text[m_po];
if (!(ch >= '0' && ch <= '9')) break;
}
if (n_digits == 0)
{
m_last_error = "invalid number (no digits after decimal)";
return Value();
}
}
try
{
Value node = Value(std::stod(std::string(&m_text[m_po - len], len)));
return node;
}
catch (...)
{
return Value();
}
}
//-------------------------------------------------------------------------
// str_known must be a null-terminated string
inline bool sub_str_eq(char* sub_str_unknown, const char* str_known)
{
size_t i = 0;
while (str_known[i] != '\0')
{
if (sub_str_unknown[i] != str_known[i]) return false;
i++;
}
return true;
}
//-------------------------------------------------------------------------
// only 3 valid literals all in lower case: false, null, true
// TODO: need to refactor code to support null return (currently it is
// treated as an error)
Value parse_literal()
{
size_t len = 0;
for (; m_po + len < m_text.size(); len++)
{
char ch = m_text[m_po + len];
if (!((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) break;
}
for (size_t i = 0; i < N_LITERALS; i++)
{
if (literal_lens[i] == len && sub_str_eq(&m_text[m_po], literals[i]))
{
m_po += len;
m_col += len;
if (std::string("true") == literals[i])
{
return Value(true);
}
else if (std::string("false") == literals[i])
{
return Value(false);
}
// default to null
return Value();
}
}
m_last_error = "invalid literal";
return Value();
}
//-------------------------------------------------------------------------
Value parse_object()
{
if (m_po >= m_text.size()) return Value();
if (m_text[m_po] != '{') return Value();
m_po++;
m_col++;
size_t line_prev = m_line;
size_t col_prev = m_col;
bool trailing_comma = false;
Value parent = Value(DataObject());
for (; m_po < m_text.size(); m_po++, m_col++)
{
skip_whitespace();
if (m_po >= m_text.size() || m_text[m_po] == '}')
{
if (trailing_comma)
{
m_last_error = "trailing comma in object";
m_line = line_prev;
m_col = col_prev;
return Value();
}
break;
}
// parse key
std::string key = parse_string_contents();
if (m_last_error != "")
{
if (trailing_comma)
{
m_last_error = "trailing comma on invalid key in object";
m_line = line_prev;
m_col = col_prev;
}
return Value();
}
skip_whitespace();
// parse ':'
if (m_po >= m_text.size() || m_text[m_po] != ':')
{
m_last_error = "no ':' following key in object";
return Value();
}
m_po++;
m_col++;
// parse value
Value child = parse_value();
if (child.is_null())
{
if (m_last_error == "")
{
m_last_error = "missing value in object";
}
if (trailing_comma)
{
m_last_error = "trailing comma in object";
m_line = line_prev;
m_col = col_prev;
}
return Value();
}
else
{
trailing_comma = false;
parent.as_object()[key] = child;
}
skip_whitespace();
char ch = m_text[m_po];
if (ch == ',')
{
trailing_comma = true;
line_prev = m_line;
col_prev = m_col;
continue;
}
else if (ch == '}')
break;
else
{
m_last_error = "invalid character in object";
return Value();
}
}
if (m_po >= m_text.size() || m_text[m_po] != '}')
{
m_last_error = "no closing curly bracket '}' for object";
return Value();
}
m_po++;
m_col++;
return parent;
}
//-------------------------------------------------------------------------
inline bool parse_escape_seq(size_t* len)
{
if (m_po >= m_text.size()) return false;
if (m_text[m_po] != '\\') return false;
m_po++;
m_col++;
(*len)++;
if (m_po >= m_text.size())
{
m_last_error = "incomplete unicode character escape sequence in string";
return false;
}
char ch = m_text[m_po];
if (ch == '"' || ch == '\\' || ch == '/' || ch == 'b' || ch == 'f' ||
ch == 'n' || ch == 'r' || ch == 't')
{
m_po++;
m_col++;
(*len)++;
return true;
}
else if (ch == 'u')
{
m_po++;
m_col++;
(*len)++;
size_t code_len = 0;
// parse 4-digit unicode character escape sequence
for (; m_po < m_text.size() && code_len < 4;
m_po++, m_col++, (*len)++, code_len++)
{
char ch = m_text[m_po];
if (!((ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') ||
(ch >= 'a' && ch <= 'f')))
{
m_last_error = "invalid unicode character escape sequence in string";
return false;
}
}
if (code_len < 4)
{
m_last_error = "incomplete unicode character escape sequence in string";
return false;
}
return true;
}
m_last_error = "invalid escape sequence in string";
return false;
}
//-------------------------------------------------------------------------
// parse a quoted string from m_text
// used for string values and object keys
// processes and discards leading and trailing quotes
// on success, returns string contents without quotes
// on error, returns empty string and sets m_last_error
std::string parse_string_contents()
{
if (m_po >= m_text.size() || m_text[m_po] != '"')
{
m_last_error = "string missing opening quote";
return "";
}
m_po++;
m_col++;
for (size_t len = 0; m_po < m_text.size();)
{
char ch = m_text[m_po];
// disallow control characters <= 0x1f and extended ascii >= 0x80
if (ch <= 0x1f)
{
m_last_error = "invalid character in string";
return "";
}
if (ch == '\\')
{
bool ok = parse_escape_seq(&len);
if (!ok) return "";
continue;
}
if (ch == '"')
{
m_po++;
m_col++;
return std::string(&m_text[m_po - 1 - len], len);
}
m_po++;
m_col++;
len++;
}
m_last_error = "string missing closing quote";
return "";
}
//-------------------------------------------------------------------------
Value parse_string()
{
std::string str = parse_string_contents();
if (m_last_error != "") return Value();
return Value(str);
}
//-------------------------------------------------------------------------
Value parse_value()
{
skip_whitespace();
char ch = m_text[m_po];
Value node;