Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
LEFEBVREJP email
radix
Commits
1be8397e
Commit
1be8397e
authored
Mar 30, 2020
by
Norby, Tom
Browse files
Bring JSONParser to radix.
parent
2ce6ce48
Pipeline
#95524
failed with stages
in 17 minutes and 26 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
radixbug/bug.hh
View file @
1be8397e
...
@@ -143,6 +143,14 @@
...
@@ -143,6 +143,14 @@
throw std::runtime_error(stream.str()); \
throw std::runtime_error(stream.str()); \
}
}
#define radix_not_implemented(msg) \
{ \
std::ostringstream stream; \
stream << __FILE__ << ":" << __LINE__ << " : " << msg \
<< " is not implemented. " << std::endl; \
throw std::runtime_error(stream.str()); \
}
/// set default timing to off
/// set default timing to off
#ifndef RADIX_TIMING
#ifndef RADIX_TIMING
#define RADIX_TIMING 0
#define RADIX_TIMING 0
...
...
radixcore/CMakeLists.txt
View file @
1be8397e
...
@@ -3,13 +3,16 @@ TRIBITS_SUBPACKAGE(core)
...
@@ -3,13 +3,16 @@ TRIBITS_SUBPACKAGE(core)
TRIBITS_CONFIGURE_FILE
(
visibility.hh
)
TRIBITS_CONFIGURE_FILE
(
visibility.hh
)
SET
(
HEADERS
SET
(
HEADERS
json.hh
system.hh
system.hh
stringfunctions.i.hh
stringfunctions.i.hh
stringfunctions.hh
stringfunctions.hh
value.hh
)
)
SET
(
SOURCES
SET
(
SOURCES
system.cc
system.cc
stringfunctions.cc
stringfunctions.cc
value.cc
)
)
TRIBITS_ADD_LIBRARY
(
radixcore
TRIBITS_ADD_LIBRARY
(
radixcore
...
...
radixcore/json.hh
0 → 100644
View file @
1be8397e
#ifndef RADIX_RADIXCORE_JSON_HH_
#define RADIX_RADIXCORE_JSON_HH_
#include <cstring>
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <vector>
#include "value.hh"
//-----------------------------------------------------------------------------
// see www.json.org for parsing grammar
namespace
radix
{
class
JSONParser
{
public:
//-------------------------------------------------------------------------
JSONParser
()
{
literals
[
0
]
=
"false"
;
literals
[
1
]
=
"null"
;
literals
[
2
]
=
"true"
;
for
(
size_t
i
=
0
;
i
<
N_LITERALS
;
i
++
)
literal_lens
[
i
]
=
strlen
(
literals
[
i
]);
}
//-------------------------------------------------------------------------
Value
&
root
()
{
return
m_root
;
}
//-------------------------------------------------------------------------
std
::
string
last_error
()
{
return
m_last_error
+
" at line "
+
std
::
to_string
(
m_line
)
+
" column "
+
std
::
to_string
(
m_col
);
}
//-------------------------------------------------------------------------
// there can be only one value at root node
bool
parse
()
{
m_po
=
0
;
m_line
=
1
;
m_col
=
1
;
m_last_error
=
""
;
m_root
=
parse_value
();
if
(
m_root
.
is_null
())
return
false
;
skip_whitespace
();
if
(
m_po
!=
m_text
.
size
())
{
m_last_error
=
"unexpected trailing character(s)"
;
return
false
;
}
return
true
;
}
//-------------------------------------------------------------------------
bool
parse_from_stream
(
std
::
istream
&
in_stream
)
{
in_stream
.
seekg
(
0
,
std
::
ios
::
end
);
std
::
streampos
file_size
=
in_stream
.
tellg
();
in_stream
.
seekg
(
0
);
m_text
.
clear
();
m_text
.
resize
(
static_cast
<
unsigned
>
(
file_size
),
'\0'
);
in_stream
.
read
(
&
m_text
[
0
],
file_size
);
if
(
!
in_stream
)
{
m_last_error
=
"could only read "
+
std
::
to_string
(
file_size
)
+
" of "
+
std
::
to_string
(
in_stream
.
gcount
())
+
" bytes"
;
return
false
;
}
return
parse
();
}
//-------------------------------------------------------------------------
bool
parse_from_file
(
std
::
string
fn
)
{
std
::
ifstream
file
;
file
.
open
(
fn
,
std
::
ios
::
in
|
std
::
ios
::
binary
|
std
::
ios
::
ate
);
if
(
!
file
.
is_open
())
{
m_last_error
=
"could not open file"
;
return
false
;
}
bool
result
=
parse_from_stream
(
file
);
file
.
close
();
return
result
;
}
private:
//-------------------------------------------------------------------------
bool
is_whitespace
()
{
char
ch
=
m_text
[
m_po
];
return
(
ch
==
' '
||
ch
==
'\t'
||
ch
==
'\r'
||
ch
==
'\n'
);
}
//-------------------------------------------------------------------------
bool
is_structural_character
()
{
char
ch
=
m_text
[
m_po
];
return
(
ch
==
','
||
ch
==
':'
||
ch
==
'['
||
ch
==
']'
||
ch
==
'{'
||
ch
==
'}'
);
}
//-------------------------------------------------------------------------
void
skip_whitespace
()
{
for
(;
m_po
<
m_text
.
size
();
m_po
++
)
{
if
(
m_text
[
m_po
]
==
' '
||
m_text
[
m_po
]
==
'\t'
)
{
m_col
++
;
continue
;
}
if
(
m_text
[
m_po
]
==
'\r'
)
{
m_line
++
;
m_col
=
1
;
continue
;
}
if
(
m_text
[
m_po
]
==
'\n'
)
{
// treat \r\n as one new-line
if
(
m_po
>
0
&&
m_text
[
m_po
-
1
]
==
'\r'
)
continue
;
m_line
++
;
m_col
=
1
;
continue
;
}
break
;
}
}
//-------------------------------------------------------------------------
Value
parse_array
()
{
if
(
m_po
>=
m_text
.
size
())
return
nullptr
;
if
(
m_text
[
m_po
]
!=
'['
)
return
nullptr
;
m_po
++
;
m_col
++
;
size_t
line_prev
=
m_line
;
size_t
col_prev
=
m_col
;
bool
trailing_comma
=
false
;
Value
parent
=
Value
(
DataArray
());
for
(;
m_po
<
m_text
.
size
();
m_po
++
,
m_col
++
)
{
skip_whitespace
();
Value
child
=
parse_value
();
if
(
child
.
is_null
())
{
if
(
m_last_error
!=
""
)
{
}
else
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"no closing bracket ']' for array"
;
}
else
if
(
trailing_comma
)
{
m_last_error
=
"trailing comma in array"
;
m_line
=
line_prev
;
m_col
=
col_prev
;
}
else
if
(
m_text
[
m_po
]
==
']'
)
break
;
return
Value
();
}
else
{
trailing_comma
=
false
;
parent
.
as_array
().
push_back
(
child
);
}
skip_whitespace
();
char
ch
=
m_text
[
m_po
];
if
(
ch
==
','
)
{
trailing_comma
=
true
;
line_prev
=
m_line
;
col_prev
=
m_col
;
continue
;
}
else
if
(
ch
==
']'
)
break
;
else
{
m_last_error
=
"invalid character '"
;
m_last_error
+=
ch
;
m_last_error
+=
"' in array"
;
return
Value
();
}
}
if
(
m_po
>=
m_text
.
size
()
||
m_text
[
m_po
]
!=
']'
)
{
m_last_error
=
"no closing bracket ']' for array"
;
return
Value
();
}
m_po
++
;
m_col
++
;
return
parent
;
}
//-------------------------------------------------------------------------
// parsed to match the following regular expression:
// (-)?
// (0|([1-9][0-9]*))
// (\.[0-9]+)?
// ([Ee][+-]?[0-9]+)?
Value
parse_number
()
{
if
(
m_po
>=
m_text
.
size
())
return
Value
();
size_t
len
=
0
;
// (-)?
if
(
m_text
[
m_po
]
==
'-'
)
{
m_po
++
;
m_col
++
;
len
++
;
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"invalid number (no digits after -)"
;
return
Value
();
}
}
// (0|([1-9][0-9]*))
char
ch
=
m_text
[
m_po
];
if
(
m_po
>=
m_text
.
size
()
||
!
(
ch
>=
'0'
&&
m_text
[
m_po
]
<=
'9'
))
{
m_last_error
=
"invalid number (no digits)"
;
return
Value
();
}
m_po
++
;
m_col
++
;
len
++
;
// [1-9][0-9]*
if
(
ch
>=
'1'
&&
ch
<=
'9'
)
{
// [0-9]*
for
(;
m_po
<
m_text
.
size
();
m_po
++
,
m_col
++
,
len
++
)
{
ch
=
m_text
[
m_po
];
if
(
!
(
ch
>=
'0'
&&
ch
<=
'9'
))
break
;
}
}
if
(
m_po
>=
m_text
.
size
())
{
try
{
Value
node
=
Value
(
std
::
stod
(
std
::
string
(
&
m_text
[
m_po
-
len
],
len
)));
return
node
;
}
catch
(...)
{
return
Value
();
}
}
ch
=
m_text
[
m_po
];
// (\.[0-9]+)?
if
(
ch
==
'.'
)
{
m_po
++
;
m_col
++
;
len
++
;
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"invalid number (no digits after decimal)"
;
return
Value
();
}
ch
=
m_text
[
m_po
];
size_t
n_digits
=
0
;
// [0-9]+
for
(;
m_po
<
m_text
.
size
();
m_po
++
,
m_col
++
,
len
++
,
n_digits
++
)
{
ch
=
m_text
[
m_po
];
if
(
!
(
ch
>=
'0'
&&
ch
<=
'9'
))
break
;
}
if
(
n_digits
==
0
)
{
m_last_error
=
"invalid number (no digits after decimal)"
;
return
Value
();
}
}
// ([Ee][+-]?[0-9]+)?
if
(
ch
==
'E'
||
ch
==
'e'
)
{
m_po
++
;
m_col
++
;
len
++
;
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"invalid number (no digits for exponent)"
;
return
Value
();
}
ch
=
m_text
[
m_po
];
// [+-]?
if
(
ch
==
'+'
||
ch
==
'-'
)
{
m_po
++
;
m_col
++
;
len
++
;
}
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"invalid number (no digits for exponent)"
;
return
Value
();
}
size_t
n_digits
=
0
;
// [0-9]+
for
(;
m_po
<
m_text
.
size
();
m_po
++
,
m_col
++
,
len
++
,
n_digits
++
)
{
ch
=
m_text
[
m_po
];
if
(
!
(
ch
>=
'0'
&&
ch
<=
'9'
))
break
;
}
if
(
n_digits
==
0
)
{
m_last_error
=
"invalid number (no digits after decimal)"
;
return
Value
();
}
}
try
{
Value
node
=
Value
(
std
::
stod
(
std
::
string
(
&
m_text
[
m_po
-
len
],
len
)));
return
node
;
}
catch
(...)
{
return
Value
();
}
}
//-------------------------------------------------------------------------
// str_known must be a null-terminated string
inline
bool
sub_str_eq
(
char
*
sub_str_unknown
,
const
char
*
str_known
)
{
size_t
i
=
0
;
while
(
str_known
[
i
]
!=
'\0'
)
{
if
(
sub_str_unknown
[
i
]
!=
str_known
[
i
])
return
false
;
i
++
;
}
return
true
;
}
//-------------------------------------------------------------------------
// only 3 valid literals all in lower case: false, null, true
// TODO: need to refactor code to support null return (currently it is
// treated as an error)
Value
parse_literal
()
{
size_t
len
=
0
;
for
(;
m_po
+
len
<
m_text
.
size
();
len
++
)
{
char
ch
=
m_text
[
m_po
+
len
];
if
(
!
((
ch
>=
'A'
&&
ch
<=
'Z'
)
||
(
ch
>=
'a'
&&
ch
<=
'z'
)))
break
;
}
for
(
size_t
i
=
0
;
i
<
N_LITERALS
;
i
++
)
{
if
(
literal_lens
[
i
]
==
len
&&
sub_str_eq
(
&
m_text
[
m_po
],
literals
[
i
]))
{
m_po
+=
len
;
m_col
+=
len
;
if
(
std
::
string
(
"true"
)
==
literals
[
i
])
{
return
Value
(
true
);
}
else
if
(
std
::
string
(
"false"
)
==
literals
[
i
])
{
return
Value
(
false
);
}
// default to null
return
Value
();
}
}
m_last_error
=
"invalid literal"
;
return
Value
();
}
//-------------------------------------------------------------------------
Value
parse_object
()
{
if
(
m_po
>=
m_text
.
size
())
return
Value
();
if
(
m_text
[
m_po
]
!=
'{'
)
return
Value
();
m_po
++
;
m_col
++
;
size_t
line_prev
=
m_line
;
size_t
col_prev
=
m_col
;
bool
trailing_comma
=
false
;
Value
parent
=
Value
(
DataObject
());
for
(;
m_po
<
m_text
.
size
();
m_po
++
,
m_col
++
)
{
skip_whitespace
();
if
(
m_po
>=
m_text
.
size
()
||
m_text
[
m_po
]
==
'}'
)
{
if
(
trailing_comma
)
{
m_last_error
=
"trailing comma in object"
;
m_line
=
line_prev
;
m_col
=
col_prev
;
return
Value
();
}
break
;
}
// parse key
std
::
string
key
=
parse_string_contents
();
if
(
m_last_error
!=
""
)
{
if
(
trailing_comma
)
{
m_last_error
=
"trailing comma on invalid key in object"
;
m_line
=
line_prev
;
m_col
=
col_prev
;
}
return
Value
();
}
skip_whitespace
();
// parse ':'
if
(
m_po
>=
m_text
.
size
()
||
m_text
[
m_po
]
!=
':'
)
{
m_last_error
=
"no ':' following key in object"
;
return
Value
();
}
m_po
++
;
m_col
++
;
// parse value
Value
child
=
parse_value
();
if
(
child
.
is_null
())
{
if
(
m_last_error
==
""
)
{
m_last_error
=
"missing value in object"
;
}
if
(
trailing_comma
)
{
m_last_error
=
"trailing comma in object"
;
m_line
=
line_prev
;
m_col
=
col_prev
;
}
return
Value
();
}
else
{
trailing_comma
=
false
;
parent
.
as_object
()[
key
]
=
child
;
}
skip_whitespace
();
char
ch
=
m_text
[
m_po
];
if
(
ch
==
','
)
{
trailing_comma
=
true
;
line_prev
=
m_line
;
col_prev
=
m_col
;
continue
;
}
else
if
(
ch
==
'}'
)
break
;
else
{
m_last_error
=
"invalid character in object"
;
return
Value
();
}
}
if
(
m_po
>=
m_text
.
size
()
||
m_text
[
m_po
]
!=
'}'
)
{
m_last_error
=
"no closing curly bracket '}' for object"
;
return
Value
();
}
m_po
++
;
m_col
++
;
return
parent
;
}
//-------------------------------------------------------------------------
inline
bool
parse_escape_seq
(
size_t
*
len
)
{
if
(
m_po
>=
m_text
.
size
())
return
false
;
if
(
m_text
[
m_po
]
!=
'\\'
)
return
false
;
m_po
++
;
m_col
++
;
(
*
len
)
++
;
if
(
m_po
>=
m_text
.
size
())
{
m_last_error
=
"incomplete unicode character escape sequence in string"
;
return
false
;
}
char
ch
=
m_text
[
m_po
];
if
(
ch
==
'"'
||
ch
==
'\\'
||
ch
==
'/'
||
ch
==
'b'
||
ch
==
'f'
||
ch
==
'n'
||
ch
==
'r'
||
ch
==
't'
)
{
m_po
++
;
m_col
++
;
(
*
len
)
++
;
return
true
;
}
else
if
(
ch
==
'u'
)
{
m_po
++
;
m_col
++
;
(
*
len
)
++
;
size_t
code_len
=
0
;
// parse 4-digit unicode character escape sequence
for
(;
m_po
<
m_text
.
size
()
&&
code_len
<
4
;
m_po
++
,
m_col
++
,
(
*
len
)
++
,
code_len
++
)
{
char
ch
=
m_text
[
m_po
];
if
(
!
((
ch
>=
'0'
&&
ch
<=
'9'
)
||
(
ch
>=
'A'
&&
ch
<=
'F'
)
||
(
ch
>=
'a'
&&
ch
<=
'f'
)))
{
m_last_error
=
"invalid unicode character escape sequence in string"
;
return
false
;
}
}
if
(
code_len
<
4
)
{
m_last_error
=
"incomplete unicode character escape sequence in string"
;
return
false
;
}
return
true
;
}
m_last_error
=
"invalid escape sequence in string"
;
return
false
;
}
//-------------------------------------------------------------------------
// parse a quoted string from m_text
// used for string values and object keys
// processes and discards leading and trailing quotes
// on success, returns string contents without quotes
// on error, returns empty string and sets m_last_error
std
::
string
parse_string_contents
()
{
if
(
m_po
>=
m_text
.
size
()
||
m_text
[
m_po
]
!=
'"'
)
{
m_last_error
=
"string missing opening quote"
;
return
""
;
}
m_po
++
;
m_col
++
;
for
(
size_t
len
=
0
;
m_po
<
m_text
.
size
();)
{
char
ch
=
m_text
[
m_po
];
// disallow control characters <= 0x1f and extended ascii >= 0x80
if
(
ch
<=
0x1f
)
{
m_last_error
=
"invalid character in string"
;
return
""
;
}
if
(
ch
==
'\\'
)
{
bool
ok
=
parse_escape_seq
(
&
len
);
if
(
!
ok
)
return
""
;
continue
;
}
if
(
ch
==
'"'
)
{
m_po
++
;
m_col
++
;
return
std
::
string
(
&
m_text
[
m_po
-
1
-
len
],
len
);
}
m_po
++
;
m_col
++
;
len
++
;
}
m_last_error
=
"string missing closing quote"
;
return
""
;
}
//-------------------------------------------------------------------------
Value
parse_string
()
{
std
::
string
str
=
parse_string_contents
();
if
(
m_last_error
!=
""
)
return
Value
();
return
Value
(
str
);