807 lines
18 KiB
C++
807 lines
18 KiB
C++
#pragma once
|
|
|
|
#include <vector>
|
|
|
|
#include "StarUnicode.hpp"
|
|
|
|
namespace Star {
|
|
|
|
struct JsonStream {
|
|
virtual ~JsonStream() {}
|
|
|
|
virtual void beginObject() = 0;
|
|
virtual void objectKey(char32_t const*, size_t) = 0;
|
|
virtual void endObject() = 0;
|
|
|
|
virtual void beginArray() = 0;
|
|
virtual void endArray() = 0;
|
|
|
|
virtual void putString(char32_t const*, size_t) = 0;
|
|
virtual void putDouble(char32_t const*, size_t) = 0;
|
|
virtual void putInteger(char32_t const*, size_t) = 0;
|
|
virtual void putBoolean(bool) = 0;
|
|
virtual void putNull() = 0;
|
|
|
|
virtual void putWhitespace(char32_t const*, size_t) = 0;
|
|
virtual void putColon() = 0;
|
|
virtual void putComma() = 0;
|
|
};
|
|
|
|
enum class JsonParseType : uint8_t {
|
|
Top, // Top-level Object or Array
|
|
Value, // Any singular Json value
|
|
Sequence // Like an array, but without needing the [] or commas.
|
|
};
|
|
|
|
// Will parse JSON and output to a given JsonStream. Parses an *extension* to
|
|
// the JSON format that includes comments.
|
|
template <typename InputIterator>
|
|
class JsonParser {
|
|
public:
|
|
JsonParser(JsonStream& stream)
|
|
: m_line(0), m_column(0), m_error(nullptr), m_stream(stream) {}
|
|
virtual ~JsonParser() {}
|
|
|
|
// Does not throw. On error, returned iterator will not be equal to end, and
|
|
// error() will be non-null. Set fragment to true to parse any JSON type
|
|
// rather than just object or array.
|
|
InputIterator parse(InputIterator begin, InputIterator end, JsonParseType parseType = JsonParseType::Top) {
|
|
init(begin, end);
|
|
|
|
try {
|
|
white();
|
|
if (parseType == JsonParseType::Top)
|
|
top();
|
|
else if (parseType == JsonParseType::Value)
|
|
value();
|
|
else if (parseType == JsonParseType::Sequence)
|
|
sequence();
|
|
white();
|
|
} catch (ParsingException const&) {
|
|
}
|
|
|
|
return m_current;
|
|
}
|
|
|
|
// Human readable parsing error, does not include line or column info.
|
|
char const* error() const {
|
|
return m_error;
|
|
}
|
|
|
|
size_t line() const {
|
|
return m_line + 1;
|
|
}
|
|
|
|
size_t column() const {
|
|
return m_column + 1;
|
|
}
|
|
|
|
private:
|
|
typedef std::basic_string<char32_t> CharArray;
|
|
|
|
// Thrown internally to abort parsing.
|
|
class ParsingException {};
|
|
|
|
void top() {
|
|
switch (m_char) {
|
|
case '{':
|
|
object();
|
|
break;
|
|
case '[':
|
|
array();
|
|
break;
|
|
default:
|
|
error("expected JSON object or array at top level");
|
|
return;
|
|
}
|
|
}
|
|
|
|
void value() {
|
|
switch (m_char) {
|
|
case '{':
|
|
object();
|
|
break;
|
|
case '[':
|
|
array();
|
|
break;
|
|
case '"':
|
|
string();
|
|
break;
|
|
case '-':
|
|
number();
|
|
break;
|
|
case 0:
|
|
error("unexpected end of stream parsing value");
|
|
return;
|
|
default:
|
|
m_char >= '0' && m_char <= '9' ? number() : word();
|
|
break;
|
|
}
|
|
}
|
|
|
|
void object() {
|
|
if (m_char != '{')
|
|
error("bad object, should be '{'");
|
|
|
|
next();
|
|
m_stream.beginObject();
|
|
|
|
white();
|
|
if (m_char == '}') {
|
|
next();
|
|
m_stream.endObject();
|
|
return;
|
|
}
|
|
|
|
while (true) {
|
|
CharArray s = parseString();
|
|
m_stream.objectKey(s.c_str(), s.length());
|
|
|
|
white();
|
|
if (m_char != ':')
|
|
error("bad object, should be ':'");
|
|
next();
|
|
m_stream.putColon();
|
|
white();
|
|
|
|
value();
|
|
|
|
white();
|
|
if (m_char == '}') {
|
|
next();
|
|
m_stream.endObject();
|
|
return;
|
|
} else if (m_char == ',') {
|
|
next();
|
|
m_stream.putComma();
|
|
white();
|
|
} else if (m_char == 0) {
|
|
error("unexpected end of stream parsing object.");
|
|
} else {
|
|
error("bad object, should be '}' or ','");
|
|
}
|
|
}
|
|
}
|
|
|
|
void array() {
|
|
if (m_char == '[') {
|
|
next();
|
|
m_stream.beginArray();
|
|
white();
|
|
if (m_char == ']') {
|
|
next();
|
|
m_stream.endArray();
|
|
} else {
|
|
while (true) {
|
|
value();
|
|
white();
|
|
if (m_char == ']') {
|
|
next();
|
|
m_stream.endArray();
|
|
break;
|
|
} else if (m_char == ',') {
|
|
next();
|
|
m_stream.putComma();
|
|
white();
|
|
} else if (m_char == 0) {
|
|
error("unexpected end of stream parsing array.");
|
|
} else {
|
|
error("bad array, should be ',' or ']'");
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
error("bad array");
|
|
}
|
|
}
|
|
|
|
void sequence() {
|
|
m_stream.beginArray();
|
|
while (true) {
|
|
if (isSpace(m_char)) {
|
|
next();
|
|
continue;
|
|
} else if (m_char == '{')
|
|
object();
|
|
else if (m_char == '[')
|
|
array();
|
|
else if (m_char == '"')
|
|
string();
|
|
else if (m_char == '-')
|
|
number();
|
|
else if (m_char == 0)
|
|
break;
|
|
else {
|
|
auto begin = m_current;
|
|
auto b_char = m_char;
|
|
if (m_char >= '0' && m_char <= '9') {
|
|
try {
|
|
number(true);
|
|
}
|
|
catch (ParsingException const&) {
|
|
m_current = begin;
|
|
m_char = b_char;
|
|
}
|
|
if (m_error == nullptr) {
|
|
next();
|
|
continue;
|
|
}
|
|
}
|
|
m_error = nullptr;
|
|
if (m_char == 't' || m_char == 'f' || m_char == 'n') {
|
|
try {
|
|
word(true);
|
|
}
|
|
catch (ParsingException const&) {
|
|
m_current = begin;
|
|
m_char = b_char;
|
|
}
|
|
if (m_error == nullptr) {
|
|
next();
|
|
continue;
|
|
}
|
|
}
|
|
m_error = nullptr;
|
|
// well, shit. do a simple string parse until we hit whitespace
|
|
// no fancy things like \n, do a proper string if you want that
|
|
CharArray str;
|
|
do {
|
|
str += m_char;
|
|
next();
|
|
} while (m_char != 0 && !isSpace(m_char));
|
|
m_stream.putString(str.c_str(), str.length());
|
|
}
|
|
next();
|
|
}
|
|
m_stream.endArray();
|
|
}
|
|
|
|
void string() {
|
|
CharArray s = parseString();
|
|
m_stream.putString(s.c_str(), s.length());
|
|
}
|
|
|
|
void number(bool seq = false) {
|
|
std::basic_string<char32_t> buffer;
|
|
bool isDouble = false;
|
|
|
|
if (m_char == '-') {
|
|
buffer += '-';
|
|
next();
|
|
}
|
|
|
|
if (m_char == '0') {
|
|
buffer += '0';
|
|
next();
|
|
} else if (m_char > '0' && m_char <= '9') {
|
|
while (m_char >= '0' && m_char <= '9') {
|
|
buffer += m_char;
|
|
next();
|
|
}
|
|
} else {
|
|
error("bad number, must start with digit");
|
|
}
|
|
|
|
if (m_char == '.') {
|
|
isDouble = true;
|
|
buffer += '.';
|
|
next();
|
|
while (m_char >= '0' && m_char <= '9') {
|
|
buffer += m_char;
|
|
next();
|
|
}
|
|
}
|
|
|
|
if (m_char == 'e' || m_char == 'E') {
|
|
isDouble = true;
|
|
buffer += m_char;
|
|
next();
|
|
if (m_char == '-' || m_char == '+') {
|
|
buffer += m_char;
|
|
next();
|
|
}
|
|
while (m_char >= '0' && m_char <= '9') {
|
|
buffer += m_char;
|
|
next();
|
|
}
|
|
}
|
|
|
|
if (seq && m_char != 0 && !isSpace(m_char))
|
|
error("unexpected character after number");
|
|
|
|
if (isDouble) {
|
|
try {
|
|
m_stream.putDouble(buffer.c_str(), buffer.length());
|
|
} catch (std::exception const& e) {
|
|
error("bad double");
|
|
}
|
|
} else {
|
|
try {
|
|
m_stream.putInteger(buffer.c_str(), buffer.length());
|
|
} catch (std::exception const& e) {
|
|
error("bad integer");
|
|
}
|
|
}
|
|
}
|
|
|
|
// true, false, or null
|
|
void word(bool seq = false) {
|
|
switch (m_char) {
|
|
case 't':
|
|
next();
|
|
check('r');
|
|
check('u');
|
|
check('e');
|
|
if (seq && m_char != 0 && !isSpace(m_char))
|
|
error("unexpected character after word");
|
|
m_stream.putBoolean(true);
|
|
break;
|
|
case 'f':
|
|
next();
|
|
check('a');
|
|
check('l');
|
|
check('s');
|
|
check('e');
|
|
if (seq && m_char != 0 && !isSpace(m_char))
|
|
error("unexpected character after word");
|
|
m_stream.putBoolean(false);
|
|
break;
|
|
case 'n':
|
|
next();
|
|
check('u');
|
|
check('l');
|
|
check('l');
|
|
if (seq && m_char != 0 && !isSpace(m_char))
|
|
error("unexpected character after word");
|
|
m_stream.putNull();
|
|
break;
|
|
default:
|
|
error("unexpected character parsing word");
|
|
return;
|
|
}
|
|
}
|
|
|
|
CharArray parseString() {
|
|
if (m_char != '"')
|
|
error("bad string, should be '\"'");
|
|
next();
|
|
|
|
CharArray str;
|
|
|
|
while (true) {
|
|
if (m_char == '\\') {
|
|
next();
|
|
if (m_char == 'u') {
|
|
std::string hexString;
|
|
next();
|
|
for (int i = 0; i < 4; ++i) {
|
|
hexString.push_back(m_char);
|
|
next();
|
|
}
|
|
char32_t codepoint = hexStringToUtf32(hexString);
|
|
if (isUtf16LeadSurrogate(codepoint)) {
|
|
check('\\');
|
|
check('u');
|
|
hexString.clear();
|
|
for (int i = 0; i < 4; ++i) {
|
|
hexString.push_back(m_char);
|
|
next();
|
|
}
|
|
codepoint = hexStringToUtf32(hexString, codepoint);
|
|
}
|
|
str += codepoint;
|
|
} else {
|
|
switch (m_char) {
|
|
case '"':
|
|
str += '"';
|
|
break;
|
|
case '\\':
|
|
str += '\\';
|
|
break;
|
|
case '/':
|
|
str += '/';
|
|
break;
|
|
case 'b':
|
|
str += '\b';
|
|
break;
|
|
case 'f':
|
|
str += '\f';
|
|
break;
|
|
case 'n':
|
|
str += '\n';
|
|
break;
|
|
case 'r':
|
|
str += '\r';
|
|
break;
|
|
case 't':
|
|
str += '\t';
|
|
break;
|
|
default:
|
|
error("bad string escape character");
|
|
break;
|
|
}
|
|
next();
|
|
}
|
|
} else if (m_char == '\"') {
|
|
next();
|
|
return str;
|
|
} else if (m_char == 0) {
|
|
error("unexpected end of stream reading string!");
|
|
} else {
|
|
str += m_char;
|
|
next();
|
|
}
|
|
}
|
|
error("parser bug");
|
|
return {};
|
|
}
|
|
|
|
// Checks current char then moves on to the next one
|
|
void check(char32_t c) {
|
|
if (m_char == 0)
|
|
error("unexpected end of stream parsing word");
|
|
if (m_char != c)
|
|
error("unexpected character in word");
|
|
next();
|
|
}
|
|
|
|
void init(InputIterator begin, InputIterator end) {
|
|
m_current = begin;
|
|
m_end = end;
|
|
m_line = 0;
|
|
m_column = 0;
|
|
|
|
if (m_current != m_end)
|
|
m_char = *m_current;
|
|
else
|
|
m_char = 0;
|
|
}
|
|
|
|
// Consumes next character.
|
|
void next() {
|
|
if (m_current == m_end)
|
|
return;
|
|
|
|
if (m_char == '\n') {
|
|
++m_line;
|
|
m_column = 0;
|
|
} else {
|
|
++m_column;
|
|
}
|
|
++m_current;
|
|
|
|
if (m_current != m_end)
|
|
m_char = *m_current;
|
|
else
|
|
m_char = 0;
|
|
}
|
|
|
|
// Will skip whitespace and comments between tokens.
|
|
void white() {
|
|
CharArray buffer;
|
|
while (m_current != m_end) {
|
|
if (m_char == '/') {
|
|
// Always consume '/' found in whitespace, because that is never valid
|
|
// JSON (other than comments)
|
|
buffer += m_char;
|
|
next();
|
|
if (m_current != m_end && m_char == '/') {
|
|
// eat "/"
|
|
buffer += m_char;
|
|
next();
|
|
|
|
// Read '//' style comments up until eol/eof.
|
|
while (m_current != m_end && m_char != '\n') {
|
|
buffer += m_char;
|
|
next();
|
|
}
|
|
} else if (m_current != m_end && m_char == '*') {
|
|
// eat "*"
|
|
buffer += m_char;
|
|
next();
|
|
|
|
// Read '/*' style comments up until '*/'.
|
|
while (m_current != m_end) {
|
|
if (m_char == '*') {
|
|
buffer += m_char;
|
|
next();
|
|
if (m_char == '/') {
|
|
buffer += m_char;
|
|
next();
|
|
break;
|
|
}
|
|
} else {
|
|
buffer += m_char;
|
|
next();
|
|
if (m_current == m_end)
|
|
error("/* comment has no matching */");
|
|
}
|
|
}
|
|
} else {
|
|
// The only allowed characters following / in whitespace are / and *
|
|
error("/ character in whitespace is not followed by '/' or '*', invalid comment");
|
|
return;
|
|
}
|
|
} else if (isSpace(m_char)) {
|
|
buffer += m_char;
|
|
next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
if (buffer.size() != 0)
|
|
m_stream.putWhitespace(buffer.c_str(), buffer.length());
|
|
}
|
|
|
|
void error(const char* msg) {
|
|
m_error = msg;
|
|
throw ParsingException();
|
|
}
|
|
|
|
bool isSpace(char32_t c) {
|
|
// Only whitespace allowed by JSON
|
|
return c == 0x20 || // space
|
|
c == 0x09 || // horizontal tab
|
|
c == 0x0a || // newline
|
|
c == 0x0d || // carriage return
|
|
c == 0xfeff; // BOM or ZWNBSP
|
|
}
|
|
|
|
char32_t m_char;
|
|
InputIterator m_current;
|
|
InputIterator m_end;
|
|
size_t m_line;
|
|
size_t m_column;
|
|
const char* m_error;
|
|
JsonStream& m_stream;
|
|
};
|
|
|
|
// Write JSON through JsonStream interface.
|
|
template <typename OutputIterator>
|
|
class JsonWriter : public JsonStream {
|
|
public:
|
|
JsonWriter(OutputIterator out, unsigned pretty = 0)
|
|
: m_out(out), m_pretty(pretty) {}
|
|
|
|
void beginObject() {
|
|
startValue();
|
|
pushState(Object);
|
|
write('{');
|
|
}
|
|
|
|
void objectKey(char32_t const* s, size_t len) {
|
|
if (currentState() == ObjectElement) {
|
|
if (m_pretty)
|
|
write('\n');
|
|
indent();
|
|
} else {
|
|
pushState(ObjectElement);
|
|
if (m_pretty)
|
|
write('\n');
|
|
indent();
|
|
}
|
|
|
|
write('"');
|
|
char32_t c = *s;
|
|
while (c && len) {
|
|
write(c);
|
|
c = *++s;
|
|
--len;
|
|
}
|
|
write('"');
|
|
if (m_pretty)
|
|
write(' ');
|
|
}
|
|
|
|
void endObject() {
|
|
bool isNotEmpty = currentState() == ObjectElement;
|
|
popState(Object);
|
|
if (isNotEmpty) {
|
|
if (m_pretty)
|
|
write('\n');
|
|
indent();
|
|
}
|
|
write('}');
|
|
}
|
|
|
|
void beginArray() {
|
|
startValue();
|
|
pushState(Array);
|
|
write('[');
|
|
}
|
|
|
|
void endArray() {
|
|
popState(Array);
|
|
write(']');
|
|
}
|
|
|
|
void putString(char32_t const* s, size_t len) {
|
|
startValue();
|
|
|
|
write('"');
|
|
char32_t c = *s;
|
|
while (c && (len > 0)) {
|
|
if (!isPrintable(c)) {
|
|
switch (c) {
|
|
case '"':
|
|
write('\\');
|
|
write('"');
|
|
break;
|
|
case '\\':
|
|
write('\\');
|
|
write('\\');
|
|
break;
|
|
case '\b':
|
|
write('\\');
|
|
write('b');
|
|
break;
|
|
case '\f':
|
|
write('\\');
|
|
write('f');
|
|
break;
|
|
case '\n':
|
|
write('\\');
|
|
write('n');
|
|
break;
|
|
case '\r':
|
|
write('\\');
|
|
write('r');
|
|
break;
|
|
case '\t':
|
|
write('\\');
|
|
write('t');
|
|
break;
|
|
default:
|
|
auto hex = hexStringFromUtf32(c);
|
|
if (hex.size() == 4) {
|
|
write('\\');
|
|
write('u');
|
|
for (auto c : hex) {
|
|
write(c);
|
|
}
|
|
} else if (hex.size() == 8) {
|
|
write('\\');
|
|
write('u');
|
|
for (auto c : hex.substr(0, 4)) {
|
|
write(c);
|
|
}
|
|
write('\\');
|
|
write('u');
|
|
for (auto c : hex.substr(4)) {
|
|
write(c);
|
|
}
|
|
} else {
|
|
throw UnicodeException("Internal Error: Received invalid unicode hex from hexStringFromUtf32.");
|
|
}
|
|
break;
|
|
}
|
|
} else {
|
|
write(c);
|
|
}
|
|
c = *++s;
|
|
--len;
|
|
}
|
|
write('"');
|
|
}
|
|
|
|
void putDouble(char32_t const* s, size_t len) {
|
|
startValue();
|
|
for (size_t i = 0; i < len; ++i)
|
|
write(s[i]);
|
|
}
|
|
|
|
void putInteger(char32_t const* s, size_t len) {
|
|
startValue();
|
|
for (size_t i = 0; i < len; ++i)
|
|
write(s[i]);
|
|
}
|
|
|
|
void putBoolean(bool b) {
|
|
startValue();
|
|
if (b) {
|
|
write('t');
|
|
write('r');
|
|
write('u');
|
|
write('e');
|
|
} else {
|
|
write('f');
|
|
write('a');
|
|
write('l');
|
|
write('s');
|
|
write('e');
|
|
}
|
|
}
|
|
|
|
void putNull() {
|
|
startValue();
|
|
write('n');
|
|
write('u');
|
|
write('l');
|
|
write('l');
|
|
}
|
|
|
|
void putWhitespace(char32_t const* s, size_t len) {
|
|
// If m_pretty is true, extra spurious whitespace will be inserted.
|
|
for (size_t i = 0; i < len; ++i)
|
|
write(s[i]);
|
|
}
|
|
|
|
void putColon() {
|
|
write(':');
|
|
if (m_pretty)
|
|
write(' ');
|
|
}
|
|
|
|
void putComma() {
|
|
write(',');
|
|
}
|
|
|
|
private:
|
|
enum State {
|
|
Top,
|
|
Object,
|
|
ObjectElement,
|
|
Array,
|
|
ArrayElement
|
|
};
|
|
|
|
// Handles separating array elements if currently adding to an array
|
|
void startValue() {
|
|
if (currentState() == ArrayElement) {
|
|
if (m_pretty)
|
|
write(' ');
|
|
} else if (currentState() == Array) {
|
|
pushState(ArrayElement);
|
|
}
|
|
}
|
|
|
|
void indent() {
|
|
for (unsigned i = 0; i < m_state.size() / 2; ++i) {
|
|
for (unsigned j = 0; j < m_pretty; ++j) {
|
|
write(' ');
|
|
}
|
|
}
|
|
}
|
|
|
|
// Push state onto stack.
|
|
void pushState(State state) {
|
|
m_state.push_back(state);
|
|
}
|
|
|
|
// Pop state stack down to given state.
|
|
void popState(State state) {
|
|
while (true) {
|
|
if (m_state.empty())
|
|
return;
|
|
|
|
State last = currentState();
|
|
m_state.pop_back();
|
|
if (last == state)
|
|
return;
|
|
}
|
|
}
|
|
|
|
State currentState() {
|
|
if (m_state.empty())
|
|
return Top;
|
|
else
|
|
return *prev(m_state.end());
|
|
}
|
|
|
|
void write(char32_t c) {
|
|
*m_out = c;
|
|
++m_out;
|
|
}
|
|
|
|
// Only chars that are unescaped according to JSON spec.
|
|
bool isPrintable(char32_t c) {
|
|
return (c >= 0x20 && c <= 0x21) || (c >= 0x23 && c <= 0x5b) || (c >= 0x5d && c <= 0x10ffff);
|
|
}
|
|
|
|
OutputIterator m_out;
|
|
unsigned m_pretty;
|
|
std::vector<State> m_state;
|
|
};
|
|
|
|
}
|