osb/source/core/StarShellParser.cpp
2023-06-20 14:33:09 +10:00

209 lines
3.8 KiB
C++

#include "StarShellParser.hpp"
namespace Star {
ShellParser::ShellParser()
: m_current(), m_end(), m_quotedType('\0') {}
auto ShellParser::tokenize(String const& command) -> List<Token> {
List<Token> res;
init(command);
while (notDone()) {
res.append(Token{TokenType::Word, word()});
}
return res;
}
StringList ShellParser::tokenizeToStringList(String const& command) {
StringList res;
for (auto token : tokenize(command)) {
if (token.type == TokenType::Word) {
res.append(move(token.token));
}
}
return res;
}
void ShellParser::init(String const& string) {
m_begin = string.begin();
m_current = m_begin;
m_end = string.end();
m_quotedType = '\0';
}
String ShellParser::word() {
String res;
while (notDone()) {
auto letter = *current();
bool escapedLetter = false;
if (letter == '\\') {
escapedLetter = true;
letter = parseBackslash();
}
if (!escapedLetter) {
if (isSpace(letter) && !inQuotedString()) {
next();
if (res.size()) {
return res;
}
continue;
}
if (isQuote(letter)) {
if (inQuotedString() && letter == m_quotedType) {
m_quotedType = '\0';
next();
continue;
}
if (!inQuotedString()) {
m_quotedType = letter;
next();
continue;
}
}
}
res.append(letter);
next();
}
return res;
}
bool ShellParser::isSpace(Char letter) const {
return String::isSpace(letter);
}
bool ShellParser::isQuote(Char letter) const {
return letter == '\'' || letter == '"';
}
bool ShellParser::inQuotedString() const {
return m_quotedType != '\0';
}
auto ShellParser::current() const -> Maybe<Char> {
if (m_current == m_end) {
return {};
}
return *m_current;
}
auto ShellParser::next() -> Maybe<Char> {
if (m_current != m_end) {
++m_current;
}
return current();
}
auto ShellParser::previous() -> Maybe<Char> {
if (m_current != m_begin) {
--m_current;
}
return current();
}
auto ShellParser::parseBackslash() -> Char {
auto letter = next();
if (!letter) {
return '\\';
}
switch (*letter) {
case ' ':
return ' ';
case 'n':
return '\n';
case 't':
return '\t';
case 'r':
return '\r';
case 'b':
return '\b';
case 'v':
return '\v';
case 'f':
return '\f';
case 'a':
return '\a';
case '\'':
return '\'';
case '"':
return '"';
case '\\':
return '\\';
case '0':
return '\0';
case 'u': {
auto letter = parseUnicodeEscapeSequence();
if (isUtf16LeadSurrogate(letter)) {
auto shouldBeSlash = next();
if (shouldBeSlash && shouldBeSlash == '\\') {
auto shouldBeU = next();
if (shouldBeU && shouldBeU == 'u') {
return parseUnicodeEscapeSequence(letter);
} else {
previous();
}
}
previous();
return STAR_UTF32_REPLACEMENT_CHAR;
} else {
return letter;
}
}
default:
return *letter;
}
}
auto ShellParser::parseUnicodeEscapeSequence(Maybe<Char> previousCodepoint) -> Char {
String codepoint;
auto letter = current();
while (!isSpace(*letter) && codepoint.size() < 4) {
auto letter = next();
if (!letter) {
break;
}
if (!isxdigit(*letter)) {
return STAR_UTF32_REPLACEMENT_CHAR;
}
codepoint.append(*letter);
}
if (!codepoint.size()) {
return 'u';
}
if (codepoint.size() != 4) // exactly 4 digits are required by \u
return STAR_UTF32_REPLACEMENT_CHAR;
try {
return hexStringToUtf32(codepoint.utf8(), previousCodepoint);
} catch (UnicodeException const&) {
return STAR_UTF32_REPLACEMENT_CHAR;
}
}
bool ShellParser::notDone() const {
return m_current != m_end;
}
}