#include "StarShellParser.hpp" namespace Star { ShellParser::ShellParser() : m_current(), m_end(), m_quotedType('\0') {} auto ShellParser::tokenize(String const& command) -> List<Token> { List<Token> res; init(command); while (notDone()) { res.append(Token{TokenType::Word, word()}); } return res; } StringList ShellParser::tokenizeToStringList(String const& command) { StringList res; for (auto token : tokenize(command)) { if (token.type == TokenType::Word) { res.append(std::move(token.token)); } } return res; } void ShellParser::init(String const& string) { m_begin = string.begin(); m_current = m_begin; m_end = string.end(); m_quotedType = '\0'; } String ShellParser::word() { String res; while (notDone()) { auto letter = *current(); bool escapedLetter = false; if (letter == '\\') { escapedLetter = true; letter = parseBackslash(); } if (!escapedLetter) { if (isSpace(letter) && !inQuotedString()) { next(); if (res.size()) { return res; } continue; } if (isQuote(letter)) { if (inQuotedString() && letter == m_quotedType) { m_quotedType = '\0'; next(); continue; } if (!inQuotedString()) { m_quotedType = letter; next(); continue; } } } res.append(letter); next(); } return res; } bool ShellParser::isSpace(Char letter) const { return String::isSpace(letter); } bool ShellParser::isQuote(Char letter) const { return letter == '\'' || letter == '"'; } bool ShellParser::inQuotedString() const { return m_quotedType != '\0'; } auto ShellParser::current() const -> Maybe<Char> { if (m_current == m_end) { return {}; } return *m_current; } auto ShellParser::next() -> Maybe<Char> { if (m_current != m_end) { ++m_current; } return current(); } auto ShellParser::previous() -> Maybe<Char> { if (m_current != m_begin) { --m_current; } return current(); } auto ShellParser::parseBackslash() -> Char { auto letter = next(); if (!letter) { return '\\'; } switch (*letter) { case ' ': return ' '; case 'n': return '\n'; case 't': return '\t'; case 'r': return '\r'; case 'b': return '\b'; case 'v': return '\v'; case 'f': return '\f'; case 'a': return '\a'; case '\'': return '\''; case '"': return '"'; case '\\': return '\\'; case '0': return '\0'; case 'u': { auto letter = parseUnicodeEscapeSequence(); if (isUtf16LeadSurrogate(letter)) { auto shouldBeSlash = next(); if (shouldBeSlash && shouldBeSlash == '\\') { auto shouldBeU = next(); if (shouldBeU && shouldBeU == 'u') { return parseUnicodeEscapeSequence(letter); } else { previous(); } } previous(); return STAR_UTF32_REPLACEMENT_CHAR; } else { return letter; } } default: return *letter; } } auto ShellParser::parseUnicodeEscapeSequence(Maybe<Char> previousCodepoint) -> Char { String codepoint; auto letter = current(); while (!isSpace(*letter) && codepoint.size() < 4) { auto letter = next(); if (!letter) { break; } if (!isxdigit(*letter)) { return STAR_UTF32_REPLACEMENT_CHAR; } codepoint.append(*letter); } if (!codepoint.size()) { return 'u'; } if (codepoint.size() != 4) // exactly 4 digits are required by \u return STAR_UTF32_REPLACEMENT_CHAR; try { return hexStringToUtf32(codepoint.utf8(), previousCodepoint); } catch (UnicodeException const&) { return STAR_UTF32_REPLACEMENT_CHAR; } } bool ShellParser::notDone() const { return m_current != m_end; } }