#include "Alias.h" #include "Execute.h" #include "Lexer.h" #include "TokenParser.h" #include #include static constexpr bool can_parse_argument_from_token_type(Token::Type token_type) { switch (token_type) { case Token::Type::Whitespace: ASSERT_NOT_REACHED(); case Token::Type::EOF_: case Token::Type::Ampersand: case Token::Type::CloseCurly: case Token::Type::CloseParen: case Token::Type::OpenCurly: case Token::Type::OpenParen: case Token::Type::Pipe: case Token::Type::Semicolon: return false; case Token::Type::Backslash: case Token::Type::Dollar: case Token::Type::DoubleQuote: case Token::Type::SingleQuote: case Token::Type::String: return true; } ASSERT_NOT_REACHED(); } static constexpr char token_type_to_single_character(Token::Type type) { switch (type) { case Token::Type::Ampersand: return '&'; case Token::Type::Backslash: return '\\'; case Token::Type::CloseCurly: return '}'; case Token::Type::CloseParen: return ')'; case Token::Type::Dollar: return '$'; case Token::Type::DoubleQuote: return '"'; case Token::Type::OpenCurly: return '{'; case Token::Type::OpenParen: return '('; case Token::Type::Pipe: return '|'; case Token::Type::Semicolon: return ';'; case Token::Type::SingleQuote: return '\''; case Token::Type::String: ASSERT_NOT_REACHED(); case Token::Type::Whitespace: ASSERT_NOT_REACHED(); case Token::Type::EOF_: ASSERT_NOT_REACHED(); } ASSERT_NOT_REACHED(); }; static constexpr BAN::Error unexpected_token_error(Token::Type type) { switch (type) { case Token::Type::EOF_: return BAN::Error::from_literal("unexpected EOF"); case Token::Type::Ampersand: return BAN::Error::from_literal("unexpected token &"); case Token::Type::Backslash: return BAN::Error::from_literal("unexpected token \\"); case Token::Type::CloseCurly: return BAN::Error::from_literal("unexpected token }"); case Token::Type::CloseParen: return BAN::Error::from_literal("unexpected token )"); case Token::Type::Dollar: return BAN::Error::from_literal("unexpected token $"); case Token::Type::DoubleQuote: return BAN::Error::from_literal("unexpected token \""); case Token::Type::OpenCurly: return BAN::Error::from_literal("unexpected token {"); case Token::Type::Pipe: return BAN::Error::from_literal("unexpected token |"); case Token::Type::OpenParen: return BAN::Error::from_literal("unexpected token ("); case Token::Type::Semicolon: return BAN::Error::from_literal("unexpected token ;"); case Token::Type::SingleQuote: return BAN::Error::from_literal("unexpected token '"); case Token::Type::String: return BAN::Error::from_literal("unexpected token "); case Token::Type::Whitespace: return BAN::Error::from_literal("unexpected token "); } ASSERT_NOT_REACHED(); } const Token& TokenParser::peek_token() const { if (m_token_stream.empty()) return m_eof_token; ASSERT(!m_token_stream.front().empty()); return m_token_stream.front().back(); } Token TokenParser::read_token() { if (m_token_stream.empty()) return Token(Token::Type::EOF_); ASSERT(!m_token_stream.front().empty()); auto token = BAN::move(m_token_stream.front().back()); m_token_stream.front().pop_back(); if (m_token_stream.front().empty()) m_token_stream.pop(); return token; } void TokenParser::consume_token() { ASSERT(!m_token_stream.empty()); ASSERT(!m_token_stream.front().empty()); m_token_stream.front().pop_back(); if (m_token_stream.front().empty()) m_token_stream.pop(); } BAN::ErrorOr TokenParser::unget_token(Token&& token) { if (m_token_stream.empty()) TRY(m_token_stream.emplace()); TRY(m_token_stream.front().push_back(BAN::move(token))); return {}; } BAN::ErrorOr TokenParser::feed_tokens(BAN::Vector&& tokens) { if (tokens.empty()) return {}; for (size_t i = 0; i < tokens.size() / 2; i++) BAN::swap(tokens[i], tokens[tokens.size() - i - 1]); TRY(m_token_stream.push(BAN::move(tokens))); return {}; } BAN::ErrorOr TokenParser::ask_input_tokens(BAN::StringView prompt, bool add_newline) { if (!m_input_function) return unexpected_token_error(Token::Type::EOF_); auto opt_input = m_input_function(prompt); if (!opt_input.has_value()) return unexpected_token_error(Token::Type::EOF_); auto tokenized = TRY(tokenize_string(opt_input.release_value())); TRY(feed_tokens(BAN::move(tokenized))); if (add_newline) { auto newline_token = Token(Token::Type::String); TRY(newline_token.string().push_back('\n')); TRY(unget_token(BAN::move(newline_token))); } return {}; } BAN::ErrorOr TokenParser::parse_backslash(bool is_quoted) { ASSERT(read_token().type() == Token::Type::Backslash); auto token = read_token(); CommandArgument::FixedString fixed_string; switch (token.type()) { case Token::Type::EOF_: TRY(ask_input_tokens("> ", false)); TRY(unget_token(Token(Token::Type::Backslash))); return parse_backslash(is_quoted); case Token::Type::Ampersand: case Token::Type::Backslash: case Token::Type::CloseCurly: case Token::Type::CloseParen: case Token::Type::Dollar: case Token::Type::DoubleQuote: case Token::Type::OpenCurly: case Token::Type::OpenParen: case Token::Type::Pipe: case Token::Type::Semicolon: case Token::Type::SingleQuote: TRY(fixed_string.value.push_back(token_type_to_single_character(token.type()))); break; case Token::Type::Whitespace: case Token::Type::String: { ASSERT(!token.string().empty()); if (is_quoted) TRY(fixed_string.value.push_back('\\')); TRY(fixed_string.value.push_back(token.string().front())); if (token.string().size() > 1) { token.string().remove(0); TRY(unget_token(BAN::move(token))); } break; } } return CommandArgument::ArgumentPart(BAN::move(fixed_string)); } BAN::ErrorOr TokenParser::parse_dollar() { ASSERT(read_token().type() == Token::Type::Dollar); const auto parse_dollar_string = [](BAN::String& string) -> BAN::ErrorOr { if (string.empty()) return CommandArgument::ArgumentPart(CommandArgument::EnvironmentVariable()); if (isdigit(string.front())) { size_t number_len = 1; while (number_len < string.size() && isdigit(string[number_len])) number_len++; CommandArgument::BuiltinVariable builtin; TRY(builtin.value.append(string.sv().substring(0, number_len))); for (size_t i = 0; i < number_len; i++) string.remove(0); return CommandArgument::ArgumentPart(BAN::move(builtin)); } switch (string.front()) { case '$': case '_': case '@': case '*': case '#': case '-': case '?': case '!': { CommandArgument::BuiltinVariable builtin; TRY(builtin.value.push_back(string.front())); string.remove(0); return CommandArgument::ArgumentPart(BAN::move(builtin)); } } if (isalpha(string.front())) { size_t env_len = 1; while (env_len < string.size() && (isalnum(string[env_len]) || string[env_len] == '_')) env_len++; CommandArgument::EnvironmentVariable environment; TRY(environment.value.append(string.sv().substring(0, env_len))); for (size_t i = 0; i < env_len; i++) string.remove(0); return CommandArgument::ArgumentPart(BAN::move(environment)); } CommandArgument::FixedString fixed_string; TRY(fixed_string.value.push_back('$')); return CommandArgument::ArgumentPart(BAN::move(fixed_string)); }; switch (peek_token().type()) { case Token::Type::EOF_: case Token::Type::Ampersand: case Token::Type::Backslash: case Token::Type::CloseCurly: case Token::Type::CloseParen: case Token::Type::DoubleQuote: case Token::Type::Pipe: case Token::Type::Semicolon: case Token::Type::SingleQuote: case Token::Type::Whitespace: { CommandArgument::FixedString fixed_string; TRY(fixed_string.value.push_back('$')); return CommandArgument::ArgumentPart(BAN::move(fixed_string)); } case Token::Type::Dollar: { consume_token(); CommandArgument::BuiltinVariable builtin_variable; TRY(builtin_variable.value.push_back('$')); return CommandArgument::ArgumentPart(BAN::move(builtin_variable)); } case Token::Type::OpenCurly: { consume_token(); BAN::String input; for (auto token = read_token(); token.type() != Token::Type::CloseCurly; token = read_token()) { if (token.type() == Token::Type::EOF_) return BAN::Error::from_literal("missing closing curly brace"); if (token.type() == Token::Type::String) TRY(input.append(token.string())); else if (token.type() == Token::Type::Dollar) TRY(input.push_back('$')); else return BAN::Error::from_literal("expected closing curly brace"); } auto result = TRY(parse_dollar_string(input)); if (!input.empty()) return BAN::Error::from_literal("bad substitution"); return result; } case Token::Type::OpenParen: { consume_token(); auto command_tree = TRY(parse_command_tree()); if (auto token = read_token(); token.type() != Token::Type::CloseParen) return BAN::Error::from_literal("expected closing parenthesis"); return CommandArgument::ArgumentPart(BAN::move(command_tree)); } case Token::Type::String: { auto token = read_token(); auto string = BAN::move(token.string()); auto result = TRY(parse_dollar_string(string)); if (!string.empty()) { auto remaining = Token(Token::Type::String); remaining.string() = BAN::move(string); TRY(unget_token(BAN::move(remaining))); } return result; } } ASSERT_NOT_REACHED(); } BAN::ErrorOr TokenParser::parse_single_quote() { ASSERT(read_token().type() == Token::Type::SingleQuote); CommandArgument::FixedString fixed_string; for (auto token = read_token();; token = read_token()) { switch (token.type()) { case Token::Type::EOF_: TRY(ask_input_tokens("quote> ", true)); break; case Token::Type::Ampersand: case Token::Type::Backslash: case Token::Type::CloseCurly: case Token::Type::CloseParen: case Token::Type::Dollar: case Token::Type::DoubleQuote: case Token::Type::OpenCurly: case Token::Type::OpenParen: case Token::Type::Pipe: case Token::Type::Semicolon: TRY(fixed_string.value.push_back(token_type_to_single_character(token.type()))); break; case Token::Type::String: case Token::Type::Whitespace: TRY(fixed_string.value.append(token.string())); break; case Token::Type::SingleQuote: return CommandArgument::ArgumentPart(BAN::move(fixed_string)); } } } BAN::ErrorOr TokenParser::parse_argument() { using FixedString = CommandArgument::FixedString; const auto token_type = peek_token().type(); if (!can_parse_argument_from_token_type(token_type)) return unexpected_token_error(token_type); CommandArgument result; bool is_in_double_quotes = false; for (auto token_type = peek_token().type(); token_type != Token::Type::EOF_ || is_in_double_quotes; token_type = peek_token().type()) { CommandArgument::ArgumentPart new_part; switch (token_type) { case Token::Type::EOF_: ASSERT(is_in_double_quotes); TRY(ask_input_tokens("dquote> ", true)); new_part = FixedString(); // do continue break; case Token::Type::Ampersand: case Token::Type::CloseCurly: case Token::Type::CloseParen: case Token::Type::OpenCurly: case Token::Type::OpenParen: case Token::Type::Pipe: case Token::Type::Semicolon: if (is_in_double_quotes) { new_part = FixedString(); TRY(new_part.get().value.push_back(token_type_to_single_character(token_type))); consume_token(); } break; case Token::Type::Whitespace: if (is_in_double_quotes) { new_part = FixedString(); TRY(new_part.get().value.append(peek_token().string())); consume_token(); } break; case Token::Type::Backslash: new_part = TRY(parse_backslash(is_in_double_quotes)); break; case Token::Type::DoubleQuote: is_in_double_quotes = !is_in_double_quotes; new_part = FixedString(); // do continue consume_token(); break; case Token::Type::Dollar: new_part = TRY(parse_dollar()); break; case Token::Type::SingleQuote: new_part = TRY(parse_single_quote()); break; case Token::Type::String: new_part = CommandArgument::ArgumentPart(FixedString {}); TRY(new_part.get().value.append(peek_token().string())); consume_token(); break; } if (!new_part.has_value()) break; if (new_part.has()) { auto& fixed_string = new_part.get(); // discard empty fixed strings if (fixed_string.value.empty()) continue; // combine consecutive fixed strings if (!result.parts.empty() && result.parts.back().has()) { TRY(result.parts.back().get().value.append(fixed_string.value)); continue; } } TRY(result.parts.push_back(BAN::move(new_part))); } return result; } BAN::ErrorOr TokenParser::parse_single_command() { SingleCommand result; while (peek_token().type() == Token::Type::Whitespace) consume_token(); while (peek_token().type() == Token::Type::String) { BAN::String env_name; const auto& string = peek_token().string(); if (!isalpha(string.front())) break; const auto env_len = string.sv().find([](char ch) { return !(isalnum(ch) || ch == '_'); }); if (!env_len.has_value() || string[*env_len] != '=') break; TRY(env_name.append(string.sv().substring(0, *env_len))); auto full_value = TRY(parse_argument()); ASSERT(!full_value.parts.empty()); ASSERT(full_value.parts.front().has()); auto& first_arg = full_value.parts.front().get(); ASSERT(first_arg.value.sv().starts_with(env_name)); ASSERT(first_arg.value[*env_len] == '='); for (size_t i = 0; i < *env_len + 1; i++) first_arg.value.remove(0); if (first_arg.value.empty()) full_value.parts.remove(0); SingleCommand::EnvironmentVariable environment_variable; environment_variable.name = BAN::move(env_name); environment_variable.value = BAN::move(full_value); TRY(result.environment.emplace_back(BAN::move(environment_variable))); while (peek_token().type() == Token::Type::Whitespace) consume_token(); } BAN::HashSet used_aliases; while (peek_token().type() == Token::Type::String) { auto token = read_token(); bool can_be_alias = false; switch (peek_token().type()) { case Token::Type::EOF_: case Token::Type::Ampersand: case Token::Type::CloseParen: case Token::Type::Pipe: case Token::Type::Semicolon: case Token::Type::Whitespace: can_be_alias = true; break; case Token::Type::Backslash: case Token::Type::CloseCurly: case Token::Type::Dollar: case Token::Type::DoubleQuote: case Token::Type::OpenCurly: case Token::Type::OpenParen: case Token::Type::SingleQuote: case Token::Type::String: can_be_alias = false; break; } if (!can_be_alias) { TRY(unget_token(BAN::move(token))); break; } if (used_aliases.contains(token.string())) { TRY(unget_token(BAN::move(token))); break; } auto opt_alias = Alias::get().get_alias(token.string().sv()); if (!opt_alias.has_value()) { TRY(unget_token(BAN::move(token))); break; } auto tokenized_alias = TRY(tokenize_string(opt_alias.value())); for (size_t i = tokenized_alias.size(); i > 0; i--) TRY(unget_token(BAN::move(tokenized_alias[i - 1]))); TRY(used_aliases.insert(TRY(BAN::String::formatted("{}", token.string())))); while (peek_token().type() == Token::Type::Whitespace) consume_token(); } while (peek_token().type() != Token::Type::EOF_) { while (peek_token().type() == Token::Type::Whitespace) consume_token(); auto argument = TRY(parse_argument()); TRY(result.arguments.push_back(BAN::move(argument))); while (peek_token().type() == Token::Type::Whitespace) consume_token(); if (!can_parse_argument_from_token_type(peek_token().type())) break; } return result; } BAN::ErrorOr TokenParser::parse_piped_command() { PipedCommand result; result.background = false; while (peek_token().type() != Token::Type::EOF_) { auto single_command = TRY(parse_single_command()); TRY(result.commands.push_back(BAN::move(single_command))); const auto token_type = peek_token().type(); if (token_type != Token::Type::Pipe && token_type != Token::Type::Ampersand) break; auto temp_token = read_token(); if (peek_token().type() == temp_token.type()) { TRY(unget_token(BAN::move(temp_token))); break; } if (temp_token.type() == Token::Type::Ampersand) { result.background = true; break; } } return result; } BAN::ErrorOr TokenParser::parse_command_tree() { CommandTree result; auto next_condition = ConditionalCommand::Condition::Always; while (peek_token().type() != Token::Type::EOF_) { ConditionalCommand conditional_command; conditional_command.command = TRY(parse_piped_command()); conditional_command.condition = next_condition; TRY(result.commands.push_back(BAN::move(conditional_command))); while (peek_token().type() == Token::Type::Whitespace) consume_token(); if (peek_token().type() == Token::Type::EOF_) break; bool should_break = false; const auto token_type = peek_token().type(); switch (token_type) { case Token::Type::Semicolon: consume_token(); next_condition = ConditionalCommand::Condition::Always; break; case Token::Type::Ampersand: case Token::Type::Pipe: consume_token(); if (read_token().type() != token_type) return BAN::Error::from_literal("expected double '&' or '|'"); next_condition = (token_type == Token::Type::Ampersand) ? ConditionalCommand::Condition::OnSuccess : ConditionalCommand::Condition::OnFailure; break; default: should_break = true; break; } if (should_break) break; } return result; } BAN::ErrorOr TokenParser::run(BAN::Vector&& tokens) { TRY(feed_tokens(BAN::move(tokens))); auto command_tree = TRY(parse_command_tree()); const auto token_type = peek_token().type(); while (!m_token_stream.empty()) m_token_stream.pop(); if (token_type != Token::Type::EOF_) return unexpected_token_error(token_type); TRY(m_execute.execute_command(command_tree)); return {}; } bool TokenParser::main_loop(bool break_on_error) { for (;;) { auto opt_input = m_input_function({}); if (!opt_input.has_value()) break; auto tokenized_input = tokenize_string(opt_input.release_value()); if (tokenized_input.is_error()) { fprintf(stderr, "banan-sh: %s\n", tokenized_input.error().get_message()); if (break_on_error) return false; continue; } if (auto ret = run(tokenized_input.release_value()); ret.is_error()) { fprintf(stderr, "banan-sh: %s\n", ret.error().get_message()); if (break_on_error) return false; continue; } } return true; }