Shell: rewrite the whole shell to use tokens instead of raw strings

tab completion is still running with raw strings and that has to be
fixed in the future.
This commit is contained in:
2024-10-13 21:56:59 +03:00
parent dab6e5a60f
commit 8adc97980a
18 changed files with 2721 additions and 1732 deletions

View File

@@ -0,0 +1,660 @@
#include "Alias.h"
#include "Execute.h"
#include "Lexer.h"
#include "TokenParser.h"
#include <BAN/HashSet.h>
#include <stdio.h>
static constexpr bool can_parse_argument_from_token_type(Token::Type token_type)
{
switch (token_type)
{
case Token::Type::Whitespace:
ASSERT_NOT_REACHED();
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
return false;
case Token::Type::Backslash:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::SingleQuote:
case Token::Type::String:
return true;
}
ASSERT_NOT_REACHED();
}
static constexpr char token_type_to_single_character(Token::Type type)
{
switch (type)
{
case Token::Type::Ampersand: return '&';
case Token::Type::Backslash: return '\\';
case Token::Type::CloseCurly: return '}';
case Token::Type::CloseParen: return ')';
case Token::Type::Dollar: return '$';
case Token::Type::DoubleQuote: return '"';
case Token::Type::OpenCurly: return '{';
case Token::Type::OpenParen: return '(';
case Token::Type::Pipe: return '|';
case Token::Type::Semicolon: return ';';
case Token::Type::SingleQuote: return '\'';
case Token::Type::String: ASSERT_NOT_REACHED();
case Token::Type::Whitespace: ASSERT_NOT_REACHED();
case Token::Type::EOF_: ASSERT_NOT_REACHED();
}
ASSERT_NOT_REACHED();
};
static constexpr BAN::Error unexpected_token_error(Token::Type type)
{
switch (type)
{
case Token::Type::EOF_:
return BAN::Error::from_literal("unexpected EOF");
case Token::Type::Ampersand:
return BAN::Error::from_literal("unexpected token &");
case Token::Type::Backslash:
return BAN::Error::from_literal("unexpected token \\");
case Token::Type::CloseCurly:
return BAN::Error::from_literal("unexpected token }");
case Token::Type::CloseParen:
return BAN::Error::from_literal("unexpected token )");
case Token::Type::Dollar:
return BAN::Error::from_literal("unexpected token $");
case Token::Type::DoubleQuote:
return BAN::Error::from_literal("unexpected token \"");
case Token::Type::OpenCurly:
return BAN::Error::from_literal("unexpected token {");
case Token::Type::Pipe:
return BAN::Error::from_literal("unexpected token |");
case Token::Type::OpenParen:
return BAN::Error::from_literal("unexpected token (");
case Token::Type::Semicolon:
return BAN::Error::from_literal("unexpected token ;");
case Token::Type::SingleQuote:
return BAN::Error::from_literal("unexpected token '");
case Token::Type::String:
return BAN::Error::from_literal("unexpected token <string>");
case Token::Type::Whitespace:
return BAN::Error::from_literal("unexpected token <whitespace>");
}
ASSERT_NOT_REACHED();
}
const Token& TokenParser::peek_token() const
{
if (m_token_stream.empty())
return m_eof_token;
ASSERT(!m_token_stream.front().empty());
return m_token_stream.front().back();
}
Token TokenParser::read_token()
{
if (m_token_stream.empty())
return Token(Token::Type::EOF_);
ASSERT(!m_token_stream.front().empty());
auto token = BAN::move(m_token_stream.front().back());
m_token_stream.front().pop_back();
if (m_token_stream.front().empty())
m_token_stream.pop();
return token;
}
void TokenParser::consume_token()
{
ASSERT(!m_token_stream.empty());
ASSERT(!m_token_stream.front().empty());
m_token_stream.front().pop_back();
if (m_token_stream.front().empty())
m_token_stream.pop();
}
BAN::ErrorOr<void> TokenParser::unget_token(Token&& token)
{
if (m_token_stream.empty())
TRY(m_token_stream.emplace());
TRY(m_token_stream.front().push_back(BAN::move(token)));
return {};
}
BAN::ErrorOr<void> TokenParser::feed_tokens(BAN::Vector<Token>&& tokens)
{
if (tokens.empty())
return {};
for (size_t i = 0; i < tokens.size() / 2; i++)
BAN::swap(tokens[i], tokens[tokens.size() - i - 1]);
TRY(m_token_stream.push(BAN::move(tokens)));
return {};
}
BAN::ErrorOr<void> TokenParser::ask_input_tokens(BAN::StringView prompt, bool add_newline)
{
if (!m_input_function)
return unexpected_token_error(Token::Type::EOF_);
auto opt_input = m_input_function(prompt);
if (!opt_input.has_value())
return unexpected_token_error(Token::Type::EOF_);
auto tokenized = TRY(tokenize_string(opt_input.release_value()));
TRY(feed_tokens(BAN::move(tokenized)));
if (add_newline)
{
auto newline_token = Token(Token::Type::String);
TRY(newline_token.string().push_back('\n'));
TRY(unget_token(BAN::move(newline_token)));
}
return {};
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_backslash(bool is_quoted)
{
ASSERT(read_token().type() == Token::Type::Backslash);
auto token = read_token();
FixedString fixed_string;
switch (token.type())
{
case Token::Type::EOF_:
TRY(ask_input_tokens("> ", false));
TRY(unget_token(Token(Token::Type::Backslash)));
return parse_backslash(is_quoted);
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::SingleQuote:
TRY(fixed_string.value.push_back(token_type_to_single_character(token.type())));
break;
case Token::Type::Whitespace:
case Token::Type::String:
{
ASSERT(!token.string().empty());
if (is_quoted)
TRY(fixed_string.value.push_back('\\'));
TRY(fixed_string.value.push_back(token.string().front()));
if (token.string().size() > 1)
{
token.string().remove(0);
TRY(unget_token(BAN::move(token)));
}
break;
}
}
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_dollar()
{
ASSERT(read_token().type() == Token::Type::Dollar);
const auto parse_dollar_string =
[](BAN::String& string) -> BAN::ErrorOr<CommandArgument::ArgumentPart>
{
if (string.empty())
return CommandArgument::ArgumentPart(EnvironmentVariable());
if (isdigit(string.front()))
{
size_t number_len = 1;
while (number_len < string.size() && isdigit(string[number_len]))
number_len++;
BuiltinVariable builtin;
TRY(builtin.value.append(string.sv().substring(0, number_len)));
for (size_t i = 0; i < number_len; i++)
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(builtin));
}
switch (string.front())
{
case '$':
case '_':
case '@':
case '*':
case '#':
case '-':
case '?':
case '!':
{
BuiltinVariable builtin;
TRY(builtin.value.push_back(string.front()));
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(builtin));
}
}
if (isalpha(string.front()))
{
size_t env_len = 1;
while (env_len < string.size() && (isalnum(string[env_len]) || string[env_len] == '_'))
env_len++;
EnvironmentVariable environment;
TRY(environment.value.append(string.sv().substring(0, env_len)));
for (size_t i = 0; i < env_len; i++)
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(environment));
}
FixedString fixed_string;
TRY(fixed_string.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
};
switch (peek_token().type())
{
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::DoubleQuote:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::SingleQuote:
case Token::Type::Whitespace:
{
FixedString fixed_string;
TRY(fixed_string.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
case Token::Type::Dollar:
{
consume_token();
BuiltinVariable builtin_variable;
TRY(builtin_variable.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(builtin_variable));
}
case Token::Type::OpenCurly:
{
consume_token();
BAN::String input;
for (auto token = read_token(); token.type() != Token::Type::CloseCurly; token = read_token())
{
if (token.type() == Token::Type::EOF_)
return BAN::Error::from_literal("missing closing curly brace");
if (token.type() == Token::Type::String)
TRY(input.append(token.string()));
else if (token.type() == Token::Type::Dollar)
TRY(input.push_back('$'));
else
return BAN::Error::from_literal("expected closing curly brace");
}
auto result = TRY(parse_dollar_string(input));
if (!input.empty())
return BAN::Error::from_literal("bad substitution");
return result;
}
case Token::Type::OpenParen:
{
consume_token();
auto command_tree = TRY(parse_command_tree());
if (auto token = read_token(); token.type() != Token::Type::CloseParen)
return BAN::Error::from_literal("expected closing parenthesis");
return CommandArgument::ArgumentPart(BAN::move(command_tree));
}
case Token::Type::String:
{
auto token = read_token();
auto string = BAN::move(token.string());
auto result = TRY(parse_dollar_string(string));
if (!string.empty())
{
auto remaining = Token(Token::Type::String);
remaining.string() = BAN::move(string);
TRY(unget_token(BAN::move(remaining)));
}
return result;
}
}
ASSERT_NOT_REACHED();
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_single_quote()
{
ASSERT(read_token().type() == Token::Type::SingleQuote);
FixedString fixed_string;
for (auto token = read_token();; token = read_token())
{
switch (token.type())
{
case Token::Type::EOF_:
TRY(ask_input_tokens("quote> ", true));
break;
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
TRY(fixed_string.value.push_back(token_type_to_single_character(token.type())));
break;
case Token::Type::String:
case Token::Type::Whitespace:
TRY(fixed_string.value.append(token.string()));
break;
case Token::Type::SingleQuote:
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
}
}
BAN::ErrorOr<CommandArgument> TokenParser::parse_argument()
{
const auto token_type = peek_token().type();
if (!can_parse_argument_from_token_type(token_type))
return unexpected_token_error(token_type);
CommandArgument result;
bool is_in_double_quotes = false;
for (auto token_type = peek_token().type(); token_type != Token::Type::EOF_ || is_in_double_quotes; token_type = peek_token().type())
{
CommandArgument::ArgumentPart new_part;
switch (token_type)
{
case Token::Type::EOF_:
ASSERT(is_in_double_quotes);
TRY(ask_input_tokens("dquote> ", true));
new_part = FixedString(); // do continue
break;
case Token::Type::Ampersand:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
if (is_in_double_quotes)
{
new_part = FixedString();
TRY(new_part.get<FixedString>().value.push_back(token_type_to_single_character(token_type)));
consume_token();
}
break;
case Token::Type::Whitespace:
if (is_in_double_quotes)
{
new_part = FixedString();
TRY(new_part.get<FixedString>().value.append(peek_token().string()));
consume_token();
}
break;
case Token::Type::Backslash:
new_part = TRY(parse_backslash(is_in_double_quotes));
break;
case Token::Type::DoubleQuote:
is_in_double_quotes = !is_in_double_quotes;
new_part = FixedString(); // do continue
consume_token();
break;
case Token::Type::Dollar:
new_part = TRY(parse_dollar());
break;
case Token::Type::SingleQuote:
new_part = TRY(parse_single_quote());
break;
case Token::Type::String:
new_part = CommandArgument::ArgumentPart(FixedString {});
TRY(new_part.get<FixedString>().value.append(peek_token().string()));
consume_token();
break;
}
if (!new_part.has_value())
break;
if (new_part.has<FixedString>())
{
auto& fixed_string = new_part.get<FixedString>();
// discard empty fixed strings
if (fixed_string.value.empty())
continue;
// combine consecutive fixed strings
if (!result.parts.empty() && result.parts.back().has<FixedString>())
{
TRY(result.parts.back().get<FixedString>().value.append(fixed_string.value));
continue;
}
}
TRY(result.parts.push_back(BAN::move(new_part)));
}
return result;
}
BAN::ErrorOr<SingleCommand> TokenParser::parse_single_command()
{
SingleCommand result;
BAN::HashSet<BAN::String> used_aliases;
while (peek_token().type() == Token::Type::String)
{
auto token = read_token();
bool can_be_alias = false;
switch (peek_token().type())
{
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::CloseParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::Whitespace:
can_be_alias = true;
break;
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::SingleQuote:
case Token::Type::String:
can_be_alias = false;
break;
}
if (!can_be_alias)
{
TRY(unget_token(BAN::move(token)));
break;
}
if (used_aliases.contains(token.string()))
{
TRY(unget_token(BAN::move(token)));
break;
}
auto opt_alias = Alias::get().get_alias(token.string().sv());
if (!opt_alias.has_value())
{
TRY(unget_token(BAN::move(token)));
break;
}
auto tokenized_alias = TRY(tokenize_string(opt_alias.value()));
for (size_t i = tokenized_alias.size(); i > 0; i--)
TRY(unget_token(BAN::move(tokenized_alias[i - 1])));
TRY(used_aliases.insert(TRY(BAN::String::formatted("{}", token.string()))));
}
while (peek_token().type() != Token::Type::EOF_)
{
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
auto argument = TRY(parse_argument());
TRY(result.arguments.push_back(BAN::move(argument)));
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
if (!can_parse_argument_from_token_type(peek_token().type()))
break;
}
return result;
}
BAN::ErrorOr<PipedCommand> TokenParser::parse_piped_command()
{
PipedCommand result;
result.background = false;
while (peek_token().type() != Token::Type::EOF_)
{
auto single_command = TRY(parse_single_command());
TRY(result.commands.push_back(BAN::move(single_command)));
const auto token_type = peek_token().type();
if (token_type != Token::Type::Pipe && token_type != Token::Type::Ampersand)
break;
auto temp_token = read_token();
if (peek_token().type() == temp_token.type())
{
TRY(unget_token(BAN::move(temp_token)));
break;
}
if (temp_token.type() == Token::Type::Ampersand)
{
result.background = true;
break;
}
}
return result;
}
BAN::ErrorOr<CommandTree> TokenParser::parse_command_tree()
{
CommandTree result;
auto next_condition = ConditionalCommand::Condition::Always;
while (peek_token().type() != Token::Type::EOF_)
{
ConditionalCommand conditional_command;
conditional_command.command = TRY(parse_piped_command());
conditional_command.condition = next_condition;
TRY(result.commands.push_back(BAN::move(conditional_command)));
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
if (peek_token().type() == Token::Type::EOF_)
break;
bool should_break = false;
const auto token_type = peek_token().type();
switch (token_type)
{
case Token::Type::Semicolon:
consume_token();
next_condition = ConditionalCommand::Condition::Always;
break;
case Token::Type::Ampersand:
case Token::Type::Pipe:
consume_token();
if (read_token().type() != token_type)
return BAN::Error::from_literal("expected double '&' or '|'");
next_condition = (token_type == Token::Type::Ampersand)
? ConditionalCommand::Condition::OnSuccess
: ConditionalCommand::Condition::OnFailure;
break;
default:
should_break = true;
break;
}
if (should_break)
break;
}
return result;
}
BAN::ErrorOr<void> TokenParser::run(BAN::Vector<Token>&& tokens)
{
TRY(feed_tokens(BAN::move(tokens)));
auto command_tree = TRY(parse_command_tree());
const auto token_type = peek_token().type();
while (!m_token_stream.empty())
m_token_stream.pop();
if (token_type != Token::Type::EOF_)
return unexpected_token_error(token_type);
TRY(m_execute.execute_command(command_tree));
return {};
}
bool TokenParser::main_loop(bool break_on_error)
{
for (;;)
{
auto opt_input = m_input_function({});
if (!opt_input.has_value())
break;
auto tokenized_input = tokenize_string(opt_input.release_value());
if (tokenized_input.is_error())
{
fprintf(stderr, "banan-sh: %s\n", tokenized_input.error().get_message());
if (break_on_error)
return false;
continue;
}
if (auto ret = run(tokenized_input.release_value()); ret.is_error())
{
fprintf(stderr, "banan-sh: %s\n", ret.error().get_message());
if (break_on_error)
return false;
continue;
}
}
return true;
}