Shell: rewrite the whole shell to use tokens instead of raw strings

tab completion is still running with raw strings and that has to be
fixed in the future.
This commit is contained in:
Bananymous 2024-10-13 21:56:59 +03:00
parent dab6e5a60f
commit 8adc97980a
18 changed files with 2721 additions and 1732 deletions

View File

@ -0,0 +1,34 @@
#include "Alias.h"
BAN::ErrorOr<void> Alias::set_alias(BAN::StringView name, BAN::StringView value)
{
TRY(m_aliases.insert_or_assign(
TRY(BAN::String::formatted("{}", name)),
TRY(BAN::String::formatted("{}", value))
));
return {};
}
BAN::Optional<BAN::StringView> Alias::get_alias(const BAN::String& name) const
{
auto it = m_aliases.find(name);
if (it == m_aliases.end())
return {};
return it->value.sv();
}
void Alias::for_each_alias(BAN::Function<BAN::Iteration(BAN::StringView, BAN::StringView)> callback) const
{
for (const auto& [name, value] : m_aliases)
{
switch (callback(name.sv(), value.sv()))
{
case BAN::Iteration::Break:
break;
case BAN::Iteration::Continue:
continue;;
}
break;
}
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <BAN/Function.h>
#include <BAN/HashMap.h>
#include <BAN/Iteration.h>
#include <BAN/NoCopyMove.h>
#include <BAN/String.h>
class Alias
{
BAN_NON_COPYABLE(Alias);
BAN_NON_MOVABLE(Alias);
public:
Alias() = default;
static Alias& get()
{
static Alias s_instance;
return s_instance;
}
BAN::ErrorOr<void> set_alias(BAN::StringView name, BAN::StringView value);
// NOTE: `const BAN::String&` instead of `BAN::StringView` to avoid BAN::String construction
// for hashmap accesses
BAN::Optional<BAN::StringView> get_alias(const BAN::String& name) const;
void for_each_alias(BAN::Function<BAN::Iteration(BAN::StringView, BAN::StringView)>) const;
private:
BAN::HashMap<BAN::String, BAN::String> m_aliases;
};

View File

@ -0,0 +1,241 @@
#include "Alias.h"
#include "Builtin.h"
#include "Execute.h"
#include <ctype.h>
#include <time.h>
#include <unistd.h>
#define ERROR_RETURN(__msg, __ret) do { perror(__msg); return __ret; } while (false)
extern char** environ;
void Builtin::initialize()
{
MUST(m_builtin_commands.emplace("clear"_sv,
[](Execute&, BAN::Span<const BAN::String>, FILE*, FILE* fout) -> int
{
fprintf(fout, "\e[H\e[3J\e[2J");
fflush(fout);
return 0;
}, true
));
MUST(m_builtin_commands.emplace("exit"_sv,
[](Execute&, BAN::Span<const BAN::String> arguments, FILE*, FILE*) -> int
{
int exit_code = 0;
if (arguments.size() > 1)
{
auto exit_string = arguments[1].sv();
for (size_t i = 0; i < exit_string.size() && isdigit(exit_string[i]); i++)
exit_code = (exit_code * 10) + (exit_string[i] - '0');
}
exit(exit_code);
ASSERT_NOT_REACHED();
}, true
));
MUST(m_builtin_commands.emplace("export"_sv,
[](Execute&, BAN::Span<const BAN::String> arguments, FILE*, FILE*) -> int
{
bool first = false;
for (const auto& argument : arguments)
{
if (first)
{
first = false;
continue;
}
auto split = MUST(argument.sv().split('=', true));
if (split.size() != 2)
continue;
if (setenv(BAN::String(split[0]).data(), BAN::String(split[1]).data(), true) == -1)
ERROR_RETURN("setenv", 1);
}
return 0;
}, true
));
MUST(m_builtin_commands.emplace("unset"_sv,
[](Execute&, BAN::Span<const BAN::String> arguments, FILE*, FILE*) -> int
{
for (const auto& argument : arguments)
if (unsetenv(argument.data()) == -1)
ERROR_RETURN("unsetenv", 1);
return 0;
}, true
));
MUST(m_builtin_commands.emplace("alias"_sv,
[](Execute&, BAN::Span<const BAN::String> arguments, FILE*, FILE* fout) -> int
{
if (arguments.size() == 1)
{
Alias::get().for_each_alias(
[fout](BAN::StringView name, BAN::StringView value) -> BAN::Iteration
{
fprintf(fout, "%.*s='%.*s'\n",
(int)name.size(), name.data(),
(int)value.size(), value.data()
);
return BAN::Iteration::Continue;
}
);
return 0;
}
for (size_t i = 1; i < arguments.size(); i++)
{
auto idx = arguments[i].sv().find('=');
if (idx.has_value() && idx.value() == 0)
continue;
if (!idx.has_value())
{
auto value = Alias::get().get_alias(arguments[i]);
if (value.has_value())
fprintf(fout, "%s='%.*s'\n", arguments[i].data(), (int)value->size(), value->data());
}
else
{
auto alias = arguments[i].sv().substring(0, idx.value());
auto value = arguments[i].sv().substring(idx.value() + 1);
if (auto ret = Alias::get().set_alias(alias, value); ret.is_error())
fprintf(stderr, "could not set alias: %s\n", ret.error().get_message());
}
}
return 0;
}, true
));
MUST(m_builtin_commands.emplace("source"_sv,
[](Execute& execute, BAN::Span<const BAN::String> arguments, FILE*, FILE* fout) -> int
{
if (arguments.size() != 2)
{
fprintf(fout, "usage: source FILE\n");
return 1;
}
if (execute.source_script(arguments[1]).is_error())
return 1;
return 0;
}, true
));
MUST(m_builtin_commands.emplace("env"_sv,
[](Execute&, BAN::Span<const BAN::String>, FILE*, FILE* fout) -> int
{
char** current = environ;
while (current && *current)
fprintf(fout, "%s\n", *current++);
return 0;
}, true
));
MUST(m_builtin_commands.emplace("cd"_sv,
[](Execute&, BAN::Span<const BAN::String> arguments, FILE*, FILE* fout) -> int
{
if (arguments.size() > 2)
{
fprintf(fout, "cd: too many arguments\n");
return 1;
}
BAN::StringView path;
if (arguments.size() == 1)
{
if (const char* path_env = getenv("HOME"))
path = path_env;
else
return 0;
}
else
path = arguments[1];
if (chdir(path.data()) == -1)
ERROR_RETURN("chdir", 1);
return 0;
}, true
));
MUST(m_builtin_commands.emplace("time"_sv,
[](Execute& execute, BAN::Span<const BAN::String> arguments, FILE* fin, FILE* fout) -> int
{
timespec start, end;
if (clock_gettime(CLOCK_MONOTONIC, &start) == -1)
ERROR_RETURN("clock_gettime", 1);
auto execute_ret = execute.execute_command_sync(arguments.slice(1), fileno(fin), fileno(fout));
if (clock_gettime(CLOCK_MONOTONIC, &end) == -1)
ERROR_RETURN("clock_gettime", 1);
uint64_t total_ns = 0;
total_ns += (end.tv_sec - start.tv_sec) * 1'000'000'000;
total_ns += end.tv_nsec - start.tv_nsec;
int secs = total_ns / 1'000'000'000;
int msecs = (total_ns % 1'000'000'000) / 1'000'000;
fprintf(fout, "took %d.%03d s\n", secs, msecs);
if (execute_ret.is_error())
return 256 + execute_ret.error().get_error_code();
return execute_ret.value();
}, false
));
}
void Builtin::for_each_builtin(BAN::Function<BAN::Iteration(BAN::StringView, const BuiltinCommand&)> callback) const
{
for (const auto& [name, function] : m_builtin_commands)
{
switch (callback(name.sv(), function))
{
case BAN::Iteration::Break:
break;
case BAN::Iteration::Continue:
continue;;
}
break;
}
}
const Builtin::BuiltinCommand* Builtin::find_builtin(const BAN::String& name) const
{
auto it = m_builtin_commands.find(name);
if (it == m_builtin_commands.end())
return nullptr;
return &it->value;
}
BAN::ErrorOr<int> Builtin::BuiltinCommand::execute(Execute& execute, BAN::Span<const BAN::String> arguments, int fd_in, int fd_out) const
{
const auto fd_to_file =
[](int fd, FILE* file, const char* mode) -> BAN::ErrorOr<FILE*>
{
if (fd == fileno(file))
return file;
int fd_dup = dup(fd);
if (fd_dup == -1)
return BAN::Error::from_errno(errno);
file = fdopen(fd_dup, mode);
if (file == nullptr)
return BAN::Error::from_errno(errno);
return file;
};
FILE* fin = TRY(fd_to_file(fd_in, stdin, "r"));
FILE* fout = TRY(fd_to_file(fd_out, stdout, "w"));
int ret = function(execute, arguments, fin, fout);
if (fileno(fin) != fd_in ) fclose(fin);
if (fileno(fout) != fd_out) fclose(fout);
return ret;
}

View File

@ -0,0 +1,50 @@
#pragma once
#include <BAN/Function.h>
#include <BAN/HashMap.h>
#include <BAN/Iteration.h>
#include <BAN/NoCopyMove.h>
#include <BAN/String.h>
#include <stdio.h>
class Execute;
class Builtin
{
BAN_NON_COPYABLE(Builtin);
BAN_NON_MOVABLE(Builtin);
public:
struct BuiltinCommand
{
using function_t = int (*)(Execute&, BAN::Span<const BAN::String>, FILE* fin, FILE* fout);
function_t function { nullptr };
bool immediate { false };
BuiltinCommand(function_t function, bool immediate)
: function(function)
, immediate(immediate)
{ }
BAN::ErrorOr<int> execute(Execute&, BAN::Span<const BAN::String> arguments, int fd_in, int fd_out) const;
};
public:
Builtin() = default;
static Builtin& get()
{
static Builtin s_instance;
return s_instance;
}
void initialize();
void for_each_builtin(BAN::Function<BAN::Iteration(BAN::StringView, const BuiltinCommand&)>) const;
// return nullptr if not found
const BuiltinCommand* find_builtin(const BAN::String& name) const;
private:
BAN::HashMap<BAN::String, BuiltinCommand> m_builtin_commands;
};

View File

@ -1,5 +1,13 @@
set(SOURCES set(SOURCES
main.cpp main.cpp
Alias.cpp
Builtin.cpp
CommandTypes.cpp
Execute.cpp
Input.cpp
Lexer.cpp
Token.cpp
TokenParser.cpp
) )
add_executable(Shell ${SOURCES}) add_executable(Shell ${SOURCES})

View File

@ -0,0 +1,150 @@
#include "CommandTypes.h"
#include "Execute.h"
#include <BAN/ScopeGuard.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <unistd.h>
extern int g_pid;
extern int g_argc;
extern char** g_argv;
BAN::ErrorOr<BAN::String> CommandArgument::evaluate(Execute& execute) const
{
static_assert(
BAN::is_same_v<CommandArgument::ArgumentPart,
BAN::Variant<
FixedString,
EnvironmentVariable,
BuiltinVariable,
CommandTree
>
>
);
BAN::String evaluated;
for (const auto& part : parts)
{
ASSERT(part.has_value());
if (part.has<FixedString>())
TRY(evaluated.append(part.get<FixedString>().value));
else if (part.has<EnvironmentVariable>())
{
const char* env = getenv(part.get<EnvironmentVariable>().value.data());
if (env != nullptr)
TRY(evaluated.append(env));
}
else if (part.has<BuiltinVariable>())
{
const auto& builtin = part.get<BuiltinVariable>();
ASSERT(!builtin.value.empty());
if (!isdigit(builtin.value.front()))
{
ASSERT(builtin.value.size() == 1);
switch (builtin.value.front())
{
case '_':
case '@':
case '*':
case '-':
fprintf(stderr, "TODO: $%c\n", builtin.value.front());
break;
case '$':
evaluated = TRY(BAN::String::formatted("{}", g_pid));
break;
case '#':
evaluated = TRY(BAN::String::formatted("{}", g_argc - 1));
break;
case '?':
evaluated = TRY(BAN::String::formatted("{}", execute.last_return_value()));
break;
case '!':
evaluated = TRY(BAN::String::formatted("{}", execute.last_background_pid()));
break;
default:
ASSERT_NOT_REACHED();
}
}
else
{
int argv_index = 0;
for (char c : builtin.value)
{
ASSERT(isdigit(c));
if (BAN::Math::will_multiplication_overflow<int>(argv_index, 10) ||
BAN::Math::will_addition_overflow<int>(argv_index * 10, c - '0'))
{
argv_index = INT_MAX;
fprintf(stderr, "integer overflow, capping at %d\n", argv_index);
break;
}
argv_index = (argv_index * 10) + (c - '0');
}
if (argv_index < g_argc)
TRY(evaluated.append(const_cast<const char*>(g_argv[argv_index])));
}
}
else if (part.has<CommandTree>())
{
// FIXME: this should resolve to multiple arguments if not double quoted
int execute_pipe[2];
if (pipe(execute_pipe) == -1)
return BAN::Error::from_errno(errno);
BAN::ScopeGuard pipe_rd_closer([execute_pipe] { close(execute_pipe[0]); });
BAN::ScopeGuard pipe_wr_closer([execute_pipe] { close(execute_pipe[1]); });
const pid_t child_pid = fork();
if (child_pid == -1)
return BAN::Error::from_errno(errno);
if (child_pid == 0)
{
if (dup2(execute_pipe[1], STDOUT_FILENO) == -1)
return BAN::Error::from_errno(errno);
setpgrp();
auto ret = execute.execute_command(part.get<CommandTree>());
if (ret.is_error())
exit(ret.error().get_error_code());
exit(execute.last_return_value());
}
pipe_wr_closer.disable();
close(execute_pipe[1]);
char buffer[128];
while (true)
{
const ssize_t nread = read(execute_pipe[0], buffer, sizeof(buffer));
if (nread < 0)
perror("read");
if (nread <= 0)
break;
TRY(evaluated.append(BAN::StringView(buffer, nread)));
}
while (!evaluated.empty() && isspace(evaluated.back()))
evaluated.pop_back();
}
else
{
ASSERT_NOT_REACHED();
}
}
return evaluated;
}
BAN::ErrorOr<BAN::Vector<BAN::String>> SingleCommand::evaluate_arguments(Execute& execute) const
{
BAN::Vector<BAN::String> result;
TRY(result.reserve(arguments.size()));
for (const auto& arugment : arguments)
TRY(result.push_back(TRY(arugment.evaluate(execute))));
return result;
}

View File

@ -0,0 +1,101 @@
#pragma once
#include <BAN/String.h>
#define COMMAND_GET_MACRO(_0, _1, _2, NAME, ...) NAME
#define COMMAND_MOVE_0(class) \
class(class&& o) { } \
class& operator=(class&& o) { }
#define COMMAND_MOVE_1(class, var) \
class(class&& o) { var = BAN::move(o.var); } \
class& operator=(class&& o) { var = BAN::move(o.var); return *this; }
#define COMMAND_MOVE_2(class, var1, var2) \
class(class&& o) { var1 = BAN::move(o.var1); var2 = BAN::move(o.var2); } \
class& operator=(class&& o) { var1 = BAN::move(o.var1); var2 = BAN::move(o.var2); return *this; }
#define COMMAND_MOVE(class, ...) COMMAND_GET_MACRO(_0 __VA_OPT__(,) __VA_ARGS__, COMMAND_MOVE_2, COMMAND_MOVE_1, COMMAND_MOVE_0)(class, __VA_ARGS__)
#define COMMAND_RULE5(class, ...) \
class() = default; \
class(const class&) = delete; \
class& operator=(const class&) = delete; \
COMMAND_MOVE(class, __VA_ARGS__)
struct CommandTree;
class Execute;
struct FixedString
{
COMMAND_RULE5(FixedString, value);
BAN::String value;
};
struct EnvironmentVariable
{
COMMAND_RULE5(EnvironmentVariable, value);
BAN::String value;
};
struct BuiltinVariable
{
COMMAND_RULE5(BuiltinVariable, value);
BAN::String value;
};
struct CommandArgument
{
using ArgumentPart =
BAN::Variant<
FixedString,
EnvironmentVariable,
BuiltinVariable,
CommandTree
>;
BAN::ErrorOr<BAN::String> evaluate(Execute& execute) const;
COMMAND_RULE5(CommandArgument, parts);
BAN::Vector<ArgumentPart> parts;
};
struct SingleCommand
{
BAN::ErrorOr<BAN::Vector<BAN::String>> evaluate_arguments(Execute& execute) const;
COMMAND_RULE5(SingleCommand, arguments);
BAN::Vector<CommandArgument> arguments;
};
struct PipedCommand
{
COMMAND_RULE5(PipedCommand, commands, background);
BAN::Vector<SingleCommand> commands;
bool background { false };
};
struct ConditionalCommand
{
enum class Condition
{
Always,
OnFailure,
OnSuccess,
};
COMMAND_RULE5(ConditionalCommand, command, condition);
PipedCommand command;
Condition condition { Condition::Always };
};
struct CommandTree
{
COMMAND_RULE5(CommandTree, commands);
BAN::Vector<ConditionalCommand> commands;
};
#undef COMMAND_GET_MACRO
#undef COMMAND_MOVE_0
#undef COMMAND_MOVE_1
#undef COMMAND_MOVE_2
#undef COMMAND_MOVE
#undef COMMAND_RULE5

View File

@ -0,0 +1,330 @@
#include "Builtin.h"
#include "Execute.h"
#include "TokenParser.h"
#include <BAN/ScopeGuard.h>
#include <limits.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <termios.h>
#include <unistd.h>
#define CHECK_FD_OR_PERROR_AND_EXIT(oldfd, newfd) ({ if ((oldfd) != (newfd) && dup2((oldfd), (newfd)) == -1) { perror("dup2"); exit(errno); } })
#define TRY_OR_PERROR_AND_BREAK(expr) ({ auto&& eval = (expr); if (eval.is_error()) { fprintf(stderr, "%s\n", eval.error().get_message()); continue; } eval.release_value(); })
#define TRY_OR_EXIT(expr) ({ auto&& eval = (expr); if (eval.is_error()) exit(eval.error().get_error_code()); eval.release_value(); })
static BAN::ErrorOr<BAN::String> find_absolute_path_of_executable(const BAN::String& command)
{
if (command.size() >= PATH_MAX)
return BAN::Error::from_errno(ENAMETOOLONG);
const auto check_executable_file =
[](const char* path) -> BAN::ErrorOr<void>
{
struct stat st;
if (stat(path, &st) == -1)
return BAN::Error::from_errno(errno);
if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)))
return BAN::Error::from_errno(ENOEXEC);
return {};
};
if (command.sv().contains('/'))
{
TRY(check_executable_file(command.data()));
return TRY(BAN::String::formatted("{}", command));
}
const char* path_env = getenv("PATH");
if (path_env == nullptr)
return BAN::Error::from_errno(ENOENT);
auto path_dirs = TRY(BAN::StringView(path_env).split(':'));
for (auto path_dir : path_dirs)
{
const auto absolute_path = TRY(BAN::String::formatted("{}/{}", path_dir, command));
auto check_result = check_executable_file(absolute_path.data());
if (!check_result.is_error())
return absolute_path;
if (check_result.error().get_error_code() == ENOENT)
continue;
return check_result.release_error();
}
return BAN::Error::from_errno(ENOENT);
}
BAN::ErrorOr<Execute::ExecuteResult> Execute::execute_command_no_wait(const InternalCommand& command)
{
ASSERT(!command.arguments.empty());
if (command.command.has<Builtin::BuiltinCommand>() && !command.background)
{
const auto& builtin = command.command.get<Builtin::BuiltinCommand>();
if (builtin.immediate)
{
return ExecuteResult {
.pid = -1,
.exit_code = TRY(builtin.execute(*this, command.arguments, command.fd_in, command.fd_out))
};
}
}
const pid_t child_pid = fork();
if (child_pid == -1)
return BAN::Error::from_errno(errno);
if (child_pid == 0)
{
if (command.command.has<Builtin::BuiltinCommand>())
{
auto builtin_ret = command.command.get<Builtin::BuiltinCommand>().execute(*this, command.arguments, command.fd_in, command.fd_out);
if (builtin_ret.is_error())
exit(builtin_ret.error().get_error_code());
exit(builtin_ret.value());
}
BAN::Vector<const char*> exec_args;
TRY_OR_EXIT(exec_args.reserve(command.arguments.size() + 1));
for (const auto& argument : command.arguments)
TRY_OR_EXIT(exec_args.push_back(argument.data()));
TRY_OR_EXIT(exec_args.push_back(nullptr));
CHECK_FD_OR_PERROR_AND_EXIT(command.fd_in, STDIN_FILENO);
CHECK_FD_OR_PERROR_AND_EXIT(command.fd_out, STDOUT_FILENO);
execv(command.command.get<BAN::String>().data(), const_cast<char* const*>(exec_args.data()));
exit(errno);
}
if (setpgid(child_pid, command.pgrp ? command.pgrp : child_pid))
perror("setpgid");
if (!command.background && command.pgrp == 0 && isatty(STDIN_FILENO))
if (tcsetpgrp(STDIN_FILENO, child_pid) == -1)
perror("tcsetpgrp");
return ExecuteResult {
.pid = child_pid,
.exit_code = -1,
};
}
BAN::ErrorOr<int> Execute::execute_command_sync(BAN::Span<const BAN::String> arguments, int fd_in, int fd_out)
{
if (arguments.empty())
return 0;
InternalCommand command {
.command = {},
.arguments = arguments,
.fd_in = fd_in,
.fd_out = fd_out,
.background = false,
.pgrp = getpgrp(),
};
if (const auto* builtin = Builtin::get().find_builtin(arguments[0]))
command.command = *builtin;
else
{
auto absolute_path_or_error = find_absolute_path_of_executable(arguments[0]);
if (absolute_path_or_error.is_error())
{
if (absolute_path_or_error.error().get_error_code() == ENOENT)
{
fprintf(stderr, "command not found: %s\n", arguments[0].data());
return 127;
}
fprintf(stderr, "could not execute command: %s\n", absolute_path_or_error.error().get_message());
return 126;
}
command.command = absolute_path_or_error.release_value();
}
const auto execute_result = TRY(execute_command_no_wait(command));
if (execute_result.pid == -1)
return execute_result.exit_code;
int status;
if (waitpid(execute_result.pid, &status, 0) == -1)
return BAN::Error::from_errno(errno);
if (!WIFSIGNALED(status))
return WEXITSTATUS(status);
return 128 + WTERMSIG(status);
}
BAN::ErrorOr<void> Execute::execute_command(const PipedCommand& piped_command)
{
ASSERT(!piped_command.commands.empty());
int last_pipe_rd = STDIN_FILENO;
BAN::Vector<pid_t> child_pids;
TRY(child_pids.resize(piped_command.commands.size(), 0));
BAN::Vector<int> child_codes;
TRY(child_codes.resize(piped_command.commands.size(), 126));
for (size_t i = 0; i < piped_command.commands.size(); i++)
{
int new_pipe[2] { STDIN_FILENO, STDOUT_FILENO };
if (i != piped_command.commands.size() - 1)
if (pipe(new_pipe) == -1)
return BAN::Error::from_errno(errno);
BAN::ScopeGuard pipe_closer(
[&]()
{
if (new_pipe[1] != STDOUT_FILENO)
close(new_pipe[1]);
if (last_pipe_rd != STDIN_FILENO)
close(last_pipe_rd);
last_pipe_rd = new_pipe[0];
}
);
const int fd_in = last_pipe_rd;
const int fd_out = new_pipe[1];
auto arguments = TRY_OR_PERROR_AND_BREAK(piped_command.commands[i].evaluate_arguments(*this));
InternalCommand command {
.command = {},
.arguments = arguments.span(),
.fd_in = fd_in,
.fd_out = fd_out,
.background = piped_command.background,
.pgrp = child_pids.front(),
};
if (const auto* builtin = Builtin::get().find_builtin(arguments[0]))
command.command = *builtin;
else
{
auto absolute_path_or_error = find_absolute_path_of_executable(arguments[0]);
if (absolute_path_or_error.is_error())
{
if (absolute_path_or_error.error().get_error_code() == ENOENT)
{
fprintf(stderr, "command not found: %s\n", arguments[0].data());
child_codes[i] = 127;
}
else
{
fprintf(stderr, "could not execute command: %s\n", absolute_path_or_error.error().get_message());
child_codes[i] = 126;
}
continue;
}
command.command = absolute_path_or_error.release_value();
}
auto execute_result = TRY_OR_PERROR_AND_BREAK(execute_command_no_wait(command));
if (execute_result.pid == -1)
child_codes[i] = execute_result.exit_code;
else
child_pids[i] = execute_result.pid;
}
if (last_pipe_rd != STDIN_FILENO)
close(last_pipe_rd);
if (piped_command.background)
return {};
for (size_t i = 0; i < piped_command.commands.size(); i++)
{
if (child_pids[i] == 0)
continue;
int status = 0;
if (waitpid(child_pids[i], &status, 0) == -1)
perror("waitpid");
if (WIFEXITED(status))
child_codes[i] = WEXITSTATUS(status);
else if (WIFSIGNALED(status))
child_codes[i] = 128 + WTERMSIG(status);
else
ASSERT_NOT_REACHED();
}
if (isatty(STDIN_FILENO) && tcsetpgrp(0, getpgrp()) == -1)
perror("tcsetpgrp");
m_last_return_value = child_codes.back();
return {};
}
BAN::ErrorOr<void> Execute::execute_command(const CommandTree& command_tree)
{
for (const auto& [command, condition] : command_tree.commands)
{
bool should_run = false;
switch (condition)
{
case ConditionalCommand::Condition::Always:
should_run = true;
break;
case ConditionalCommand::Condition::OnFailure:
should_run = (m_last_return_value != 0);
break;
case ConditionalCommand::Condition::OnSuccess:
should_run = (m_last_return_value == 0);
break;
}
if (!should_run)
continue;
TRY(execute_command(command));
}
return {};
}
BAN::ErrorOr<void> Execute::source_script(BAN::StringView path)
{
BAN::Vector<BAN::String> script_lines;
{
FILE* fp = fopen(path.data(), "r");
if (fp == nullptr)
return BAN::Error::from_errno(errno);
BAN::String current;
char temp_buffer[128];
while (fgets(temp_buffer, sizeof(temp_buffer), fp))
{
TRY(current.append(temp_buffer));
if (current.back() != '\n')
continue;
current.pop_back();
if (!current.empty())
TRY(script_lines.push_back(BAN::move(current)));
current.clear();
}
if (!current.empty())
TRY(script_lines.push_back(BAN::move(current)));
fclose(fp);
}
size_t index = 0;
TokenParser parser(
[&](BAN::Optional<BAN::StringView>) -> BAN::Optional<BAN::String>
{
if (index >= script_lines.size())
return {};
return script_lines[index++];
}
);
if (!parser.main_loop(true))
return BAN::Error::from_literal("oop");
return {};
}

View File

@ -0,0 +1,53 @@
#pragma once
#include "Builtin.h"
#include "CommandTypes.h"
#include <BAN/NoCopyMove.h>
class Execute
{
BAN_NON_COPYABLE(Execute);
BAN_NON_MOVABLE(Execute);
public:
Execute() = default;
BAN::ErrorOr<int> execute_command_sync(BAN::Span<const BAN::String> arguments, int fd_in, int fd_out);
BAN::ErrorOr<void> execute_command(const SingleCommand&, int fd_in, int fd_out, bool background, pid_t pgrp = 0);
BAN::ErrorOr<void> execute_command(const PipedCommand&);
BAN::ErrorOr<void> execute_command(const CommandTree&);
BAN::ErrorOr<void> source_script(BAN::StringView path);
int last_background_pid() const { return m_last_background_pid; }
int last_return_value() const { return m_last_return_value; }
private:
struct InternalCommand
{
enum class Type
{
Builtin,
External,
};
BAN::Variant<Builtin::BuiltinCommand, BAN::String> command;
BAN::Span<const BAN::String> arguments;
int fd_in;
int fd_out;
bool background;
pid_t pgrp;
};
struct ExecuteResult
{
pid_t pid;
int exit_code;
};
BAN::ErrorOr<ExecuteResult> execute_command_no_wait(const InternalCommand& command);
private:
int m_last_background_pid { 0 };
int m_last_return_value { 0 };
};

View File

@ -0,0 +1,682 @@
#include "Alias.h"
#include "Builtin.h"
#include "Input.h"
#include <BAN/ScopeGuard.h>
#include <BAN/Sort.h>
#include <ctype.h>
#include <dirent.h>
#include <pwd.h>
#include <sys/stat.h>
#include <unistd.h>
static struct termios s_original_termios;
static struct termios s_raw_termios;
static bool s_termios_initialized { false };
static BAN::Vector<BAN::String> list_matching_entries(BAN::StringView path, BAN::StringView start, bool require_executable)
{
ASSERT(path.size() < PATH_MAX);
char path_cstr[PATH_MAX];
memcpy(path_cstr, path.data(), path.size());
path_cstr[path.size()] = '\0';
DIR* dirp = opendir(path_cstr);
if (dirp == nullptr)
return {};
BAN::Vector<BAN::String> result;
dirent* entry;
while ((entry = readdir(dirp)))
{
if (entry->d_name[0] == '.' && !start.starts_with("."_sv))
continue;
if (strncmp(entry->d_name, start.data(), start.size()))
continue;
struct stat st;
if (fstatat(dirfd(dirp), entry->d_name, &st, 0))
continue;
if (require_executable)
{
if (S_ISDIR(st.st_mode))
continue;
if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXUSR)))
continue;
}
MUST(result.emplace_back(entry->d_name + start.size()));
if (S_ISDIR(st.st_mode))
MUST(result.back().push_back('/'));
}
closedir(dirp);
return BAN::move(result);
}
struct TabCompletion
{
bool should_escape_spaces { false };
BAN::StringView prefix;
BAN::Vector<BAN::String> completions;
};
static TabCompletion list_tab_completion_entries(BAN::StringView current_input)
{
enum class CompletionType
{
Command,
File,
};
BAN::StringView prefix = current_input;
BAN::String last_argument;
CompletionType completion_type = CompletionType::Command;
bool should_escape_spaces = true;
for (size_t i = 0; i < current_input.size(); i++)
{
if (current_input[i] == '\\')
{
i++;
if (i < current_input.size())
MUST(last_argument.push_back(current_input[i]));
}
else if (isspace(current_input[i]) || current_input[i] == ';' || current_input[i] == '|' || current_input.substring(i).starts_with("&&"_sv))
{
if (!isspace(current_input[i]))
completion_type = CompletionType::Command;
else if (!last_argument.empty())
completion_type = CompletionType::File;
if (auto rest = current_input.substring(i); rest.starts_with("||"_sv) || rest.starts_with("&&"_sv))
i++;
prefix = current_input.substring(i + 1);
last_argument.clear();
should_escape_spaces = true;
}
else if (current_input[i] == '\'' || current_input[i] == '"')
{
const char quote_type = current_input[i++];
while (i < current_input.size() && current_input[i] != quote_type)
MUST(last_argument.push_back(current_input[i++]));
should_escape_spaces = false;
}
else
{
MUST(last_argument.push_back(current_input[i]));
}
}
if (last_argument.sv().contains('/'))
completion_type = CompletionType::File;
BAN::Vector<BAN::String> result;
switch (completion_type)
{
case CompletionType::Command:
{
const char* path_env = getenv("PATH");
if (path_env)
{
auto splitted_path_env = MUST(BAN::StringView(path_env).split(':'));
for (auto path : splitted_path_env)
{
auto matching_entries = list_matching_entries(path, last_argument, true);
MUST(result.reserve(result.size() + matching_entries.size()));
for (auto&& entry : matching_entries)
MUST(result.push_back(BAN::move(entry)));
}
}
Builtin::get().for_each_builtin(
[&](BAN::StringView name, const Builtin::BuiltinCommand&) -> BAN::Iteration
{
if (name.starts_with(last_argument))
MUST(result.emplace_back(name.substring(last_argument.size())));
return BAN::Iteration::Continue;
}
);
Alias::get().for_each_alias(
[&](BAN::StringView name, BAN::StringView) -> BAN::Iteration
{
if (name.starts_with(last_argument))
MUST(result.emplace_back(name.substring(last_argument.size())));
return BAN::Iteration::Continue;
}
);
break;
}
case CompletionType::File:
{
BAN::String dir_path;
if (last_argument.sv().starts_with("/"_sv))
MUST(dir_path.push_back('/'));
else
{
char cwd_buffer[PATH_MAX];
if (getcwd(cwd_buffer, sizeof(cwd_buffer)) == nullptr)
return {};
MUST(dir_path.reserve(strlen(cwd_buffer) + 1));
MUST(dir_path.append(cwd_buffer));
MUST(dir_path.push_back('/'));
}
auto match_against = last_argument.sv();
if (auto idx = match_against.rfind('/'); idx.has_value())
{
MUST(dir_path.append(match_against.substring(0, idx.value())));
match_against = match_against.substring(idx.value() + 1);
}
result = list_matching_entries(dir_path, match_against, false);
break;
}
}
if (auto idx = prefix.rfind('/'); idx.has_value())
prefix = prefix.substring(idx.value() + 1);
return { should_escape_spaces, prefix, BAN::move(result) };
}
static int character_length(BAN::StringView prompt)
{
int length { 0 };
bool in_escape { false };
for (char c : prompt)
{
if (in_escape)
{
if (isalpha(c))
in_escape = false;
}
else
{
if (c == '\e')
in_escape = true;
else if (((uint8_t)c & 0xC0) != 0x80)
length++;
}
}
return length;
}
BAN::String Input::parse_ps1_prompt()
{
const char* raw_prompt = getenv("PS1");
if (raw_prompt == nullptr)
return "$ "_sv;
BAN::String prompt;
for (int i = 0; raw_prompt[i]; i++)
{
char ch = raw_prompt[i];
if (ch == '\\')
{
switch (raw_prompt[++i])
{
case 'e':
MUST(prompt.push_back('\e'));
break;
case 'n':
MUST(prompt.push_back('\n'));
break;
case '\\':
MUST(prompt.push_back('\\'));
break;
case '~':
{
char buffer[256];
if (getcwd(buffer, sizeof(buffer)) == nullptr)
strcpy(buffer, strerrorname_np(errno));
const char* home = getenv("HOME");
size_t home_len = home ? strlen(home) : 0;
if (home && strncmp(buffer, home, home_len) == 0)
{
MUST(prompt.push_back('~'));
MUST(prompt.append(buffer + home_len));
}
else
{
MUST(prompt.append(buffer));
}
break;
}
case 'u':
{
static char* username = nullptr;
if (username == nullptr)
{
auto* passwd = getpwuid(geteuid());
if (passwd == nullptr)
break;
username = new char[strlen(passwd->pw_name) + 1];
strcpy(username, passwd->pw_name);
endpwent();
}
MUST(prompt.append(username));
break;
}
case 'h':
{
MUST(prompt.append(m_hostname));
break;
}
case '\0':
MUST(prompt.push_back('\\'));
break;
default:
MUST(prompt.push_back('\\'));
MUST(prompt.push_back(*raw_prompt));
break;
}
}
else
{
MUST(prompt.push_back(ch));
}
}
return prompt;
}
BAN::Optional<BAN::String> Input::get_input(BAN::Optional<BAN::StringView> custom_prompt)
{
tcsetattr(0, TCSANOW, &s_raw_termios);
BAN::ScopeGuard _([] { tcsetattr(0, TCSANOW, &s_original_termios); });
BAN::String ps1_prompt;
if (!custom_prompt.has_value())
ps1_prompt = parse_ps1_prompt();
const auto print_prompt =
[&]()
{
if (custom_prompt.has_value())
printf("%.*s", (int)custom_prompt->size(), custom_prompt->data());
else
printf("%.*s", (int)ps1_prompt.size(), ps1_prompt.data());
};
const auto prompt_length =
[&]() -> int
{
if (custom_prompt.has_value())
return custom_prompt->size();
return character_length(ps1_prompt);
};
print_prompt();
fflush(stdout);
while (true)
{
int chi = getchar();
if (chi == EOF)
{
if (errno != EINTR)
{
perror("getchar");
exit(1);
}
clearerr(stdin);
m_buffers = m_history;
MUST(m_buffers.emplace_back(""_sv));
m_buffer_index = m_buffers.size() - 1;
m_buffer_col = 0;
putchar('\n');
print_prompt();
fflush(stdout);
continue;
}
uint8_t ch = chi;
if (ch != '\t')
{
m_tab_completions.clear();
m_tab_index.clear();
}
if (m_waiting_utf8 > 0)
{
m_waiting_utf8--;
ASSERT((ch & 0xC0) == 0x80);
putchar(ch);
MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++));
if (m_waiting_utf8 == 0)
{
printf("\e[s%s\e[u", m_buffers[m_buffer_index].data() + m_buffer_col);
fflush(stdout);
}
continue;
}
else if (ch & 0x80)
{
if ((ch & 0xE0) == 0xC0)
m_waiting_utf8 = 1;
else if ((ch & 0xF0) == 0xE0)
m_waiting_utf8 = 2;
else if ((ch & 0xF8) == 0xF0)
m_waiting_utf8 = 3;
else
ASSERT_NOT_REACHED();
putchar(ch);
MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++));
continue;
}
switch (ch)
{
case '\e':
{
ch = getchar();
if (ch != '[')
break;
ch = getchar();
int value = 0;
while (isdigit(ch))
{
value = (value * 10) + (ch - '0');
ch = getchar();
}
switch (ch)
{
case 'A':
if (m_buffer_index > 0)
{
m_buffer_index--;
m_buffer_col = m_buffers[m_buffer_index].size();
printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data());
fflush(stdout);
}
break;
case 'B':
if (m_buffer_index < m_buffers.size() - 1)
{
m_buffer_index++;
m_buffer_col = m_buffers[m_buffer_index].size();
printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data());
fflush(stdout);
}
break;
case 'C':
if (m_buffer_col < m_buffers[m_buffer_index].size())
{
m_buffer_col++;
while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80)
m_buffer_col++;
printf("\e[C");
fflush(stdout);
}
break;
case 'D':
if (m_buffer_col > 0)
{
while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80)
m_buffer_col--;
m_buffer_col--;
printf("\e[D");
fflush(stdout);
}
break;
case '~':
switch (value)
{
case 3: // delete
if (m_buffer_col >= m_buffers[m_buffer_index].size())
break;
m_buffers[m_buffer_index].remove(m_buffer_col);
while (m_buffer_col < m_buffers[m_buffer_index].size() && (m_buffers[m_buffer_index][m_buffer_col] & 0xC0) == 0x80)
m_buffers[m_buffer_index].remove(m_buffer_col);
printf("\e[s%s \e[u", m_buffers[m_buffer_index].data() + m_buffer_col);
fflush(stdout);
break;
}
break;
}
break;
}
case '\x0C': // ^L
{
int x = prompt_length() + character_length(m_buffers[m_buffer_index].sv().substring(m_buffer_col)) + 1;
printf("\e[H\e[J");
print_prompt();
printf("%s\e[u\e[1;%dH", m_buffers[m_buffer_index].data(), x);
fflush(stdout);
break;
}
case '\b':
if (m_buffer_col <= 0)
break;
while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80)
m_buffer_col--;
m_buffer_col--;
printf("\e[D");
fflush(stdout);
break;
case '\x01': // ^A
m_buffer_col = 0;
printf("\e[%dG", prompt_length() + 1);
fflush(stdout);
break;
case '\x03': // ^C
putchar('\n');
print_prompt();
fflush(stdout);
m_buffers[m_buffer_index].clear();
m_buffer_col = 0;
break;
case '\x04': // ^D
if (!m_buffers[m_buffer_index].empty())
break;
putchar('\n');
return {};
case '\x7F': // backspace
if (m_buffer_col <= 0)
break;
while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80)
m_buffers[m_buffer_index].remove(--m_buffer_col);
m_buffers[m_buffer_index].remove(--m_buffer_col);
printf("\b\e[s%s \e[u", m_buffers[m_buffer_index].data() + m_buffer_col);
fflush(stdout);
break;
case '\n':
{
BAN::String input;
MUST(input.append(m_buffers[m_buffer_index]));
if (!m_buffers[m_buffer_index].empty())
{
MUST(m_history.push_back(m_buffers[m_buffer_index]));
m_buffers = m_history;
MUST(m_buffers.emplace_back(""_sv));
}
m_buffer_index = m_buffers.size() - 1;
m_buffer_col = 0;
putchar('\n');
return input;
}
case '\t':
{
// FIXME: tab completion is really hacked together currently.
// this should ask token parser about the current parse state
// and do completions based on that, not raw strings
if (m_buffer_col != m_buffers[m_buffer_index].size())
continue;
if (m_tab_completions.has_value())
{
ASSERT(m_tab_completions->size() >= 2);
if (!m_tab_index.has_value())
m_tab_index = 0;
else
{
MUST(m_buffers[m_buffer_index].resize(m_tab_completion_keep));
m_buffer_col = m_tab_completion_keep;
*m_tab_index = (*m_tab_index + 1) % m_tab_completions->size();
}
MUST(m_buffers[m_buffer_index].append(m_tab_completions.value()[*m_tab_index]));
m_buffer_col += m_tab_completions.value()[*m_tab_index].size();
printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data());
fflush(stdout);
break;
}
m_tab_completion_keep = m_buffer_col;
auto [should_escape_spaces, prefix, completions] = list_tab_completion_entries(m_buffers[m_buffer_index].sv().substring(0, m_tab_completion_keep));
BAN::sort::sort(completions.begin(), completions.end(),
[](const BAN::String& a, const BAN::String& b) {
if (auto cmp = strcmp(a.data(), b.data()))
return cmp < 0;
return a.size() < b.size();
}
);
for (size_t i = 1; i < completions.size();)
{
if (completions[i - 1] == completions[i])
completions.remove(i);
else
i++;
}
if (completions.empty())
break;
size_t all_match_len = 0;
for (;;)
{
if (completions.front().size() <= all_match_len)
break;
const char target = completions.front()[all_match_len];
bool all_matched = true;
for (const auto& completion : completions)
{
if (completion.size() > all_match_len && completion[all_match_len] == target)
continue;
all_matched = false;
break;
}
if (!all_matched)
break;
all_match_len++;
}
if (all_match_len)
{
auto completion = completions.front().sv().substring(0, all_match_len);
BAN::String temp_escaped;
if (should_escape_spaces)
{
MUST(temp_escaped.append(completion));
for (size_t i = 0; i < temp_escaped.size(); i++)
{
if (!isspace(temp_escaped[i]))
continue;
MUST(temp_escaped.insert('\\', i));
i++;
}
completion = temp_escaped.sv();
if (!m_buffers[m_buffer_index].empty() && m_buffers[m_buffer_index].back() == '\\' && completion.front() == '\\')
completion = completion.substring(1);
}
m_buffer_col += completion.size();
MUST(m_buffers[m_buffer_index].append(completion));
printf("%.*s", (int)completion.size(), completion.data());
fflush(stdout);
break;
}
if (completions.size() == 1)
{
ASSERT(all_match_len == completions.front().size());
break;
}
printf("\n");
for (size_t i = 0; i < completions.size(); i++)
{
if (i != 0)
printf(" ");
const char* format = completions[i].sv().contains(' ') ? "'%.*s%s'" : "%.*s%s";
printf(format, (int)prefix.size(), prefix.data(), completions[i].data());
}
printf("\n");
print_prompt();
printf("%s", m_buffers[m_buffer_index].data());
fflush(stdout);
if (should_escape_spaces)
{
for (auto& completion : completions)
{
for (size_t i = 0; i < completion.size(); i++)
{
if (!isspace(completion[i]))
continue;
MUST(completion.insert('\\', i));
i++;
}
}
}
m_tab_completion_keep = m_buffer_col;
m_tab_completions = BAN::move(completions);
break;
}
default:
MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++));
if (m_buffer_col == m_buffers[m_buffer_index].size())
putchar(ch);
else
printf("%c\e[s%s\e[u", ch, m_buffers[m_buffer_index].data() + m_buffer_col);
fflush(stdout);
break;
}
}
}
Input::Input()
{
if (!s_termios_initialized)
{
tcgetattr(0, &s_original_termios);
s_raw_termios = s_original_termios;
s_raw_termios.c_lflag &= ~(ECHO | ICANON);
atexit([] { tcsetattr(0, TCSANOW, &s_original_termios); });
s_termios_initialized = true;
}
char hostname_buffer[HOST_NAME_MAX];
if (gethostname(hostname_buffer, sizeof(hostname_buffer)) == 0) {
MUST(m_hostname.append(hostname_buffer));
}
}

View File

@ -0,0 +1,36 @@
#pragma once
#include <BAN/NoCopyMove.h>
#include <BAN/String.h>
#include <BAN/Optional.h>
#include <BAN/Vector.h>
#include <sys/types.h>
#include <termios.h>
class Input
{
BAN_NON_COPYABLE(Input);
BAN_NON_MOVABLE(Input);
public:
Input();
BAN::Optional<BAN::String> get_input(BAN::Optional<BAN::StringView> custom_prompt);
private:
BAN::String parse_ps1_prompt();
private:
BAN::String m_hostname;
BAN::Vector<BAN::String> m_buffers { 1, ""_sv };
BAN::Vector<BAN::String> m_history;
size_t m_buffer_index { 0 };
size_t m_buffer_col { 0 };
BAN::Optional<ssize_t> m_tab_index;
BAN::Optional<BAN::Vector<BAN::String>> m_tab_completions;
size_t m_tab_completion_keep { 0 };
int m_waiting_utf8 { 0 };
};

View File

@ -0,0 +1,79 @@
#include "Lexer.h"
BAN::ErrorOr<BAN::Vector<Token>> tokenize_string(BAN::StringView string)
{
{
size_t i = 0;
while (i < string.size() && isspace(string[i]))
i++;
if (i >= string.size() || string[i] == '#')
return BAN::Vector<Token>();
}
constexpr auto char_to_token_type =
[](char c) -> BAN::Optional<Token::Type>
{
switch (c)
{
case '&': return Token::Type::Ampersand;
case '\\': return Token::Type::Backslash;
case '}': return Token::Type::CloseCurly;
case ')': return Token::Type::CloseParen;
case '$': return Token::Type::Dollar;
case '"': return Token::Type::DoubleQuote;
case '{': return Token::Type::OpenCurly;
case '(': return Token::Type::OpenParen;
case '|': return Token::Type::Pipe;
case ';': return Token::Type::Semicolon;
case '\'': return Token::Type::SingleQuote;
}
return {};
};
BAN::Vector<Token> result;
BAN::String current_string;
const auto append_current_if_exists =
[&]() -> BAN::ErrorOr<void>
{
if (current_string.empty())
return {};
TRY(result.emplace_back(Token::Type::String, BAN::move(current_string)));
current_string = BAN::String();
return {};
};
while (!string.empty())
{
if (isspace(string.front()))
{
TRY(append_current_if_exists());
size_t whitespace_len = 1;
while (whitespace_len < string.size() && isspace(string[whitespace_len]))
whitespace_len++;
BAN::String whitespace_str;
TRY(whitespace_str.append(string.substring(0, whitespace_len)));
TRY(result.emplace_back(Token::Type::Whitespace, BAN::move(whitespace_str)));
string = string.substring(whitespace_len);
continue;
}
if (auto token_type = char_to_token_type(string.front()); token_type.has_value())
{
TRY(append_current_if_exists());
TRY(result.emplace_back(token_type.value()));
string = string.substring(1);
continue;
}
TRY(current_string.push_back(string.front()));
string = string.substring(1);
}
TRY(append_current_if_exists());
return result;
}

View File

@ -0,0 +1,5 @@
#pragma once
#include "Token.h"
BAN::ErrorOr<BAN::Vector<Token>> tokenize_string(BAN::StringView);

View File

@ -0,0 +1,52 @@
#include "Token.h"
#include <BAN/Debug.h>
void Token::debug_dump() const
{
switch (type())
{
case Type::EOF_:
dwarnln("Token <EOF>");
break;
case Type::Ampersand:
dprintln("Token <Ampersand>");
break;
case Type::Backslash:
dprintln("Token <Backslash>");
break;
case Type::CloseCurly:
dprintln("Token <CloseCurly>");
break;
case Type::CloseParen:
dprintln("Token <CloseParen>");
break;
case Type::Dollar:
dprintln("Token <Dollar>");
break;
case Type::DoubleQuote:
dprintln("Token <DoubleQuote>");
break;
case Type::OpenCurly:
dprintln("Token <OpenCurly>");
break;
case Type::OpenParen:
dprintln("Token <OpenParen>");
break;
case Type::Pipe:
dprintln("Token <Pipe>");
break;
case Type::Semicolon:
dprintln("Token <Semicolon>");
break;
case Type::SingleQuote:
dprintln("Token <SingleQuote>");
break;
case Type::String:
dprintln("Token <String \"{}\">", string());
break;
case Type::Whitespace:
dprintln("Token <Whitespace \"{}\">", string());
break;
}
}

View File

@ -0,0 +1,84 @@
#pragma once
#include <BAN/Assert.h>
#include <BAN/String.h>
#include <BAN/Vector.h>
#include <ctype.h>
struct Token
{
public:
enum class Type
{
EOF_,
Ampersand,
Backslash,
CloseCurly,
CloseParen,
Dollar,
DoubleQuote,
OpenCurly,
OpenParen,
Pipe,
Semicolon,
SingleQuote,
String,
Whitespace,
};
Token(Type type)
: m_type(type)
{}
Token(Type type, BAN::String&& string)
: m_type(type)
{
ASSERT(type == Type::String || type == Type::Whitespace);
if (type == Type::Whitespace)
for (char c : string)
ASSERT(isspace(c));
m_value = BAN::move(string);
}
Token(Token&& other)
{
m_type = other.m_type;
m_value = other.m_value;
other.clear();
}
Token& operator=(Token&& other)
{
m_type = other.m_type;
m_value = other.m_value;
other.clear();
return *this;
}
Token(const Token&) = delete;
Token& operator=(const Token&) = delete;
~Token()
{
clear();
}
Type type() const { return m_type; }
BAN::String& string() { ASSERT(m_type == Type::String || m_type == Type::Whitespace); return m_value; }
const BAN::String& string() const { ASSERT(m_type == Type::String || m_type == Type::Whitespace); return m_value; }
void clear()
{
m_type = Type::EOF_;
m_value.clear();
}
void debug_dump() const;
private:
Type m_type { Type::EOF_ };
BAN::String m_value;
};

View File

@ -0,0 +1,660 @@
#include "Alias.h"
#include "Execute.h"
#include "Lexer.h"
#include "TokenParser.h"
#include <BAN/HashSet.h>
#include <stdio.h>
static constexpr bool can_parse_argument_from_token_type(Token::Type token_type)
{
switch (token_type)
{
case Token::Type::Whitespace:
ASSERT_NOT_REACHED();
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
return false;
case Token::Type::Backslash:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::SingleQuote:
case Token::Type::String:
return true;
}
ASSERT_NOT_REACHED();
}
static constexpr char token_type_to_single_character(Token::Type type)
{
switch (type)
{
case Token::Type::Ampersand: return '&';
case Token::Type::Backslash: return '\\';
case Token::Type::CloseCurly: return '}';
case Token::Type::CloseParen: return ')';
case Token::Type::Dollar: return '$';
case Token::Type::DoubleQuote: return '"';
case Token::Type::OpenCurly: return '{';
case Token::Type::OpenParen: return '(';
case Token::Type::Pipe: return '|';
case Token::Type::Semicolon: return ';';
case Token::Type::SingleQuote: return '\'';
case Token::Type::String: ASSERT_NOT_REACHED();
case Token::Type::Whitespace: ASSERT_NOT_REACHED();
case Token::Type::EOF_: ASSERT_NOT_REACHED();
}
ASSERT_NOT_REACHED();
};
static constexpr BAN::Error unexpected_token_error(Token::Type type)
{
switch (type)
{
case Token::Type::EOF_:
return BAN::Error::from_literal("unexpected EOF");
case Token::Type::Ampersand:
return BAN::Error::from_literal("unexpected token &");
case Token::Type::Backslash:
return BAN::Error::from_literal("unexpected token \\");
case Token::Type::CloseCurly:
return BAN::Error::from_literal("unexpected token }");
case Token::Type::CloseParen:
return BAN::Error::from_literal("unexpected token )");
case Token::Type::Dollar:
return BAN::Error::from_literal("unexpected token $");
case Token::Type::DoubleQuote:
return BAN::Error::from_literal("unexpected token \"");
case Token::Type::OpenCurly:
return BAN::Error::from_literal("unexpected token {");
case Token::Type::Pipe:
return BAN::Error::from_literal("unexpected token |");
case Token::Type::OpenParen:
return BAN::Error::from_literal("unexpected token (");
case Token::Type::Semicolon:
return BAN::Error::from_literal("unexpected token ;");
case Token::Type::SingleQuote:
return BAN::Error::from_literal("unexpected token '");
case Token::Type::String:
return BAN::Error::from_literal("unexpected token <string>");
case Token::Type::Whitespace:
return BAN::Error::from_literal("unexpected token <whitespace>");
}
ASSERT_NOT_REACHED();
}
const Token& TokenParser::peek_token() const
{
if (m_token_stream.empty())
return m_eof_token;
ASSERT(!m_token_stream.front().empty());
return m_token_stream.front().back();
}
Token TokenParser::read_token()
{
if (m_token_stream.empty())
return Token(Token::Type::EOF_);
ASSERT(!m_token_stream.front().empty());
auto token = BAN::move(m_token_stream.front().back());
m_token_stream.front().pop_back();
if (m_token_stream.front().empty())
m_token_stream.pop();
return token;
}
void TokenParser::consume_token()
{
ASSERT(!m_token_stream.empty());
ASSERT(!m_token_stream.front().empty());
m_token_stream.front().pop_back();
if (m_token_stream.front().empty())
m_token_stream.pop();
}
BAN::ErrorOr<void> TokenParser::unget_token(Token&& token)
{
if (m_token_stream.empty())
TRY(m_token_stream.emplace());
TRY(m_token_stream.front().push_back(BAN::move(token)));
return {};
}
BAN::ErrorOr<void> TokenParser::feed_tokens(BAN::Vector<Token>&& tokens)
{
if (tokens.empty())
return {};
for (size_t i = 0; i < tokens.size() / 2; i++)
BAN::swap(tokens[i], tokens[tokens.size() - i - 1]);
TRY(m_token_stream.push(BAN::move(tokens)));
return {};
}
BAN::ErrorOr<void> TokenParser::ask_input_tokens(BAN::StringView prompt, bool add_newline)
{
if (!m_input_function)
return unexpected_token_error(Token::Type::EOF_);
auto opt_input = m_input_function(prompt);
if (!opt_input.has_value())
return unexpected_token_error(Token::Type::EOF_);
auto tokenized = TRY(tokenize_string(opt_input.release_value()));
TRY(feed_tokens(BAN::move(tokenized)));
if (add_newline)
{
auto newline_token = Token(Token::Type::String);
TRY(newline_token.string().push_back('\n'));
TRY(unget_token(BAN::move(newline_token)));
}
return {};
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_backslash(bool is_quoted)
{
ASSERT(read_token().type() == Token::Type::Backslash);
auto token = read_token();
FixedString fixed_string;
switch (token.type())
{
case Token::Type::EOF_:
TRY(ask_input_tokens("> ", false));
TRY(unget_token(Token(Token::Type::Backslash)));
return parse_backslash(is_quoted);
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::SingleQuote:
TRY(fixed_string.value.push_back(token_type_to_single_character(token.type())));
break;
case Token::Type::Whitespace:
case Token::Type::String:
{
ASSERT(!token.string().empty());
if (is_quoted)
TRY(fixed_string.value.push_back('\\'));
TRY(fixed_string.value.push_back(token.string().front()));
if (token.string().size() > 1)
{
token.string().remove(0);
TRY(unget_token(BAN::move(token)));
}
break;
}
}
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_dollar()
{
ASSERT(read_token().type() == Token::Type::Dollar);
const auto parse_dollar_string =
[](BAN::String& string) -> BAN::ErrorOr<CommandArgument::ArgumentPart>
{
if (string.empty())
return CommandArgument::ArgumentPart(EnvironmentVariable());
if (isdigit(string.front()))
{
size_t number_len = 1;
while (number_len < string.size() && isdigit(string[number_len]))
number_len++;
BuiltinVariable builtin;
TRY(builtin.value.append(string.sv().substring(0, number_len)));
for (size_t i = 0; i < number_len; i++)
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(builtin));
}
switch (string.front())
{
case '$':
case '_':
case '@':
case '*':
case '#':
case '-':
case '?':
case '!':
{
BuiltinVariable builtin;
TRY(builtin.value.push_back(string.front()));
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(builtin));
}
}
if (isalpha(string.front()))
{
size_t env_len = 1;
while (env_len < string.size() && (isalnum(string[env_len]) || string[env_len] == '_'))
env_len++;
EnvironmentVariable environment;
TRY(environment.value.append(string.sv().substring(0, env_len)));
for (size_t i = 0; i < env_len; i++)
string.remove(0);
return CommandArgument::ArgumentPart(BAN::move(environment));
}
FixedString fixed_string;
TRY(fixed_string.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
};
switch (peek_token().type())
{
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::DoubleQuote:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::SingleQuote:
case Token::Type::Whitespace:
{
FixedString fixed_string;
TRY(fixed_string.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
case Token::Type::Dollar:
{
consume_token();
BuiltinVariable builtin_variable;
TRY(builtin_variable.value.push_back('$'));
return CommandArgument::ArgumentPart(BAN::move(builtin_variable));
}
case Token::Type::OpenCurly:
{
consume_token();
BAN::String input;
for (auto token = read_token(); token.type() != Token::Type::CloseCurly; token = read_token())
{
if (token.type() == Token::Type::EOF_)
return BAN::Error::from_literal("missing closing curly brace");
if (token.type() == Token::Type::String)
TRY(input.append(token.string()));
else if (token.type() == Token::Type::Dollar)
TRY(input.push_back('$'));
else
return BAN::Error::from_literal("expected closing curly brace");
}
auto result = TRY(parse_dollar_string(input));
if (!input.empty())
return BAN::Error::from_literal("bad substitution");
return result;
}
case Token::Type::OpenParen:
{
consume_token();
auto command_tree = TRY(parse_command_tree());
if (auto token = read_token(); token.type() != Token::Type::CloseParen)
return BAN::Error::from_literal("expected closing parenthesis");
return CommandArgument::ArgumentPart(BAN::move(command_tree));
}
case Token::Type::String:
{
auto token = read_token();
auto string = BAN::move(token.string());
auto result = TRY(parse_dollar_string(string));
if (!string.empty())
{
auto remaining = Token(Token::Type::String);
remaining.string() = BAN::move(string);
TRY(unget_token(BAN::move(remaining)));
}
return result;
}
}
ASSERT_NOT_REACHED();
}
BAN::ErrorOr<CommandArgument::ArgumentPart> TokenParser::parse_single_quote()
{
ASSERT(read_token().type() == Token::Type::SingleQuote);
FixedString fixed_string;
for (auto token = read_token();; token = read_token())
{
switch (token.type())
{
case Token::Type::EOF_:
TRY(ask_input_tokens("quote> ", true));
break;
case Token::Type::Ampersand:
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
TRY(fixed_string.value.push_back(token_type_to_single_character(token.type())));
break;
case Token::Type::String:
case Token::Type::Whitespace:
TRY(fixed_string.value.append(token.string()));
break;
case Token::Type::SingleQuote:
return CommandArgument::ArgumentPart(BAN::move(fixed_string));
}
}
}
BAN::ErrorOr<CommandArgument> TokenParser::parse_argument()
{
const auto token_type = peek_token().type();
if (!can_parse_argument_from_token_type(token_type))
return unexpected_token_error(token_type);
CommandArgument result;
bool is_in_double_quotes = false;
for (auto token_type = peek_token().type(); token_type != Token::Type::EOF_ || is_in_double_quotes; token_type = peek_token().type())
{
CommandArgument::ArgumentPart new_part;
switch (token_type)
{
case Token::Type::EOF_:
ASSERT(is_in_double_quotes);
TRY(ask_input_tokens("dquote> ", true));
new_part = FixedString(); // do continue
break;
case Token::Type::Ampersand:
case Token::Type::CloseCurly:
case Token::Type::CloseParen:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
if (is_in_double_quotes)
{
new_part = FixedString();
TRY(new_part.get<FixedString>().value.push_back(token_type_to_single_character(token_type)));
consume_token();
}
break;
case Token::Type::Whitespace:
if (is_in_double_quotes)
{
new_part = FixedString();
TRY(new_part.get<FixedString>().value.append(peek_token().string()));
consume_token();
}
break;
case Token::Type::Backslash:
new_part = TRY(parse_backslash(is_in_double_quotes));
break;
case Token::Type::DoubleQuote:
is_in_double_quotes = !is_in_double_quotes;
new_part = FixedString(); // do continue
consume_token();
break;
case Token::Type::Dollar:
new_part = TRY(parse_dollar());
break;
case Token::Type::SingleQuote:
new_part = TRY(parse_single_quote());
break;
case Token::Type::String:
new_part = CommandArgument::ArgumentPart(FixedString {});
TRY(new_part.get<FixedString>().value.append(peek_token().string()));
consume_token();
break;
}
if (!new_part.has_value())
break;
if (new_part.has<FixedString>())
{
auto& fixed_string = new_part.get<FixedString>();
// discard empty fixed strings
if (fixed_string.value.empty())
continue;
// combine consecutive fixed strings
if (!result.parts.empty() && result.parts.back().has<FixedString>())
{
TRY(result.parts.back().get<FixedString>().value.append(fixed_string.value));
continue;
}
}
TRY(result.parts.push_back(BAN::move(new_part)));
}
return result;
}
BAN::ErrorOr<SingleCommand> TokenParser::parse_single_command()
{
SingleCommand result;
BAN::HashSet<BAN::String> used_aliases;
while (peek_token().type() == Token::Type::String)
{
auto token = read_token();
bool can_be_alias = false;
switch (peek_token().type())
{
case Token::Type::EOF_:
case Token::Type::Ampersand:
case Token::Type::CloseParen:
case Token::Type::Pipe:
case Token::Type::Semicolon:
case Token::Type::Whitespace:
can_be_alias = true;
break;
case Token::Type::Backslash:
case Token::Type::CloseCurly:
case Token::Type::Dollar:
case Token::Type::DoubleQuote:
case Token::Type::OpenCurly:
case Token::Type::OpenParen:
case Token::Type::SingleQuote:
case Token::Type::String:
can_be_alias = false;
break;
}
if (!can_be_alias)
{
TRY(unget_token(BAN::move(token)));
break;
}
if (used_aliases.contains(token.string()))
{
TRY(unget_token(BAN::move(token)));
break;
}
auto opt_alias = Alias::get().get_alias(token.string().sv());
if (!opt_alias.has_value())
{
TRY(unget_token(BAN::move(token)));
break;
}
auto tokenized_alias = TRY(tokenize_string(opt_alias.value()));
for (size_t i = tokenized_alias.size(); i > 0; i--)
TRY(unget_token(BAN::move(tokenized_alias[i - 1])));
TRY(used_aliases.insert(TRY(BAN::String::formatted("{}", token.string()))));
}
while (peek_token().type() != Token::Type::EOF_)
{
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
auto argument = TRY(parse_argument());
TRY(result.arguments.push_back(BAN::move(argument)));
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
if (!can_parse_argument_from_token_type(peek_token().type()))
break;
}
return result;
}
BAN::ErrorOr<PipedCommand> TokenParser::parse_piped_command()
{
PipedCommand result;
result.background = false;
while (peek_token().type() != Token::Type::EOF_)
{
auto single_command = TRY(parse_single_command());
TRY(result.commands.push_back(BAN::move(single_command)));
const auto token_type = peek_token().type();
if (token_type != Token::Type::Pipe && token_type != Token::Type::Ampersand)
break;
auto temp_token = read_token();
if (peek_token().type() == temp_token.type())
{
TRY(unget_token(BAN::move(temp_token)));
break;
}
if (temp_token.type() == Token::Type::Ampersand)
{
result.background = true;
break;
}
}
return result;
}
BAN::ErrorOr<CommandTree> TokenParser::parse_command_tree()
{
CommandTree result;
auto next_condition = ConditionalCommand::Condition::Always;
while (peek_token().type() != Token::Type::EOF_)
{
ConditionalCommand conditional_command;
conditional_command.command = TRY(parse_piped_command());
conditional_command.condition = next_condition;
TRY(result.commands.push_back(BAN::move(conditional_command)));
while (peek_token().type() == Token::Type::Whitespace)
consume_token();
if (peek_token().type() == Token::Type::EOF_)
break;
bool should_break = false;
const auto token_type = peek_token().type();
switch (token_type)
{
case Token::Type::Semicolon:
consume_token();
next_condition = ConditionalCommand::Condition::Always;
break;
case Token::Type::Ampersand:
case Token::Type::Pipe:
consume_token();
if (read_token().type() != token_type)
return BAN::Error::from_literal("expected double '&' or '|'");
next_condition = (token_type == Token::Type::Ampersand)
? ConditionalCommand::Condition::OnSuccess
: ConditionalCommand::Condition::OnFailure;
break;
default:
should_break = true;
break;
}
if (should_break)
break;
}
return result;
}
BAN::ErrorOr<void> TokenParser::run(BAN::Vector<Token>&& tokens)
{
TRY(feed_tokens(BAN::move(tokens)));
auto command_tree = TRY(parse_command_tree());
const auto token_type = peek_token().type();
while (!m_token_stream.empty())
m_token_stream.pop();
if (token_type != Token::Type::EOF_)
return unexpected_token_error(token_type);
TRY(m_execute.execute_command(command_tree));
return {};
}
bool TokenParser::main_loop(bool break_on_error)
{
for (;;)
{
auto opt_input = m_input_function({});
if (!opt_input.has_value())
break;
auto tokenized_input = tokenize_string(opt_input.release_value());
if (tokenized_input.is_error())
{
fprintf(stderr, "banan-sh: %s\n", tokenized_input.error().get_message());
if (break_on_error)
return false;
continue;
}
if (auto ret = run(tokenized_input.release_value()); ret.is_error())
{
fprintf(stderr, "banan-sh: %s\n", ret.error().get_message());
if (break_on_error)
return false;
continue;
}
}
return true;
}

View File

@ -0,0 +1,57 @@
#pragma once
#include "CommandTypes.h"
#include "Execute.h"
#include "Token.h"
#include <BAN/Function.h>
#include <BAN/NoCopyMove.h>
#include <BAN/Optional.h>
#include <BAN/Queue.h>
#include <BAN/Vector.h>
class TokenParser
{
BAN_NON_COPYABLE(TokenParser);
BAN_NON_MOVABLE(TokenParser);
public:
using InputFunction = BAN::Function<BAN::Optional<BAN::String>(BAN::Optional<BAN::StringView>)>;
public:
TokenParser(const InputFunction& input_function)
: m_input_function(input_function)
{ }
Execute& execute() { return m_execute; }
const Execute& execute() const { return m_execute; }
[[nodiscard]] bool main_loop(bool break_on_error);
private:
const Token& peek_token() const;
Token read_token();
void consume_token();
BAN::ErrorOr<void> feed_tokens(BAN::Vector<Token>&& tokens);
BAN::ErrorOr<void> unget_token(Token&& token);
BAN::ErrorOr<void> ask_input_tokens(BAN::StringView prompt, bool add_newline);
BAN::ErrorOr<void> run(BAN::Vector<Token>&&);
BAN::ErrorOr<CommandArgument::ArgumentPart> parse_backslash(bool is_quoted);
BAN::ErrorOr<CommandArgument::ArgumentPart> parse_dollar();
BAN::ErrorOr<CommandArgument::ArgumentPart> parse_single_quote();
BAN::ErrorOr<CommandArgument> parse_argument();
BAN::ErrorOr<SingleCommand> parse_single_command();
BAN::ErrorOr<PipedCommand> parse_piped_command();
BAN::ErrorOr<CommandTree> parse_command_tree();
private:
Execute m_execute;
Token m_eof_token { Token::Type::EOF_ };
BAN::Queue<BAN::Vector<Token>> m_token_stream;
InputFunction m_input_function;
};

File diff suppressed because it is too large Load Diff