From 8adc97980a52978cf4191d60252347ff178f935a Mon Sep 17 00:00:00 2001 From: Bananymous Date: Sun, 13 Oct 2024 21:56:59 +0300 Subject: [PATCH] Shell: rewrite the whole shell to use tokens instead of raw strings tab completion is still running with raw strings and that has to be fixed in the future. --- userspace/programs/Shell/Alias.cpp | 34 + userspace/programs/Shell/Alias.h | 31 + userspace/programs/Shell/Builtin.cpp | 241 +++ userspace/programs/Shell/Builtin.h | 50 + userspace/programs/Shell/CMakeLists.txt | 8 + userspace/programs/Shell/CommandTypes.cpp | 150 ++ userspace/programs/Shell/CommandTypes.h | 101 ++ userspace/programs/Shell/Execute.cpp | 330 ++++ userspace/programs/Shell/Execute.h | 53 + userspace/programs/Shell/Input.cpp | 682 ++++++++ userspace/programs/Shell/Input.h | 36 + userspace/programs/Shell/Lexer.cpp | 79 + userspace/programs/Shell/Lexer.h | 5 + userspace/programs/Shell/Token.cpp | 52 + userspace/programs/Shell/Token.h | 84 + userspace/programs/Shell/TokenParser.cpp | 660 ++++++++ userspace/programs/Shell/TokenParser.h | 57 + userspace/programs/Shell/main.cpp | 1800 +-------------------- 18 files changed, 2721 insertions(+), 1732 deletions(-) create mode 100644 userspace/programs/Shell/Alias.cpp create mode 100644 userspace/programs/Shell/Alias.h create mode 100644 userspace/programs/Shell/Builtin.cpp create mode 100644 userspace/programs/Shell/Builtin.h create mode 100644 userspace/programs/Shell/CommandTypes.cpp create mode 100644 userspace/programs/Shell/CommandTypes.h create mode 100644 userspace/programs/Shell/Execute.cpp create mode 100644 userspace/programs/Shell/Execute.h create mode 100644 userspace/programs/Shell/Input.cpp create mode 100644 userspace/programs/Shell/Input.h create mode 100644 userspace/programs/Shell/Lexer.cpp create mode 100644 userspace/programs/Shell/Lexer.h create mode 100644 userspace/programs/Shell/Token.cpp create mode 100644 userspace/programs/Shell/Token.h create mode 100644 userspace/programs/Shell/TokenParser.cpp create mode 100644 userspace/programs/Shell/TokenParser.h diff --git a/userspace/programs/Shell/Alias.cpp b/userspace/programs/Shell/Alias.cpp new file mode 100644 index 00000000..263c0efd --- /dev/null +++ b/userspace/programs/Shell/Alias.cpp @@ -0,0 +1,34 @@ +#include "Alias.h" + +BAN::ErrorOr Alias::set_alias(BAN::StringView name, BAN::StringView value) +{ + TRY(m_aliases.insert_or_assign( + TRY(BAN::String::formatted("{}", name)), + TRY(BAN::String::formatted("{}", value)) + )); + return {}; +} + +BAN::Optional Alias::get_alias(const BAN::String& name) const +{ + auto it = m_aliases.find(name); + if (it == m_aliases.end()) + return {}; + return it->value.sv(); +} + + +void Alias::for_each_alias(BAN::Function callback) const +{ + for (const auto& [name, value] : m_aliases) + { + switch (callback(name.sv(), value.sv())) + { + case BAN::Iteration::Break: + break; + case BAN::Iteration::Continue: + continue;; + } + break; + } +} diff --git a/userspace/programs/Shell/Alias.h b/userspace/programs/Shell/Alias.h new file mode 100644 index 00000000..44824fc3 --- /dev/null +++ b/userspace/programs/Shell/Alias.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include +#include +#include +#include + +class Alias +{ + BAN_NON_COPYABLE(Alias); + BAN_NON_MOVABLE(Alias); +public: + Alias() = default; + static Alias& get() + { + static Alias s_instance; + return s_instance; + } + + BAN::ErrorOr set_alias(BAN::StringView name, BAN::StringView value); + + // NOTE: `const BAN::String&` instead of `BAN::StringView` to avoid BAN::String construction + // for hashmap accesses + BAN::Optional get_alias(const BAN::String& name) const; + + void for_each_alias(BAN::Function) const; + +private: + BAN::HashMap m_aliases; +}; diff --git a/userspace/programs/Shell/Builtin.cpp b/userspace/programs/Shell/Builtin.cpp new file mode 100644 index 00000000..59fadc95 --- /dev/null +++ b/userspace/programs/Shell/Builtin.cpp @@ -0,0 +1,241 @@ +#include "Alias.h" +#include "Builtin.h" +#include "Execute.h" + +#include +#include +#include + +#define ERROR_RETURN(__msg, __ret) do { perror(__msg); return __ret; } while (false) + +extern char** environ; + +void Builtin::initialize() +{ + MUST(m_builtin_commands.emplace("clear"_sv, + [](Execute&, BAN::Span, FILE*, FILE* fout) -> int + { + fprintf(fout, "\e[H\e[3J\e[2J"); + fflush(fout); + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("exit"_sv, + [](Execute&, BAN::Span arguments, FILE*, FILE*) -> int + { + int exit_code = 0; + if (arguments.size() > 1) + { + auto exit_string = arguments[1].sv(); + for (size_t i = 0; i < exit_string.size() && isdigit(exit_string[i]); i++) + exit_code = (exit_code * 10) + (exit_string[i] - '0'); + } + exit(exit_code); + ASSERT_NOT_REACHED(); + }, true + )); + + MUST(m_builtin_commands.emplace("export"_sv, + [](Execute&, BAN::Span arguments, FILE*, FILE*) -> int + { + bool first = false; + for (const auto& argument : arguments) + { + if (first) + { + first = false; + continue; + } + + auto split = MUST(argument.sv().split('=', true)); + if (split.size() != 2) + continue; + + if (setenv(BAN::String(split[0]).data(), BAN::String(split[1]).data(), true) == -1) + ERROR_RETURN("setenv", 1); + } + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("unset"_sv, + [](Execute&, BAN::Span arguments, FILE*, FILE*) -> int + { + for (const auto& argument : arguments) + if (unsetenv(argument.data()) == -1) + ERROR_RETURN("unsetenv", 1); + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("alias"_sv, + [](Execute&, BAN::Span arguments, FILE*, FILE* fout) -> int + { + if (arguments.size() == 1) + { + Alias::get().for_each_alias( + [fout](BAN::StringView name, BAN::StringView value) -> BAN::Iteration + { + fprintf(fout, "%.*s='%.*s'\n", + (int)name.size(), name.data(), + (int)value.size(), value.data() + ); + return BAN::Iteration::Continue; + } + ); + return 0; + } + + for (size_t i = 1; i < arguments.size(); i++) + { + auto idx = arguments[i].sv().find('='); + if (idx.has_value() && idx.value() == 0) + continue; + if (!idx.has_value()) + { + auto value = Alias::get().get_alias(arguments[i]); + if (value.has_value()) + fprintf(fout, "%s='%.*s'\n", arguments[i].data(), (int)value->size(), value->data()); + } + else + { + auto alias = arguments[i].sv().substring(0, idx.value()); + auto value = arguments[i].sv().substring(idx.value() + 1); + if (auto ret = Alias::get().set_alias(alias, value); ret.is_error()) + fprintf(stderr, "could not set alias: %s\n", ret.error().get_message()); + } + } + + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("source"_sv, + [](Execute& execute, BAN::Span arguments, FILE*, FILE* fout) -> int + { + if (arguments.size() != 2) + { + fprintf(fout, "usage: source FILE\n"); + return 1; + } + if (execute.source_script(arguments[1]).is_error()) + return 1; + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("env"_sv, + [](Execute&, BAN::Span, FILE*, FILE* fout) -> int + { + char** current = environ; + while (current && *current) + fprintf(fout, "%s\n", *current++); + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("cd"_sv, + [](Execute&, BAN::Span arguments, FILE*, FILE* fout) -> int + { + if (arguments.size() > 2) + { + fprintf(fout, "cd: too many arguments\n"); + return 1; + } + + BAN::StringView path; + + if (arguments.size() == 1) + { + if (const char* path_env = getenv("HOME")) + path = path_env; + else + return 0; + } + else + path = arguments[1]; + + if (chdir(path.data()) == -1) + ERROR_RETURN("chdir", 1); + + return 0; + }, true + )); + + MUST(m_builtin_commands.emplace("time"_sv, + [](Execute& execute, BAN::Span arguments, FILE* fin, FILE* fout) -> int + { + timespec start, end; + + if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) + ERROR_RETURN("clock_gettime", 1); + + auto execute_ret = execute.execute_command_sync(arguments.slice(1), fileno(fin), fileno(fout)); + + if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) + ERROR_RETURN("clock_gettime", 1); + + uint64_t total_ns = 0; + total_ns += (end.tv_sec - start.tv_sec) * 1'000'000'000; + total_ns += end.tv_nsec - start.tv_nsec; + + int secs = total_ns / 1'000'000'000; + int msecs = (total_ns % 1'000'000'000) / 1'000'000; + + fprintf(fout, "took %d.%03d s\n", secs, msecs); + + if (execute_ret.is_error()) + return 256 + execute_ret.error().get_error_code(); + return execute_ret.value(); + }, false + )); +} + +void Builtin::for_each_builtin(BAN::Function callback) const +{ + for (const auto& [name, function] : m_builtin_commands) + { + switch (callback(name.sv(), function)) + { + case BAN::Iteration::Break: + break; + case BAN::Iteration::Continue: + continue;; + } + break; + } +} + +const Builtin::BuiltinCommand* Builtin::find_builtin(const BAN::String& name) const +{ + auto it = m_builtin_commands.find(name); + if (it == m_builtin_commands.end()) + return nullptr; + return &it->value; +} + +BAN::ErrorOr Builtin::BuiltinCommand::execute(Execute& execute, BAN::Span arguments, int fd_in, int fd_out) const +{ + const auto fd_to_file = + [](int fd, FILE* file, const char* mode) -> BAN::ErrorOr + { + if (fd == fileno(file)) + return file; + int fd_dup = dup(fd); + if (fd_dup == -1) + return BAN::Error::from_errno(errno); + file = fdopen(fd_dup, mode); + if (file == nullptr) + return BAN::Error::from_errno(errno); + return file; + }; + + FILE* fin = TRY(fd_to_file(fd_in, stdin, "r")); + FILE* fout = TRY(fd_to_file(fd_out, stdout, "w")); + int ret = function(execute, arguments, fin, fout); + if (fileno(fin) != fd_in ) fclose(fin); + if (fileno(fout) != fd_out) fclose(fout); + + return ret; +} diff --git a/userspace/programs/Shell/Builtin.h b/userspace/programs/Shell/Builtin.h new file mode 100644 index 00000000..abf88e1b --- /dev/null +++ b/userspace/programs/Shell/Builtin.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +class Execute; + +class Builtin +{ + BAN_NON_COPYABLE(Builtin); + BAN_NON_MOVABLE(Builtin); +public: + struct BuiltinCommand + { + using function_t = int (*)(Execute&, BAN::Span, FILE* fin, FILE* fout); + + function_t function { nullptr }; + bool immediate { false }; + + BuiltinCommand(function_t function, bool immediate) + : function(function) + , immediate(immediate) + { } + + BAN::ErrorOr execute(Execute&, BAN::Span arguments, int fd_in, int fd_out) const; + }; + +public: + Builtin() = default; + static Builtin& get() + { + static Builtin s_instance; + return s_instance; + } + + void initialize(); + + void for_each_builtin(BAN::Function) const; + + // return nullptr if not found + const BuiltinCommand* find_builtin(const BAN::String& name) const; + +private: + BAN::HashMap m_builtin_commands; +}; diff --git a/userspace/programs/Shell/CMakeLists.txt b/userspace/programs/Shell/CMakeLists.txt index 3d243d84..46bfc6d8 100644 --- a/userspace/programs/Shell/CMakeLists.txt +++ b/userspace/programs/Shell/CMakeLists.txt @@ -1,5 +1,13 @@ set(SOURCES main.cpp + Alias.cpp + Builtin.cpp + CommandTypes.cpp + Execute.cpp + Input.cpp + Lexer.cpp + Token.cpp + TokenParser.cpp ) add_executable(Shell ${SOURCES}) diff --git a/userspace/programs/Shell/CommandTypes.cpp b/userspace/programs/Shell/CommandTypes.cpp new file mode 100644 index 00000000..3344c649 --- /dev/null +++ b/userspace/programs/Shell/CommandTypes.cpp @@ -0,0 +1,150 @@ +#include "CommandTypes.h" +#include "Execute.h" + +#include + +#include +#include +#include +#include + +extern int g_pid; +extern int g_argc; +extern char** g_argv; + +BAN::ErrorOr CommandArgument::evaluate(Execute& execute) const +{ + static_assert( + BAN::is_same_v + > + ); + + BAN::String evaluated; + + for (const auto& part : parts) + { + ASSERT(part.has_value()); + if (part.has()) + TRY(evaluated.append(part.get().value)); + else if (part.has()) + { + const char* env = getenv(part.get().value.data()); + if (env != nullptr) + TRY(evaluated.append(env)); + } + else if (part.has()) + { + const auto& builtin = part.get(); + ASSERT(!builtin.value.empty()); + + if (!isdigit(builtin.value.front())) + { + ASSERT(builtin.value.size() == 1); + switch (builtin.value.front()) + { + case '_': + case '@': + case '*': + case '-': + fprintf(stderr, "TODO: $%c\n", builtin.value.front()); + break; + case '$': + evaluated = TRY(BAN::String::formatted("{}", g_pid)); + break; + case '#': + evaluated = TRY(BAN::String::formatted("{}", g_argc - 1)); + break; + case '?': + evaluated = TRY(BAN::String::formatted("{}", execute.last_return_value())); + break; + case '!': + evaluated = TRY(BAN::String::formatted("{}", execute.last_background_pid())); + break; + default: + ASSERT_NOT_REACHED(); + } + } + else + { + int argv_index = 0; + for (char c : builtin.value) + { + ASSERT(isdigit(c)); + if (BAN::Math::will_multiplication_overflow(argv_index, 10) || + BAN::Math::will_addition_overflow(argv_index * 10, c - '0')) + { + argv_index = INT_MAX; + fprintf(stderr, "integer overflow, capping at %d\n", argv_index); + break; + } + argv_index = (argv_index * 10) + (c - '0'); + } + + if (argv_index < g_argc) + TRY(evaluated.append(const_cast(g_argv[argv_index]))); + } + } + else if (part.has()) + { + // FIXME: this should resolve to multiple arguments if not double quoted + + int execute_pipe[2]; + if (pipe(execute_pipe) == -1) + return BAN::Error::from_errno(errno); + BAN::ScopeGuard pipe_rd_closer([execute_pipe] { close(execute_pipe[0]); }); + BAN::ScopeGuard pipe_wr_closer([execute_pipe] { close(execute_pipe[1]); }); + + const pid_t child_pid = fork(); + if (child_pid == -1) + return BAN::Error::from_errno(errno); + if (child_pid == 0) + { + if (dup2(execute_pipe[1], STDOUT_FILENO) == -1) + return BAN::Error::from_errno(errno); + setpgrp(); + auto ret = execute.execute_command(part.get()); + if (ret.is_error()) + exit(ret.error().get_error_code()); + exit(execute.last_return_value()); + } + + pipe_wr_closer.disable(); + close(execute_pipe[1]); + + char buffer[128]; + while (true) + { + const ssize_t nread = read(execute_pipe[0], buffer, sizeof(buffer)); + if (nread < 0) + perror("read"); + if (nread <= 0) + break; + TRY(evaluated.append(BAN::StringView(buffer, nread))); + } + + while (!evaluated.empty() && isspace(evaluated.back())) + evaluated.pop_back(); + } + else + { + ASSERT_NOT_REACHED(); + } + } + + return evaluated; +} + +BAN::ErrorOr> SingleCommand::evaluate_arguments(Execute& execute) const +{ + BAN::Vector result; + TRY(result.reserve(arguments.size())); + for (const auto& arugment : arguments) + TRY(result.push_back(TRY(arugment.evaluate(execute)))); + return result; +} diff --git a/userspace/programs/Shell/CommandTypes.h b/userspace/programs/Shell/CommandTypes.h new file mode 100644 index 00000000..63a56d50 --- /dev/null +++ b/userspace/programs/Shell/CommandTypes.h @@ -0,0 +1,101 @@ +#pragma once + +#include + +#define COMMAND_GET_MACRO(_0, _1, _2, NAME, ...) NAME + +#define COMMAND_MOVE_0(class) \ + class(class&& o) { } \ + class& operator=(class&& o) { } +#define COMMAND_MOVE_1(class, var) \ + class(class&& o) { var = BAN::move(o.var); } \ + class& operator=(class&& o) { var = BAN::move(o.var); return *this; } +#define COMMAND_MOVE_2(class, var1, var2) \ + class(class&& o) { var1 = BAN::move(o.var1); var2 = BAN::move(o.var2); } \ + class& operator=(class&& o) { var1 = BAN::move(o.var1); var2 = BAN::move(o.var2); return *this; } +#define COMMAND_MOVE(class, ...) COMMAND_GET_MACRO(_0 __VA_OPT__(,) __VA_ARGS__, COMMAND_MOVE_2, COMMAND_MOVE_1, COMMAND_MOVE_0)(class, __VA_ARGS__) + +#define COMMAND_RULE5(class, ...) \ + class() = default; \ + class(const class&) = delete; \ + class& operator=(const class&) = delete; \ + COMMAND_MOVE(class, __VA_ARGS__) + +struct CommandTree; +class Execute; + +struct FixedString +{ + COMMAND_RULE5(FixedString, value); + BAN::String value; +}; + +struct EnvironmentVariable +{ + COMMAND_RULE5(EnvironmentVariable, value); + BAN::String value; +}; + +struct BuiltinVariable +{ + COMMAND_RULE5(BuiltinVariable, value); + BAN::String value; +}; + +struct CommandArgument +{ + using ArgumentPart = + BAN::Variant< + FixedString, + EnvironmentVariable, + BuiltinVariable, + CommandTree + >; + + BAN::ErrorOr evaluate(Execute& execute) const; + + COMMAND_RULE5(CommandArgument, parts); + BAN::Vector parts; +}; + +struct SingleCommand +{ + BAN::ErrorOr> evaluate_arguments(Execute& execute) const; + + COMMAND_RULE5(SingleCommand, arguments); + BAN::Vector arguments; +}; + +struct PipedCommand +{ + COMMAND_RULE5(PipedCommand, commands, background); + BAN::Vector commands; + bool background { false }; +}; + +struct ConditionalCommand +{ + enum class Condition + { + Always, + OnFailure, + OnSuccess, + }; + + COMMAND_RULE5(ConditionalCommand, command, condition); + PipedCommand command; + Condition condition { Condition::Always }; +}; + +struct CommandTree +{ + COMMAND_RULE5(CommandTree, commands); + BAN::Vector commands; +}; + +#undef COMMAND_GET_MACRO +#undef COMMAND_MOVE_0 +#undef COMMAND_MOVE_1 +#undef COMMAND_MOVE_2 +#undef COMMAND_MOVE +#undef COMMAND_RULE5 diff --git a/userspace/programs/Shell/Execute.cpp b/userspace/programs/Shell/Execute.cpp new file mode 100644 index 00000000..4c222246 --- /dev/null +++ b/userspace/programs/Shell/Execute.cpp @@ -0,0 +1,330 @@ +#include "Builtin.h" +#include "Execute.h" +#include "TokenParser.h" + +#include + +#include +#include +#include +#include +#include + +#define CHECK_FD_OR_PERROR_AND_EXIT(oldfd, newfd) ({ if ((oldfd) != (newfd) && dup2((oldfd), (newfd)) == -1) { perror("dup2"); exit(errno); } }) +#define TRY_OR_PERROR_AND_BREAK(expr) ({ auto&& eval = (expr); if (eval.is_error()) { fprintf(stderr, "%s\n", eval.error().get_message()); continue; } eval.release_value(); }) +#define TRY_OR_EXIT(expr) ({ auto&& eval = (expr); if (eval.is_error()) exit(eval.error().get_error_code()); eval.release_value(); }) + +static BAN::ErrorOr find_absolute_path_of_executable(const BAN::String& command) +{ + if (command.size() >= PATH_MAX) + return BAN::Error::from_errno(ENAMETOOLONG); + + const auto check_executable_file = + [](const char* path) -> BAN::ErrorOr + { + struct stat st; + if (stat(path, &st) == -1) + return BAN::Error::from_errno(errno); + if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) + return BAN::Error::from_errno(ENOEXEC); + return {}; + }; + + if (command.sv().contains('/')) + { + TRY(check_executable_file(command.data())); + return TRY(BAN::String::formatted("{}", command)); + } + + const char* path_env = getenv("PATH"); + if (path_env == nullptr) + return BAN::Error::from_errno(ENOENT); + + auto path_dirs = TRY(BAN::StringView(path_env).split(':')); + for (auto path_dir : path_dirs) + { + const auto absolute_path = TRY(BAN::String::formatted("{}/{}", path_dir, command)); + + auto check_result = check_executable_file(absolute_path.data()); + if (!check_result.is_error()) + return absolute_path; + + if (check_result.error().get_error_code() == ENOENT) + continue; + return check_result.release_error(); + } + + return BAN::Error::from_errno(ENOENT); +} + +BAN::ErrorOr Execute::execute_command_no_wait(const InternalCommand& command) +{ + ASSERT(!command.arguments.empty()); + + if (command.command.has() && !command.background) + { + const auto& builtin = command.command.get(); + if (builtin.immediate) + { + return ExecuteResult { + .pid = -1, + .exit_code = TRY(builtin.execute(*this, command.arguments, command.fd_in, command.fd_out)) + }; + } + } + + const pid_t child_pid = fork(); + if (child_pid == -1) + return BAN::Error::from_errno(errno); + if (child_pid == 0) + { + if (command.command.has()) + { + auto builtin_ret = command.command.get().execute(*this, command.arguments, command.fd_in, command.fd_out); + if (builtin_ret.is_error()) + exit(builtin_ret.error().get_error_code()); + exit(builtin_ret.value()); + } + + BAN::Vector exec_args; + TRY_OR_EXIT(exec_args.reserve(command.arguments.size() + 1)); + for (const auto& argument : command.arguments) + TRY_OR_EXIT(exec_args.push_back(argument.data())); + TRY_OR_EXIT(exec_args.push_back(nullptr)); + + CHECK_FD_OR_PERROR_AND_EXIT(command.fd_in, STDIN_FILENO); + CHECK_FD_OR_PERROR_AND_EXIT(command.fd_out, STDOUT_FILENO); + + execv(command.command.get().data(), const_cast(exec_args.data())); + exit(errno); + } + + if (setpgid(child_pid, command.pgrp ? command.pgrp : child_pid)) + perror("setpgid"); + if (!command.background && command.pgrp == 0 && isatty(STDIN_FILENO)) + if (tcsetpgrp(STDIN_FILENO, child_pid) == -1) + perror("tcsetpgrp"); + + return ExecuteResult { + .pid = child_pid, + .exit_code = -1, + }; +} + +BAN::ErrorOr Execute::execute_command_sync(BAN::Span arguments, int fd_in, int fd_out) +{ + if (arguments.empty()) + return 0; + + InternalCommand command { + .command = {}, + .arguments = arguments, + .fd_in = fd_in, + .fd_out = fd_out, + .background = false, + .pgrp = getpgrp(), + }; + + if (const auto* builtin = Builtin::get().find_builtin(arguments[0])) + command.command = *builtin; + else + { + auto absolute_path_or_error = find_absolute_path_of_executable(arguments[0]); + if (absolute_path_or_error.is_error()) + { + if (absolute_path_or_error.error().get_error_code() == ENOENT) + { + fprintf(stderr, "command not found: %s\n", arguments[0].data()); + return 127; + } + fprintf(stderr, "could not execute command: %s\n", absolute_path_or_error.error().get_message()); + return 126; + } + command.command = absolute_path_or_error.release_value(); + } + + const auto execute_result = TRY(execute_command_no_wait(command)); + if (execute_result.pid == -1) + return execute_result.exit_code; + + int status; + if (waitpid(execute_result.pid, &status, 0) == -1) + return BAN::Error::from_errno(errno); + + if (!WIFSIGNALED(status)) + return WEXITSTATUS(status); + return 128 + WTERMSIG(status); +} + +BAN::ErrorOr Execute::execute_command(const PipedCommand& piped_command) +{ + ASSERT(!piped_command.commands.empty()); + + int last_pipe_rd = STDIN_FILENO; + + BAN::Vector child_pids; + TRY(child_pids.resize(piped_command.commands.size(), 0)); + + BAN::Vector child_codes; + TRY(child_codes.resize(piped_command.commands.size(), 126)); + + for (size_t i = 0; i < piped_command.commands.size(); i++) + { + int new_pipe[2] { STDIN_FILENO, STDOUT_FILENO }; + if (i != piped_command.commands.size() - 1) + if (pipe(new_pipe) == -1) + return BAN::Error::from_errno(errno); + + BAN::ScopeGuard pipe_closer( + [&]() + { + if (new_pipe[1] != STDOUT_FILENO) + close(new_pipe[1]); + if (last_pipe_rd != STDIN_FILENO) + close(last_pipe_rd); + last_pipe_rd = new_pipe[0]; + } + ); + + const int fd_in = last_pipe_rd; + const int fd_out = new_pipe[1]; + + auto arguments = TRY_OR_PERROR_AND_BREAK(piped_command.commands[i].evaluate_arguments(*this)); + + InternalCommand command { + .command = {}, + .arguments = arguments.span(), + .fd_in = fd_in, + .fd_out = fd_out, + .background = piped_command.background, + .pgrp = child_pids.front(), + }; + + if (const auto* builtin = Builtin::get().find_builtin(arguments[0])) + command.command = *builtin; + else + { + auto absolute_path_or_error = find_absolute_path_of_executable(arguments[0]); + if (absolute_path_or_error.is_error()) + { + if (absolute_path_or_error.error().get_error_code() == ENOENT) + { + fprintf(stderr, "command not found: %s\n", arguments[0].data()); + child_codes[i] = 127; + } + else + { + fprintf(stderr, "could not execute command: %s\n", absolute_path_or_error.error().get_message()); + child_codes[i] = 126; + } + continue; + } + command.command = absolute_path_or_error.release_value(); + } + + auto execute_result = TRY_OR_PERROR_AND_BREAK(execute_command_no_wait(command)); + if (execute_result.pid == -1) + child_codes[i] = execute_result.exit_code; + else + child_pids[i] = execute_result.pid; + } + + if (last_pipe_rd != STDIN_FILENO) + close(last_pipe_rd); + + if (piped_command.background) + return {}; + + for (size_t i = 0; i < piped_command.commands.size(); i++) + { + if (child_pids[i] == 0) + continue; + + int status = 0; + if (waitpid(child_pids[i], &status, 0) == -1) + perror("waitpid"); + + if (WIFEXITED(status)) + child_codes[i] = WEXITSTATUS(status); + else if (WIFSIGNALED(status)) + child_codes[i] = 128 + WTERMSIG(status); + else + ASSERT_NOT_REACHED(); + } + + if (isatty(STDIN_FILENO) && tcsetpgrp(0, getpgrp()) == -1) + perror("tcsetpgrp"); + m_last_return_value = child_codes.back(); + + return {}; +} + +BAN::ErrorOr Execute::execute_command(const CommandTree& command_tree) +{ + for (const auto& [command, condition] : command_tree.commands) + { + bool should_run = false; + switch (condition) + { + case ConditionalCommand::Condition::Always: + should_run = true; + break; + case ConditionalCommand::Condition::OnFailure: + should_run = (m_last_return_value != 0); + break; + case ConditionalCommand::Condition::OnSuccess: + should_run = (m_last_return_value == 0); + break; + } + + if (!should_run) + continue; + + TRY(execute_command(command)); + } + + return {}; +} + +BAN::ErrorOr Execute::source_script(BAN::StringView path) +{ + BAN::Vector script_lines; + + { + FILE* fp = fopen(path.data(), "r"); + if (fp == nullptr) + return BAN::Error::from_errno(errno); + + BAN::String current; + char temp_buffer[128]; + while (fgets(temp_buffer, sizeof(temp_buffer), fp)) + { + TRY(current.append(temp_buffer)); + if (current.back() != '\n') + continue; + current.pop_back(); + + if (!current.empty()) + TRY(script_lines.push_back(BAN::move(current))); + current.clear(); + } + + if (!current.empty()) + TRY(script_lines.push_back(BAN::move(current))); + + fclose(fp); + } + + size_t index = 0; + TokenParser parser( + [&](BAN::Optional) -> BAN::Optional + { + if (index >= script_lines.size()) + return {}; + return script_lines[index++]; + } + ); + if (!parser.main_loop(true)) + return BAN::Error::from_literal("oop"); + return {}; +} diff --git a/userspace/programs/Shell/Execute.h b/userspace/programs/Shell/Execute.h new file mode 100644 index 00000000..147a400e --- /dev/null +++ b/userspace/programs/Shell/Execute.h @@ -0,0 +1,53 @@ +#pragma once + +#include "Builtin.h" +#include "CommandTypes.h" + +#include + +class Execute +{ + BAN_NON_COPYABLE(Execute); + BAN_NON_MOVABLE(Execute); +public: + Execute() = default; + + BAN::ErrorOr execute_command_sync(BAN::Span arguments, int fd_in, int fd_out); + BAN::ErrorOr execute_command(const SingleCommand&, int fd_in, int fd_out, bool background, pid_t pgrp = 0); + BAN::ErrorOr execute_command(const PipedCommand&); + BAN::ErrorOr execute_command(const CommandTree&); + + BAN::ErrorOr source_script(BAN::StringView path); + + int last_background_pid() const { return m_last_background_pid; } + int last_return_value() const { return m_last_return_value; } + +private: + struct InternalCommand + { + enum class Type + { + Builtin, + External, + }; + + BAN::Variant command; + BAN::Span arguments; + int fd_in; + int fd_out; + bool background; + pid_t pgrp; + }; + + struct ExecuteResult + { + pid_t pid; + int exit_code; + }; + + BAN::ErrorOr execute_command_no_wait(const InternalCommand& command); + +private: + int m_last_background_pid { 0 }; + int m_last_return_value { 0 }; +}; diff --git a/userspace/programs/Shell/Input.cpp b/userspace/programs/Shell/Input.cpp new file mode 100644 index 00000000..51cee71a --- /dev/null +++ b/userspace/programs/Shell/Input.cpp @@ -0,0 +1,682 @@ +#include "Alias.h" +#include "Builtin.h" +#include "Input.h" + +#include +#include + +#include +#include +#include +#include +#include + +static struct termios s_original_termios; +static struct termios s_raw_termios; +static bool s_termios_initialized { false }; + +static BAN::Vector list_matching_entries(BAN::StringView path, BAN::StringView start, bool require_executable) +{ + ASSERT(path.size() < PATH_MAX); + + char path_cstr[PATH_MAX]; + memcpy(path_cstr, path.data(), path.size()); + path_cstr[path.size()] = '\0'; + + DIR* dirp = opendir(path_cstr); + if (dirp == nullptr) + return {}; + + BAN::Vector result; + + dirent* entry; + while ((entry = readdir(dirp))) + { + if (entry->d_name[0] == '.' && !start.starts_with("."_sv)) + continue; + if (strncmp(entry->d_name, start.data(), start.size())) + continue; + + struct stat st; + if (fstatat(dirfd(dirp), entry->d_name, &st, 0)) + continue; + + if (require_executable) + { + if (S_ISDIR(st.st_mode)) + continue; + if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXUSR))) + continue; + } + + MUST(result.emplace_back(entry->d_name + start.size())); + if (S_ISDIR(st.st_mode)) + MUST(result.back().push_back('/')); + } + + closedir(dirp); + + return BAN::move(result); +} + +struct TabCompletion +{ + bool should_escape_spaces { false }; + BAN::StringView prefix; + BAN::Vector completions; +}; + +static TabCompletion list_tab_completion_entries(BAN::StringView current_input) +{ + enum class CompletionType + { + Command, + File, + }; + + BAN::StringView prefix = current_input; + BAN::String last_argument; + CompletionType completion_type = CompletionType::Command; + + bool should_escape_spaces = true; + for (size_t i = 0; i < current_input.size(); i++) + { + if (current_input[i] == '\\') + { + i++; + if (i < current_input.size()) + MUST(last_argument.push_back(current_input[i])); + } + else if (isspace(current_input[i]) || current_input[i] == ';' || current_input[i] == '|' || current_input.substring(i).starts_with("&&"_sv)) + { + if (!isspace(current_input[i])) + completion_type = CompletionType::Command; + else if (!last_argument.empty()) + completion_type = CompletionType::File; + if (auto rest = current_input.substring(i); rest.starts_with("||"_sv) || rest.starts_with("&&"_sv)) + i++; + prefix = current_input.substring(i + 1); + last_argument.clear(); + should_escape_spaces = true; + } + else if (current_input[i] == '\'' || current_input[i] == '"') + { + const char quote_type = current_input[i++]; + while (i < current_input.size() && current_input[i] != quote_type) + MUST(last_argument.push_back(current_input[i++])); + should_escape_spaces = false; + } + else + { + MUST(last_argument.push_back(current_input[i])); + } + } + + if (last_argument.sv().contains('/')) + completion_type = CompletionType::File; + + BAN::Vector result; + switch (completion_type) + { + case CompletionType::Command: + { + const char* path_env = getenv("PATH"); + if (path_env) + { + auto splitted_path_env = MUST(BAN::StringView(path_env).split(':')); + for (auto path : splitted_path_env) + { + auto matching_entries = list_matching_entries(path, last_argument, true); + MUST(result.reserve(result.size() + matching_entries.size())); + for (auto&& entry : matching_entries) + MUST(result.push_back(BAN::move(entry))); + } + } + + Builtin::get().for_each_builtin( + [&](BAN::StringView name, const Builtin::BuiltinCommand&) -> BAN::Iteration + { + if (name.starts_with(last_argument)) + MUST(result.emplace_back(name.substring(last_argument.size()))); + return BAN::Iteration::Continue; + } + ); + + Alias::get().for_each_alias( + [&](BAN::StringView name, BAN::StringView) -> BAN::Iteration + { + if (name.starts_with(last_argument)) + MUST(result.emplace_back(name.substring(last_argument.size()))); + return BAN::Iteration::Continue; + } + ); + + break; + } + case CompletionType::File: + { + BAN::String dir_path; + if (last_argument.sv().starts_with("/"_sv)) + MUST(dir_path.push_back('/')); + else + { + char cwd_buffer[PATH_MAX]; + if (getcwd(cwd_buffer, sizeof(cwd_buffer)) == nullptr) + return {}; + MUST(dir_path.reserve(strlen(cwd_buffer) + 1)); + MUST(dir_path.append(cwd_buffer)); + MUST(dir_path.push_back('/')); + } + + auto match_against = last_argument.sv(); + if (auto idx = match_against.rfind('/'); idx.has_value()) + { + MUST(dir_path.append(match_against.substring(0, idx.value()))); + match_against = match_against.substring(idx.value() + 1); + } + + result = list_matching_entries(dir_path, match_against, false); + + break; + } + } + + if (auto idx = prefix.rfind('/'); idx.has_value()) + prefix = prefix.substring(idx.value() + 1); + + return { should_escape_spaces, prefix, BAN::move(result) }; +} + +static int character_length(BAN::StringView prompt) +{ + int length { 0 }; + bool in_escape { false }; + for (char c : prompt) + { + if (in_escape) + { + if (isalpha(c)) + in_escape = false; + } + else + { + if (c == '\e') + in_escape = true; + else if (((uint8_t)c & 0xC0) != 0x80) + length++; + } + } + return length; +} + +BAN::String Input::parse_ps1_prompt() +{ + const char* raw_prompt = getenv("PS1"); + if (raw_prompt == nullptr) + return "$ "_sv; + + BAN::String prompt; + for (int i = 0; raw_prompt[i]; i++) + { + char ch = raw_prompt[i]; + if (ch == '\\') + { + switch (raw_prompt[++i]) + { + case 'e': + MUST(prompt.push_back('\e')); + break; + case 'n': + MUST(prompt.push_back('\n')); + break; + case '\\': + MUST(prompt.push_back('\\')); + break; + case '~': + { + char buffer[256]; + if (getcwd(buffer, sizeof(buffer)) == nullptr) + strcpy(buffer, strerrorname_np(errno)); + + const char* home = getenv("HOME"); + size_t home_len = home ? strlen(home) : 0; + if (home && strncmp(buffer, home, home_len) == 0) + { + MUST(prompt.push_back('~')); + MUST(prompt.append(buffer + home_len)); + } + else + { + MUST(prompt.append(buffer)); + } + + break; + } + case 'u': + { + static char* username = nullptr; + if (username == nullptr) + { + auto* passwd = getpwuid(geteuid()); + if (passwd == nullptr) + break; + username = new char[strlen(passwd->pw_name) + 1]; + strcpy(username, passwd->pw_name); + endpwent(); + } + MUST(prompt.append(username)); + break; + } + case 'h': + { + MUST(prompt.append(m_hostname)); + break; + } + case '\0': + MUST(prompt.push_back('\\')); + break; + default: + MUST(prompt.push_back('\\')); + MUST(prompt.push_back(*raw_prompt)); + break; + } + } + else + { + MUST(prompt.push_back(ch)); + } + } + + return prompt; +} + +BAN::Optional Input::get_input(BAN::Optional custom_prompt) +{ + tcsetattr(0, TCSANOW, &s_raw_termios); + BAN::ScopeGuard _([] { tcsetattr(0, TCSANOW, &s_original_termios); }); + + BAN::String ps1_prompt; + if (!custom_prompt.has_value()) + ps1_prompt = parse_ps1_prompt(); + + const auto print_prompt = + [&]() + { + if (custom_prompt.has_value()) + printf("%.*s", (int)custom_prompt->size(), custom_prompt->data()); + else + printf("%.*s", (int)ps1_prompt.size(), ps1_prompt.data()); + }; + const auto prompt_length = + [&]() -> int + { + if (custom_prompt.has_value()) + return custom_prompt->size(); + return character_length(ps1_prompt); + }; + + print_prompt(); + fflush(stdout); + + while (true) + { + int chi = getchar(); + if (chi == EOF) + { + if (errno != EINTR) + { + perror("getchar"); + exit(1); + } + + clearerr(stdin); + m_buffers = m_history; + MUST(m_buffers.emplace_back(""_sv)); + m_buffer_index = m_buffers.size() - 1; + m_buffer_col = 0; + putchar('\n'); + print_prompt(); + fflush(stdout); + continue; + } + + uint8_t ch = chi; + if (ch != '\t') + { + m_tab_completions.clear(); + m_tab_index.clear(); + } + + if (m_waiting_utf8 > 0) + { + m_waiting_utf8--; + + ASSERT((ch & 0xC0) == 0x80); + + putchar(ch); + MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++)); + if (m_waiting_utf8 == 0) + { + printf("\e[s%s\e[u", m_buffers[m_buffer_index].data() + m_buffer_col); + fflush(stdout); + } + continue; + } + else if (ch & 0x80) + { + if ((ch & 0xE0) == 0xC0) + m_waiting_utf8 = 1; + else if ((ch & 0xF0) == 0xE0) + m_waiting_utf8 = 2; + else if ((ch & 0xF8) == 0xF0) + m_waiting_utf8 = 3; + else + ASSERT_NOT_REACHED(); + + putchar(ch); + MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++)); + continue; + } + + switch (ch) + { + case '\e': + { + ch = getchar(); + if (ch != '[') + break; + ch = getchar(); + + int value = 0; + while (isdigit(ch)) + { + value = (value * 10) + (ch - '0'); + ch = getchar(); + } + + switch (ch) + { + case 'A': + if (m_buffer_index > 0) + { + m_buffer_index--; + m_buffer_col = m_buffers[m_buffer_index].size(); + printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data()); + fflush(stdout); + } + break; + case 'B': + if (m_buffer_index < m_buffers.size() - 1) + { + m_buffer_index++; + m_buffer_col = m_buffers[m_buffer_index].size(); + printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data()); + fflush(stdout); + } + break; + case 'C': + if (m_buffer_col < m_buffers[m_buffer_index].size()) + { + m_buffer_col++; + while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80) + m_buffer_col++; + printf("\e[C"); + fflush(stdout); + } + break; + case 'D': + if (m_buffer_col > 0) + { + while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80) + m_buffer_col--; + m_buffer_col--; + printf("\e[D"); + fflush(stdout); + } + break; + case '~': + switch (value) + { + case 3: // delete + if (m_buffer_col >= m_buffers[m_buffer_index].size()) + break; + m_buffers[m_buffer_index].remove(m_buffer_col); + while (m_buffer_col < m_buffers[m_buffer_index].size() && (m_buffers[m_buffer_index][m_buffer_col] & 0xC0) == 0x80) + m_buffers[m_buffer_index].remove(m_buffer_col); + printf("\e[s%s \e[u", m_buffers[m_buffer_index].data() + m_buffer_col); + fflush(stdout); + break; + } + break; + } + break; + } + case '\x0C': // ^L + { + int x = prompt_length() + character_length(m_buffers[m_buffer_index].sv().substring(m_buffer_col)) + 1; + printf("\e[H\e[J"); + print_prompt(); + printf("%s\e[u\e[1;%dH", m_buffers[m_buffer_index].data(), x); + fflush(stdout); + break; + } + case '\b': + if (m_buffer_col <= 0) + break; + while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80) + m_buffer_col--; + m_buffer_col--; + printf("\e[D"); + fflush(stdout); + break; + case '\x01': // ^A + m_buffer_col = 0; + printf("\e[%dG", prompt_length() + 1); + fflush(stdout); + break; + case '\x03': // ^C + putchar('\n'); + print_prompt(); + fflush(stdout); + m_buffers[m_buffer_index].clear(); + m_buffer_col = 0; + break; + case '\x04': // ^D + if (!m_buffers[m_buffer_index].empty()) + break; + putchar('\n'); + return {}; + case '\x7F': // backspace + if (m_buffer_col <= 0) + break; + while ((m_buffers[m_buffer_index][m_buffer_col - 1] & 0xC0) == 0x80) + m_buffers[m_buffer_index].remove(--m_buffer_col); + m_buffers[m_buffer_index].remove(--m_buffer_col); + printf("\b\e[s%s \e[u", m_buffers[m_buffer_index].data() + m_buffer_col); + fflush(stdout); + break; + case '\n': + { + BAN::String input; + MUST(input.append(m_buffers[m_buffer_index])); + + if (!m_buffers[m_buffer_index].empty()) + { + MUST(m_history.push_back(m_buffers[m_buffer_index])); + m_buffers = m_history; + MUST(m_buffers.emplace_back(""_sv)); + } + m_buffer_index = m_buffers.size() - 1; + m_buffer_col = 0; + putchar('\n'); + + return input; + } + case '\t': + { + // FIXME: tab completion is really hacked together currently. + // this should ask token parser about the current parse state + // and do completions based on that, not raw strings + + if (m_buffer_col != m_buffers[m_buffer_index].size()) + continue; + + if (m_tab_completions.has_value()) + { + ASSERT(m_tab_completions->size() >= 2); + + if (!m_tab_index.has_value()) + m_tab_index = 0; + else + { + MUST(m_buffers[m_buffer_index].resize(m_tab_completion_keep)); + m_buffer_col = m_tab_completion_keep; + *m_tab_index = (*m_tab_index + 1) % m_tab_completions->size(); + } + + MUST(m_buffers[m_buffer_index].append(m_tab_completions.value()[*m_tab_index])); + m_buffer_col += m_tab_completions.value()[*m_tab_index].size(); + + printf("\e[%dG%s\e[K", prompt_length() + 1, m_buffers[m_buffer_index].data()); + fflush(stdout); + + break; + } + + m_tab_completion_keep = m_buffer_col; + auto [should_escape_spaces, prefix, completions] = list_tab_completion_entries(m_buffers[m_buffer_index].sv().substring(0, m_tab_completion_keep)); + + BAN::sort::sort(completions.begin(), completions.end(), + [](const BAN::String& a, const BAN::String& b) { + if (auto cmp = strcmp(a.data(), b.data())) + return cmp < 0; + return a.size() < b.size(); + } + ); + + for (size_t i = 1; i < completions.size();) + { + if (completions[i - 1] == completions[i]) + completions.remove(i); + else + i++; + } + + if (completions.empty()) + break; + + size_t all_match_len = 0; + for (;;) + { + if (completions.front().size() <= all_match_len) + break; + const char target = completions.front()[all_match_len]; + + bool all_matched = true; + for (const auto& completion : completions) + { + if (completion.size() > all_match_len && completion[all_match_len] == target) + continue; + all_matched = false; + break; + } + + if (!all_matched) + break; + all_match_len++; + } + + if (all_match_len) + { + auto completion = completions.front().sv().substring(0, all_match_len); + + BAN::String temp_escaped; + if (should_escape_spaces) + { + MUST(temp_escaped.append(completion)); + for (size_t i = 0; i < temp_escaped.size(); i++) + { + if (!isspace(temp_escaped[i])) + continue; + MUST(temp_escaped.insert('\\', i)); + i++; + } + completion = temp_escaped.sv(); + + if (!m_buffers[m_buffer_index].empty() && m_buffers[m_buffer_index].back() == '\\' && completion.front() == '\\') + completion = completion.substring(1); + } + + m_buffer_col += completion.size(); + MUST(m_buffers[m_buffer_index].append(completion)); + printf("%.*s", (int)completion.size(), completion.data()); + fflush(stdout); + break; + } + + if (completions.size() == 1) + { + ASSERT(all_match_len == completions.front().size()); + break; + } + + printf("\n"); + for (size_t i = 0; i < completions.size(); i++) + { + if (i != 0) + printf(" "); + const char* format = completions[i].sv().contains(' ') ? "'%.*s%s'" : "%.*s%s"; + printf(format, (int)prefix.size(), prefix.data(), completions[i].data()); + } + printf("\n"); + print_prompt(); + printf("%s", m_buffers[m_buffer_index].data()); + fflush(stdout); + + if (should_escape_spaces) + { + for (auto& completion : completions) + { + for (size_t i = 0; i < completion.size(); i++) + { + if (!isspace(completion[i])) + continue; + MUST(completion.insert('\\', i)); + i++; + } + } + } + + m_tab_completion_keep = m_buffer_col; + m_tab_completions = BAN::move(completions); + + break; + } + default: + MUST(m_buffers[m_buffer_index].insert(ch, m_buffer_col++)); + if (m_buffer_col == m_buffers[m_buffer_index].size()) + putchar(ch); + else + printf("%c\e[s%s\e[u", ch, m_buffers[m_buffer_index].data() + m_buffer_col); + fflush(stdout); + break; + } + } +} + +Input::Input() +{ + if (!s_termios_initialized) + { + tcgetattr(0, &s_original_termios); + s_raw_termios = s_original_termios; + s_raw_termios.c_lflag &= ~(ECHO | ICANON); + atexit([] { tcsetattr(0, TCSANOW, &s_original_termios); }); + s_termios_initialized = true; + } + + char hostname_buffer[HOST_NAME_MAX]; + if (gethostname(hostname_buffer, sizeof(hostname_buffer)) == 0) { + MUST(m_hostname.append(hostname_buffer)); + } +} diff --git a/userspace/programs/Shell/Input.h b/userspace/programs/Shell/Input.h new file mode 100644 index 00000000..47e84d07 --- /dev/null +++ b/userspace/programs/Shell/Input.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include +#include + +#include +#include + +class Input +{ + BAN_NON_COPYABLE(Input); + BAN_NON_MOVABLE(Input); +public: + Input(); + + BAN::Optional get_input(BAN::Optional custom_prompt); + +private: + BAN::String parse_ps1_prompt(); + +private: + BAN::String m_hostname; + + BAN::Vector m_buffers { 1, ""_sv }; + BAN::Vector m_history; + size_t m_buffer_index { 0 }; + size_t m_buffer_col { 0 }; + + BAN::Optional m_tab_index; + BAN::Optional> m_tab_completions; + size_t m_tab_completion_keep { 0 }; + + int m_waiting_utf8 { 0 }; +}; diff --git a/userspace/programs/Shell/Lexer.cpp b/userspace/programs/Shell/Lexer.cpp new file mode 100644 index 00000000..a87c5715 --- /dev/null +++ b/userspace/programs/Shell/Lexer.cpp @@ -0,0 +1,79 @@ +#include "Lexer.h" + +BAN::ErrorOr> tokenize_string(BAN::StringView string) +{ + { + size_t i = 0; + while (i < string.size() && isspace(string[i])) + i++; + if (i >= string.size() || string[i] == '#') + return BAN::Vector(); + } + + constexpr auto char_to_token_type = + [](char c) -> BAN::Optional + { + switch (c) + { + case '&': return Token::Type::Ampersand; + case '\\': return Token::Type::Backslash; + case '}': return Token::Type::CloseCurly; + case ')': return Token::Type::CloseParen; + case '$': return Token::Type::Dollar; + case '"': return Token::Type::DoubleQuote; + case '{': return Token::Type::OpenCurly; + case '(': return Token::Type::OpenParen; + case '|': return Token::Type::Pipe; + case ';': return Token::Type::Semicolon; + case '\'': return Token::Type::SingleQuote; + } + return {}; + }; + + BAN::Vector result; + + BAN::String current_string; + + const auto append_current_if_exists = + [&]() -> BAN::ErrorOr + { + if (current_string.empty()) + return {}; + TRY(result.emplace_back(Token::Type::String, BAN::move(current_string))); + current_string = BAN::String(); + return {}; + }; + + while (!string.empty()) + { + if (isspace(string.front())) + { + TRY(append_current_if_exists()); + + size_t whitespace_len = 1; + while (whitespace_len < string.size() && isspace(string[whitespace_len])) + whitespace_len++; + + BAN::String whitespace_str; + TRY(whitespace_str.append(string.substring(0, whitespace_len))); + TRY(result.emplace_back(Token::Type::Whitespace, BAN::move(whitespace_str))); + string = string.substring(whitespace_len); + continue; + } + + if (auto token_type = char_to_token_type(string.front()); token_type.has_value()) + { + TRY(append_current_if_exists()); + TRY(result.emplace_back(token_type.value())); + + string = string.substring(1); + continue; + } + + TRY(current_string.push_back(string.front())); + string = string.substring(1); + } + + TRY(append_current_if_exists()); + return result; +} diff --git a/userspace/programs/Shell/Lexer.h b/userspace/programs/Shell/Lexer.h new file mode 100644 index 00000000..2f105c58 --- /dev/null +++ b/userspace/programs/Shell/Lexer.h @@ -0,0 +1,5 @@ +#pragma once + +#include "Token.h" + +BAN::ErrorOr> tokenize_string(BAN::StringView); diff --git a/userspace/programs/Shell/Token.cpp b/userspace/programs/Shell/Token.cpp new file mode 100644 index 00000000..77c07185 --- /dev/null +++ b/userspace/programs/Shell/Token.cpp @@ -0,0 +1,52 @@ +#include "Token.h" + +#include + +void Token::debug_dump() const +{ + switch (type()) + { + case Type::EOF_: + dwarnln("Token "); + break; + case Type::Ampersand: + dprintln("Token "); + break; + case Type::Backslash: + dprintln("Token "); + break; + case Type::CloseCurly: + dprintln("Token "); + break; + case Type::CloseParen: + dprintln("Token "); + break; + case Type::Dollar: + dprintln("Token "); + break; + case Type::DoubleQuote: + dprintln("Token "); + break; + case Type::OpenCurly: + dprintln("Token "); + break; + case Type::OpenParen: + dprintln("Token "); + break; + case Type::Pipe: + dprintln("Token "); + break; + case Type::Semicolon: + dprintln("Token "); + break; + case Type::SingleQuote: + dprintln("Token "); + break; + case Type::String: + dprintln("Token ", string()); + break; + case Type::Whitespace: + dprintln("Token ", string()); + break; + } +} diff --git a/userspace/programs/Shell/Token.h b/userspace/programs/Shell/Token.h new file mode 100644 index 00000000..aa59d3d5 --- /dev/null +++ b/userspace/programs/Shell/Token.h @@ -0,0 +1,84 @@ +#pragma once + +#include +#include +#include + +#include + +struct Token +{ +public: + enum class Type + { + EOF_, + + Ampersand, + Backslash, + CloseCurly, + CloseParen, + Dollar, + DoubleQuote, + OpenCurly, + OpenParen, + Pipe, + Semicolon, + SingleQuote, + String, + Whitespace, + }; + + Token(Type type) + : m_type(type) + {} + + Token(Type type, BAN::String&& string) + : m_type(type) + { + ASSERT(type == Type::String || type == Type::Whitespace); + if (type == Type::Whitespace) + for (char c : string) + ASSERT(isspace(c)); + m_value = BAN::move(string); + } + + Token(Token&& other) + { + m_type = other.m_type; + m_value = other.m_value; + other.clear(); + } + + Token& operator=(Token&& other) + { + m_type = other.m_type; + m_value = other.m_value; + other.clear(); + return *this; + } + + Token(const Token&) = delete; + Token& operator=(const Token&) = delete; + + ~Token() + { + clear(); + } + + Type type() const { return m_type; } + + BAN::String& string() { ASSERT(m_type == Type::String || m_type == Type::Whitespace); return m_value; } + const BAN::String& string() const { ASSERT(m_type == Type::String || m_type == Type::Whitespace); return m_value; } + + void clear() + { + m_type = Type::EOF_; + m_value.clear(); + } + + void debug_dump() const; + +private: + Type m_type { Type::EOF_ }; + BAN::String m_value; +}; diff --git a/userspace/programs/Shell/TokenParser.cpp b/userspace/programs/Shell/TokenParser.cpp new file mode 100644 index 00000000..c15e6c57 --- /dev/null +++ b/userspace/programs/Shell/TokenParser.cpp @@ -0,0 +1,660 @@ +#include "Alias.h" +#include "Execute.h" +#include "Lexer.h" +#include "TokenParser.h" + +#include + +#include + +static constexpr bool can_parse_argument_from_token_type(Token::Type token_type) +{ + switch (token_type) + { + case Token::Type::Whitespace: + ASSERT_NOT_REACHED(); + case Token::Type::EOF_: + case Token::Type::Ampersand: + case Token::Type::CloseCurly: + case Token::Type::CloseParen: + case Token::Type::OpenCurly: + case Token::Type::OpenParen: + case Token::Type::Pipe: + case Token::Type::Semicolon: + return false; + case Token::Type::Backslash: + case Token::Type::Dollar: + case Token::Type::DoubleQuote: + case Token::Type::SingleQuote: + case Token::Type::String: + return true; + } + ASSERT_NOT_REACHED(); +} + +static constexpr char token_type_to_single_character(Token::Type type) +{ + switch (type) + { + case Token::Type::Ampersand: return '&'; + case Token::Type::Backslash: return '\\'; + case Token::Type::CloseCurly: return '}'; + case Token::Type::CloseParen: return ')'; + case Token::Type::Dollar: return '$'; + case Token::Type::DoubleQuote: return '"'; + case Token::Type::OpenCurly: return '{'; + case Token::Type::OpenParen: return '('; + case Token::Type::Pipe: return '|'; + case Token::Type::Semicolon: return ';'; + case Token::Type::SingleQuote: return '\''; + + case Token::Type::String: ASSERT_NOT_REACHED(); + case Token::Type::Whitespace: ASSERT_NOT_REACHED(); + case Token::Type::EOF_: ASSERT_NOT_REACHED(); + } + ASSERT_NOT_REACHED(); +}; + +static constexpr BAN::Error unexpected_token_error(Token::Type type) +{ + switch (type) + { + case Token::Type::EOF_: + return BAN::Error::from_literal("unexpected EOF"); + case Token::Type::Ampersand: + return BAN::Error::from_literal("unexpected token &"); + case Token::Type::Backslash: + return BAN::Error::from_literal("unexpected token \\"); + case Token::Type::CloseCurly: + return BAN::Error::from_literal("unexpected token }"); + case Token::Type::CloseParen: + return BAN::Error::from_literal("unexpected token )"); + case Token::Type::Dollar: + return BAN::Error::from_literal("unexpected token $"); + case Token::Type::DoubleQuote: + return BAN::Error::from_literal("unexpected token \""); + case Token::Type::OpenCurly: + return BAN::Error::from_literal("unexpected token {"); + case Token::Type::Pipe: + return BAN::Error::from_literal("unexpected token |"); + case Token::Type::OpenParen: + return BAN::Error::from_literal("unexpected token ("); + case Token::Type::Semicolon: + return BAN::Error::from_literal("unexpected token ;"); + case Token::Type::SingleQuote: + return BAN::Error::from_literal("unexpected token '"); + case Token::Type::String: + return BAN::Error::from_literal("unexpected token "); + case Token::Type::Whitespace: + return BAN::Error::from_literal("unexpected token "); + } + ASSERT_NOT_REACHED(); +} + +const Token& TokenParser::peek_token() const +{ + if (m_token_stream.empty()) + return m_eof_token; + + ASSERT(!m_token_stream.front().empty()); + return m_token_stream.front().back(); +} + +Token TokenParser::read_token() +{ + if (m_token_stream.empty()) + return Token(Token::Type::EOF_); + + ASSERT(!m_token_stream.front().empty()); + + auto token = BAN::move(m_token_stream.front().back()); + m_token_stream.front().pop_back(); + if (m_token_stream.front().empty()) + m_token_stream.pop(); + + return token; +} + +void TokenParser::consume_token() +{ + ASSERT(!m_token_stream.empty()); + ASSERT(!m_token_stream.front().empty()); + + m_token_stream.front().pop_back(); + if (m_token_stream.front().empty()) + m_token_stream.pop(); +} + +BAN::ErrorOr TokenParser::unget_token(Token&& token) +{ + if (m_token_stream.empty()) + TRY(m_token_stream.emplace()); + TRY(m_token_stream.front().push_back(BAN::move(token))); + return {}; +} + +BAN::ErrorOr TokenParser::feed_tokens(BAN::Vector&& tokens) +{ + if (tokens.empty()) + return {}; + for (size_t i = 0; i < tokens.size() / 2; i++) + BAN::swap(tokens[i], tokens[tokens.size() - i - 1]); + TRY(m_token_stream.push(BAN::move(tokens))); + return {}; +} + +BAN::ErrorOr TokenParser::ask_input_tokens(BAN::StringView prompt, bool add_newline) +{ + if (!m_input_function) + return unexpected_token_error(Token::Type::EOF_); + + auto opt_input = m_input_function(prompt); + if (!opt_input.has_value()) + return unexpected_token_error(Token::Type::EOF_); + + auto tokenized = TRY(tokenize_string(opt_input.release_value())); + TRY(feed_tokens(BAN::move(tokenized))); + + if (add_newline) + { + auto newline_token = Token(Token::Type::String); + TRY(newline_token.string().push_back('\n')); + TRY(unget_token(BAN::move(newline_token))); + } + + return {}; +} + +BAN::ErrorOr TokenParser::parse_backslash(bool is_quoted) +{ + ASSERT(read_token().type() == Token::Type::Backslash); + + auto token = read_token(); + + FixedString fixed_string; + switch (token.type()) + { + case Token::Type::EOF_: + TRY(ask_input_tokens("> ", false)); + TRY(unget_token(Token(Token::Type::Backslash))); + return parse_backslash(is_quoted); + case Token::Type::Ampersand: + case Token::Type::Backslash: + case Token::Type::CloseCurly: + case Token::Type::CloseParen: + case Token::Type::Dollar: + case Token::Type::DoubleQuote: + case Token::Type::OpenCurly: + case Token::Type::OpenParen: + case Token::Type::Pipe: + case Token::Type::Semicolon: + case Token::Type::SingleQuote: + TRY(fixed_string.value.push_back(token_type_to_single_character(token.type()))); + break; + case Token::Type::Whitespace: + case Token::Type::String: + { + ASSERT(!token.string().empty()); + if (is_quoted) + TRY(fixed_string.value.push_back('\\')); + TRY(fixed_string.value.push_back(token.string().front())); + if (token.string().size() > 1) + { + token.string().remove(0); + TRY(unget_token(BAN::move(token))); + } + break; + } + } + + return CommandArgument::ArgumentPart(BAN::move(fixed_string)); +} + +BAN::ErrorOr TokenParser::parse_dollar() +{ + ASSERT(read_token().type() == Token::Type::Dollar); + + const auto parse_dollar_string = + [](BAN::String& string) -> BAN::ErrorOr + { + if (string.empty()) + return CommandArgument::ArgumentPart(EnvironmentVariable()); + if (isdigit(string.front())) + { + size_t number_len = 1; + while (number_len < string.size() && isdigit(string[number_len])) + number_len++; + + BuiltinVariable builtin; + TRY(builtin.value.append(string.sv().substring(0, number_len))); + for (size_t i = 0; i < number_len; i++) + string.remove(0); + + return CommandArgument::ArgumentPart(BAN::move(builtin)); + } + switch (string.front()) + { + case '$': + case '_': + case '@': + case '*': + case '#': + case '-': + case '?': + case '!': + { + BuiltinVariable builtin; + TRY(builtin.value.push_back(string.front())); + string.remove(0); + return CommandArgument::ArgumentPart(BAN::move(builtin)); + } + } + if (isalpha(string.front())) + { + size_t env_len = 1; + while (env_len < string.size() && (isalnum(string[env_len]) || string[env_len] == '_')) + env_len++; + + EnvironmentVariable environment; + TRY(environment.value.append(string.sv().substring(0, env_len))); + for (size_t i = 0; i < env_len; i++) + string.remove(0); + + return CommandArgument::ArgumentPart(BAN::move(environment)); + } + + FixedString fixed_string; + TRY(fixed_string.value.push_back('$')); + return CommandArgument::ArgumentPart(BAN::move(fixed_string)); + }; + + switch (peek_token().type()) + { + case Token::Type::EOF_: + case Token::Type::Ampersand: + case Token::Type::Backslash: + case Token::Type::CloseCurly: + case Token::Type::CloseParen: + case Token::Type::DoubleQuote: + case Token::Type::Pipe: + case Token::Type::Semicolon: + case Token::Type::SingleQuote: + case Token::Type::Whitespace: + { + FixedString fixed_string; + TRY(fixed_string.value.push_back('$')); + return CommandArgument::ArgumentPart(BAN::move(fixed_string)); + } + case Token::Type::Dollar: + { + consume_token(); + + BuiltinVariable builtin_variable; + TRY(builtin_variable.value.push_back('$')); + return CommandArgument::ArgumentPart(BAN::move(builtin_variable)); + } + case Token::Type::OpenCurly: + { + consume_token(); + + BAN::String input; + for (auto token = read_token(); token.type() != Token::Type::CloseCurly; token = read_token()) + { + if (token.type() == Token::Type::EOF_) + return BAN::Error::from_literal("missing closing curly brace"); + + if (token.type() == Token::Type::String) + TRY(input.append(token.string())); + else if (token.type() == Token::Type::Dollar) + TRY(input.push_back('$')); + else + return BAN::Error::from_literal("expected closing curly brace"); + } + + auto result = TRY(parse_dollar_string(input)); + if (!input.empty()) + return BAN::Error::from_literal("bad substitution"); + return result; + } + case Token::Type::OpenParen: + { + consume_token(); + + auto command_tree = TRY(parse_command_tree()); + if (auto token = read_token(); token.type() != Token::Type::CloseParen) + return BAN::Error::from_literal("expected closing parenthesis"); + return CommandArgument::ArgumentPart(BAN::move(command_tree)); + } + case Token::Type::String: + { + auto token = read_token(); + + auto string = BAN::move(token.string()); + auto result = TRY(parse_dollar_string(string)); + if (!string.empty()) + { + auto remaining = Token(Token::Type::String); + remaining.string() = BAN::move(string); + TRY(unget_token(BAN::move(remaining))); + } + return result; + } + } + + ASSERT_NOT_REACHED(); +} + +BAN::ErrorOr TokenParser::parse_single_quote() +{ + ASSERT(read_token().type() == Token::Type::SingleQuote); + + FixedString fixed_string; + for (auto token = read_token();; token = read_token()) + { + switch (token.type()) + { + case Token::Type::EOF_: + TRY(ask_input_tokens("quote> ", true)); + break; + case Token::Type::Ampersand: + case Token::Type::Backslash: + case Token::Type::CloseCurly: + case Token::Type::CloseParen: + case Token::Type::Dollar: + case Token::Type::DoubleQuote: + case Token::Type::OpenCurly: + case Token::Type::OpenParen: + case Token::Type::Pipe: + case Token::Type::Semicolon: + TRY(fixed_string.value.push_back(token_type_to_single_character(token.type()))); + break; + case Token::Type::String: + case Token::Type::Whitespace: + TRY(fixed_string.value.append(token.string())); + break; + case Token::Type::SingleQuote: + return CommandArgument::ArgumentPart(BAN::move(fixed_string)); + } + } +} + +BAN::ErrorOr TokenParser::parse_argument() +{ + const auto token_type = peek_token().type(); + if (!can_parse_argument_from_token_type(token_type)) + return unexpected_token_error(token_type); + + CommandArgument result; + + bool is_in_double_quotes = false; + for (auto token_type = peek_token().type(); token_type != Token::Type::EOF_ || is_in_double_quotes; token_type = peek_token().type()) + { + CommandArgument::ArgumentPart new_part; + switch (token_type) + { + case Token::Type::EOF_: + ASSERT(is_in_double_quotes); + TRY(ask_input_tokens("dquote> ", true)); + new_part = FixedString(); // do continue + break; + case Token::Type::Ampersand: + case Token::Type::CloseCurly: + case Token::Type::CloseParen: + case Token::Type::OpenCurly: + case Token::Type::OpenParen: + case Token::Type::Pipe: + case Token::Type::Semicolon: + if (is_in_double_quotes) + { + new_part = FixedString(); + TRY(new_part.get().value.push_back(token_type_to_single_character(token_type))); + consume_token(); + } + break; + case Token::Type::Whitespace: + if (is_in_double_quotes) + { + new_part = FixedString(); + TRY(new_part.get().value.append(peek_token().string())); + consume_token(); + } + break; + case Token::Type::Backslash: + new_part = TRY(parse_backslash(is_in_double_quotes)); + break; + case Token::Type::DoubleQuote: + is_in_double_quotes = !is_in_double_quotes; + new_part = FixedString(); // do continue + consume_token(); + break; + case Token::Type::Dollar: + new_part = TRY(parse_dollar()); + break; + case Token::Type::SingleQuote: + new_part = TRY(parse_single_quote()); + break; + case Token::Type::String: + new_part = CommandArgument::ArgumentPart(FixedString {}); + TRY(new_part.get().value.append(peek_token().string())); + consume_token(); + break; + } + + if (!new_part.has_value()) + break; + + if (new_part.has()) + { + auto& fixed_string = new_part.get(); + // discard empty fixed strings + if (fixed_string.value.empty()) + continue; + // combine consecutive fixed strings + if (!result.parts.empty() && result.parts.back().has()) + { + TRY(result.parts.back().get().value.append(fixed_string.value)); + continue; + } + } + + TRY(result.parts.push_back(BAN::move(new_part))); + } + + return result; +} + +BAN::ErrorOr TokenParser::parse_single_command() +{ + SingleCommand result; + + BAN::HashSet used_aliases; + while (peek_token().type() == Token::Type::String) + { + auto token = read_token(); + + bool can_be_alias = false; + switch (peek_token().type()) + { + case Token::Type::EOF_: + case Token::Type::Ampersand: + case Token::Type::CloseParen: + case Token::Type::Pipe: + case Token::Type::Semicolon: + case Token::Type::Whitespace: + can_be_alias = true; + break; + case Token::Type::Backslash: + case Token::Type::CloseCurly: + case Token::Type::Dollar: + case Token::Type::DoubleQuote: + case Token::Type::OpenCurly: + case Token::Type::OpenParen: + case Token::Type::SingleQuote: + case Token::Type::String: + can_be_alias = false; + break; + } + if (!can_be_alias) + { + TRY(unget_token(BAN::move(token))); + break; + } + + if (used_aliases.contains(token.string())) + { + TRY(unget_token(BAN::move(token))); + break; + } + + auto opt_alias = Alias::get().get_alias(token.string().sv()); + if (!opt_alias.has_value()) + { + TRY(unget_token(BAN::move(token))); + break; + } + + auto tokenized_alias = TRY(tokenize_string(opt_alias.value())); + for (size_t i = tokenized_alias.size(); i > 0; i--) + TRY(unget_token(BAN::move(tokenized_alias[i - 1]))); + TRY(used_aliases.insert(TRY(BAN::String::formatted("{}", token.string())))); + } + + while (peek_token().type() != Token::Type::EOF_) + { + while (peek_token().type() == Token::Type::Whitespace) + consume_token(); + + auto argument = TRY(parse_argument()); + TRY(result.arguments.push_back(BAN::move(argument))); + + while (peek_token().type() == Token::Type::Whitespace) + consume_token(); + if (!can_parse_argument_from_token_type(peek_token().type())) + break; + } + + return result; +} + +BAN::ErrorOr TokenParser::parse_piped_command() +{ + PipedCommand result; + result.background = false; + + while (peek_token().type() != Token::Type::EOF_) + { + auto single_command = TRY(parse_single_command()); + TRY(result.commands.push_back(BAN::move(single_command))); + + const auto token_type = peek_token().type(); + if (token_type != Token::Type::Pipe && token_type != Token::Type::Ampersand) + break; + + auto temp_token = read_token(); + if (peek_token().type() == temp_token.type()) + { + TRY(unget_token(BAN::move(temp_token))); + break; + } + + if (temp_token.type() == Token::Type::Ampersand) + { + result.background = true; + break; + } + } + + return result; +} + +BAN::ErrorOr TokenParser::parse_command_tree() +{ + CommandTree result; + + auto next_condition = ConditionalCommand::Condition::Always; + while (peek_token().type() != Token::Type::EOF_) + { + ConditionalCommand conditional_command; + conditional_command.command = TRY(parse_piped_command()); + conditional_command.condition = next_condition; + TRY(result.commands.push_back(BAN::move(conditional_command))); + + while (peek_token().type() == Token::Type::Whitespace) + consume_token(); + if (peek_token().type() == Token::Type::EOF_) + break; + + bool should_break = false; + const auto token_type = peek_token().type(); + switch (token_type) + { + case Token::Type::Semicolon: + consume_token(); + next_condition = ConditionalCommand::Condition::Always; + break; + case Token::Type::Ampersand: + case Token::Type::Pipe: + consume_token(); + if (read_token().type() != token_type) + return BAN::Error::from_literal("expected double '&' or '|'"); + next_condition = (token_type == Token::Type::Ampersand) + ? ConditionalCommand::Condition::OnSuccess + : ConditionalCommand::Condition::OnFailure; + break; + default: + should_break = true; + break; + } + + if (should_break) + break; + } + + return result; +} + +BAN::ErrorOr TokenParser::run(BAN::Vector&& tokens) +{ + TRY(feed_tokens(BAN::move(tokens))); + + auto command_tree = TRY(parse_command_tree()); + + const auto token_type = peek_token().type(); + while (!m_token_stream.empty()) + m_token_stream.pop(); + + if (token_type != Token::Type::EOF_) + return unexpected_token_error(token_type); + + TRY(m_execute.execute_command(command_tree)); + return {}; +} + +bool TokenParser::main_loop(bool break_on_error) +{ + for (;;) + { + auto opt_input = m_input_function({}); + if (!opt_input.has_value()) + break; + + auto tokenized_input = tokenize_string(opt_input.release_value()); + if (tokenized_input.is_error()) + { + fprintf(stderr, "banan-sh: %s\n", tokenized_input.error().get_message()); + if (break_on_error) + return false; + continue; + } + + if (auto ret = run(tokenized_input.release_value()); ret.is_error()) + { + fprintf(stderr, "banan-sh: %s\n", ret.error().get_message()); + if (break_on_error) + return false; + continue; + } + } + + return true; +} diff --git a/userspace/programs/Shell/TokenParser.h b/userspace/programs/Shell/TokenParser.h new file mode 100644 index 00000000..e48ee2a0 --- /dev/null +++ b/userspace/programs/Shell/TokenParser.h @@ -0,0 +1,57 @@ +#pragma once + +#include "CommandTypes.h" +#include "Execute.h" +#include "Token.h" + +#include +#include +#include +#include +#include + +class TokenParser +{ + BAN_NON_COPYABLE(TokenParser); + BAN_NON_MOVABLE(TokenParser); +public: + using InputFunction = BAN::Function(BAN::Optional)>; + +public: + TokenParser(const InputFunction& input_function) + : m_input_function(input_function) + { } + + Execute& execute() { return m_execute; } + const Execute& execute() const { return m_execute; } + + [[nodiscard]] bool main_loop(bool break_on_error); + +private: + const Token& peek_token() const; + Token read_token(); + void consume_token(); + + BAN::ErrorOr feed_tokens(BAN::Vector&& tokens); + BAN::ErrorOr unget_token(Token&& token); + + BAN::ErrorOr ask_input_tokens(BAN::StringView prompt, bool add_newline); + + BAN::ErrorOr run(BAN::Vector&&); + + BAN::ErrorOr parse_backslash(bool is_quoted); + BAN::ErrorOr parse_dollar(); + BAN::ErrorOr parse_single_quote(); + BAN::ErrorOr parse_argument(); + + BAN::ErrorOr parse_single_command(); + BAN::ErrorOr parse_piped_command(); + BAN::ErrorOr parse_command_tree(); + +private: + Execute m_execute; + + Token m_eof_token { Token::Type::EOF_ }; + BAN::Queue> m_token_stream; + InputFunction m_input_function; +}; diff --git a/userspace/programs/Shell/main.cpp b/userspace/programs/Shell/main.cpp index 1f324ef6..a863aee1 100644 --- a/userspace/programs/Shell/main.cpp +++ b/userspace/programs/Shell/main.cpp @@ -1,1439 +1,46 @@ -#include -#include -#include -#include -#include -#include +#include "Builtin.h" +#include "Execute.h" +#include "Input.h" +#include "TokenParser.h" -#include -#include -#include -#include -#include -#include -#include +#include #include -#include -#include -#include #include -#define ERROR_RETURN(__msg, __ret) do { perror(__msg); return __ret; } while (false) - -extern char** environ; - -static struct termios old_termios, new_termios; - -static char s_shell_path[PATH_MAX]; -static int last_return = 0; - -static BAN::String hostname; - -struct SingleCommand -{ - BAN::Vector arguments; -}; - -struct PipedCommand -{ - bool background; - BAN::Vector commands; -}; - -struct CommandList -{ - enum class Condition - { - Always, - OnSuccess, - OnFailure, - }; - - struct Command - { - BAN::String expression; - Condition condition; - }; - BAN::Vector commands; -}; - -struct BuiltinCommand -{ - int (*function)(const SingleCommand&, FILE* fout, int fd_in, int fd_out); -}; -static BAN::HashMap s_builtin_commands; - -static BAN::HashMap s_aliases; - -static BAN::StringView strip_whitespace(BAN::StringView sv) -{ - size_t leading = 0; - while (leading < sv.size() && isspace(sv[leading])) - leading++; - sv = sv.substring(leading); - - size_t trailing = 0; - while (trailing < sv.size() && isspace(sv[sv.size() - trailing - 1])) - trailing++; - sv = sv.substring(0, sv.size() - trailing); - - return sv; -} - -static BAN::Optional parse_dollar(BAN::StringView command, size_t& i) -{ - ASSERT(command[i] == '$'); - - if (++i >= command.size()) - return BAN::String("$"_sv); - - if (command[i] == '?') - { - i++; - return MUST(BAN::String::formatted("{}", last_return)); - } - if (isalnum(command[i])) - { - size_t len = 1; - for (; i + len < command.size(); len++) - if (!isalnum(command[i + len])) - break; - BAN::String name = command.substring(i, len); - i += len - 1; - - if (const char* value = getenv(name.data())) - return BAN::String(value); - return BAN::String(); - } - else if (command[i] == '{') - { - size_t len = 1; - for (; i + len < command.size(); len++) - { - if (command[i + len] == '}') - break; - if (!isalnum(command[i + len])) - return {}; - } - - if (i + len >= command.size()) - return {}; - - BAN::String name = command.substring(i + 1, len - 1); - i += len; - - if (const char* value = getenv(name.data())) - return BAN::String(value); - return BAN::String(); - } - else if (command[i] == '[') - { - return {}; - } - else if (command[i] == '(') - { - size_t len = 1; - int count = 1; - for (; i + len < command.size(); len++) - { - if (command[i + len] == '(') - count++; - if (command[i + len] == ')') - count--; - if (count == 0) - break; - } - - if (count != 0) - return {}; - - BAN::String subcommand = command.substring(i + 1, len - 1); - - char temp[3] { '-', 'c', '\0' }; - BAN::Vector argv; - MUST(argv.push_back(s_shell_path)); - MUST(argv.push_back(temp)); - MUST(argv.push_back((char*)subcommand.data())); - MUST(argv.push_back(nullptr)); - - int fds[2]; - if (pipe(fds) == -1) - ERROR_RETURN("pipe", {}); - - pid_t pid = fork(); - if (pid == 0) - { - if (dup2(fds[1], STDOUT_FILENO) == -1) - { - perror("dup2"); - exit(1); - } - close(fds[0]); - close(fds[1]); - - execv(argv.front(), argv.data()); - perror("execv"); - exit(1); - } - if (pid == -1) - ERROR_RETURN("fork", {}); - - close(fds[1]); - - char buffer[100]; - BAN::String output; - while (ssize_t ret = read(fds[0], buffer, sizeof(buffer))) - { - if (ret == -1) - { - perror("read"); - break; - } - MUST(output.append(BAN::StringView(buffer, ret))); - } - - close(fds[0]); - - int status; - if (waitpid(pid, &status, 0) == -1) - ERROR_RETURN("waitpid", {}); - - while (!output.empty() && output.back() == '\n') - output.pop_back(); - - i += len; - return output; - } - - BAN::String temp = "$"_sv; - MUST(temp.push_back(command[i])); - return temp; -} - -static SingleCommand parse_single_command(BAN::StringView command_view) -{ - constexpr auto can_escape = - [](char c) - { - switch (c) - { - case 'e': - case 'n': - case 't': - case 'r': - case '"': - case '\'': - case ' ': - return true; - } - return false; - }; - - constexpr auto parse_escaped = - [](char c) -> char - { - switch (c) - { - case 'e': return '\e'; - case 'n': return '\n'; - case 't': return '\t'; - case 'r': return '\r'; - case '"': return '"'; - case '\'': return '\''; - case ' ': return ' '; - } - ASSERT_NOT_REACHED(); - }; - - while (!command_view.empty() && isspace(command_view.front())) - command_view = command_view.substring(1); - while (!command_view.empty() && isspace(command_view.back())) - command_view = command_view.substring(0, command_view.size() - 1); - - SingleCommand result; - BAN::String current_argument; - for (size_t i = 0; i < command_view.size(); i++) - { - const char current = command_view[i]; - - if (isspace(current)) - { - MUST(result.arguments.push_back(BAN::move(current_argument))); - current_argument.clear(); - while (i + 1 < command_view.size() && isspace(command_view[i + 1])) - i++; - continue; - } - - switch (current) - { - case '\\': - if (i + 1 < command_view.size() && can_escape(command_view[i + 1])) - MUST(current_argument.push_back(parse_escaped(command_view[++i]))); - else - MUST(current_argument.push_back('\\')); - break; - case '$': - if (auto expansion = parse_dollar(command_view, i); expansion.has_value()) - MUST(current_argument.append(expansion.release_value())); - else - { - fprintf(stderr, "bad substitution\n"); - return {}; - } - break; - case '~': - if (i == 0 || (isspace(command_view[i - 1]) && (i == 1 || command_view[i - 2] != '\\'))) - { - const char* home_env = getenv("HOME"); - if (home_env) - { - MUST(current_argument.append(home_env)); - break; - } - } - MUST(current_argument.push_back('~')); - break; - case '\'': - while (++i < command_view.size()) - { - if (command_view[i] == current) - break; - if (command_view[i] == '\\' && i + 1 < command_view.size() && can_escape(command_view[i + 1])) - MUST(current_argument.push_back(parse_escaped(command_view[++i]))); - else - MUST(current_argument.push_back(command_view[i])); - } - break; - case '"': - while (++i < command_view.size()) - { - if (command_view[i] == current) - break; - if (command_view[i] == '\\' && i + 1 < command_view.size() && can_escape(command_view[i + 1])) - MUST(current_argument.push_back(parse_escaped(command_view[++i]))); - else if (!(current == '"' && command_view[i] == '$')) - MUST(current_argument.push_back(command_view[i])); - else - { - if (auto expansion = parse_dollar(command_view, i); expansion.has_value()) - MUST(current_argument.append(expansion.release_value())); - else - { - fprintf(stderr, "bad substitution\n"); - return {}; - } - } - } - break; - default: - MUST(current_argument.push_back(command_view[i])); - break; - } - } - - MUST(result.arguments.push_back(BAN::move(current_argument))); - - return BAN::move(result); -} - -static PipedCommand parse_piped_command(BAN::StringView command_view) -{ - while (!command_view.empty() && isspace(command_view.back())) - command_view = command_view.substring(0, command_view.size() - 1); - const bool background = !command_view.empty() && command_view.back() == '&'; - if (background) - command_view = command_view.substring(0, command_view.size() - 1); - - PipedCommand result; - result.background = background; - - for (size_t i = 0; i < command_view.size(); i++) - { - const char current = command_view[i]; - switch (current) - { - case '\\': - i++; - break; - case '\'': - case '"': - while (++i < command_view.size()) - { - if (command_view[i] == current) - break; - if (command_view[i] == '\\') - i++; - } - break; - case '|': - MUST(result.commands.emplace_back(parse_single_command(command_view.substring(0, i)))); - command_view = command_view.substring(i + 1); - i = -1; - break; - } - } - - MUST(result.commands.emplace_back(parse_single_command(command_view))); - - return BAN::move(result); -} - -static BAN::String parse_aliases(BAN::StringView command_view) -{ - while (!command_view.empty() && isspace(command_view.front())) - command_view = command_view.substring(1); - - BAN::String result; - MUST(result.append(command_view)); - - BAN::HashSet matched_aliases; - - for (size_t i = 0; i < result.size();) - { - size_t command_len = 0; - for (; command_len < result.size() - i; command_len++) - if (isspace(result[i + command_len])) - break; - auto command = result.sv().substring(i, command_len); - - if (!matched_aliases.contains(command)) - { - auto it = s_aliases.find(command); - if (it != s_aliases.end()) - { - MUST(matched_aliases.insert(command)); - for (size_t j = 0; j < command_len; j++) - result.remove(i); - MUST(result.insert(it->value, i)); - continue; - } - } - - matched_aliases.clear(); - - for (; i < result.size(); i++) - { - bool should_break = false; - - const char current = result[i]; - switch (current) - { - case '\\': - i++; - break; - case '\'': - case '"': - while (++i < result.size()) - { - if (result[i] == current) - break; - if (result[i] == '\\') - i++; - } - break; - case '|': - case '&': - if (i + 1 < result.size() && result[i + 1] == current) - i++; - else if (current == '&') - break; - // fall through - case ';': - i++; - should_break = true; - break; - } - - if (should_break) - break; - } - - while (i < result.size() && isspace(result[i])) - i++; - } - - return BAN::move(result); -} - -static CommandList parse_command_list(BAN::StringView command_view) -{ - const auto command_with_aliases_parsed = parse_aliases(command_view); - command_view = command_with_aliases_parsed.sv(); - - CommandList result; - CommandList::Condition next_condition = CommandList::Condition::Always; - for (size_t i = 0; i < command_view.size(); i++) - { - const char current = command_view[i]; - switch (current) - { - case '\\': - i++; - break; - case '\'': - case '"': - while (++i < command_view.size()) - { - if (command_view[i] == current) - break; - if (command_view[i] == '\\') - i++; - } - break; - case ';': - MUST(result.commands.emplace_back( - strip_whitespace(command_view.substring(0, i)), - next_condition - )); - command_view = strip_whitespace(command_view.substring(i + 1)); - next_condition = CommandList::Condition::Always; - i = -1; - break; - case '|': - case '&': - if (i + 1 >= command_view.size() || command_view[i + 1] != current) - break; - MUST(result.commands.emplace_back( - strip_whitespace(command_view.substring(0, i)), - next_condition - )); - command_view = strip_whitespace(command_view.substring(i + 2)); - next_condition = (current == '|') ? CommandList::Condition::OnFailure : CommandList::Condition::OnSuccess; - i = -1; - break; - } - } - - MUST(result.commands.emplace_back( - strip_whitespace(command_view), - next_condition - )); - - for (const auto& [expression, _] : result.commands) - { - if (!expression.empty()) - continue; - fprintf(stderr, "expected an expression\n"); - return {}; - } - - return BAN::move(result); -} - -static int execute_command(const SingleCommand& command, int fd_in, int fd_out, bool background); - -static int source_script(const BAN::String& path); - -static BAN::Optional execute_builtin(const SingleCommand& command, int fd_in, int fd_out) -{ - if (command.arguments.empty()) - return 0; - - auto it = s_builtin_commands.find(command.arguments.front()); - if (it == s_builtin_commands.end()) - return {}; - - FILE* fout = stdout; - bool should_close = false; - if (fd_out != STDOUT_FILENO) - { - int fd_dup = dup(fd_out); - if (fd_dup == -1) - ERROR_RETURN("dup", 1); - fout = fdopen(fd_dup, "w"); - if (fout == nullptr) - ERROR_RETURN("fdopen", 1); - should_close = true; - } - - int ret = it->value.function(command, fout, fd_in, fd_out); - - if (should_close) - fclose(fout); - - return ret; -} - -static void install_builtin_commands() -{ - MUST(s_builtin_commands.emplace("clear"_sv, - [](const SingleCommand&, FILE* fout, int, int) -> int - { - fprintf(fout, "\e[H\e[3J\e[2J"); - fflush(fout); - return 0; - } - )); - - MUST(s_builtin_commands.emplace("exit"_sv, - [](const SingleCommand& command, FILE*, int, int) -> int - { - int exit_code = 0; - if (command.arguments.size() > 1) - { - auto exit_string = command.arguments[1].sv(); - for (size_t i = 0; i < exit_string.size() && isdigit(exit_string[i]); i++) - exit_code = (exit_code * 10) + (exit_string[i] - '0'); - } - exit(exit_code); - ASSERT_NOT_REACHED(); - } - )); - - MUST(s_builtin_commands.emplace("export"_sv, - [](const SingleCommand& command, FILE*, int, int) -> int - { - bool first = false; - for (const auto& argument : command.arguments) - { - if (first) - { - first = false; - continue; - } - - auto split = MUST(argument.sv().split('=', true)); - if (split.size() != 2) - continue; - - if (setenv(BAN::String(split[0]).data(), BAN::String(split[1]).data(), true) == -1) - ERROR_RETURN("setenv", 1); - } - return 0; - } - )); - - MUST(s_builtin_commands.emplace("alias"_sv, - [](const SingleCommand& command, FILE* fout, int, int) -> int - { - if (command.arguments.size() == 1) - { - for (const auto& [alias, value] : s_aliases) - fprintf(fout, "%s='%s'\n", alias.data(), value.data()); - return 0; - } - - for (size_t i = 1; i < command.arguments.size(); i++) - { - auto idx = command.arguments[i].sv().find('='); - if (idx.has_value() && idx.value() == 0) - continue; - if (!idx.has_value()) - { - auto it = s_aliases.find(command.arguments[i]); - if (it != s_aliases.end()) - fprintf(fout, "%s='%s'\n", command.arguments[i].data(), it->value.data()); - } - else - { - auto alias = command.arguments[i].sv().substring(0, idx.value()); - auto value = command.arguments[i].sv().substring(idx.value() + 1); - - if (s_aliases.contains(alias)) - s_aliases.remove(alias); - MUST(s_aliases.insert(alias, value)); - } - } - - return 0; - } - )); - - MUST(s_builtin_commands.emplace("source"_sv, - [](const SingleCommand& command, FILE* fout, int, int) -> int - { - if (command.arguments.size() != 2) - { - fprintf(fout, "usage: source FILE\n"); - return 1; - } - return source_script(command.arguments[1]); - } - )); - - MUST(s_builtin_commands.emplace("env"_sv, - [](const SingleCommand&, FILE* fout, int, int) -> int - { - char** current = environ; - while (current && *current) - fprintf(fout, "%s\n", *current++); - return 0; - } - )); - - MUST(s_builtin_commands.emplace("cd"_sv, - [](const SingleCommand& command, FILE* fout, int, int) -> int - { - if (command.arguments.size() > 2) - { - fprintf(fout, "cd: too many arguments\n"); - return 1; - } - - BAN::StringView path; - - if (command.arguments.size() == 1) - { - if (const char* path_env = getenv("HOME")) - path = path_env; - else - return 0; - } - else - path = command.arguments[1]; - - if (chdir(path.data()) == -1) - ERROR_RETURN("chdir", 1); - - return 0; - } - )); - - MUST(s_builtin_commands.emplace("time"_sv, - [](const SingleCommand& command, FILE* fout, int fd_in, int fd_out) -> int - { - SingleCommand timed_command; - MUST(timed_command.arguments.reserve(command.arguments.size() - 1)); - for (size_t i = 1; i < command.arguments.size(); i++) - MUST(timed_command.arguments.emplace_back(command.arguments[i])); - - timespec start, end; - - if (clock_gettime(CLOCK_MONOTONIC, &start) == -1) - ERROR_RETURN("clock_gettime", 1); - - int ret = execute_command(timed_command, fd_in, fd_out, false); - - if (clock_gettime(CLOCK_MONOTONIC, &end) == -1) - ERROR_RETURN("clock_gettime", 1); - - uint64_t total_ns = 0; - total_ns += (end.tv_sec - start.tv_sec) * 1'000'000'000; - total_ns += end.tv_nsec - start.tv_nsec; - - int secs = total_ns / 1'000'000'000; - int msecs = (total_ns % 1'000'000'000) / 1'000'000; - - fprintf(fout, "took %d.%03d s\n", secs, msecs); - - return ret; - } - )); -} - -static pid_t execute_command_no_wait(const SingleCommand& command, int fd_in, int fd_out, pid_t pgrp, bool background) -{ - ASSERT(!command.arguments.empty()); - - BAN::Vector cmd_args; - MUST(cmd_args.reserve(command.arguments.size() + 1)); - for (const auto& arg : command.arguments) - MUST(cmd_args.push_back((char*)arg.data())); - MUST(cmd_args.push_back(nullptr)); - - // do PATH resolution - BAN::String executable_file; - if (!command.arguments.front().sv().contains('/')) - { - const char* path_env_cstr = getenv("PATH"); - if (path_env_cstr == nullptr) - path_env_cstr = ""; - - auto path_env_list = MUST(BAN::StringView(path_env_cstr).split(':')); - for (auto path_env : path_env_list) - { - BAN::String test_file = path_env; - MUST(test_file.push_back('/')); - MUST(test_file.append(command.arguments.front())); - - struct stat st; - if (stat(test_file.data(), &st) == 0) - { - executable_file = BAN::move(test_file); - break; - } - } - } - else - { - executable_file = command.arguments.front(); - } - - // Verify that the file exists is executable - { - struct stat st; - if (executable_file.empty() || stat(executable_file.data(), &st) == -1) - { - fprintf(stderr, "command not found: %s\n", command.arguments.front().data()); - return -1; - } - if ((st.st_mode & 0111) == 0) - { - fprintf(stderr, "permission denied: %s\n", executable_file.data()); - return -1; - } - } - - const pid_t pid = fork(); - if (pid == 0) - { - if (fd_in != STDIN_FILENO) - { - if (dup2(fd_in, STDIN_FILENO) == -1) - { - perror("dup2"); - exit(1); - } - close(fd_in); - } - if (fd_out != STDOUT_FILENO) - { - if (dup2(fd_out, STDOUT_FILENO) == -1) - { - perror("dup2"); - exit(1); - } - close(fd_out); - } - - execv(executable_file.data(), cmd_args.data()); - perror("execv"); - exit(1); - } - - if (pid == -1) - ERROR_RETURN("fork", -1); - - if (background) - ; - else if (pgrp == 0 && isatty(0)) - { - if(setpgid(pid, pid) == -1) - perror("setpgid"); - if (tcsetpgrp(0, pid) == -1) - perror("tcsetpgrp"); - } - else - { - setpgid(pid, pgrp); - } - - return pid; -} - -static int execute_command(const SingleCommand& command, int fd_in, int fd_out, bool background) -{ - const pid_t pid = execute_command_no_wait(command, fd_in, fd_out, 0, background); - if (pid == -1) - return 1; - if (background) - return 0; - - int status; - if (waitpid(pid, &status, 0) == -1) - ERROR_RETURN("waitpid", 1); - - if (isatty(0) && tcsetpgrp(0, getpgrp()) == -1) - ERROR_RETURN("tcsetpgrp", 1); - - if (WIFSIGNALED(status)) - fprintf(stderr, "Terminated by signal %d\n", WTERMSIG(status)); - - return WEXITSTATUS(status); -} - -static int execute_piped_commands(const PipedCommand& piped_command) -{ - if (piped_command.commands.empty()) - return 0; - - if (piped_command.commands.size() == 1) - { - auto& command = piped_command.commands.front(); - if (auto ret = execute_builtin(command, STDIN_FILENO, STDOUT_FILENO); ret.has_value()) - return ret.value(); - return execute_command(command, STDIN_FILENO, STDOUT_FILENO, piped_command.background); - } - - BAN::Vector exit_codes(piped_command.commands.size(), 0); - BAN::Vector processes(piped_command.commands.size(), -1); - pid_t pgrp = 0; - - int next_stdin = STDIN_FILENO; - for (size_t i = 0; i < piped_command.commands.size(); i++) - { - const bool last = (i == piped_command.commands.size() - 1); - - int pipefd[2] { -1, STDOUT_FILENO }; - if (!last && pipe(pipefd) == -1) - { - if (i > 0) - close(next_stdin); - perror("pipe"); - break; - } - - auto builtin_ret = execute_builtin(piped_command.commands[i], next_stdin, pipefd[1]); - if (builtin_ret.has_value()) - exit_codes[i] = builtin_ret.value(); - else - { - const pid_t pid = execute_command_no_wait(piped_command.commands[i], next_stdin, pipefd[1], pgrp, piped_command.background); - processes[i] = pid; - if (pgrp == 0) - pgrp = pid; - } - - if (next_stdin != STDIN_FILENO) - close(next_stdin); - if (pipefd[1] != STDOUT_FILENO) - close(pipefd[1]); - next_stdin = pipefd[0]; - } - - if (piped_command.background) - return 0; - - for (size_t i = 0; i < piped_command.commands.size(); i++) - { - if (processes[i] == -1) - continue; - - int status; - if (waitpid(processes[i], &status, 0) == -1) - { - perror("waitpid"); - exit_codes[i] = 69420; - continue; - } - - if (WIFSIGNALED(status)) - fprintf(stderr, "Terminated by signal %d\n", WTERMSIG(status)); - - if (WEXITSTATUS(status)) - exit_codes[i] = WEXITSTATUS(status); - } - - if (isatty(0) && tcsetpgrp(0, getpgrp()) == -1) - ERROR_RETURN("tcsetpgrp", 1); - - return exit_codes.back(); -} - -static int parse_and_execute_command(BAN::StringView command) -{ - command = strip_whitespace(command); - if (command.empty()) - return 0; - - auto command_list = parse_command_list(command); - if (command_list.commands.empty()) - return 0; - - tcsetattr(0, TCSANOW, &old_termios); - - last_return = 0; - - for (size_t i = 0; i < command_list.commands.size(); i++) - { - const auto& [expression, condition] = command_list.commands[i]; - - const auto parsed_command = parse_piped_command(expression); - if (parsed_command.background && i + 1 < command_list.commands.size() && command_list.commands[i + 1].condition != CommandList::Condition::Always) - { - printf("invalid background command with conditional execution\n"); - break; - } - - bool should_run = false; - switch (condition) - { - case CommandList::Condition::Always: - should_run = true; - break; - case CommandList::Condition::OnSuccess: - should_run = (last_return == 0); - break; - case CommandList::Condition::OnFailure: - should_run = (last_return != 0); - break; - } - - if (!should_run) - continue; - - int return_value = execute_piped_commands(parsed_command); - if (!parsed_command.background) - last_return = return_value; - } - - tcsetattr(0, TCSANOW, &new_termios); - - return last_return; -} - -static int source_script(const BAN::String& path) -{ - FILE* fp = fopen(path.data(), "r"); - if (fp == nullptr) - ERROR_RETURN("fopen", 1); - - int ret = 0; - - BAN::String command; - char temp_buffer[128]; - while (fgets(temp_buffer, sizeof(temp_buffer), fp)) - { - MUST(command.append(temp_buffer)); - if (command.back() != '\n') - continue; - - command.pop_back(); - - if (!command.empty()) - if (int temp = parse_and_execute_command(command)) - ret = temp; - command.clear(); - } - - if (!command.empty()) - if (int temp = parse_and_execute_command(command)) - ret = temp; - - fclose(fp); - - return ret; -} - -static bool exists(const BAN::String& path) -{ - struct stat st; - return stat(path.data(), &st) == 0; -} - -static int source_shellrc() -{ - if (char* home = getenv("HOME")) - { - BAN::String path(home); - MUST(path.append("/.shellrc"_sv)); - if (exists(path)) - return source_script(path); - } - return 0; -} - -static BAN::Vector list_matching_entries(BAN::StringView path, BAN::StringView start, bool require_executable) -{ - ASSERT(path.size() < PATH_MAX); - - char path_cstr[PATH_MAX]; - memcpy(path_cstr, path.data(), path.size()); - path_cstr[path.size()] = '\0'; - - DIR* dirp = opendir(path_cstr); - if (dirp == nullptr) - return {}; - - BAN::Vector result; - - dirent* entry; - while ((entry = readdir(dirp))) - { - if (entry->d_name[0] == '.' && !start.starts_with("."_sv)) - continue; - if (strncmp(entry->d_name, start.data(), start.size())) - continue; - - struct stat st; - if (fstatat(dirfd(dirp), entry->d_name, &st, 0)) - continue; - - if (require_executable) - { - if (S_ISDIR(st.st_mode)) - continue; - if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXUSR))) - continue; - } - - MUST(result.emplace_back(entry->d_name + start.size())); - if (S_ISDIR(st.st_mode)) - MUST(result.back().push_back('/')); - } - - closedir(dirp); - - return BAN::move(result); -} - -struct TabCompletion -{ - bool should_escape_spaces; - BAN::StringView prefix; - BAN::Vector completions; -}; - -static TabCompletion list_tab_completion_entries(BAN::StringView command) -{ - enum class CompletionType - { - Command, - File, - }; - - BAN::StringView prefix = command; - BAN::String last_argument; - CompletionType completion_type = CompletionType::Command; - - bool should_escape_spaces = true; - for (size_t i = 0; i < command.size(); i++) - { - if (command[i] == '\\') - { - i++; - if (i < command.size()) - MUST(last_argument.push_back(command[i])); - } - else if (isspace(command[i]) || command[i] == ';' || command[i] == '|' || command.substring(i).starts_with("&&"_sv)) - { - if (!isspace(command[i])) - completion_type = CompletionType::Command; - else if (!last_argument.empty()) - completion_type = CompletionType::File; - if (auto rest = command.substring(i); rest.starts_with("||"_sv) || rest.starts_with("&&"_sv)) - i++; - prefix = command.substring(i + 1); - last_argument.clear(); - should_escape_spaces = true; - } - else if (command[i] == '\'' || command[i] == '"') - { - const char quote_type = command[i++]; - while (i < command.size() && command[i] != quote_type) - MUST(last_argument.push_back(command[i++])); - should_escape_spaces = false; - } - else - { - MUST(last_argument.push_back(command[i])); - } - } - - if (last_argument.sv().contains('/')) - completion_type = CompletionType::File; - - BAN::Vector result; - switch (completion_type) - { - case CompletionType::Command: - { - const char* path_env = getenv("PATH"); - if (path_env) - { - auto splitted_path_env = MUST(BAN::StringView(path_env).split(':')); - for (auto path : splitted_path_env) - { - auto matching_entries = list_matching_entries(path, last_argument, true); - MUST(result.reserve(result.size() + matching_entries.size())); - for (auto&& entry : matching_entries) - MUST(result.push_back(BAN::move(entry))); - } - } - - for (const auto& [builtin_name, _] : s_builtin_commands) - { - if (!builtin_name.sv().starts_with(last_argument)) - continue; - MUST(result.emplace_back(builtin_name.sv().substring(last_argument.size()))); - } - - for (const auto& [alias_name, _] : s_aliases) - { - if (!alias_name.sv().starts_with(last_argument)) - continue; - MUST(result.emplace_back(alias_name.sv().substring(last_argument.size()))); - } - - break; - } - case CompletionType::File: - { - BAN::String dir_path; - if (last_argument.sv().starts_with("/"_sv)) - MUST(dir_path.push_back('/')); - else - { - char cwd_buffer[PATH_MAX]; - if (getcwd(cwd_buffer, sizeof(cwd_buffer)) == nullptr) - return {}; - MUST(dir_path.reserve(strlen(cwd_buffer) + 1)); - MUST(dir_path.append(cwd_buffer)); - MUST(dir_path.push_back('/')); - } - - auto match_against = last_argument.sv(); - if (auto idx = match_against.rfind('/'); idx.has_value()) - { - MUST(dir_path.append(match_against.substring(0, idx.value()))); - match_against = match_against.substring(idx.value() + 1); - } - - result = list_matching_entries(dir_path, match_against, false); - - break; - } - } - - if (auto idx = prefix.rfind('/'); idx.has_value()) - prefix = prefix.substring(idx.value() + 1); - - return { should_escape_spaces, prefix, BAN::move(result) }; -} - -static int character_length(BAN::StringView prompt) -{ - int length { 0 }; - bool in_escape { false }; - for (char c : prompt) - { - if (in_escape) - { - if (isalpha(c)) - in_escape = false; - } - else - { - if (c == '\e') - in_escape = true; - else if (((uint8_t)c & 0xC0) != 0x80) - length++; - } - } - return length; -} - -static BAN::String get_prompt() -{ - const char* raw_prompt = getenv("PS1"); - if (raw_prompt == nullptr) - return "$ "_sv; - - BAN::String prompt; - for (int i = 0; raw_prompt[i]; i++) - { - char ch = raw_prompt[i]; - if (ch == '\\') - { - switch (raw_prompt[++i]) - { - case 'e': - MUST(prompt.push_back('\e')); - break; - case 'n': - MUST(prompt.push_back('\n')); - break; - case '\\': - MUST(prompt.push_back('\\')); - break; - case '~': - { - char buffer[256]; - if (getcwd(buffer, sizeof(buffer)) == nullptr) - strcpy(buffer, strerrorname_np(errno)); - - const char* home = getenv("HOME"); - size_t home_len = home ? strlen(home) : 0; - if (home && strncmp(buffer, home, home_len) == 0) - { - MUST(prompt.push_back('~')); - MUST(prompt.append(buffer + home_len)); - } - else - { - MUST(prompt.append(buffer)); - } - - break; - } - case 'u': - { - static char* username = nullptr; - if (username == nullptr) - { - auto* passwd = getpwuid(geteuid()); - if (passwd == nullptr) - break; - username = new char[strlen(passwd->pw_name) + 1]; - strcpy(username, passwd->pw_name); - endpwent(); - } - MUST(prompt.append(username)); - break; - } - case 'h': - { - MUST(prompt.append(hostname)); - break; - } - case '\0': - MUST(prompt.push_back('\\')); - break; - default: - MUST(prompt.push_back('\\')); - MUST(prompt.push_back(*raw_prompt)); - break; - } - } - else - { - MUST(prompt.push_back(ch)); - } - } - - return prompt; -} - -static int prompt_length() -{ - return character_length(get_prompt()); -} - -static void print_prompt() -{ - auto prompt = get_prompt(); - printf("%.*s", (int)prompt.size(), prompt.data()); - fflush(stdout); -} - -static bool detect_cursor_position_support() -{ - constexpr auto getchar_nonblock = - []() -> char - { - fd_set fds; - FD_ZERO(&fds); - FD_SET(STDIN_FILENO, &fds); - - timeval timeout; - timeout.tv_sec = 0; - timeout.tv_usec = 100'000; - - int nselect = select(STDIN_FILENO + 1, &fds, nullptr, nullptr, &timeout); - if (nselect != 1) - return '\0'; - - char ch; - if (read(STDIN_FILENO, &ch, 1) != 1) - return '\0'; - return ch; - }; - - if (write(STDOUT_FILENO, "\e[6n", 4) != 4) - return false; - - char ch = getchar_nonblock(); - if (ch != '\e') - { - if (ch != '\0') - ungetc(ch, stdin); - return false; - } - if (getchar_nonblock() != '[') - return false; - - int cur; - while (isdigit(cur = getchar_nonblock())) - ; - if (cur != ';') - return false; - while (isdigit(cur = getchar_nonblock())) - ; - if (cur != 'R') - return false; - - return true; -} - -struct CursorPosition -{ - int x; - int y; -}; - -static BAN::Optional try_read_cursor_position() -{ -#if __banan_os__ - return {}; -#endif - - static BAN::Optional s_supports_cursor_position; - if (!s_supports_cursor_position.has_value()) - s_supports_cursor_position = detect_cursor_position_support(); - - if (!s_supports_cursor_position.value()) - return {}; - - if (write(STDOUT_FILENO, "\e[6n", 4) != 4) - return {}; - - char ch = getchar(); - if (ch != '\e') - { - ungetc(ch, stdin); - return {}; - } - if (getchar() != '[') - return {}; - - int cur, x = 0, y = 0; - while (isdigit(cur = getchar())) - y = (y * 10) + (cur - '0'); - if (cur != ';') - return {}; - while (isdigit(cur = getchar())) - x = (x * 10) + (cur - '0'); - if (cur != 'R') - return {}; - - if (x > 0) x--; - if (y > 0) y--; - return CursorPosition { x, y }; -} +int g_pid; +int g_argc; +char** g_argv; int main(int argc, char** argv) { - realpath(argv[0], s_shell_path); + g_pid = getpid(); + g_argc = argc; + g_argv = argv; - struct sigaction sa; - sa.sa_flags = 0; + { + struct sigaction sa; + sa.sa_flags = 0; - sa.sa_handler = [](int) {}; - sigaction(SIGINT, &sa, nullptr); + sa.sa_handler = [](int) {}; + sigaction(SIGINT, &sa, nullptr); - sa.sa_handler = SIG_IGN; - sigaction(SIGTTOU, &sa, nullptr); - - tcgetattr(0, &old_termios); - - char hostname_buffer[HOST_NAME_MAX]; - if (gethostname(hostname_buffer, sizeof(hostname_buffer)) == 0) { - MUST(hostname.append(hostname_buffer)); + sa.sa_handler = SIG_IGN; + sigaction(SIGTTOU, &sa, nullptr); } - new_termios = old_termios; - new_termios.c_lflag &= ~(ECHO | ICANON); - tcsetattr(0, TCSANOW, &new_termios); - - atexit([]() { tcsetattr(0, TCSANOW, &old_termios); }); - - install_builtin_commands(); + Builtin::get().initialize(); for (int i = 1; i < argc; i++) { if (argv[i][0] != '-') - return source_script(BAN::String(argv[i])); + { + g_argc = g_argc - i; + g_argv = g_argv + i; + + Execute execute; + (void)execute.source_script(argv[i]); + return execute.last_return_value(); + } if (strcmp(argv[i], "-c") == 0) { @@ -1442,7 +49,27 @@ int main(int argc, char** argv) printf("-c requires an argument\n"); return 1; } - return parse_and_execute_command(BAN::String(argv[i + 1])); + + g_argc = g_argc - (i + 2); + g_argv = g_argv + (i + 2); + + bool got_input = false; + + TokenParser parser( + [&](BAN::Optional) -> BAN::Optional + { + if (got_input) + return {}; + got_input = true; + + BAN::String input; + MUST(input.append(argv[i + 1])); + return input; + } + ); + if (!parser.main_loop(true)) + return 126; + return parser.execute().last_return_value(); } else if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--version") == 0) { @@ -1464,320 +91,29 @@ int main(int argc, char** argv) } } - source_shellrc(); + Input input; + TokenParser parser( + [&](BAN::Optional prompt) -> BAN::Optional + { + return input.get_input(prompt); + } + ); - BAN::Vector buffers, history; - MUST(buffers.emplace_back(""_sv)); - size_t index = 0; - size_t col = 0; - - BAN::Optional tab_index; - BAN::Optional> tab_completions; - size_t tab_completion_keep = 0; - - int waiting_utf8 = 0; - - print_prompt(); - - while (true) + if (const char* home_env = getenv("HOME")) { - int chi = getchar(); - if (chi == EOF) + BAN::String config_file_path; + MUST(config_file_path.append(home_env)); + MUST(config_file_path.append("/.shellrc"_sv)); + + struct stat st; + if (stat(config_file_path.data(), &st) == 0) { - if (errno != EINTR) - { - perror("getchar"); - return 1; - } - - clearerr(stdin); - buffers = history; - MUST(buffers.emplace_back(""_sv)); - index = buffers.size() - 1; - col = 0; - putchar('\n'); - print_prompt(); - continue; - } - - uint8_t ch = chi; - if (ch != '\t') - { - tab_completions.clear(); - tab_index.clear(); - } - - if (waiting_utf8 > 0) - { - waiting_utf8--; - - ASSERT((ch & 0xC0) == 0x80); - - putchar(ch); - MUST(buffers[index].insert(ch, col++)); - if (waiting_utf8 == 0) - { - printf("\e[s%s\e[u", buffers[index].data() + col); - fflush(stdout); - } - continue; - } - else if (ch & 0x80) - { - if ((ch & 0xE0) == 0xC0) - waiting_utf8 = 1; - else if ((ch & 0xF0) == 0xE0) - waiting_utf8 = 2; - else if ((ch & 0xF8) == 0xF0) - waiting_utf8 = 3; - else - ASSERT_NOT_REACHED(); - - putchar(ch); - MUST(buffers[index].insert(ch, col++)); - continue; - } - - switch (ch) - { - case '\e': - { - ch = getchar(); - if (ch != '[') - break; - ch = getchar(); - - int value = 0; - while (isdigit(ch)) - { - value = (value * 10) + (ch - '0'); - ch = getchar(); - } - - switch (ch) - { - case 'A': if (index > 0) { index--; col = buffers[index].size(); printf("\e[%dG%s\e[K", prompt_length() + 1, buffers[index].data()); fflush(stdout); } break; - case 'B': if (index < buffers.size() - 1) { index++; col = buffers[index].size(); printf("\e[%dG%s\e[K", prompt_length() + 1, buffers[index].data()); fflush(stdout); } break; - case 'C': if (col < buffers[index].size()) { col++; while ((buffers[index][col - 1] & 0xC0) == 0x80) col++; printf("\e[C"); fflush(stdout); } break; - case 'D': if (col > 0) { while ((buffers[index][col - 1] & 0xC0) == 0x80) col--; col--; printf("\e[D"); fflush(stdout); } break; - case '~': - switch (value) - { - case 3: // delete - if (col >= buffers[index].size()) - break; - buffers[index].remove(col); - while (col < buffers[index].size() && (buffers[index][col] & 0xC0) == 0x80) - buffers[index].remove(col); - printf("\e[s%s \e[u", buffers[index].data() + col); - fflush(stdout); - break; - } - } - break; - } - case '\x0C': // ^L - { - int x = prompt_length() + character_length(buffers[index].sv().substring(col)) + 1; - printf("\e[H\e[J"); - print_prompt(); - printf("%s\e[u\e[1;%dH", buffers[index].data(), x); - fflush(stdout); - break; - } - case '\b': - if (col <= 0) - break; - while ((buffers[index][col - 1] & 0xC0) == 0x80) - col--; - col--; - printf("\e[D"); - fflush(stdout); - break; - case '\x01': // ^A - col = 0; - printf("\e[%dG", prompt_length() + 1); - fflush(stdout); - break; - case '\x03': // ^C - putchar('\n'); - print_prompt(); - buffers[index].clear(); - col = 0; - break; - case '\x04': // ^D - putchar('\n'); - return 0; - case '\x7F': // backspace - if (col <= 0) - break; - while ((buffers[index][col - 1] & 0xC0) == 0x80) - buffers[index].remove(--col); - buffers[index].remove(--col); - printf("\b\e[s%s \e[u", buffers[index].data() + col); - fflush(stdout); - break; - case '\n': - putchar('\n'); - if (!buffers[index].empty()) - { - parse_and_execute_command(buffers[index]); - MUST(history.push_back(buffers[index])); - buffers = history; - MUST(buffers.emplace_back(""_sv)); - - auto cursor_pos = try_read_cursor_position(); - if (cursor_pos.has_value() && cursor_pos->x > 0) - printf("\e[7m%%\e[m\n"); - } - print_prompt(); - index = buffers.size() - 1; - col = 0; - break; - case '\t': - { - if (col != buffers[index].size()) - continue; - - if (tab_completions.has_value()) - { - ASSERT(tab_completions->size() >= 2); - - if (!tab_index.has_value()) - tab_index = 0; - else - { - MUST(buffers[index].resize(tab_completion_keep)); - col = tab_completion_keep; - *tab_index = (*tab_index + 1) % tab_completions->size(); - } - - MUST(buffers[index].append(tab_completions.value()[*tab_index])); - col += tab_completions.value()[*tab_index].size(); - - printf("\e[%dG%s\e[K", prompt_length() + 1, buffers[index].data()); - fflush(stdout); - - break; - } - - tab_completion_keep = col; - auto [should_escape_spaces, prefix, completions] = list_tab_completion_entries(buffers[index].sv().substring(0, tab_completion_keep)); - - BAN::sort::sort(completions.begin(), completions.end(), - [](const BAN::String& a, const BAN::String& b) { - if (auto cmp = strcmp(a.data(), b.data())) - return cmp < 0; - return a.size() < b.size(); - } - ); - - for (size_t i = 1; i < completions.size();) - { - if (completions[i - 1] == completions[i]) - completions.remove(i); - else - i++; - } - - if (completions.empty()) - break; - - size_t all_match_len = 0; - for (;;) - { - if (completions.front().size() <= all_match_len) - break; - const char target = completions.front()[all_match_len]; - - bool all_matched = true; - for (const auto& completion : completions) - { - if (completion.size() > all_match_len && completion[all_match_len] == target) - continue; - all_matched = false; - break; - } - - if (!all_matched) - break; - all_match_len++; - } - - if (all_match_len) - { - auto completion = completions.front().sv().substring(0, all_match_len); - - BAN::String temp_escaped; - if (should_escape_spaces) - { - MUST(temp_escaped.append(completion)); - for (size_t i = 0; i < temp_escaped.size(); i++) - { - if (!isspace(temp_escaped[i])) - continue; - MUST(temp_escaped.insert('\\', i)); - i++; - } - completion = temp_escaped.sv(); - - if (!buffers[index].empty() && buffers[index].back() == '\\' && completion.front() == '\\') - completion = completion.substring(1); - } - - col += completion.size(); - MUST(buffers[index].append(completion)); - printf("%.*s", (int)completion.size(), completion.data()); - fflush(stdout); - break; - } - - if (completions.size() == 1) - { - ASSERT(all_match_len == completions.front().size()); - break; - } - - printf("\n"); - for (size_t i = 0; i < completions.size(); i++) - { - if (i != 0) - printf(" "); - const char* format = completions[i].sv().contains(' ') ? "'%.*s%s'" : "%.*s%s"; - printf(format, (int)prefix.size(), prefix.data(), completions[i].data()); - } - printf("\n"); - print_prompt(); - printf("%s", buffers[index].data()); - fflush(stdout); - - if (should_escape_spaces) - { - for (auto& completion : completions) - { - for (size_t i = 0; i < completion.size(); i++) - { - if (!isspace(completion[i])) - continue; - MUST(completion.insert('\\', i)); - i++; - } - } - } - - tab_completion_keep = col; - tab_completions = BAN::move(completions); - - break; - } - default: - MUST(buffers[index].insert(ch, col++)); - if (col == buffers[index].size()) - putchar(ch); - else - printf("%c\e[s%s\e[u", ch, buffers[index].data() + col); - fflush(stdout); - break; + if (auto ret = parser.execute().source_script(config_file_path.sv()); ret.is_error()) + fprintf(stderr, "could not source config file at '%s': %s\n", config_file_path.data(), ret.error().get_message()); } } + + if (!parser.main_loop(false)) + return 126; + return 0; }