diff --git a/userspace/programs/CMakeLists.txt b/userspace/programs/CMakeLists.txt index 6870e8ca..cc67e545 100644 --- a/userspace/programs/CMakeLists.txt +++ b/userspace/programs/CMakeLists.txt @@ -47,6 +47,7 @@ set(USERSPACE_PROGRAMS sync tee test + tr true TaskBar Terminal diff --git a/userspace/programs/tr/main.cpp b/userspace/programs/tr/main.cpp new file mode 100644 index 00000000..5de374ee --- /dev/null +++ b/userspace/programs/tr/main.cpp @@ -0,0 +1,287 @@ +#include +#include +#include +#include +#include + +static const char* s_argv0 { nullptr }; + +static char parse_char(const char*& array) +{ + assert(array[0]); + + if (array[0] != '\\') + return *array++; + + switch (array[1]) + { + case '\\': array += 2; return '\\'; + case 'a': array += 2; return '\a'; + case 'b': array += 2; return '\b'; + case 'f': array += 2; return '\f'; + case 'n': array += 2; return '\n'; + case 'r': array += 2; return '\r'; + case 't': array += 2; return '\t'; + case 'v': array += 2; return '\v'; + } + + size_t octal_count = 0; + while (octal_count < 3 && '0' <= array[1 + octal_count] && array[1 + octal_count] <= '7') + octal_count++; + + if (octal_count == 0) + return *array++; + + int value = 0; + for (size_t i = 1; i <= octal_count; i++) + value = (value * 8) + (array[i] - '0'); + + array += 1 + octal_count; + return value; +} + +static BAN::String expand_array(const char* array, size_t expand_len) +{ + BAN::String result; + while (*array) + { + if (array[0] == '[') + { + const char* end = strchr(array + 1, ']'); + if (end == nullptr) + goto normal_character; + + const size_t len = end - array - 1; + if (len < 2) + goto normal_character; + + if (array[1] == ':' && end[-1] == ':') + { + int (*class_test)(int) = nullptr; + +#define CHECK_CHAR_CLASS(name) \ + else if (len == sizeof(#name) + 1 && memcmp(array + 2, #name, len - 2) == 0) \ + class_test = is##name + if (false); + CHECK_CHAR_CLASS(alnum); + CHECK_CHAR_CLASS(alpha); + CHECK_CHAR_CLASS(blank); + CHECK_CHAR_CLASS(cntrl); + CHECK_CHAR_CLASS(digit); + CHECK_CHAR_CLASS(graph); + CHECK_CHAR_CLASS(lower); + CHECK_CHAR_CLASS(print); + CHECK_CHAR_CLASS(punct); + CHECK_CHAR_CLASS(space); + CHECK_CHAR_CLASS(upper); + CHECK_CHAR_CLASS(xdigit); +#undef CHECK_CHAR_CLASS + + if (class_test == nullptr) + { + fprintf(stderr, "%s: invalid character class '%.*s'\n", s_argv0, (int)len - 2, array + 2); + exit(1); + } + + for (int ch = 0; ch < 0x100; ch++) + if (class_test(ch)) + MUST(result.push_back(ch)); + } + else if (array[1] == '=' && end[-1] == '=') + { + if (len - 2 != 1) + { + fprintf(stderr, "%s: %.*s: equivalence class must be a single character\n", s_argv0, (int)len - 2, array + 2); + exit(1); + } + + // TODO: actually support collating elements + MUST(result.push_back(array[2])); + } + else + { + const char* temp = array + 1; + const char ch = parse_char(temp); + + if (temp[0] != '*') + goto normal_character; + temp++; + + const int base = (temp[0] == '0') ? 8 : 10; + + bool valid_count = true; + for (size_t i = 0; valid_count && temp[i] != ']'; i++) + valid_count = ('0' <= temp[i] && temp[i] <= '0' + base - 1); + if (!valid_count) + { + fprintf(stderr, "%s: invalid repeat count '%.*s'\n", s_argv0, (int)len - 2, array + 2); + exit(1); + } + + size_t count = 0; + for (size_t i = 0; temp[i] != ']'; i++) + count = (count * base) + (temp[i] - '0'); + if (count == 0 && result.size() < expand_len) + count = expand_len - result.size(); + + for (size_t i = 0; i < count; i++) + MUST(result.push_back(ch)); + } + + array = end + 1; + continue; + } + + normal_character: + const char ch1 = parse_char(array); + + if (array[0] == '-' && array[1]) + { + array++; + const char ch2 = parse_char(array); + for (int ch = ch1; ch <= ch2; ch++) + MUST(result.push_back(ch)); + continue; + } + + MUST(result.push_back(ch1)); + } + + return result; +} + +int main(int argc, char* argv[]) +{ + s_argv0 = argv[0]; + + bool complement { false }; + bool delete_ { false }; + bool squeeze { false }; + bool truncate { false }; + + for (;;) + { + static option long_options[] { + { "complement", no_argument, nullptr, 'c' }, + { "delete", no_argument, nullptr, 'd' }, + { "squeeze-repeats", no_argument, nullptr, 's' }, + { "truncate-set1", no_argument, nullptr, 't' }, + { "help", no_argument, nullptr, 0 }, + {} + }; + + int ch = getopt_long(argc, argv, "cCdst", long_options, nullptr); + if (ch == -1) + break; + + switch (ch) + { + case 'c': case 'C': + complement = true; + break; + case 'd': + delete_ = true; + break; + case 's': + squeeze = true; + break; + case 't': + truncate = true; + break; + case 0: + fprintf(stderr, "usage: %s [OPTION]... STRING1 [STRING2]\n", argv[0]); + fprintf(stderr, " translate and/or delete characters from standard input\n"); + fprintf(stderr, "OPTIONS:\n"); + fprintf(stderr, " -c, -C, --complement do not ignore entries starting with .\n"); + fprintf(stderr, " -d, --delete do not list . and ..\n"); + fprintf(stderr, " -s, --squeeze-repeats list directories and not their contents\n"); + fprintf(stderr, " -t, --truncate-set1 print sizes in human readable form\n"); + fprintf(stderr, " --help show this message and exit\n"); + return 0; + case ':' : case '?': + fprintf(stderr, "see '%s --help' for usage\n", argv[0]); + return 1; + } + } + + const int needed_args = (delete_ == squeeze) ? 2 : 1; + if (optind + needed_args > argc) + { + fprintf(stderr, "%s: missing operand\n", argv[0]); + fprintf(stderr, "see '%s --help' for usage\n", argv[0]); + return 1; + } + + if (optind + 2 < argc) + { + fprintf(stderr, "%s: extra operand '%s'\n", argv[0], argv[optind + 2]); + fprintf(stderr, "see '%s --help' for usage\n", argv[0]); + return 1; + } + + BAN::String array1 = expand_array(argv[optind], 0); + if (complement) + { + bool contains[0x100] {}; + for (int ch : array1) + contains[ch] = true; + array1.clear(); + for (int ch = 0; ch < 0x100; ch++) + if (!contains[ch]) + MUST(array1.push_back(ch)); + } + + BAN::Optional array2; + if (optind + 1 < argc) + { + array2 = expand_array(argv[optind + 1], array1.size());; + if (truncate && array1.size() > array2->size()) + MUST(array1.resize(array2->size())); + if (!array1.empty() && array2->empty()) + { + fprintf(stderr, "%s: STRING2 must not be empty\n", argv[0]); + return 1; + } + while (array2->size() < array1.size()) + MUST(array2->push_back(array2->back())); + } + + char translate_map[0x100] {}; + for (int ch = 0; ch < 0x100; ch++) + translate_map[ch] = ch; + if (!delete_ && !squeeze) + for (size_t i = 0; i < array1.size(); i++) + translate_map[static_cast(array1[i])] = array2.value()[i]; + + bool delete_set[0x100] {}; + if (delete_) + { + for (int ch : array1) + delete_set[ch] = true; + } + + bool squeeze_set[0x100] {}; + if (squeeze) + { + const auto& array = array2.has_value() ? array2.value() : array1; + for (int ch : array) + squeeze_set[ch] = true; + } + + int prev_char = -1; + for (;;) + { + int ch = getchar(); + if (ch == EOF) + break; + ch = translate_map[ch]; + if (delete_set[ch]) + continue; + if (squeeze_set[ch] && prev_char == ch) + continue; + prev_char = ch; + putchar(ch); + } + + return 0; +}