diff --git a/01_lexer/main.py b/01_lexer/main.py new file mode 100644 index 0000000..009bac4 --- /dev/null +++ b/01_lexer/main.py @@ -0,0 +1,113 @@ +#!/bin/env python3 + +import argparse +import datetime +import ply.lex as lex + +tokens = ( + 'LPAREN', + 'RPAREN', + 'LSQUARE', + 'RSQUARE', + 'LCURLY', + 'RCURLY', + 'APOSTROPHE', + 'AMPERSAND', + 'COMMA', + 'DOT', + 'EQ', + 'LT', + 'PLUS', + 'MINUS', + 'MULT', + 'DIV', + 'STRING', + 'DATE_LITERAL', + 'INT_LITERAL', + 'IDENT', + 'FUNC_IDENT', + 'PROC_IDENT', +) + +def t_whitespace(t): + r'[ \t\n]+' + t.lexer.lineno += t.value.count('\n') + +def t_comment(t): + r'\(%(.|\n)*%\)' + t.lexer.lineno += t.value.count('\n') + +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LSQUARE = r'\[' +t_RSQUARE = r'\]' +t_LCURLY = r'\{' +t_RCURLY = r'\}' +t_APOSTROPHE = r'\'' +t_AMPERSAND = r'&' +t_COMMA = r',' +t_DOT = r'\.' +t_EQ = r'=' +t_LT = r'<' +t_PLUS = r'\+' +t_MINUS = r'-' +t_MULT = r'\*' +t_DIV = r'/' + +def t_STRING(t): + r'".*?"' + t.value = t.value[1:-1] + return t + +def t_DATE_LITERAL(t): + r'\d{4}-\d{2}-\d{2}' + t.value = datetime.date.fromisoformat(t.value) + return t + +def t_INT_LITERAL(t): + r'-?\d{1,3}(\'\d{3})*' + t.value = int(t.value.replace('\'', '')) + return t + +def t_IDENT(t): + r'[a-z][a-zA-Z0-9_]+' + return t + +def t_FUNC_IDENT(t): + r'[A-Z][a-z0-9_]+' + return t + +def t_PROC_IDENT(t): + r'[A-Z]{2}[A-Z0-9_]*' + return t + +def t_error(t): + print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}') + exit(1) + + +def tokenize_file(file_path: str): + lexer = lex.lex() + with open(file_path, 'r', encoding='utf-8') as file: + lexer.input(file.read()) + + tok = lexer.token() + while tok: + print(tok) + tok = lexer.token() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') + group.add_argument('-f', '--file', help='filename to process') + + args = parser.parse_args() + + if args.who: + print('Author') + print(' Student ID: 150189237') + print(' Name: Oskari Alaranta') + else: + tokenize_file(args.file)