From edadabc7ba407da13d0d2171a99e1059e85e6abe Mon Sep 17 00:00:00 2001 From: Bananymous Date: Mon, 4 Mar 2024 01:54:42 +0200 Subject: [PATCH] Implement phase 02 syntax analysis --- 02_syntax/lexer.py | 128 +++++++++++++++++++++++++++++++ 02_syntax/main.py | 186 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 314 insertions(+) create mode 100644 02_syntax/lexer.py create mode 100644 02_syntax/main.py diff --git a/02_syntax/lexer.py b/02_syntax/lexer.py new file mode 100644 index 0000000..da17551 --- /dev/null +++ b/02_syntax/lexer.py @@ -0,0 +1,128 @@ +#!/bin/env python3 + +import argparse +import datetime +import ply.lex as lex + +reserved = { + 'var': 'VAR', + 'is': 'IS', + 'unless': 'UNLESS', + 'otherwise': 'OTHERWISE', + 'until': 'UNTIL', + 'do': 'DO', + 'done': 'DONE', + 'procedure': 'PROCEDURE', + 'function': 'FUNCTION', + 'return': 'RETURN', + 'print': 'PRINT', + 'end': 'END', +} + +tokens = [ + 'LPAREN', + 'RPAREN', + 'LSQUARE', + 'RSQUARE', + 'LCURLY', + 'RCURLY', + 'APOSTROPHE', + 'AMPERSAND', + 'COMMA', + 'DOT', + 'EQ', + 'LT', + 'PLUS', + 'MINUS', + 'MULT', + 'DIV', + 'STRING', + 'DATE_LITERAL', + 'INT_LITERAL', + 'IDENT', + 'FUNC_IDENT', + 'PROC_IDENT', +] + list(reserved.values()) + +def t_whitespace(t): + r'[ \t\n]+' + t.lexer.lineno += t.value.count('\n') + +def t_comment(t): + r'\(%(.|\n)*%\)' + t.lexer.lineno += t.value.count('\n') + +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LSQUARE = r'\[' +t_RSQUARE = r'\]' +t_LCURLY = r'\{' +t_RCURLY = r'\}' +t_APOSTROPHE = r'\'' +t_AMPERSAND = r'&' +t_COMMA = r',' +t_DOT = r'\.' +t_EQ = r'=' +t_LT = r'<' +t_PLUS = r'\+' +t_MINUS = r'-' +t_MULT = r'\*' +t_DIV = r'/' + +def t_STRING(t): + r'".*?"' + t.value = t.value[1:-1] + return t + +def t_DATE_LITERAL(t): + r'\d{4}-\d{2}-\d{2}' + t.value = datetime.date.fromisoformat(t.value) + return t + +def t_INT_LITERAL(t): + r'-?\d{1,3}(\'\d{3})*' + t.value = int(t.value.replace('\'', '')) + return t + +def t_IDENT(t): + r'[a-z][a-zA-Z0-9_]+' + t.type = reserved.get(t.value, 'IDENT') + return t + +def t_FUNC_IDENT(t): + r'[A-Z][a-z0-9_]+' + return t + +def t_PROC_IDENT(t): + r'[A-Z]{2}[A-Z0-9_]*' + return t + +def t_error(t): + print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}') + raise SystemExit + +lexer = lex.lex() + +def tokenize_file(file_path: str): + with open(file_path, 'r', encoding='utf-8') as file: + lexer.input(file.read()) + + tok = lexer.token() + while tok: + print(tok) + tok = lexer.token() + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') + group.add_argument('-f', '--file', help='filename to process') + + args = parser.parse_args() + + if args.who: + print('Author') + print(' Student ID: 150189237') + print(' Name: Oskari Alaranta') + else: + tokenize_file(args.file) diff --git a/02_syntax/main.py b/02_syntax/main.py new file mode 100644 index 0000000..30afd95 --- /dev/null +++ b/02_syntax/main.py @@ -0,0 +1,186 @@ +#!/bin/env python3 + +import argparse +import ply.lex as lex +import ply.yacc as yacc +import lexer + +# Simple debuggin function to call from syntax rules +symbolnum = 0 +def debug_print(p): + global symbolnum + symbolnum += 1 + p[0] = symbolnum + msg = '' + for i, s in enumerate(p.slice): + if s is not None: + if type(s) is lex.LexToken: + msg += str(s.type) + '<' + str(s.value) + '> ' + else: + msg += str(s) + '(' + str(p[i]) + ') ' + else: + msg += '?? ' + if i == 0: + msg += ':: ' + print(msg) + +tokens = lexer.tokens + +def p_program(p): + '''program : statement_list + | definitions program''' + debug_print(p) + +def p_statement_list(p): + '''statement_list : statement + | statement COMMA statement_list''' + debug_print(p) + +def p_definitions(p): + '''definitions : function_definition + | procedure_definition + | variable_definition''' + debug_print(p) + +def p_variable_definition(p): + '''variable_definition : VAR IDENT EQ expression''' + debug_print(p) + +def p_empty(p): + '''empty :''' + debug_print(p) + +def p_opt_formals(p): + '''opt_formals : empty + | formals''' + debug_print(p) + +def p_opt_variable_defitions(p): + '''opt_variable_definitions : empty + | variable_definition opt_variable_definitions''' + debug_print(p) + +def p_function_definition(p): + '''function_definition : FUNCTION FUNC_IDENT LCURLY opt_formals RCURLY RETURN IDENT opt_variable_definitions IS rvalue END FUNCTION''' + debug_print(p) + +def p_procedure_definition(p): + '''procedure_definition : PROCEDURE PROC_IDENT LCURLY opt_formals RCURLY opt_variable_definitions IS statement_list END PROCEDURE + | PROCEDURE PROC_IDENT LCURLY opt_formals RCURLY RETURN IDENT opt_variable_definitions IS statement_list END PROCEDURE''' + debug_print(p) + +def p_formals(p): + '''formals : formal_arg + | formal_arg COMMA formals''' + debug_print(p) + +def p_formal_arg(p): + '''formal_arg : IDENT LSQUARE IDENT RSQUARE''' + debug_print(p) + +def p_procedure_call(p): + '''procedure_call : PROC_IDENT LPAREN RPAREN + | PROC_IDENT LPAREN arguments RPAREN''' + debug_print(p) + +def p_arguments(p): + '''arguments : expression + | expression COMMA arguments''' + debug_print(p) + +def p_assignment(p): + '''assignment : lvalue EQ rvalue''' + debug_print(p) + +def p_lvalue(p): + '''lvalue : IDENT + | IDENT DOT IDENT''' + debug_print(p) + +def p_rvalue(p): + '''rvalue : expression + | unless_expression''' + debug_print(p) + +def p_print_statement(p): + '''print_statement : PRINT print_item + | print_statement AMPERSAND print_item''' + debug_print(p) + +def p_print_item(p): + '''print_item : STRING + | expression''' + debug_print(p) + +def p_statement(p): + '''statement : procedure_call + | assignment + | print_statement + | DO statement_list UNTIL expression + | DO statement_list UNLESS expression DONE + | DO statement_list UNLESS expression OTHERWISE statement_list DONE + | RETURN expression''' + debug_print(p) + +def p_expression(p): + '''expression : simple_expr + | expression EQ simple_expr + | expression LT simple_expr''' + debug_print(p) + +def p_simple_expr(p): + '''simple_expr : term + | simple_expr PLUS term + | simple_expr MINUS term + term : factor + | term MULT factor + | term DIV factor + factor : atom + | MINUS atom + | PLUS atom + atom : IDENT + | IDENT APOSTROPHE IDENT + | INT_LITERAL + | DATE_LITERAL + | function_call + | procedure_call + | LPAREN expression RPAREN''' + debug_print(p) + +def p_function_call(p): + '''function_call : FUNC_IDENT LPAREN RPAREN + | FUNC_IDENT LPAREN arguments RPAREN''' + debug_print(p) + +def p_unless_expression(p): + '''unless_expression : DO expression UNLESS expression OTHERWISE expression DONE''' + debug_print(p) + +def p_error(p): + if p is not None: + print(f"{{{p.lineno}}}:Syntax Error (token:'{p.value}')") + else: + print('Syntax Error at the end of file') + raise SystemExit + +def syntax_check_file(file_path: str, debug: bool): + parser = yacc.yacc() + with open(file_path, 'r', encoding='utf-8') as file: + parser.parse(file.read(), lexer=lexer.lexer, debug=debug) + print('syntax OK') + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-d', '--debug', action='store_true', help='debug?') + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') + group.add_argument('-f', '--file', help='filename to process') + + args = parser.parse_args() + + if args.who: + print('Author') + print(' Student ID: 150189237') + print(' Name: Oskari Alaranta') + else: + syntax_check_file(args.file, args.debug)