#!/bin/env python3 import argparse import ply.lex as lex import ply.yacc as yacc import lexer import tree_print tokens = lexer.tokens class ASTnode: def __init__(self, typestr, lineno, value = None): self.nodetype = typestr self.lineno = lineno if value is not None: self.value = value class SemData: def __init__(self): self.in_procedure_def = False self.in_function_def = False self.return_type = None self.parent = None def p_program1(p): 'program : statement_list' p[0] = ASTnode('program', p.lineno(1)) p[0].children_definitions = [] p[0].children_statements = p[1].children_statements def p_program2(p): 'program : definition_list statement_list' p[0] = ASTnode('program', p.lineno(1)) p[0].children_definitions = p[1].children_definitions p[0].children_statements = p[2].children_statements def p_statement_list1(p): 'statement_list : statement' p[0] = ASTnode('statement_list', p.lineno(1)) p[0].children_statements = [ p[1] ] def p_statement_list2(p): 'statement_list : statement_list COMMA statement' p[0] = p[1] p[0].children_statements += [ p[3] ] def p_definition_list1(p): 'definition_list : definition' p[0] = ASTnode('definition_list', p.lineno(1)) p[0].children_definitions = [ p[1] ] def p_definition_list2(p): 'definition_list : definition_list definition' p[0] = p[1] p[0].children_definitions += [ p[2] ] def p_definition(p): '''definition : function_definition | procedure_definition | variable_definition''' p[0] = p[1] def p_variable_definition(p): 'variable_definition : VAR IDENT EQ expression' p[0] = ASTnode('variable_definition', p.lineno(1), p[2]) p[0].child_expression = p[4] def p_empty(p): 'empty :' pass def p_variable_definition_list1(p): 'variable_definition_list : empty' p[0] = ASTnode('variable_definition_list', p.lineno(1)) p[0].children_definitions = [] def p_variable_definition_list2(p): 'variable_definition_list : variable_definition_list variable_definition' p[0] = p[1] p[0].children_definitions += [ p[2] ] def p_function_definition(p): '''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION''' p[0] = ASTnode('function_definition', p.lineno(2), p[2]) p[0].children_formals = p[4].children_formals p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7]) p[0].children_variable_definitions = p[8].children_definitions p[0].child_value = p[10] def p_procedure_definition1(p): 'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE' p[0] = ASTnode('procedure_definition', p.lineno(2), p[2]) p[0].children_formals = p[4].children_formals p[0].children_variable_definitions = p[6].children_definitions p[0].children_statements = p[8].children_statements p[0].child_return_type = None def p_procedure_definition2(p): '''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE''' p[0] = ASTnode('procedure_definition', p.lineno(2), p[2]) p[0].children_formals = p[4].children_formals p[0].children_variable_definitions = p[8].children_definitions p[0].children_statements = p[10].children_statements p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7]) def p_formal_list1(p): 'formal_list : empty' p[0] = ASTnode('formal_list', p.lineno(1)) p[0].children_formals = [] def p_formal_list2(p): 'formal_list : formal_arg' p[0] = ASTnode('formal_list', p.lineno(1)) p[0].children_formals = [ p[1] ] def p_formal_list3(p): 'formal_list : formal_list COMMA formal_arg' p[0] = p[1] p[0].children_formals += [ p[3] ] def p_formal_arg(p): 'formal_arg : IDENT LSQUARE IDENT RSQUARE' p[0] = ASTnode('formal_argument', p.lineno(1)) p[0].child_variable = ASTnode('identifier', p.lineno(1), p[1]) p[0].child_type = ASTnode('identifier', p.lineno(3), p[3]) def p_procedure_call1(p): 'procedure_call : PROC_IDENT LPAREN RPAREN' p[0] = ASTnode('procedure_call', p.lineno(1), p[1]) p[0].children_arguments = [] def p_procedure_call(p): '''procedure_call : PROC_IDENT LPAREN arguments RPAREN''' p[0] = ASTnode('procedure_call', p.lineno(1), p[1]) p[0].children_arguments = p[3].children_arguments def p_arguments1(p): 'arguments : expression' p[0] = ASTnode('arguments', p.lineno(1)) p[0].children_arguments = [ p[1] ] def p_arguments2(p): 'arguments : arguments COMMA expression' p[0] = p[1] p[0].children_arguments += [ p[3] ] def p_assignment(p): 'assignment : lvalue EQ rvalue' p[0] = ASTnode('assignment', p.lineno(1)) p[0].child_lhs = p[1] p[0].child_rhs = p[3] def p_lvalue1(p): 'lvalue : IDENT' p[0] = ASTnode('identifier', p.lineno(1), p[1]) def p_lvalue2(p): 'lvalue : IDENT DOT IDENT' p[0] = ASTnode('attribute_write', p.lineno(1)) p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1]) p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3]) def p_rvalue(p): '''rvalue : expression | unless_expression''' p[0] = p[1] def p_print_statement1(p): 'print_statement : PRINT print_item' p[0] = ASTnode('print', p.lineno(1)) p[0].children_items = [ p[2] ] def p_print_statement2(p): 'print_statement : print_statement AMPERSAND print_item' p[0] = p[1] p[0].children_items += [ p[3] ] def p_print_item1(p): 'print_item : STRING' p[0] = ASTnode('string_literal', p.lineno(1), p[1]) def p_print_item2(p): 'print_item : expression' p[0] = p[1] def p_statement1(p): '''statement : procedure_call | assignment | print_statement''' p[0] = p[1] def p_statement2(p): 'statement : DO statement_list UNTIL expression' p[0] = ASTnode('do_until', p.lineno(1)) p[0].children_statements = p[2].children_statements p[0].child_condition = p[4] def p_statement3(p): 'statement : DO statement_list UNLESS expression DONE' p[0] = ASTnode('do_unless', p.lineno(1)) p[0].children_statements = p[2].children_statements p[0].child_condition = p[4] p[0].children_otherwise = [] def p_statement4(p): 'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE' p[0] = ASTnode('do_unless', p.lineno(1)) p[0].children_statements = p[2].children_statements p[0].child_condition = p[4] p[0].children_otherwise = p[6].children_statements def p_statement5(p): 'statement : RETURN expression' p[0] = ASTnode('return', p.lineno(1)) p[0].child_expression = p[2] def p_expression1(p): 'expression : simple_expr' p[0] = p[1] def p_expression2(p): '''expression : expression EQ simple_expr | expression LT simple_expr''' p[0] = ASTnode('binary_op', p.lineno(2), p[2]) p[0].child_lhs = p[1] p[0].child_rhs = p[3] def p_simple_expr1(p): 'simple_expr : term' p[0] = p[1] def p_simple_expr2(p): '''simple_expr : simple_expr PLUS term | simple_expr MINUS term''' p[0] = ASTnode('binary_op', p.lineno(2), p[2]) p[0].child_lhs = p[1] p[0].child_rhs = p[3] def p_term1(p): 'term : factor' p[0] = p[1] def p_term2(p): '''term : term MULT factor | term DIV factor''' p[0] = ASTnode('binary_op', p.lineno(2), p[2]) p[0].child_lhs = p[1] p[0].child_rhs = p[3] def p_factor1(p): 'factor : atom' p[0] = p[1] def p_factor2(p): '''factor : MINUS atom | PLUS atom''' p[0] = ASTnode('unary_op', p.lineno(1), p[1]) p[0].child_atom = p[2] def p_atom1(p): 'atom : IDENT' p[0] = ASTnode('identifier', p.lineno(1), p[1]) def p_atom2(p): 'atom : INT_LITERAL' p[0] = ASTnode('int_literal', p.lineno(1), p[1]) def p_atom3(p): 'atom : DATE_LITERAL' p[0] = ASTnode('date_literal', p.lineno(1), p[1]) def p_atom4(p): 'atom : IDENT APOSTROPHE IDENT' p[0] = ASTnode('attribute_read', p.lineno(1)) p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1]) p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3]) def p_atom5(p): 'atom : LPAREN expression RPAREN' p[0] = p[2] def p_atom6(p): '''atom : function_call | procedure_call''' p[0] = p[1] def p_function_call1(p): 'function_call : FUNC_IDENT LPAREN RPAREN' p[0] = ASTnode('function_call', p.lineno(1), p[1]) p[0].children_arguments = [] def p_function_call2(p): 'function_call : FUNC_IDENT LPAREN arguments RPAREN' p[0] = ASTnode('function_call', p.lineno(1), p[1]) p[0].children_arguments = p[3].children_arguments def p_unless_expression(p): 'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE' p[0] = ASTnode('unless_expression', p.lineno(1)) p[0].child_condition = p[4] p[0].child_true_expr = p[2] p[0].child_false_expr = p[6] def p_error(p): if p is not None: print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')") else: print('Syntax Error at the end of file') raise SystemExit def syntax_check_file(file_path: str, debug: bool) -> ASTnode: parser = yacc.yacc() with open(file_path, 'r', encoding='utf-8') as file: result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug) return result def semantic_check(node: ASTnode, sem_data: SemData): is_procedure = False is_function = False return_type = None match node.nodetype: case 'attribute_read': if node.child_attribute.value not in ['day', 'month', 'year', 'weekday', 'weeknum']: print(f'Semantic Error: invalid read attribute \'{node.child_attribute.value}\' at line {node.lineno}') raise SystemExit case 'attribute_write': if node.child_attribute.value not in ['day', 'month', 'year']: print(f'Semantic Error: invalid write attribute \'{node.child_attribute.value}\' at line {node.lineno}') raise SystemExit case 'procedure_definition': if node.child_return_type is not None: if node.child_return_type.value not in ['int', 'date']: print(f'Semantic Error: procedure definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}') raise SystemExit return_type = node.child_return_type.value for formal in node.children_formals: if formal.child_type.value not in ['int', 'date']: print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}') raise SystemExit is_procedure = True case 'function_definition': if node.child_return_type is not None: if node.child_return_type.value not in ['int', 'date']: print(f'Semantic Error: function definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}') raise SystemExit return_type = node.child_return_type.value for formal in node.children_formals: if formal.child_type.value not in ['int', 'date']: print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}') raise SystemExit is_function = True case 'procedure_call': if sem_data.in_function_def: print(f'Semantic Error: procedure call inside function at line {node.lineno}') raise SystemExit case 'return': if not sem_data.in_procedure_def: print(f'Semantic Error: return statement outside of procedure definition at line {node.lineno}') raise SystemExit if sem_data.return_type is None: print(f'Semantic Error: return statement in returnless procedure definition at line {node.lineno}') raise SystemExit case 'date_literal': # date literal can be in variable definition if sem_data.parent.nodetype == 'variable_definition': pass # right side of assignment elif sem_data.parent.nodetype == 'assignment': if sem_data.parent.child_lhs.nodetype == 'date_literal': print(f'Semantic Error: invalid date literal at line {node.lineno}') raise SystemExit # either side of subtraction or left side of addition elif sem_data.parent.nodetype == 'binary_op': if sem_data.parent.value == '-': pass elif sem_data.parent.value == '+': if sem_data.parent.child_rhs.nodetype == 'date_literal': print(f'Semantic Error: invalid date literal at line {node.lineno}') raise SystemExit else: print(f'Semantic Error: invalid date literal at line {node.lineno}') raise SystemExit else: print(f'Semantic Error: invalid date literal at line {node.lineno}') raise SystemExit if is_procedure or is_function: sem_data.in_procedure_def = is_procedure sem_data.in_function_def = is_function sem_data.return_type = return_type temp_parent = sem_data.parent sem_data.parent = node for name, child in tree_print.get_childvars(node): if child is not None: semantic_check(child, sem_data) sem_data.parent = temp_parent if is_procedure or is_function: sem_data.in_procedure_def = False sem_data.in_function_def = False sem_data.return_type = None if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-d', '--debug', action='store_true', help='debug?') group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') group.add_argument('-f', '--file', help='filename to process') args = parser.parse_args() if args.who: print('Author') print(' Student ID: 150189237') print(' Name: Oskari Alaranta') else: ast = syntax_check_file(args.file, args.debug) tree_print.treeprint(ast, 'unicode') semantic_check(ast, SemData())