Implement phase 3 semantic checking
This commit is contained in:
		
							parent
							
								
									004ee25273
								
							
						
					
					
						commit
						91a3a0ba2e
					
				|  | @ -0,0 +1,132 @@ | ||||||
|  | #!/bin/env python3 | ||||||
|  | 
 | ||||||
|  | import argparse | ||||||
|  | import datetime | ||||||
|  | import ply.lex as lex | ||||||
|  | 
 | ||||||
|  | reserved = { | ||||||
|  |     'var':          'VAR', | ||||||
|  |     'is':           'IS', | ||||||
|  |     'unless':       'UNLESS', | ||||||
|  |     'otherwise':    'OTHERWISE', | ||||||
|  |     'until':        'UNTIL', | ||||||
|  |     'do':           'DO', | ||||||
|  |     'done':         'DONE', | ||||||
|  |     'procedure':    'PROCEDURE', | ||||||
|  |     'function':     'FUNCTION', | ||||||
|  |     'return':       'RETURN', | ||||||
|  |     'print':        'PRINT', | ||||||
|  |     'end':          'END', | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | tokens = [ | ||||||
|  |     'LPAREN', | ||||||
|  |     'RPAREN', | ||||||
|  |     'LSQUARE', | ||||||
|  |     'RSQUARE', | ||||||
|  |     'LCURLY', | ||||||
|  |     'RCURLY', | ||||||
|  |     'APOSTROPHE', | ||||||
|  |     'AMPERSAND', | ||||||
|  |     'COMMA', | ||||||
|  |     'DOT', | ||||||
|  |     'EQ', | ||||||
|  |     'LT', | ||||||
|  |     'PLUS', | ||||||
|  |     'MINUS', | ||||||
|  |     'MULT', | ||||||
|  |     'DIV', | ||||||
|  |     'STRING', | ||||||
|  |     'DATE_LITERAL', | ||||||
|  |     'INT_LITERAL', | ||||||
|  |     'IDENT', | ||||||
|  |     'FUNC_IDENT', | ||||||
|  |     'PROC_IDENT', | ||||||
|  | ] + list(reserved.values()) | ||||||
|  | 
 | ||||||
|  | def t_whitespace(t): | ||||||
|  |     r'[ \t\n]+' | ||||||
|  |     t.lexer.lineno += t.value.count('\n') | ||||||
|  | 
 | ||||||
|  | def t_comment(t): | ||||||
|  |     r'\(%(.|\n)*?%\)' | ||||||
|  |     t.lexer.lineno += t.value.count('\n') | ||||||
|  | 
 | ||||||
|  | t_LPAREN        = r'\(' | ||||||
|  | t_RPAREN        = r'\)' | ||||||
|  | t_LSQUARE       = r'\[' | ||||||
|  | t_RSQUARE       = r'\]' | ||||||
|  | t_LCURLY        = r'\{' | ||||||
|  | t_RCURLY        = r'\}' | ||||||
|  | t_APOSTROPHE    = r'\'' | ||||||
|  | t_AMPERSAND     = r'&' | ||||||
|  | t_COMMA         = r',' | ||||||
|  | t_DOT           = r'\.' | ||||||
|  | t_EQ            = r'=' | ||||||
|  | t_LT            = r'<' | ||||||
|  | t_PLUS          = r'\+' | ||||||
|  | t_MINUS         = r'-' | ||||||
|  | t_MULT          = r'\*' | ||||||
|  | t_DIV           = r'/' | ||||||
|  | 
 | ||||||
|  | def t_STRING(t): | ||||||
|  |     r'".*?"' | ||||||
|  |     t.value = t.value[1:-1] | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_DATE_LITERAL(t): | ||||||
|  |     r'\d{4}-\d{2}-\d{2}' | ||||||
|  |     try: | ||||||
|  |         t.value = datetime.date.fromisoformat(t.value) | ||||||
|  |     except: | ||||||
|  |         print(f'Invalid date \'{t.value}\' at line {t.lexer.lineno}') | ||||||
|  |         raise SystemExit | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_INT_LITERAL(t): | ||||||
|  |     r'-?\d{1,3}(\'\d{3})*' | ||||||
|  |     t.value = int(t.value.replace('\'', '')) | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_IDENT(t): | ||||||
|  |     r'[a-z][a-zA-Z0-9_]+' | ||||||
|  |     t.type = reserved.get(t.value, 'IDENT') | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_FUNC_IDENT(t): | ||||||
|  |     r'[A-Z][a-z0-9_]+' | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_PROC_IDENT(t): | ||||||
|  |     r'[A-Z]{2}[A-Z0-9_]*' | ||||||
|  |     return t | ||||||
|  | 
 | ||||||
|  | def t_error(t): | ||||||
|  |     print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}') | ||||||
|  |     raise SystemExit | ||||||
|  | 
 | ||||||
|  | lexer = lex.lex() | ||||||
|  | 
 | ||||||
|  | def tokenize_file(file_path: str): | ||||||
|  |     with open(file_path, 'r', encoding='utf-8') as file: | ||||||
|  |         lexer.input(file.read()) | ||||||
|  | 
 | ||||||
|  |     tok = lexer.token() | ||||||
|  |     while tok: | ||||||
|  |         print(tok) | ||||||
|  |         tok = lexer.token() | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     parser = argparse.ArgumentParser() | ||||||
|  |     group = parser.add_mutually_exclusive_group(required=True) | ||||||
|  |     group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') | ||||||
|  |     group.add_argument('-f', '--file', help='filename to process') | ||||||
|  | 
 | ||||||
|  |     args = parser.parse_args() | ||||||
|  |      | ||||||
|  |     if args.who: | ||||||
|  |         print('Author') | ||||||
|  |         print('  Student ID: 150189237') | ||||||
|  |         print('  Name:       Oskari Alaranta') | ||||||
|  |     else: | ||||||
|  |         tokenize_file(args.file) | ||||||
|  | @ -0,0 +1,428 @@ | ||||||
|  | #!/bin/env python3 | ||||||
|  | 
 | ||||||
|  | import argparse | ||||||
|  | import ply.lex as lex | ||||||
|  | import ply.yacc as yacc | ||||||
|  | import lexer | ||||||
|  | import tree_print | ||||||
|  | 
 | ||||||
|  | tokens = lexer.tokens | ||||||
|  | 
 | ||||||
|  | class ASTnode: | ||||||
|  |   def __init__(self, typestr, lineno, value = None): | ||||||
|  |     self.nodetype = typestr | ||||||
|  |     self.lineno = lineno | ||||||
|  |     if value is not None: | ||||||
|  |         self.value = value | ||||||
|  | 
 | ||||||
|  | class SemData: | ||||||
|  |     def __init__(self): | ||||||
|  |         self.in_procedure_def = False | ||||||
|  |         self.in_function_def = False | ||||||
|  |         self.return_type = None | ||||||
|  |         self.parent = None | ||||||
|  | 
 | ||||||
|  | def p_program1(p): | ||||||
|  |     'program : statement_list' | ||||||
|  |     p[0] = ASTnode('program', p.lineno(1)) | ||||||
|  |     p[0].children_definitions = [] | ||||||
|  |     p[0].children_statements = p[1].children_statements | ||||||
|  | 
 | ||||||
|  | def p_program2(p): | ||||||
|  |     'program : definition_list statement_list' | ||||||
|  |     p[0] = ASTnode('program', p.lineno(1)) | ||||||
|  |     p[0].children_definitions = p[1].children_definitions | ||||||
|  |     p[0].children_statements = p[2].children_statements | ||||||
|  | 
 | ||||||
|  | def p_statement_list1(p): | ||||||
|  |     'statement_list : statement' | ||||||
|  |     p[0] = ASTnode('statement_list', p.lineno(1)) | ||||||
|  |     p[0].children_statements = [ p[1] ] | ||||||
|  | 
 | ||||||
|  | def p_statement_list2(p): | ||||||
|  |     'statement_list : statement_list COMMA statement' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_statements += [ p[3] ] | ||||||
|  | 
 | ||||||
|  | def p_definition_list1(p): | ||||||
|  |     'definition_list : definition' | ||||||
|  |     p[0] = ASTnode('definition_list', p.lineno(1)) | ||||||
|  |     p[0].children_definitions = [ p[1] ] | ||||||
|  | 
 | ||||||
|  | def p_definition_list2(p): | ||||||
|  |     'definition_list : definition_list definition' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_definitions += [ p[2] ] | ||||||
|  | 
 | ||||||
|  | def p_definition(p): | ||||||
|  |     '''definition : function_definition | ||||||
|  |                    | procedure_definition | ||||||
|  |                    | variable_definition''' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_variable_definition(p): | ||||||
|  |     'variable_definition : VAR IDENT EQ expression' | ||||||
|  |     p[0] = ASTnode('variable_definition', p.lineno(1), p[2]) | ||||||
|  |     p[0].child_expression = p[4] | ||||||
|  | 
 | ||||||
|  | def p_empty(p): | ||||||
|  |     'empty :' | ||||||
|  |     pass | ||||||
|  | 
 | ||||||
|  | def p_variable_definition_list1(p): | ||||||
|  |     'variable_definition_list : empty' | ||||||
|  |     p[0] = ASTnode('variable_definition_list', p.lineno(1)) | ||||||
|  |     p[0].children_definitions = [] | ||||||
|  | 
 | ||||||
|  | def p_variable_definition_list2(p): | ||||||
|  |     'variable_definition_list : variable_definition_list variable_definition' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_definitions += [ p[2] ] | ||||||
|  | 
 | ||||||
|  | def p_function_definition(p): | ||||||
|  |     '''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION''' | ||||||
|  |     p[0] = ASTnode('function_definition', p.lineno(2), p[2]) | ||||||
|  |     p[0].children_formals = p[4].children_formals | ||||||
|  |     p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7]) | ||||||
|  |     p[0].children_variable_definitions = p[8].children_definitions | ||||||
|  |     p[0].child_value = p[10] | ||||||
|  | 
 | ||||||
|  | def p_procedure_definition1(p): | ||||||
|  |     'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE' | ||||||
|  |     p[0] = ASTnode('procedure_definition', p.lineno(2), p[2]) | ||||||
|  |     p[0].children_formals = p[4].children_formals | ||||||
|  |     p[0].children_variable_definitions = p[6].children_definitions | ||||||
|  |     p[0].children_statements = p[8].children_statements | ||||||
|  |     p[0].child_return_type = None | ||||||
|  | 
 | ||||||
|  | def p_procedure_definition2(p): | ||||||
|  |     '''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE''' | ||||||
|  |     p[0] = ASTnode('procedure_definition', p.lineno(2), p[2]) | ||||||
|  |     p[0].children_formals = p[4].children_formals | ||||||
|  |     p[0].children_variable_definitions = p[8].children_definitions | ||||||
|  |     p[0].children_statements = p[10].children_statements | ||||||
|  |     p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7]) | ||||||
|  | 
 | ||||||
|  | def p_formal_list1(p): | ||||||
|  |     'formal_list : empty' | ||||||
|  |     p[0] = ASTnode('formal_list', p.lineno(1)) | ||||||
|  |     p[0].children_formals = [] | ||||||
|  | 
 | ||||||
|  | def p_formal_list2(p): | ||||||
|  |     'formal_list : formal_arg' | ||||||
|  |     p[0] = ASTnode('formal_list', p.lineno(1)) | ||||||
|  |     p[0].children_formals = [ p[1] ] | ||||||
|  | 
 | ||||||
|  | def p_formal_list3(p): | ||||||
|  |     'formal_list : formal_list COMMA formal_arg' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_formals += [ p[3] ] | ||||||
|  | 
 | ||||||
|  | def p_formal_arg(p): | ||||||
|  |     'formal_arg : IDENT LSQUARE IDENT RSQUARE' | ||||||
|  |     p[0] = ASTnode('formal_argument', p.lineno(1)) | ||||||
|  |     p[0].child_variable = ASTnode('identifier', p.lineno(1), p[1]) | ||||||
|  |     p[0].child_type = ASTnode('identifier', p.lineno(3), p[3]) | ||||||
|  | 
 | ||||||
|  | def p_procedure_call1(p): | ||||||
|  |     'procedure_call : PROC_IDENT LPAREN RPAREN' | ||||||
|  |     p[0] = ASTnode('procedure_call', p.lineno(1), p[1]) | ||||||
|  |     p[0].children_arguments = [] | ||||||
|  | 
 | ||||||
|  | def p_procedure_call(p): | ||||||
|  |     '''procedure_call : PROC_IDENT LPAREN arguments RPAREN''' | ||||||
|  |     p[0] = ASTnode('procedure_call', p.lineno(1), p[1]) | ||||||
|  |     p[0].children_arguments = p[3].children_arguments | ||||||
|  | 
 | ||||||
|  | def p_arguments1(p): | ||||||
|  |     'arguments : expression' | ||||||
|  |     p[0] = ASTnode('arguments', p.lineno(1)) | ||||||
|  |     p[0].children_arguments = [ p[1] ] | ||||||
|  | 
 | ||||||
|  | def p_arguments2(p): | ||||||
|  |     'arguments : arguments COMMA expression' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_arguments += [ p[3] ] | ||||||
|  | 
 | ||||||
|  | def p_assignment(p): | ||||||
|  |     'assignment : lvalue EQ rvalue' | ||||||
|  |     p[0] = ASTnode('assignment', p.lineno(1)) | ||||||
|  |     p[0].child_lhs = p[1] | ||||||
|  |     p[0].child_rhs = p[3] | ||||||
|  | 
 | ||||||
|  | def p_lvalue1(p): | ||||||
|  |     'lvalue : IDENT' | ||||||
|  |     p[0] = ASTnode('identifier', p.lineno(1), p[1]) | ||||||
|  | 
 | ||||||
|  | def p_lvalue2(p): | ||||||
|  |     'lvalue : IDENT DOT IDENT' | ||||||
|  |     p[0] = ASTnode('attribute_write', p.lineno(1)) | ||||||
|  |     p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1]) | ||||||
|  |     p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3]) | ||||||
|  | 
 | ||||||
|  | def p_rvalue(p): | ||||||
|  |     '''rvalue : expression | ||||||
|  |               | unless_expression''' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_print_statement1(p): | ||||||
|  |     'print_statement : PRINT print_item' | ||||||
|  |     p[0] = ASTnode('print', p.lineno(1)) | ||||||
|  |     p[0].children_items = [ p[2] ] | ||||||
|  | 
 | ||||||
|  | def p_print_statement2(p): | ||||||
|  |     'print_statement : print_statement AMPERSAND print_item' | ||||||
|  |     p[0] = p[1] | ||||||
|  |     p[0].children_items += [ p[3] ] | ||||||
|  | 
 | ||||||
|  | def p_print_item1(p): | ||||||
|  |     'print_item : STRING' | ||||||
|  |     p[0] = ASTnode('string_literal', p.lineno(1), p[1]) | ||||||
|  | 
 | ||||||
|  | def p_print_item2(p): | ||||||
|  |     'print_item : expression' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_statement1(p): | ||||||
|  |     '''statement : procedure_call | ||||||
|  |                  | assignment | ||||||
|  |                  | print_statement''' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_statement2(p): | ||||||
|  |     'statement : DO statement_list UNTIL expression' | ||||||
|  |     p[0] = ASTnode('do_until', p.lineno(1)) | ||||||
|  |     p[0].children_statements = p[2].children_statements | ||||||
|  |     p[0].child_condition = p[4] | ||||||
|  | 
 | ||||||
|  | def p_statement3(p): | ||||||
|  |     'statement : DO statement_list UNLESS expression DONE' | ||||||
|  |     p[0] = ASTnode('do_unless', p.lineno(1)) | ||||||
|  |     p[0].children_statements = p[2].children_statements | ||||||
|  |     p[0].child_condition = p[4] | ||||||
|  |     p[0].children_otherwise = [] | ||||||
|  | 
 | ||||||
|  | def p_statement4(p): | ||||||
|  |     'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE' | ||||||
|  |     p[0] = ASTnode('do_unless', p.lineno(1)) | ||||||
|  |     p[0].children_statements = p[2].children_statements | ||||||
|  |     p[0].child_condition = p[4] | ||||||
|  |     p[0].children_otherwise = p[6].children_statements | ||||||
|  | 
 | ||||||
|  | def p_statement5(p): | ||||||
|  |     'statement : RETURN expression' | ||||||
|  |     p[0] = ASTnode('return', p.lineno(1)) | ||||||
|  |     p[0].child_expression = p[2] | ||||||
|  | 
 | ||||||
|  | def p_expression1(p): | ||||||
|  |     'expression : simple_expr' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_expression2(p): | ||||||
|  |     '''expression : expression EQ simple_expr | ||||||
|  |                   | expression LT simple_expr''' | ||||||
|  |     p[0] = ASTnode('binary_op', p.lineno(2), p[2]) | ||||||
|  |     p[0].child_lhs = p[1] | ||||||
|  |     p[0].child_rhs = p[3] | ||||||
|  | 
 | ||||||
|  | def p_simple_expr1(p): | ||||||
|  |     'simple_expr : term' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_simple_expr2(p): | ||||||
|  |     '''simple_expr : simple_expr PLUS term | ||||||
|  |                    | simple_expr MINUS term''' | ||||||
|  |     p[0] = ASTnode('binary_op', p.lineno(2), p[2]) | ||||||
|  |     p[0].child_lhs = p[1] | ||||||
|  |     p[0].child_rhs = p[3] | ||||||
|  | 
 | ||||||
|  | def p_term1(p): | ||||||
|  |     'term : factor' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_term2(p): | ||||||
|  |     '''term : term MULT factor | ||||||
|  |             | term DIV factor''' | ||||||
|  |     p[0] = ASTnode('binary_op', p.lineno(2), p[2]) | ||||||
|  |     p[0].child_lhs = p[1] | ||||||
|  |     p[0].child_rhs = p[3] | ||||||
|  | 
 | ||||||
|  | def p_factor1(p): | ||||||
|  |     'factor : atom' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_factor2(p): | ||||||
|  |     '''factor : MINUS atom | ||||||
|  |               | PLUS atom''' | ||||||
|  |     p[0] = ASTnode('unary_op', p.lineno(1), p[1]) | ||||||
|  |     p[0].child_atom = p[2] | ||||||
|  | 
 | ||||||
|  | def p_atom1(p): | ||||||
|  |     'atom : IDENT' | ||||||
|  |     p[0] = ASTnode('identifier', p.lineno(1), p[1]) | ||||||
|  | 
 | ||||||
|  | def p_atom2(p): | ||||||
|  |     'atom : INT_LITERAL' | ||||||
|  |     p[0] = ASTnode('int_literal', p.lineno(1), p[1]) | ||||||
|  | 
 | ||||||
|  | def p_atom3(p): | ||||||
|  |     'atom : DATE_LITERAL' | ||||||
|  |     p[0] = ASTnode('date_literal', p.lineno(1), p[1]) | ||||||
|  | 
 | ||||||
|  | def p_atom4(p): | ||||||
|  |     'atom : IDENT APOSTROPHE IDENT' | ||||||
|  |     p[0] = ASTnode('attribute_read', p.lineno(1)) | ||||||
|  |     p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1]) | ||||||
|  |     p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3]) | ||||||
|  | 
 | ||||||
|  | def p_atom5(p): | ||||||
|  |     'atom : LPAREN expression RPAREN' | ||||||
|  |     p[0] = p[2] | ||||||
|  | 
 | ||||||
|  | def p_atom6(p): | ||||||
|  |     '''atom : function_call | ||||||
|  |             | procedure_call''' | ||||||
|  |     p[0] = p[1] | ||||||
|  | 
 | ||||||
|  | def p_function_call1(p): | ||||||
|  |     'function_call : FUNC_IDENT LPAREN RPAREN' | ||||||
|  |     p[0] = ASTnode('function_call', p.lineno(1), p[1]) | ||||||
|  |     p[0].children_arguments = [] | ||||||
|  | 
 | ||||||
|  | def p_function_call2(p): | ||||||
|  |     'function_call : FUNC_IDENT LPAREN arguments RPAREN' | ||||||
|  |     p[0] = ASTnode('function_call', p.lineno(1), p[1]) | ||||||
|  |     p[0].children_arguments = p[3].children_arguments | ||||||
|  | 
 | ||||||
|  | def p_unless_expression(p): | ||||||
|  |     'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE' | ||||||
|  |     p[0] = ASTnode('unless_expression', p.lineno(1)) | ||||||
|  |     p[0].child_condition = p[4] | ||||||
|  |     p[0].child_true_expr = p[2] | ||||||
|  |     p[0].child_false_expr = p[6] | ||||||
|  | 
 | ||||||
|  | def p_error(p): | ||||||
|  |     if p is not None: | ||||||
|  |         print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')") | ||||||
|  |     else: | ||||||
|  |         print('Syntax Error at the end of file') | ||||||
|  |     raise SystemExit | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def syntax_check_file(file_path: str, debug: bool) -> ASTnode: | ||||||
|  |     parser = yacc.yacc() | ||||||
|  |     with open(file_path, 'r', encoding='utf-8') as file: | ||||||
|  |         result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug) | ||||||
|  |     return result | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def semantic_check(node: ASTnode, sem_data: SemData): | ||||||
|  |     is_procedure = False | ||||||
|  |     is_function = False | ||||||
|  |     return_type = None | ||||||
|  | 
 | ||||||
|  |     match node.nodetype: | ||||||
|  |         case 'attribute_read': | ||||||
|  |             if node.child_attribute.value not in ['day', 'month', 'year', 'weekday', 'weeknum']: | ||||||
|  |                 print(f'Semantic Error: invalid read attribute \'{node.child_attribute.value}\' at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  |         case 'attribute_write': | ||||||
|  |             if node.child_attribute.value not in ['day', 'month', 'year']: | ||||||
|  |                 print(f'Semantic Error: invalid write attribute \'{node.child_attribute.value}\' at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  |         case 'procedure_definition': | ||||||
|  |             if node.child_return_type is not None: | ||||||
|  |                 if node.child_return_type.value not in ['int', 'date']: | ||||||
|  |                     print(f'Semantic Error: procedure definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |                 return_type = node.child_return_type.value | ||||||
|  |             for formal in node.children_formals: | ||||||
|  |                 if formal.child_type.value not in ['int', 'date']: | ||||||
|  |                     print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |             is_procedure = True | ||||||
|  |         case 'function_definition': | ||||||
|  |             if node.child_return_type is not None: | ||||||
|  |                 if node.child_return_type.value not in ['int', 'date']: | ||||||
|  |                     print(f'Semantic Error: function definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |                 return_type = node.child_return_type.value | ||||||
|  |             for formal in node.children_formals: | ||||||
|  |                 if formal.child_type.value not in ['int', 'date']: | ||||||
|  |                     print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |             is_function = True | ||||||
|  |         case 'procedure_call': | ||||||
|  |             if sem_data.in_function_def: | ||||||
|  |                 print(f'Semantic Error: procedure call inside function at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  |         case 'return': | ||||||
|  |             if not sem_data.in_procedure_def: | ||||||
|  |                 print(f'Semantic Error: return statement outside of procedure definition at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  |             if sem_data.return_type is None: | ||||||
|  |                 print(f'Semantic Error: return statement in returnless procedure definition at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  |         case 'date_literal': | ||||||
|  |             # date literal can be in variable definition | ||||||
|  |             if sem_data.parent.nodetype == 'variable_definition': | ||||||
|  |                 pass | ||||||
|  |             # right side of assignment | ||||||
|  |             elif sem_data.parent.nodetype == 'assignment': | ||||||
|  |                 if sem_data.parent.child_lhs.nodetype == 'date_literal': | ||||||
|  |                     print(f'Semantic Error: invalid date literal at line {node.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |             # either side of subtraction or left side of addition | ||||||
|  |             elif sem_data.parent.nodetype == 'binary_op': | ||||||
|  |                 if sem_data.parent.value == '-': | ||||||
|  |                     pass | ||||||
|  |                 elif sem_data.parent.value == '+': | ||||||
|  |                     if sem_data.parent.child_rhs.nodetype == 'date_literal': | ||||||
|  |                         print(f'Semantic Error: invalid date literal at line {node.lineno}') | ||||||
|  |                         raise SystemExit | ||||||
|  |                 else: | ||||||
|  |                     print(f'Semantic Error: invalid date literal at line {node.lineno}') | ||||||
|  |                     raise SystemExit | ||||||
|  |             else: | ||||||
|  |                 print(f'Semantic Error: invalid date literal at line {node.lineno}') | ||||||
|  |                 raise SystemExit | ||||||
|  | 
 | ||||||
|  |     if is_procedure or is_function: | ||||||
|  |         sem_data.in_procedure_def = is_procedure | ||||||
|  |         sem_data.in_function_def = is_function | ||||||
|  |         sem_data.return_type = return_type | ||||||
|  | 
 | ||||||
|  |     temp_parent = sem_data.parent | ||||||
|  |     sem_data.parent = node | ||||||
|  | 
 | ||||||
|  |     for name, child in tree_print.get_childvars(node): | ||||||
|  |         if child is not None: | ||||||
|  |             semantic_check(child, sem_data) | ||||||
|  | 
 | ||||||
|  |     sem_data.parent = temp_parent | ||||||
|  | 
 | ||||||
|  |     if is_procedure or is_function: | ||||||
|  |         sem_data.in_procedure_def = False | ||||||
|  |         sem_data.in_function_def = False | ||||||
|  |         sem_data.return_type = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     parser = argparse.ArgumentParser() | ||||||
|  |     parser.add_argument('-d', '--debug', action='store_true', help='debug?') | ||||||
|  |     group = parser.add_mutually_exclusive_group(required=True) | ||||||
|  |     group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') | ||||||
|  |     group.add_argument('-f', '--file', help='filename to process') | ||||||
|  | 
 | ||||||
|  |     args = parser.parse_args() | ||||||
|  | 
 | ||||||
|  |     if args.who: | ||||||
|  |         print('Author') | ||||||
|  |         print('  Student ID: 150189237') | ||||||
|  |         print('  Name:       Oskari Alaranta') | ||||||
|  |     else: | ||||||
|  |         ast = syntax_check_file(args.file, args.debug) | ||||||
|  |         tree_print.treeprint(ast, 'unicode') | ||||||
|  | 
 | ||||||
|  |         semantic_check(ast, SemData()) | ||||||
|  | 
 | ||||||
|  | @ -0,0 +1,209 @@ | ||||||
|  | #!/usr/bin/env python3 | ||||||
|  | # ---------------------------------------------------------------------- | ||||||
|  | 
 | ||||||
|  | # Values to control the module's working | ||||||
|  | 
 | ||||||
|  | # How to recognize attributes in nodes by their names | ||||||
|  | 
 | ||||||
|  | child_prefix_default = "child_" | ||||||
|  | children_prefix_default = "children_" | ||||||
|  | value_attr = "value" | ||||||
|  | nodetype_attr = "nodetype" | ||||||
|  | lineno_attr = "lineno" | ||||||
|  | type_attr = "type" | ||||||
|  | 
 | ||||||
|  | # Finding and creating a list of all children nodes of a node, based on | ||||||
|  | # attribute names of a node | ||||||
|  | 
 | ||||||
|  | def get_childvars(node, child_prefix=child_prefix_default, | ||||||
|  |                   children_prefix=children_prefix_default): | ||||||
|  |   '''Return all children nodes of a tree node | ||||||
|  |    | ||||||
|  |   This function assumes that all attributes of a node beginning with | ||||||
|  |   child_prefix refer to a child node, and attributes beginning with | ||||||
|  |   children_prefix refer to a LIST of child nodes. The return value is a list | ||||||
|  |   of pairs (tuples), where the first element of each pair is a "label" | ||||||
|  |   for the node (the name of the attribute without the child/children prefix), | ||||||
|  |   and the second element is the child node itself. For child lists, the label | ||||||
|  |   also contains the number of the child, or EMPTY if the list is empty | ||||||
|  |   (in which case None is used as the second element, as there is no child).''' | ||||||
|  | 
 | ||||||
|  |   childvars = [] | ||||||
|  |   # Only search for attributes if we have an object | ||||||
|  |   if hasattr(node, "__dict__"): | ||||||
|  |     # Iterate though all attributes of the node object | ||||||
|  |     for name,val in vars(node).items(): | ||||||
|  |       # An attribute containing one child node | ||||||
|  |       if name.startswith(child_prefix): | ||||||
|  |         label = name[len(child_prefix):] | ||||||
|  |         childvars.append((label, val)) | ||||||
|  |       # An attribute containing a child list | ||||||
|  |       elif name.startswith(children_prefix): | ||||||
|  |         label = name[len(children_prefix):] | ||||||
|  |         # Make sure contents is not None and is a list (or actually, can | ||||||
|  |         # be iterated through | ||||||
|  |         if val is None: | ||||||
|  |           childvars.append((label+"[NONE stored instead of a list!!!]", None)) | ||||||
|  |         else: | ||||||
|  |           if not hasattr(val, "__iter__"): | ||||||
|  |             childvars.append((label+"[Not a list!!!]", None)) | ||||||
|  |           # An empty list/iterable (no nodes) | ||||||
|  |           elif not val: | ||||||
|  |             childvars.append((label+"[EMPTY]", None)) | ||||||
|  |           # A non-empty list/iterable | ||||||
|  |           else: | ||||||
|  |             childvars.extend([(label+"["+str(i)+"]", child) for (i, child) in enumerate(val)]) | ||||||
|  |   return childvars | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # Printing the syntax tree (AST) | ||||||
|  | 
 | ||||||
|  | # Strings that ASCII and Unicode trees are made out of | ||||||
|  | 
 | ||||||
|  | vertical_uni = "\N{BOX DRAWINGS LIGHT VERTICAL}" | ||||||
|  | horizontal_uni = "\N{BOX DRAWINGS LIGHT HORIZONTAL}" | ||||||
|  | vertical_right_uni = "\N{BOX DRAWINGS LIGHT VERTICAL AND RIGHT}" | ||||||
|  | up_right_uni = "\N{BOX DRAWINGS LIGHT UP AND RIGHT}" | ||||||
|  | child_indent_uni = vertical_right_uni + horizontal_uni + horizontal_uni | ||||||
|  | last_child_indent_uni = up_right_uni + horizontal_uni + horizontal_uni | ||||||
|  | normal_indent_uni = vertical_uni + "  " | ||||||
|  | last_normal_indent_uni = "   " | ||||||
|  | 
 | ||||||
|  | vertical_asc = "|" | ||||||
|  | horizontal_asc = "-" | ||||||
|  | vertical_right_asc = "+" | ||||||
|  | up_right_asc = "+" | ||||||
|  | child_indent_asc = vertical_right_asc + horizontal_asc + horizontal_asc | ||||||
|  | last_child_indent_asc = up_right_asc + horizontal_asc + horizontal_asc | ||||||
|  | normal_indent_asc = vertical_asc + "  " | ||||||
|  | last_normal_indent_asc = "   " | ||||||
|  | 
 | ||||||
|  | # What to put to the beginning and end of dot files | ||||||
|  | 
 | ||||||
|  | dot_preamble='''digraph parsetree { | ||||||
|  |     ratio=fill | ||||||
|  |     node [shape="box"] | ||||||
|  |     edge [style=bold] | ||||||
|  |     ranksep=equally | ||||||
|  |     nodesep=0.5 | ||||||
|  |     rankdir = TB | ||||||
|  |     clusterrank = local''' | ||||||
|  | 
 | ||||||
|  | dot_postamble='}' | ||||||
|  | 
 | ||||||
|  | def dotnodeid(nodenum): | ||||||
|  |   '''Convert node number to a dot id''' | ||||||
|  |   return "N"+str(nodenum) | ||||||
|  | 
 | ||||||
|  | def treeprint_indent(node, outtype="unicode", label="", first_indent="", indent=""): | ||||||
|  |   '''Print out an ASCII/Unicode version of a subtree in a tree. | ||||||
|  |    | ||||||
|  |   node = the root of the subtree | ||||||
|  |   outtype = unicode/ascii | ||||||
|  |   label = the "role" of the subtree on the parent node (from attribute name) | ||||||
|  |   first_indent = what to print at the beginning of the first line (indentation) | ||||||
|  |   indent = what to print at the beginning of the rest of the lines (indentation)''' | ||||||
|  |    | ||||||
|  |   # Add label (if any) to the first line after the indentation | ||||||
|  |   if label: | ||||||
|  |     first_indent += label + ": " | ||||||
|  |   if not node: | ||||||
|  |     # If node is None, just print NONE | ||||||
|  |     print(first_indent + "NONE") | ||||||
|  |   else: | ||||||
|  |     # If node has node type attribute, print that, otherwise try to print the whole | ||||||
|  |     # node take help in finding the error | ||||||
|  |     if hasattr(node, nodetype_attr): | ||||||
|  |       print(first_indent + getattr(node, nodetype_attr), end="") | ||||||
|  |     else: | ||||||
|  |       print(first_indent + "??? '" + str(node) + "' ???", end="") | ||||||
|  |     # If node has a value attribute, print the value of the node in parenthesis | ||||||
|  |     if hasattr(node, value_attr): | ||||||
|  |       print(" (" + str(getattr(node, value_attr)) + ")", end="") | ||||||
|  |     if hasattr(node, type_attr): | ||||||
|  |       print(" :" + str(getattr(node, type_attr)), end="") | ||||||
|  |     if hasattr(node, lineno_attr): | ||||||
|  |       print(" #" + str(getattr(node, lineno_attr)), end="") | ||||||
|  |     print() | ||||||
|  |     # Get all children of the node and iterate through them | ||||||
|  |     childvars = get_childvars(node) | ||||||
|  |     i = len(childvars) | ||||||
|  |     for name,value in childvars: | ||||||
|  |       i -= 1 | ||||||
|  |       if i > 0: | ||||||
|  |         # Not the last child, use normal indentation | ||||||
|  |         if outtype == "unicode": | ||||||
|  |           first_indent = child_indent_uni | ||||||
|  |           rest_indent = normal_indent_uni | ||||||
|  |         else: | ||||||
|  |           first_indent = child_indent_asc | ||||||
|  |           rest_indent = normal_indent_asc | ||||||
|  |       else: | ||||||
|  |         # The last child, use indentation for that case | ||||||
|  |         if outtype == "unicode": | ||||||
|  |           first_indent = last_child_indent_uni | ||||||
|  |           rest_indent = last_normal_indent_uni | ||||||
|  |         else: | ||||||
|  |           first_indent = last_child_indent_asc | ||||||
|  |           rest_indent = last_normal_indent_asc | ||||||
|  |       # Recursively print the child subtrees, adding indentation | ||||||
|  |       treeprint_indent(value, outtype, name, indent+first_indent, | ||||||
|  |                 indent+rest_indent) | ||||||
|  | 
 | ||||||
|  | def treeprint_dot(node, nodenum, nodecount): | ||||||
|  |   '''Print a subtree in dot format. | ||||||
|  |    | ||||||
|  |   nodenum = number of the node (for dot id generation) | ||||||
|  |   nodecount = a list containing the maximum used id''' | ||||||
|  |    | ||||||
|  |   nodeline = dotnodeid(nodenum) | ||||||
|  |   if not node: | ||||||
|  |     # None is output as an ellipse with label NONE | ||||||
|  |     nodeline += ' [shape="ellipse", label="NONE"]' | ||||||
|  |     print(nodeline) | ||||||
|  |   else: | ||||||
|  |     # Normal nodes use the default shape | ||||||
|  |     nodeline += ' [label="' | ||||||
|  |     # If node has node type attribute, print that, otherwise try to print the whole | ||||||
|  |     # node take help in finding the error | ||||||
|  |     if hasattr(node, nodetype_attr): | ||||||
|  |       nodeline += getattr(node, nodetype_attr) | ||||||
|  |     else: | ||||||
|  |       nodeline += "??? '" + str(node) + "' ???" | ||||||
|  |     nextnodeline = "" | ||||||
|  |     # If node has a value attribute, output the value in parenthesis | ||||||
|  |     if hasattr(node, value_attr): | ||||||
|  |       nextnodeline += " (" + str(getattr(node, value_attr)) + ")" | ||||||
|  |     if hasattr(node, type_attr): | ||||||
|  |       nextnodeline += " :" + str(getattr(node, type_attr)) | ||||||
|  |     if hasattr(node, lineno_attr): | ||||||
|  |       nextnodeline += " #" + str(getattr(node, lineno_attr)) | ||||||
|  |     if nextnodeline: | ||||||
|  |       nodeline += "\n"+nextnodeline | ||||||
|  |     nodeline += '"]' | ||||||
|  |     print(nodeline) | ||||||
|  |     # Get all children of the node and iterate through them | ||||||
|  |     childvars = get_childvars(node) | ||||||
|  |     for name,value in childvars: | ||||||
|  |       # Number the child by one more than current maximum (and update maximum) | ||||||
|  |       nodecount[0] += 1 | ||||||
|  |       childnum = nodecount[0] | ||||||
|  |       # Recursively print the child subtrees | ||||||
|  |       treeprint_dot(value, childnum, nodecount) | ||||||
|  |       # Output the named connection between parent and child | ||||||
|  |       print(dotnodeid(nodenum)+"->"+dotnodeid(childnum)+ ' [label="'+name+'"]') | ||||||
|  | 
 | ||||||
|  | def treeprint(rootnode, outtype="unicode"): | ||||||
|  |   '''Prints out a tree, given its root. | ||||||
|  |    | ||||||
|  |      The second argument is the output type: | ||||||
|  |      "unicode" (default) prints a text-version of the tree using Unicode block characters. | ||||||
|  |      "ascii" prints an ASCII-only version, with |, -, +. | ||||||
|  |      "dot" prints a tree in dot format (can be converted to a graphical tree | ||||||
|  |      using dot command in graphwiz).''' | ||||||
|  |   if outtype == "dot": | ||||||
|  |     print(dot_preamble) | ||||||
|  |     treeprint_dot(rootnode, 0, [0]) | ||||||
|  |     print(dot_postamble) | ||||||
|  |   else: | ||||||
|  |     treeprint_indent(rootnode, outtype) | ||||||
		Loading…
	
		Reference in New Issue