PoPL/03_syntax_tree/main.py

429 lines
14 KiB
Python
Raw Normal View History

2024-04-13 22:51:45 +03:00
#!/bin/env python3
import argparse
import ply.lex as lex
import ply.yacc as yacc
import lexer
import tree_print
tokens = lexer.tokens
class ASTnode:
def __init__(self, typestr, lineno, value = None):
self.nodetype = typestr
self.lineno = lineno
if value is not None:
self.value = value
class SemData:
def __init__(self):
self.in_procedure_def = False
self.in_function_def = False
self.return_type = None
self.parent = None
def p_program1(p):
'program : statement_list'
p[0] = ASTnode('program', p.lineno(1))
p[0].children_definitions = []
p[0].children_statements = p[1].children_statements
def p_program2(p):
'program : definition_list statement_list'
p[0] = ASTnode('program', p.lineno(1))
p[0].children_definitions = p[1].children_definitions
p[0].children_statements = p[2].children_statements
def p_statement_list1(p):
'statement_list : statement'
p[0] = ASTnode('statement_list', p.lineno(1))
p[0].children_statements = [ p[1] ]
def p_statement_list2(p):
'statement_list : statement_list COMMA statement'
p[0] = p[1]
p[0].children_statements += [ p[3] ]
def p_definition_list1(p):
'definition_list : definition'
p[0] = ASTnode('definition_list', p.lineno(1))
p[0].children_definitions = [ p[1] ]
def p_definition_list2(p):
'definition_list : definition_list definition'
p[0] = p[1]
p[0].children_definitions += [ p[2] ]
def p_definition(p):
'''definition : function_definition
| procedure_definition
| variable_definition'''
p[0] = p[1]
def p_variable_definition(p):
'variable_definition : VAR IDENT EQ expression'
p[0] = ASTnode('variable_definition', p.lineno(1), p[2])
p[0].child_expression = p[4]
def p_empty(p):
'empty :'
pass
def p_variable_definition_list1(p):
'variable_definition_list : empty'
p[0] = ASTnode('variable_definition_list', p.lineno(1))
p[0].children_definitions = []
def p_variable_definition_list2(p):
'variable_definition_list : variable_definition_list variable_definition'
p[0] = p[1]
p[0].children_definitions += [ p[2] ]
def p_function_definition(p):
'''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION'''
p[0] = ASTnode('function_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
p[0].children_variable_definitions = p[8].children_definitions
p[0].child_value = p[10]
def p_procedure_definition1(p):
'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE'
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].children_variable_definitions = p[6].children_definitions
p[0].children_statements = p[8].children_statements
p[0].child_return_type = None
def p_procedure_definition2(p):
'''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE'''
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].children_variable_definitions = p[8].children_definitions
p[0].children_statements = p[10].children_statements
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
def p_formal_list1(p):
'formal_list : empty'
p[0] = ASTnode('formal_list', p.lineno(1))
p[0].children_formals = []
def p_formal_list2(p):
'formal_list : formal_arg'
p[0] = ASTnode('formal_list', p.lineno(1))
p[0].children_formals = [ p[1] ]
def p_formal_list3(p):
'formal_list : formal_list COMMA formal_arg'
p[0] = p[1]
p[0].children_formals += [ p[3] ]
def p_formal_arg(p):
'formal_arg : IDENT LSQUARE IDENT RSQUARE'
p[0] = ASTnode('formal_argument', p.lineno(1))
p[0].child_variable = ASTnode('identifier', p.lineno(1), p[1])
p[0].child_type = ASTnode('identifier', p.lineno(3), p[3])
def p_procedure_call1(p):
'procedure_call : PROC_IDENT LPAREN RPAREN'
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
p[0].children_arguments = []
def p_procedure_call(p):
'''procedure_call : PROC_IDENT LPAREN arguments RPAREN'''
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
p[0].children_arguments = p[3].children_arguments
def p_arguments1(p):
'arguments : expression'
p[0] = ASTnode('arguments', p.lineno(1))
p[0].children_arguments = [ p[1] ]
def p_arguments2(p):
'arguments : arguments COMMA expression'
p[0] = p[1]
p[0].children_arguments += [ p[3] ]
def p_assignment(p):
'assignment : lvalue EQ rvalue'
p[0] = ASTnode('assignment', p.lineno(1))
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_lvalue1(p):
'lvalue : IDENT'
p[0] = ASTnode('identifier', p.lineno(1), p[1])
def p_lvalue2(p):
'lvalue : IDENT DOT IDENT'
p[0] = ASTnode('attribute_write', p.lineno(1))
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
def p_rvalue(p):
'''rvalue : expression
| unless_expression'''
p[0] = p[1]
def p_print_statement1(p):
'print_statement : PRINT print_item'
p[0] = ASTnode('print', p.lineno(1))
p[0].children_items = [ p[2] ]
def p_print_statement2(p):
'print_statement : print_statement AMPERSAND print_item'
p[0] = p[1]
p[0].children_items += [ p[3] ]
def p_print_item1(p):
'print_item : STRING'
p[0] = ASTnode('string_literal', p.lineno(1), p[1])
def p_print_item2(p):
'print_item : expression'
p[0] = p[1]
def p_statement1(p):
'''statement : procedure_call
| assignment
| print_statement'''
p[0] = p[1]
def p_statement2(p):
'statement : DO statement_list UNTIL expression'
p[0] = ASTnode('do_until', p.lineno(1))
p[0].children_statements = p[2].children_statements
p[0].child_condition = p[4]
def p_statement3(p):
'statement : DO statement_list UNLESS expression DONE'
p[0] = ASTnode('do_unless', p.lineno(1))
p[0].children_statements = p[2].children_statements
p[0].child_condition = p[4]
p[0].children_otherwise = []
def p_statement4(p):
'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE'
p[0] = ASTnode('do_unless', p.lineno(1))
p[0].children_statements = p[2].children_statements
p[0].child_condition = p[4]
p[0].children_otherwise = p[6].children_statements
def p_statement5(p):
'statement : RETURN expression'
p[0] = ASTnode('return', p.lineno(1))
p[0].child_expression = p[2]
def p_expression1(p):
'expression : simple_expr'
p[0] = p[1]
def p_expression2(p):
'''expression : expression EQ simple_expr
| expression LT simple_expr'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_simple_expr1(p):
'simple_expr : term'
p[0] = p[1]
def p_simple_expr2(p):
'''simple_expr : simple_expr PLUS term
| simple_expr MINUS term'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_term1(p):
'term : factor'
p[0] = p[1]
def p_term2(p):
'''term : term MULT factor
| term DIV factor'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_factor1(p):
'factor : atom'
p[0] = p[1]
def p_factor2(p):
'''factor : MINUS atom
| PLUS atom'''
p[0] = ASTnode('unary_op', p.lineno(1), p[1])
p[0].child_atom = p[2]
def p_atom1(p):
'atom : IDENT'
p[0] = ASTnode('identifier', p.lineno(1), p[1])
def p_atom2(p):
'atom : INT_LITERAL'
p[0] = ASTnode('int_literal', p.lineno(1), p[1])
def p_atom3(p):
'atom : DATE_LITERAL'
p[0] = ASTnode('date_literal', p.lineno(1), p[1])
def p_atom4(p):
'atom : IDENT APOSTROPHE IDENT'
p[0] = ASTnode('attribute_read', p.lineno(1))
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
def p_atom5(p):
'atom : LPAREN expression RPAREN'
p[0] = p[2]
def p_atom6(p):
'''atom : function_call
| procedure_call'''
p[0] = p[1]
def p_function_call1(p):
'function_call : FUNC_IDENT LPAREN RPAREN'
p[0] = ASTnode('function_call', p.lineno(1), p[1])
p[0].children_arguments = []
def p_function_call2(p):
'function_call : FUNC_IDENT LPAREN arguments RPAREN'
p[0] = ASTnode('function_call', p.lineno(1), p[1])
p[0].children_arguments = p[3].children_arguments
def p_unless_expression(p):
'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE'
p[0] = ASTnode('unless_expression', p.lineno(1))
p[0].child_condition = p[4]
p[0].child_true_expr = p[2]
p[0].child_false_expr = p[6]
def p_error(p):
if p is not None:
print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')")
else:
print('Syntax Error at the end of file')
raise SystemExit
def syntax_check_file(file_path: str, debug: bool) -> ASTnode:
parser = yacc.yacc()
with open(file_path, 'r', encoding='utf-8') as file:
result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug)
return result
def semantic_check(node: ASTnode, sem_data: SemData):
is_procedure = False
is_function = False
return_type = None
match node.nodetype:
case 'attribute_read':
if node.child_attribute.value not in ['day', 'month', 'year', 'weekday', 'weeknum']:
print(f'Semantic Error: invalid read attribute \'{node.child_attribute.value}\' at line {node.lineno}')
raise SystemExit
case 'attribute_write':
if node.child_attribute.value not in ['day', 'month', 'year']:
print(f'Semantic Error: invalid write attribute \'{node.child_attribute.value}\' at line {node.lineno}')
raise SystemExit
case 'procedure_definition':
if node.child_return_type is not None:
if node.child_return_type.value not in ['int', 'date']:
print(f'Semantic Error: procedure definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
raise SystemExit
return_type = node.child_return_type.value
for formal in node.children_formals:
if formal.child_type.value not in ['int', 'date']:
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
raise SystemExit
is_procedure = True
case 'function_definition':
if node.child_return_type is not None:
if node.child_return_type.value not in ['int', 'date']:
print(f'Semantic Error: function definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
raise SystemExit
return_type = node.child_return_type.value
for formal in node.children_formals:
if formal.child_type.value not in ['int', 'date']:
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
raise SystemExit
is_function = True
case 'procedure_call':
if sem_data.in_function_def:
print(f'Semantic Error: procedure call inside function at line {node.lineno}')
raise SystemExit
case 'return':
if not sem_data.in_procedure_def:
print(f'Semantic Error: return statement outside of procedure definition at line {node.lineno}')
raise SystemExit
if sem_data.return_type is None:
print(f'Semantic Error: return statement in returnless procedure definition at line {node.lineno}')
raise SystemExit
case 'date_literal':
# date literal can be in variable definition
if sem_data.parent.nodetype == 'variable_definition':
pass
# right side of assignment
elif sem_data.parent.nodetype == 'assignment':
if sem_data.parent.child_lhs.nodetype == 'date_literal':
print(f'Semantic Error: invalid date literal at line {node.lineno}')
raise SystemExit
# either side of subtraction or left side of addition
elif sem_data.parent.nodetype == 'binary_op':
if sem_data.parent.value == '-':
pass
elif sem_data.parent.value == '+':
if sem_data.parent.child_rhs.nodetype == 'date_literal':
print(f'Semantic Error: invalid date literal at line {node.lineno}')
raise SystemExit
else:
print(f'Semantic Error: invalid date literal at line {node.lineno}')
raise SystemExit
else:
print(f'Semantic Error: invalid date literal at line {node.lineno}')
raise SystemExit
if is_procedure or is_function:
sem_data.in_procedure_def = is_procedure
sem_data.in_function_def = is_function
sem_data.return_type = return_type
temp_parent = sem_data.parent
sem_data.parent = node
for name, child in tree_print.get_childvars(node):
if child is not None:
semantic_check(child, sem_data)
sem_data.parent = temp_parent
if is_procedure or is_function:
sem_data.in_procedure_def = False
sem_data.in_function_def = False
sem_data.return_type = None
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
group.add_argument('-f', '--file', help='filename to process')
args = parser.parse_args()
if args.who:
print('Author')
print(' Student ID: 150189237')
print(' Name: Oskari Alaranta')
else:
ast = syntax_check_file(args.file, args.debug)
tree_print.treeprint(ast, 'unicode')
semantic_check(ast, SemData())