PoPL/04_semantics_and_running/build_ast.py

307 lines
9.1 KiB
Python

#!/usr/bin/env python3
import lexer
import ply.lex as lex
import ply.yacc as yacc
tokens = lexer.tokens
class ASTnode:
def __init__(self, typestr, lineno, value = None):
self.type = None
self.nodetype = typestr
self.lineno = lineno
if value is not None:
self.value = value
def p_program1(p):
'program : statement_list'
p[0] = ASTnode('program', p.lineno(1))
p[0].children_definitions = []
p[0].children_statements = p[1].children_statements
def p_program2(p):
'program : definition_list statement_list'
p[0] = ASTnode('program', p.lineno(1))
p[0].children_definitions = p[1].children_definitions
p[0].children_statements = p[2].children_statements
def p_statement_list1(p):
'statement_list : statement'
p[0] = ASTnode('statement_list', p.lineno(1))
p[0].children_statements = [ p[1] ]
def p_statement_list2(p):
'statement_list : statement_list COMMA statement'
p[0] = p[1]
p[0].children_statements += [ p[3] ]
def p_definition_list1(p):
'definition_list : definition'
p[0] = ASTnode('definition_list', p.lineno(1))
p[0].children_definitions = [ p[1] ]
def p_definition_list2(p):
'definition_list : definition_list definition'
p[0] = p[1]
p[0].children_definitions += [ p[2] ]
def p_definition(p):
'''definition : function_definition
| procedure_definition
| variable_definition'''
p[0] = p[1]
def p_variable_definition(p):
'variable_definition : VAR IDENT EQ expression'
p[0] = ASTnode('variable_definition', p.lineno(1), p[2])
p[0].child_expression = p[4]
def p_empty(p):
'empty :'
pass
def p_variable_definition_list1(p):
'variable_definition_list : empty'
p[0] = ASTnode('variable_definition_list', p.lineno(1))
p[0].children_definitions = []
def p_variable_definition_list2(p):
'variable_definition_list : variable_definition_list variable_definition'
p[0] = p[1]
p[0].children_definitions += [ p[2] ]
def p_function_definition(p):
'''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION'''
p[0] = ASTnode('function_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].child_return_type = p[7]
p[0].children_variable_definitions = p[8].children_definitions
p[0].child_expression = p[10]
def p_procedure_definition1(p):
'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE'
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].children_variable_definitions = p[6].children_definitions
p[0].children_statements = p[8].children_statements
p[0].child_return_type = None
def p_procedure_definition2(p):
'''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE'''
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
p[0].children_formals = p[4].children_formals
p[0].children_variable_definitions = p[8].children_definitions
p[0].children_statements = p[10].children_statements
p[0].child_return_type = p[7]
def p_formal_list1(p):
'formal_list : empty'
p[0] = ASTnode('formal_list', p.lineno(1))
p[0].children_formals = []
def p_formal_list2(p):
'formal_list : formal_arg'
p[0] = ASTnode('formal_list', p.lineno(1))
p[0].children_formals = [ p[1] ]
def p_formal_list3(p):
'formal_list : formal_list COMMA formal_arg'
p[0] = p[1]
p[0].children_formals += [ p[3] ]
def p_formal_arg(p):
'formal_arg : IDENT LSQUARE IDENT RSQUARE'
p[0] = ASTnode('formal_argument', p.lineno(1), p[1])
p[0].type = p[3]
def p_procedure_call1(p):
'procedure_call : PROC_IDENT LPAREN RPAREN'
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
p[0].children_arguments = []
def p_procedure_call(p):
'''procedure_call : PROC_IDENT LPAREN arguments RPAREN'''
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
p[0].children_arguments = p[3].children_arguments
def p_arguments1(p):
'arguments : expression'
p[0] = ASTnode('arguments', p.lineno(1))
p[0].children_arguments = [ p[1] ]
def p_arguments2(p):
'arguments : arguments COMMA expression'
p[0] = p[1]
p[0].children_arguments += [ p[3] ]
def p_assignment(p):
'assignment : lvalue EQ rvalue'
p[0] = ASTnode('assignment', p.lineno(2))
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_lvalue1(p):
'lvalue : IDENT'
p[0] = ASTnode('identifier', p.lineno(1), p[1])
def p_lvalue2(p):
'lvalue : IDENT DOT IDENT'
p[0] = ASTnode('attribute_write', p.lineno(1))
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
def p_rvalue(p):
'''rvalue : expression
| unless_expression'''
p[0] = p[1]
def p_print_statement1(p):
'print_statement : PRINT print_item'
p[0] = ASTnode('print', p.lineno(1))
p[0].children_items = [ p[2] ]
def p_print_statement2(p):
'print_statement : print_statement AMPERSAND print_item'
p[0] = p[1]
p[0].children_items += [ p[3] ]
def p_print_item1(p):
'print_item : STRING'
p[0] = ASTnode('string_literal', p.lineno(1), p[1])
def p_print_item2(p):
'print_item : expression'
p[0] = p[1]
def p_statement1(p):
'''statement : procedure_call
| assignment
| print_statement'''
p[0] = p[1]
def p_statement2(p):
'statement : DO statement_list UNTIL expression'
p[0] = ASTnode('do_until', p.lineno(1))
p[0].children_statements = p[2].children_statements
p[0].child_condition = p[4]
def p_statement3(p):
'statement : DO statement_list UNLESS expression DONE'
p[0] = ASTnode('do_unless', p.lineno(1))
p[0].children_statements_false = p[2].children_statements
p[0].child_condition = p[4]
p[0].children_statements_true = []
def p_statement4(p):
'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE'
p[0] = ASTnode('do_unless', p.lineno(1))
p[0].children_statements_false = p[2].children_statements
p[0].child_condition = p[4]
p[0].children_statements_true = p[6].children_statements
def p_statement5(p):
'statement : RETURN expression'
p[0] = ASTnode('return', p.lineno(1))
p[0].child_expression = p[2]
def p_expression1(p):
'expression : simple_expr'
p[0] = p[1]
def p_expression2(p):
'''expression : expression EQ simple_expr
| expression LT simple_expr'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_simple_expr1(p):
'simple_expr : term'
p[0] = p[1]
def p_simple_expr2(p):
'''simple_expr : simple_expr PLUS term
| simple_expr MINUS term'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_term1(p):
'term : factor'
p[0] = p[1]
def p_term2(p):
'''term : term MULT factor
| term DIV factor'''
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
p[0].child_lhs = p[1]
p[0].child_rhs = p[3]
def p_factor1(p):
'factor : atom'
p[0] = p[1]
def p_factor2(p):
'''factor : MINUS atom
| PLUS atom'''
p[0] = ASTnode('unary_op', p.lineno(1), p[1])
p[0].child_atom = p[2]
def p_atom1(p):
'atom : IDENT'
p[0] = ASTnode('identifier', p.lineno(1), p[1])
def p_atom2(p):
'atom : INT_LITERAL'
p[0] = ASTnode('int_literal', p.lineno(1), p[1])
def p_atom3(p):
'atom : DATE_LITERAL'
p[0] = ASTnode('date_literal', p.lineno(1), p[1])
def p_atom4(p):
'atom : IDENT APOSTROPHE IDENT'
p[0] = ASTnode('attribute_read', p.lineno(1))
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
def p_atom5(p):
'atom : LPAREN expression RPAREN'
p[0] = p[2]
def p_atom6(p):
'''atom : function_call
| procedure_call'''
p[0] = p[1]
def p_function_call1(p):
'function_call : FUNC_IDENT LPAREN RPAREN'
p[0] = ASTnode('function_call', p.lineno(1), p[1])
p[0].children_arguments = []
def p_function_call2(p):
'function_call : FUNC_IDENT LPAREN arguments RPAREN'
p[0] = ASTnode('function_call', p.lineno(1), p[1])
p[0].children_arguments = p[3].children_arguments
def p_unless_expression(p):
'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE'
p[0] = ASTnode('unless_expression', p.lineno(1))
p[0].child_condition = p[4]
p[0].child_expression_true = p[6]
p[0].child_expression_false = p[2]
def p_error(p):
if p is not None:
print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')")
else:
print('Syntax Error at the end of file')
raise SystemExit
def syntax_check_file(file_path: str, debug: bool) -> ASTnode:
parser = yacc.yacc()
with open(file_path, 'r', encoding='utf-8') as file:
result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug)
return result