Implement phase 3 semantic checking
This commit is contained in:
parent
004ee25273
commit
91a3a0ba2e
|
@ -0,0 +1,132 @@
|
|||
#!/bin/env python3
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import ply.lex as lex
|
||||
|
||||
reserved = {
|
||||
'var': 'VAR',
|
||||
'is': 'IS',
|
||||
'unless': 'UNLESS',
|
||||
'otherwise': 'OTHERWISE',
|
||||
'until': 'UNTIL',
|
||||
'do': 'DO',
|
||||
'done': 'DONE',
|
||||
'procedure': 'PROCEDURE',
|
||||
'function': 'FUNCTION',
|
||||
'return': 'RETURN',
|
||||
'print': 'PRINT',
|
||||
'end': 'END',
|
||||
}
|
||||
|
||||
tokens = [
|
||||
'LPAREN',
|
||||
'RPAREN',
|
||||
'LSQUARE',
|
||||
'RSQUARE',
|
||||
'LCURLY',
|
||||
'RCURLY',
|
||||
'APOSTROPHE',
|
||||
'AMPERSAND',
|
||||
'COMMA',
|
||||
'DOT',
|
||||
'EQ',
|
||||
'LT',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'MULT',
|
||||
'DIV',
|
||||
'STRING',
|
||||
'DATE_LITERAL',
|
||||
'INT_LITERAL',
|
||||
'IDENT',
|
||||
'FUNC_IDENT',
|
||||
'PROC_IDENT',
|
||||
] + list(reserved.values())
|
||||
|
||||
def t_whitespace(t):
|
||||
r'[ \t\n]+'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
def t_comment(t):
|
||||
r'\(%(.|\n)*?%\)'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
t_LPAREN = r'\('
|
||||
t_RPAREN = r'\)'
|
||||
t_LSQUARE = r'\['
|
||||
t_RSQUARE = r'\]'
|
||||
t_LCURLY = r'\{'
|
||||
t_RCURLY = r'\}'
|
||||
t_APOSTROPHE = r'\''
|
||||
t_AMPERSAND = r'&'
|
||||
t_COMMA = r','
|
||||
t_DOT = r'\.'
|
||||
t_EQ = r'='
|
||||
t_LT = r'<'
|
||||
t_PLUS = r'\+'
|
||||
t_MINUS = r'-'
|
||||
t_MULT = r'\*'
|
||||
t_DIV = r'/'
|
||||
|
||||
def t_STRING(t):
|
||||
r'".*?"'
|
||||
t.value = t.value[1:-1]
|
||||
return t
|
||||
|
||||
def t_DATE_LITERAL(t):
|
||||
r'\d{4}-\d{2}-\d{2}'
|
||||
try:
|
||||
t.value = datetime.date.fromisoformat(t.value)
|
||||
except:
|
||||
print(f'Invalid date \'{t.value}\' at line {t.lexer.lineno}')
|
||||
raise SystemExit
|
||||
return t
|
||||
|
||||
def t_INT_LITERAL(t):
|
||||
r'-?\d{1,3}(\'\d{3})*'
|
||||
t.value = int(t.value.replace('\'', ''))
|
||||
return t
|
||||
|
||||
def t_IDENT(t):
|
||||
r'[a-z][a-zA-Z0-9_]+'
|
||||
t.type = reserved.get(t.value, 'IDENT')
|
||||
return t
|
||||
|
||||
def t_FUNC_IDENT(t):
|
||||
r'[A-Z][a-z0-9_]+'
|
||||
return t
|
||||
|
||||
def t_PROC_IDENT(t):
|
||||
r'[A-Z]{2}[A-Z0-9_]*'
|
||||
return t
|
||||
|
||||
def t_error(t):
|
||||
print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}')
|
||||
raise SystemExit
|
||||
|
||||
lexer = lex.lex()
|
||||
|
||||
def tokenize_file(file_path: str):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
lexer.input(file.read())
|
||||
|
||||
tok = lexer.token()
|
||||
while tok:
|
||||
print(tok)
|
||||
tok = lexer.token()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||
group.add_argument('-f', '--file', help='filename to process')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.who:
|
||||
print('Author')
|
||||
print(' Student ID: 150189237')
|
||||
print(' Name: Oskari Alaranta')
|
||||
else:
|
||||
tokenize_file(args.file)
|
|
@ -0,0 +1,428 @@
|
|||
#!/bin/env python3
|
||||
|
||||
import argparse
|
||||
import ply.lex as lex
|
||||
import ply.yacc as yacc
|
||||
import lexer
|
||||
import tree_print
|
||||
|
||||
tokens = lexer.tokens
|
||||
|
||||
class ASTnode:
|
||||
def __init__(self, typestr, lineno, value = None):
|
||||
self.nodetype = typestr
|
||||
self.lineno = lineno
|
||||
if value is not None:
|
||||
self.value = value
|
||||
|
||||
class SemData:
|
||||
def __init__(self):
|
||||
self.in_procedure_def = False
|
||||
self.in_function_def = False
|
||||
self.return_type = None
|
||||
self.parent = None
|
||||
|
||||
def p_program1(p):
|
||||
'program : statement_list'
|
||||
p[0] = ASTnode('program', p.lineno(1))
|
||||
p[0].children_definitions = []
|
||||
p[0].children_statements = p[1].children_statements
|
||||
|
||||
def p_program2(p):
|
||||
'program : definition_list statement_list'
|
||||
p[0] = ASTnode('program', p.lineno(1))
|
||||
p[0].children_definitions = p[1].children_definitions
|
||||
p[0].children_statements = p[2].children_statements
|
||||
|
||||
def p_statement_list1(p):
|
||||
'statement_list : statement'
|
||||
p[0] = ASTnode('statement_list', p.lineno(1))
|
||||
p[0].children_statements = [ p[1] ]
|
||||
|
||||
def p_statement_list2(p):
|
||||
'statement_list : statement_list COMMA statement'
|
||||
p[0] = p[1]
|
||||
p[0].children_statements += [ p[3] ]
|
||||
|
||||
def p_definition_list1(p):
|
||||
'definition_list : definition'
|
||||
p[0] = ASTnode('definition_list', p.lineno(1))
|
||||
p[0].children_definitions = [ p[1] ]
|
||||
|
||||
def p_definition_list2(p):
|
||||
'definition_list : definition_list definition'
|
||||
p[0] = p[1]
|
||||
p[0].children_definitions += [ p[2] ]
|
||||
|
||||
def p_definition(p):
|
||||
'''definition : function_definition
|
||||
| procedure_definition
|
||||
| variable_definition'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_variable_definition(p):
|
||||
'variable_definition : VAR IDENT EQ expression'
|
||||
p[0] = ASTnode('variable_definition', p.lineno(1), p[2])
|
||||
p[0].child_expression = p[4]
|
||||
|
||||
def p_empty(p):
|
||||
'empty :'
|
||||
pass
|
||||
|
||||
def p_variable_definition_list1(p):
|
||||
'variable_definition_list : empty'
|
||||
p[0] = ASTnode('variable_definition_list', p.lineno(1))
|
||||
p[0].children_definitions = []
|
||||
|
||||
def p_variable_definition_list2(p):
|
||||
'variable_definition_list : variable_definition_list variable_definition'
|
||||
p[0] = p[1]
|
||||
p[0].children_definitions += [ p[2] ]
|
||||
|
||||
def p_function_definition(p):
|
||||
'''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION'''
|
||||
p[0] = ASTnode('function_definition', p.lineno(2), p[2])
|
||||
p[0].children_formals = p[4].children_formals
|
||||
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
|
||||
p[0].children_variable_definitions = p[8].children_definitions
|
||||
p[0].child_value = p[10]
|
||||
|
||||
def p_procedure_definition1(p):
|
||||
'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE'
|
||||
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
|
||||
p[0].children_formals = p[4].children_formals
|
||||
p[0].children_variable_definitions = p[6].children_definitions
|
||||
p[0].children_statements = p[8].children_statements
|
||||
p[0].child_return_type = None
|
||||
|
||||
def p_procedure_definition2(p):
|
||||
'''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE'''
|
||||
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
|
||||
p[0].children_formals = p[4].children_formals
|
||||
p[0].children_variable_definitions = p[8].children_definitions
|
||||
p[0].children_statements = p[10].children_statements
|
||||
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
|
||||
|
||||
def p_formal_list1(p):
|
||||
'formal_list : empty'
|
||||
p[0] = ASTnode('formal_list', p.lineno(1))
|
||||
p[0].children_formals = []
|
||||
|
||||
def p_formal_list2(p):
|
||||
'formal_list : formal_arg'
|
||||
p[0] = ASTnode('formal_list', p.lineno(1))
|
||||
p[0].children_formals = [ p[1] ]
|
||||
|
||||
def p_formal_list3(p):
|
||||
'formal_list : formal_list COMMA formal_arg'
|
||||
p[0] = p[1]
|
||||
p[0].children_formals += [ p[3] ]
|
||||
|
||||
def p_formal_arg(p):
|
||||
'formal_arg : IDENT LSQUARE IDENT RSQUARE'
|
||||
p[0] = ASTnode('formal_argument', p.lineno(1))
|
||||
p[0].child_variable = ASTnode('identifier', p.lineno(1), p[1])
|
||||
p[0].child_type = ASTnode('identifier', p.lineno(3), p[3])
|
||||
|
||||
def p_procedure_call1(p):
|
||||
'procedure_call : PROC_IDENT LPAREN RPAREN'
|
||||
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
|
||||
p[0].children_arguments = []
|
||||
|
||||
def p_procedure_call(p):
|
||||
'''procedure_call : PROC_IDENT LPAREN arguments RPAREN'''
|
||||
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
|
||||
p[0].children_arguments = p[3].children_arguments
|
||||
|
||||
def p_arguments1(p):
|
||||
'arguments : expression'
|
||||
p[0] = ASTnode('arguments', p.lineno(1))
|
||||
p[0].children_arguments = [ p[1] ]
|
||||
|
||||
def p_arguments2(p):
|
||||
'arguments : arguments COMMA expression'
|
||||
p[0] = p[1]
|
||||
p[0].children_arguments += [ p[3] ]
|
||||
|
||||
def p_assignment(p):
|
||||
'assignment : lvalue EQ rvalue'
|
||||
p[0] = ASTnode('assignment', p.lineno(1))
|
||||
p[0].child_lhs = p[1]
|
||||
p[0].child_rhs = p[3]
|
||||
|
||||
def p_lvalue1(p):
|
||||
'lvalue : IDENT'
|
||||
p[0] = ASTnode('identifier', p.lineno(1), p[1])
|
||||
|
||||
def p_lvalue2(p):
|
||||
'lvalue : IDENT DOT IDENT'
|
||||
p[0] = ASTnode('attribute_write', p.lineno(1))
|
||||
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
|
||||
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
|
||||
|
||||
def p_rvalue(p):
|
||||
'''rvalue : expression
|
||||
| unless_expression'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_print_statement1(p):
|
||||
'print_statement : PRINT print_item'
|
||||
p[0] = ASTnode('print', p.lineno(1))
|
||||
p[0].children_items = [ p[2] ]
|
||||
|
||||
def p_print_statement2(p):
|
||||
'print_statement : print_statement AMPERSAND print_item'
|
||||
p[0] = p[1]
|
||||
p[0].children_items += [ p[3] ]
|
||||
|
||||
def p_print_item1(p):
|
||||
'print_item : STRING'
|
||||
p[0] = ASTnode('string_literal', p.lineno(1), p[1])
|
||||
|
||||
def p_print_item2(p):
|
||||
'print_item : expression'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_statement1(p):
|
||||
'''statement : procedure_call
|
||||
| assignment
|
||||
| print_statement'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_statement2(p):
|
||||
'statement : DO statement_list UNTIL expression'
|
||||
p[0] = ASTnode('do_until', p.lineno(1))
|
||||
p[0].children_statements = p[2].children_statements
|
||||
p[0].child_condition = p[4]
|
||||
|
||||
def p_statement3(p):
|
||||
'statement : DO statement_list UNLESS expression DONE'
|
||||
p[0] = ASTnode('do_unless', p.lineno(1))
|
||||
p[0].children_statements = p[2].children_statements
|
||||
p[0].child_condition = p[4]
|
||||
p[0].children_otherwise = []
|
||||
|
||||
def p_statement4(p):
|
||||
'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE'
|
||||
p[0] = ASTnode('do_unless', p.lineno(1))
|
||||
p[0].children_statements = p[2].children_statements
|
||||
p[0].child_condition = p[4]
|
||||
p[0].children_otherwise = p[6].children_statements
|
||||
|
||||
def p_statement5(p):
|
||||
'statement : RETURN expression'
|
||||
p[0] = ASTnode('return', p.lineno(1))
|
||||
p[0].child_expression = p[2]
|
||||
|
||||
def p_expression1(p):
|
||||
'expression : simple_expr'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_expression2(p):
|
||||
'''expression : expression EQ simple_expr
|
||||
| expression LT simple_expr'''
|
||||
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||
p[0].child_lhs = p[1]
|
||||
p[0].child_rhs = p[3]
|
||||
|
||||
def p_simple_expr1(p):
|
||||
'simple_expr : term'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_simple_expr2(p):
|
||||
'''simple_expr : simple_expr PLUS term
|
||||
| simple_expr MINUS term'''
|
||||
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||
p[0].child_lhs = p[1]
|
||||
p[0].child_rhs = p[3]
|
||||
|
||||
def p_term1(p):
|
||||
'term : factor'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_term2(p):
|
||||
'''term : term MULT factor
|
||||
| term DIV factor'''
|
||||
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||
p[0].child_lhs = p[1]
|
||||
p[0].child_rhs = p[3]
|
||||
|
||||
def p_factor1(p):
|
||||
'factor : atom'
|
||||
p[0] = p[1]
|
||||
|
||||
def p_factor2(p):
|
||||
'''factor : MINUS atom
|
||||
| PLUS atom'''
|
||||
p[0] = ASTnode('unary_op', p.lineno(1), p[1])
|
||||
p[0].child_atom = p[2]
|
||||
|
||||
def p_atom1(p):
|
||||
'atom : IDENT'
|
||||
p[0] = ASTnode('identifier', p.lineno(1), p[1])
|
||||
|
||||
def p_atom2(p):
|
||||
'atom : INT_LITERAL'
|
||||
p[0] = ASTnode('int_literal', p.lineno(1), p[1])
|
||||
|
||||
def p_atom3(p):
|
||||
'atom : DATE_LITERAL'
|
||||
p[0] = ASTnode('date_literal', p.lineno(1), p[1])
|
||||
|
||||
def p_atom4(p):
|
||||
'atom : IDENT APOSTROPHE IDENT'
|
||||
p[0] = ASTnode('attribute_read', p.lineno(1))
|
||||
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
|
||||
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
|
||||
|
||||
def p_atom5(p):
|
||||
'atom : LPAREN expression RPAREN'
|
||||
p[0] = p[2]
|
||||
|
||||
def p_atom6(p):
|
||||
'''atom : function_call
|
||||
| procedure_call'''
|
||||
p[0] = p[1]
|
||||
|
||||
def p_function_call1(p):
|
||||
'function_call : FUNC_IDENT LPAREN RPAREN'
|
||||
p[0] = ASTnode('function_call', p.lineno(1), p[1])
|
||||
p[0].children_arguments = []
|
||||
|
||||
def p_function_call2(p):
|
||||
'function_call : FUNC_IDENT LPAREN arguments RPAREN'
|
||||
p[0] = ASTnode('function_call', p.lineno(1), p[1])
|
||||
p[0].children_arguments = p[3].children_arguments
|
||||
|
||||
def p_unless_expression(p):
|
||||
'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE'
|
||||
p[0] = ASTnode('unless_expression', p.lineno(1))
|
||||
p[0].child_condition = p[4]
|
||||
p[0].child_true_expr = p[2]
|
||||
p[0].child_false_expr = p[6]
|
||||
|
||||
def p_error(p):
|
||||
if p is not None:
|
||||
print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')")
|
||||
else:
|
||||
print('Syntax Error at the end of file')
|
||||
raise SystemExit
|
||||
|
||||
|
||||
def syntax_check_file(file_path: str, debug: bool) -> ASTnode:
|
||||
parser = yacc.yacc()
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug)
|
||||
return result
|
||||
|
||||
|
||||
def semantic_check(node: ASTnode, sem_data: SemData):
|
||||
is_procedure = False
|
||||
is_function = False
|
||||
return_type = None
|
||||
|
||||
match node.nodetype:
|
||||
case 'attribute_read':
|
||||
if node.child_attribute.value not in ['day', 'month', 'year', 'weekday', 'weeknum']:
|
||||
print(f'Semantic Error: invalid read attribute \'{node.child_attribute.value}\' at line {node.lineno}')
|
||||
raise SystemExit
|
||||
case 'attribute_write':
|
||||
if node.child_attribute.value not in ['day', 'month', 'year']:
|
||||
print(f'Semantic Error: invalid write attribute \'{node.child_attribute.value}\' at line {node.lineno}')
|
||||
raise SystemExit
|
||||
case 'procedure_definition':
|
||||
if node.child_return_type is not None:
|
||||
if node.child_return_type.value not in ['int', 'date']:
|
||||
print(f'Semantic Error: procedure definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
|
||||
raise SystemExit
|
||||
return_type = node.child_return_type.value
|
||||
for formal in node.children_formals:
|
||||
if formal.child_type.value not in ['int', 'date']:
|
||||
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
|
||||
raise SystemExit
|
||||
is_procedure = True
|
||||
case 'function_definition':
|
||||
if node.child_return_type is not None:
|
||||
if node.child_return_type.value not in ['int', 'date']:
|
||||
print(f'Semantic Error: function definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
|
||||
raise SystemExit
|
||||
return_type = node.child_return_type.value
|
||||
for formal in node.children_formals:
|
||||
if formal.child_type.value not in ['int', 'date']:
|
||||
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
|
||||
raise SystemExit
|
||||
is_function = True
|
||||
case 'procedure_call':
|
||||
if sem_data.in_function_def:
|
||||
print(f'Semantic Error: procedure call inside function at line {node.lineno}')
|
||||
raise SystemExit
|
||||
case 'return':
|
||||
if not sem_data.in_procedure_def:
|
||||
print(f'Semantic Error: return statement outside of procedure definition at line {node.lineno}')
|
||||
raise SystemExit
|
||||
if sem_data.return_type is None:
|
||||
print(f'Semantic Error: return statement in returnless procedure definition at line {node.lineno}')
|
||||
raise SystemExit
|
||||
case 'date_literal':
|
||||
# date literal can be in variable definition
|
||||
if sem_data.parent.nodetype == 'variable_definition':
|
||||
pass
|
||||
# right side of assignment
|
||||
elif sem_data.parent.nodetype == 'assignment':
|
||||
if sem_data.parent.child_lhs.nodetype == 'date_literal':
|
||||
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||
raise SystemExit
|
||||
# either side of subtraction or left side of addition
|
||||
elif sem_data.parent.nodetype == 'binary_op':
|
||||
if sem_data.parent.value == '-':
|
||||
pass
|
||||
elif sem_data.parent.value == '+':
|
||||
if sem_data.parent.child_rhs.nodetype == 'date_literal':
|
||||
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||
raise SystemExit
|
||||
else:
|
||||
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||
raise SystemExit
|
||||
else:
|
||||
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||
raise SystemExit
|
||||
|
||||
if is_procedure or is_function:
|
||||
sem_data.in_procedure_def = is_procedure
|
||||
sem_data.in_function_def = is_function
|
||||
sem_data.return_type = return_type
|
||||
|
||||
temp_parent = sem_data.parent
|
||||
sem_data.parent = node
|
||||
|
||||
for name, child in tree_print.get_childvars(node):
|
||||
if child is not None:
|
||||
semantic_check(child, sem_data)
|
||||
|
||||
sem_data.parent = temp_parent
|
||||
|
||||
if is_procedure or is_function:
|
||||
sem_data.in_procedure_def = False
|
||||
sem_data.in_function_def = False
|
||||
sem_data.return_type = None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||
group.add_argument('-f', '--file', help='filename to process')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.who:
|
||||
print('Author')
|
||||
print(' Student ID: 150189237')
|
||||
print(' Name: Oskari Alaranta')
|
||||
else:
|
||||
ast = syntax_check_file(args.file, args.debug)
|
||||
tree_print.treeprint(ast, 'unicode')
|
||||
|
||||
semantic_check(ast, SemData())
|
||||
|
|
@ -0,0 +1,209 @@
|
|||
#!/usr/bin/env python3
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# Values to control the module's working
|
||||
|
||||
# How to recognize attributes in nodes by their names
|
||||
|
||||
child_prefix_default = "child_"
|
||||
children_prefix_default = "children_"
|
||||
value_attr = "value"
|
||||
nodetype_attr = "nodetype"
|
||||
lineno_attr = "lineno"
|
||||
type_attr = "type"
|
||||
|
||||
# Finding and creating a list of all children nodes of a node, based on
|
||||
# attribute names of a node
|
||||
|
||||
def get_childvars(node, child_prefix=child_prefix_default,
|
||||
children_prefix=children_prefix_default):
|
||||
'''Return all children nodes of a tree node
|
||||
|
||||
This function assumes that all attributes of a node beginning with
|
||||
child_prefix refer to a child node, and attributes beginning with
|
||||
children_prefix refer to a LIST of child nodes. The return value is a list
|
||||
of pairs (tuples), where the first element of each pair is a "label"
|
||||
for the node (the name of the attribute without the child/children prefix),
|
||||
and the second element is the child node itself. For child lists, the label
|
||||
also contains the number of the child, or EMPTY if the list is empty
|
||||
(in which case None is used as the second element, as there is no child).'''
|
||||
|
||||
childvars = []
|
||||
# Only search for attributes if we have an object
|
||||
if hasattr(node, "__dict__"):
|
||||
# Iterate though all attributes of the node object
|
||||
for name,val in vars(node).items():
|
||||
# An attribute containing one child node
|
||||
if name.startswith(child_prefix):
|
||||
label = name[len(child_prefix):]
|
||||
childvars.append((label, val))
|
||||
# An attribute containing a child list
|
||||
elif name.startswith(children_prefix):
|
||||
label = name[len(children_prefix):]
|
||||
# Make sure contents is not None and is a list (or actually, can
|
||||
# be iterated through
|
||||
if val is None:
|
||||
childvars.append((label+"[NONE stored instead of a list!!!]", None))
|
||||
else:
|
||||
if not hasattr(val, "__iter__"):
|
||||
childvars.append((label+"[Not a list!!!]", None))
|
||||
# An empty list/iterable (no nodes)
|
||||
elif not val:
|
||||
childvars.append((label+"[EMPTY]", None))
|
||||
# A non-empty list/iterable
|
||||
else:
|
||||
childvars.extend([(label+"["+str(i)+"]", child) for (i, child) in enumerate(val)])
|
||||
return childvars
|
||||
|
||||
|
||||
# Printing the syntax tree (AST)
|
||||
|
||||
# Strings that ASCII and Unicode trees are made out of
|
||||
|
||||
vertical_uni = "\N{BOX DRAWINGS LIGHT VERTICAL}"
|
||||
horizontal_uni = "\N{BOX DRAWINGS LIGHT HORIZONTAL}"
|
||||
vertical_right_uni = "\N{BOX DRAWINGS LIGHT VERTICAL AND RIGHT}"
|
||||
up_right_uni = "\N{BOX DRAWINGS LIGHT UP AND RIGHT}"
|
||||
child_indent_uni = vertical_right_uni + horizontal_uni + horizontal_uni
|
||||
last_child_indent_uni = up_right_uni + horizontal_uni + horizontal_uni
|
||||
normal_indent_uni = vertical_uni + " "
|
||||
last_normal_indent_uni = " "
|
||||
|
||||
vertical_asc = "|"
|
||||
horizontal_asc = "-"
|
||||
vertical_right_asc = "+"
|
||||
up_right_asc = "+"
|
||||
child_indent_asc = vertical_right_asc + horizontal_asc + horizontal_asc
|
||||
last_child_indent_asc = up_right_asc + horizontal_asc + horizontal_asc
|
||||
normal_indent_asc = vertical_asc + " "
|
||||
last_normal_indent_asc = " "
|
||||
|
||||
# What to put to the beginning and end of dot files
|
||||
|
||||
dot_preamble='''digraph parsetree {
|
||||
ratio=fill
|
||||
node [shape="box"]
|
||||
edge [style=bold]
|
||||
ranksep=equally
|
||||
nodesep=0.5
|
||||
rankdir = TB
|
||||
clusterrank = local'''
|
||||
|
||||
dot_postamble='}'
|
||||
|
||||
def dotnodeid(nodenum):
|
||||
'''Convert node number to a dot id'''
|
||||
return "N"+str(nodenum)
|
||||
|
||||
def treeprint_indent(node, outtype="unicode", label="", first_indent="", indent=""):
|
||||
'''Print out an ASCII/Unicode version of a subtree in a tree.
|
||||
|
||||
node = the root of the subtree
|
||||
outtype = unicode/ascii
|
||||
label = the "role" of the subtree on the parent node (from attribute name)
|
||||
first_indent = what to print at the beginning of the first line (indentation)
|
||||
indent = what to print at the beginning of the rest of the lines (indentation)'''
|
||||
|
||||
# Add label (if any) to the first line after the indentation
|
||||
if label:
|
||||
first_indent += label + ": "
|
||||
if not node:
|
||||
# If node is None, just print NONE
|
||||
print(first_indent + "NONE")
|
||||
else:
|
||||
# If node has node type attribute, print that, otherwise try to print the whole
|
||||
# node take help in finding the error
|
||||
if hasattr(node, nodetype_attr):
|
||||
print(first_indent + getattr(node, nodetype_attr), end="")
|
||||
else:
|
||||
print(first_indent + "??? '" + str(node) + "' ???", end="")
|
||||
# If node has a value attribute, print the value of the node in parenthesis
|
||||
if hasattr(node, value_attr):
|
||||
print(" (" + str(getattr(node, value_attr)) + ")", end="")
|
||||
if hasattr(node, type_attr):
|
||||
print(" :" + str(getattr(node, type_attr)), end="")
|
||||
if hasattr(node, lineno_attr):
|
||||
print(" #" + str(getattr(node, lineno_attr)), end="")
|
||||
print()
|
||||
# Get all children of the node and iterate through them
|
||||
childvars = get_childvars(node)
|
||||
i = len(childvars)
|
||||
for name,value in childvars:
|
||||
i -= 1
|
||||
if i > 0:
|
||||
# Not the last child, use normal indentation
|
||||
if outtype == "unicode":
|
||||
first_indent = child_indent_uni
|
||||
rest_indent = normal_indent_uni
|
||||
else:
|
||||
first_indent = child_indent_asc
|
||||
rest_indent = normal_indent_asc
|
||||
else:
|
||||
# The last child, use indentation for that case
|
||||
if outtype == "unicode":
|
||||
first_indent = last_child_indent_uni
|
||||
rest_indent = last_normal_indent_uni
|
||||
else:
|
||||
first_indent = last_child_indent_asc
|
||||
rest_indent = last_normal_indent_asc
|
||||
# Recursively print the child subtrees, adding indentation
|
||||
treeprint_indent(value, outtype, name, indent+first_indent,
|
||||
indent+rest_indent)
|
||||
|
||||
def treeprint_dot(node, nodenum, nodecount):
|
||||
'''Print a subtree in dot format.
|
||||
|
||||
nodenum = number of the node (for dot id generation)
|
||||
nodecount = a list containing the maximum used id'''
|
||||
|
||||
nodeline = dotnodeid(nodenum)
|
||||
if not node:
|
||||
# None is output as an ellipse with label NONE
|
||||
nodeline += ' [shape="ellipse", label="NONE"]'
|
||||
print(nodeline)
|
||||
else:
|
||||
# Normal nodes use the default shape
|
||||
nodeline += ' [label="'
|
||||
# If node has node type attribute, print that, otherwise try to print the whole
|
||||
# node take help in finding the error
|
||||
if hasattr(node, nodetype_attr):
|
||||
nodeline += getattr(node, nodetype_attr)
|
||||
else:
|
||||
nodeline += "??? '" + str(node) + "' ???"
|
||||
nextnodeline = ""
|
||||
# If node has a value attribute, output the value in parenthesis
|
||||
if hasattr(node, value_attr):
|
||||
nextnodeline += " (" + str(getattr(node, value_attr)) + ")"
|
||||
if hasattr(node, type_attr):
|
||||
nextnodeline += " :" + str(getattr(node, type_attr))
|
||||
if hasattr(node, lineno_attr):
|
||||
nextnodeline += " #" + str(getattr(node, lineno_attr))
|
||||
if nextnodeline:
|
||||
nodeline += "\n"+nextnodeline
|
||||
nodeline += '"]'
|
||||
print(nodeline)
|
||||
# Get all children of the node and iterate through them
|
||||
childvars = get_childvars(node)
|
||||
for name,value in childvars:
|
||||
# Number the child by one more than current maximum (and update maximum)
|
||||
nodecount[0] += 1
|
||||
childnum = nodecount[0]
|
||||
# Recursively print the child subtrees
|
||||
treeprint_dot(value, childnum, nodecount)
|
||||
# Output the named connection between parent and child
|
||||
print(dotnodeid(nodenum)+"->"+dotnodeid(childnum)+ ' [label="'+name+'"]')
|
||||
|
||||
def treeprint(rootnode, outtype="unicode"):
|
||||
'''Prints out a tree, given its root.
|
||||
|
||||
The second argument is the output type:
|
||||
"unicode" (default) prints a text-version of the tree using Unicode block characters.
|
||||
"ascii" prints an ASCII-only version, with |, -, +.
|
||||
"dot" prints a tree in dot format (can be converted to a graphical tree
|
||||
using dot command in graphwiz).'''
|
||||
if outtype == "dot":
|
||||
print(dot_preamble)
|
||||
treeprint_dot(rootnode, 0, [0])
|
||||
print(dot_postamble)
|
||||
else:
|
||||
treeprint_indent(rootnode, outtype)
|
Loading…
Reference in New Issue