Implement phase 3 semantic checking
This commit is contained in:
parent
004ee25273
commit
91a3a0ba2e
|
@ -0,0 +1,132 @@
|
||||||
|
#!/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import ply.lex as lex
|
||||||
|
|
||||||
|
reserved = {
|
||||||
|
'var': 'VAR',
|
||||||
|
'is': 'IS',
|
||||||
|
'unless': 'UNLESS',
|
||||||
|
'otherwise': 'OTHERWISE',
|
||||||
|
'until': 'UNTIL',
|
||||||
|
'do': 'DO',
|
||||||
|
'done': 'DONE',
|
||||||
|
'procedure': 'PROCEDURE',
|
||||||
|
'function': 'FUNCTION',
|
||||||
|
'return': 'RETURN',
|
||||||
|
'print': 'PRINT',
|
||||||
|
'end': 'END',
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens = [
|
||||||
|
'LPAREN',
|
||||||
|
'RPAREN',
|
||||||
|
'LSQUARE',
|
||||||
|
'RSQUARE',
|
||||||
|
'LCURLY',
|
||||||
|
'RCURLY',
|
||||||
|
'APOSTROPHE',
|
||||||
|
'AMPERSAND',
|
||||||
|
'COMMA',
|
||||||
|
'DOT',
|
||||||
|
'EQ',
|
||||||
|
'LT',
|
||||||
|
'PLUS',
|
||||||
|
'MINUS',
|
||||||
|
'MULT',
|
||||||
|
'DIV',
|
||||||
|
'STRING',
|
||||||
|
'DATE_LITERAL',
|
||||||
|
'INT_LITERAL',
|
||||||
|
'IDENT',
|
||||||
|
'FUNC_IDENT',
|
||||||
|
'PROC_IDENT',
|
||||||
|
] + list(reserved.values())
|
||||||
|
|
||||||
|
def t_whitespace(t):
|
||||||
|
r'[ \t\n]+'
|
||||||
|
t.lexer.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
def t_comment(t):
|
||||||
|
r'\(%(.|\n)*?%\)'
|
||||||
|
t.lexer.lineno += t.value.count('\n')
|
||||||
|
|
||||||
|
t_LPAREN = r'\('
|
||||||
|
t_RPAREN = r'\)'
|
||||||
|
t_LSQUARE = r'\['
|
||||||
|
t_RSQUARE = r'\]'
|
||||||
|
t_LCURLY = r'\{'
|
||||||
|
t_RCURLY = r'\}'
|
||||||
|
t_APOSTROPHE = r'\''
|
||||||
|
t_AMPERSAND = r'&'
|
||||||
|
t_COMMA = r','
|
||||||
|
t_DOT = r'\.'
|
||||||
|
t_EQ = r'='
|
||||||
|
t_LT = r'<'
|
||||||
|
t_PLUS = r'\+'
|
||||||
|
t_MINUS = r'-'
|
||||||
|
t_MULT = r'\*'
|
||||||
|
t_DIV = r'/'
|
||||||
|
|
||||||
|
def t_STRING(t):
|
||||||
|
r'".*?"'
|
||||||
|
t.value = t.value[1:-1]
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_DATE_LITERAL(t):
|
||||||
|
r'\d{4}-\d{2}-\d{2}'
|
||||||
|
try:
|
||||||
|
t.value = datetime.date.fromisoformat(t.value)
|
||||||
|
except:
|
||||||
|
print(f'Invalid date \'{t.value}\' at line {t.lexer.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_INT_LITERAL(t):
|
||||||
|
r'-?\d{1,3}(\'\d{3})*'
|
||||||
|
t.value = int(t.value.replace('\'', ''))
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_IDENT(t):
|
||||||
|
r'[a-z][a-zA-Z0-9_]+'
|
||||||
|
t.type = reserved.get(t.value, 'IDENT')
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_FUNC_IDENT(t):
|
||||||
|
r'[A-Z][a-z0-9_]+'
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_PROC_IDENT(t):
|
||||||
|
r'[A-Z]{2}[A-Z0-9_]*'
|
||||||
|
return t
|
||||||
|
|
||||||
|
def t_error(t):
|
||||||
|
print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
lexer = lex.lex()
|
||||||
|
|
||||||
|
def tokenize_file(file_path: str):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
lexer.input(file.read())
|
||||||
|
|
||||||
|
tok = lexer.token()
|
||||||
|
while tok:
|
||||||
|
print(tok)
|
||||||
|
tok = lexer.token()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||||
|
group.add_argument('-f', '--file', help='filename to process')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.who:
|
||||||
|
print('Author')
|
||||||
|
print(' Student ID: 150189237')
|
||||||
|
print(' Name: Oskari Alaranta')
|
||||||
|
else:
|
||||||
|
tokenize_file(args.file)
|
|
@ -0,0 +1,428 @@
|
||||||
|
#!/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import ply.lex as lex
|
||||||
|
import ply.yacc as yacc
|
||||||
|
import lexer
|
||||||
|
import tree_print
|
||||||
|
|
||||||
|
tokens = lexer.tokens
|
||||||
|
|
||||||
|
class ASTnode:
|
||||||
|
def __init__(self, typestr, lineno, value = None):
|
||||||
|
self.nodetype = typestr
|
||||||
|
self.lineno = lineno
|
||||||
|
if value is not None:
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
class SemData:
|
||||||
|
def __init__(self):
|
||||||
|
self.in_procedure_def = False
|
||||||
|
self.in_function_def = False
|
||||||
|
self.return_type = None
|
||||||
|
self.parent = None
|
||||||
|
|
||||||
|
def p_program1(p):
|
||||||
|
'program : statement_list'
|
||||||
|
p[0] = ASTnode('program', p.lineno(1))
|
||||||
|
p[0].children_definitions = []
|
||||||
|
p[0].children_statements = p[1].children_statements
|
||||||
|
|
||||||
|
def p_program2(p):
|
||||||
|
'program : definition_list statement_list'
|
||||||
|
p[0] = ASTnode('program', p.lineno(1))
|
||||||
|
p[0].children_definitions = p[1].children_definitions
|
||||||
|
p[0].children_statements = p[2].children_statements
|
||||||
|
|
||||||
|
def p_statement_list1(p):
|
||||||
|
'statement_list : statement'
|
||||||
|
p[0] = ASTnode('statement_list', p.lineno(1))
|
||||||
|
p[0].children_statements = [ p[1] ]
|
||||||
|
|
||||||
|
def p_statement_list2(p):
|
||||||
|
'statement_list : statement_list COMMA statement'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_statements += [ p[3] ]
|
||||||
|
|
||||||
|
def p_definition_list1(p):
|
||||||
|
'definition_list : definition'
|
||||||
|
p[0] = ASTnode('definition_list', p.lineno(1))
|
||||||
|
p[0].children_definitions = [ p[1] ]
|
||||||
|
|
||||||
|
def p_definition_list2(p):
|
||||||
|
'definition_list : definition_list definition'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_definitions += [ p[2] ]
|
||||||
|
|
||||||
|
def p_definition(p):
|
||||||
|
'''definition : function_definition
|
||||||
|
| procedure_definition
|
||||||
|
| variable_definition'''
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_variable_definition(p):
|
||||||
|
'variable_definition : VAR IDENT EQ expression'
|
||||||
|
p[0] = ASTnode('variable_definition', p.lineno(1), p[2])
|
||||||
|
p[0].child_expression = p[4]
|
||||||
|
|
||||||
|
def p_empty(p):
|
||||||
|
'empty :'
|
||||||
|
pass
|
||||||
|
|
||||||
|
def p_variable_definition_list1(p):
|
||||||
|
'variable_definition_list : empty'
|
||||||
|
p[0] = ASTnode('variable_definition_list', p.lineno(1))
|
||||||
|
p[0].children_definitions = []
|
||||||
|
|
||||||
|
def p_variable_definition_list2(p):
|
||||||
|
'variable_definition_list : variable_definition_list variable_definition'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_definitions += [ p[2] ]
|
||||||
|
|
||||||
|
def p_function_definition(p):
|
||||||
|
'''function_definition : FUNCTION FUNC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS rvalue END FUNCTION'''
|
||||||
|
p[0] = ASTnode('function_definition', p.lineno(2), p[2])
|
||||||
|
p[0].children_formals = p[4].children_formals
|
||||||
|
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
|
||||||
|
p[0].children_variable_definitions = p[8].children_definitions
|
||||||
|
p[0].child_value = p[10]
|
||||||
|
|
||||||
|
def p_procedure_definition1(p):
|
||||||
|
'procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY variable_definition_list IS statement_list END PROCEDURE'
|
||||||
|
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
|
||||||
|
p[0].children_formals = p[4].children_formals
|
||||||
|
p[0].children_variable_definitions = p[6].children_definitions
|
||||||
|
p[0].children_statements = p[8].children_statements
|
||||||
|
p[0].child_return_type = None
|
||||||
|
|
||||||
|
def p_procedure_definition2(p):
|
||||||
|
'''procedure_definition : PROCEDURE PROC_IDENT LCURLY formal_list RCURLY RETURN IDENT variable_definition_list IS statement_list END PROCEDURE'''
|
||||||
|
p[0] = ASTnode('procedure_definition', p.lineno(2), p[2])
|
||||||
|
p[0].children_formals = p[4].children_formals
|
||||||
|
p[0].children_variable_definitions = p[8].children_definitions
|
||||||
|
p[0].children_statements = p[10].children_statements
|
||||||
|
p[0].child_return_type = ASTnode('identifier', p.lineno(7), p[7])
|
||||||
|
|
||||||
|
def p_formal_list1(p):
|
||||||
|
'formal_list : empty'
|
||||||
|
p[0] = ASTnode('formal_list', p.lineno(1))
|
||||||
|
p[0].children_formals = []
|
||||||
|
|
||||||
|
def p_formal_list2(p):
|
||||||
|
'formal_list : formal_arg'
|
||||||
|
p[0] = ASTnode('formal_list', p.lineno(1))
|
||||||
|
p[0].children_formals = [ p[1] ]
|
||||||
|
|
||||||
|
def p_formal_list3(p):
|
||||||
|
'formal_list : formal_list COMMA formal_arg'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_formals += [ p[3] ]
|
||||||
|
|
||||||
|
def p_formal_arg(p):
|
||||||
|
'formal_arg : IDENT LSQUARE IDENT RSQUARE'
|
||||||
|
p[0] = ASTnode('formal_argument', p.lineno(1))
|
||||||
|
p[0].child_variable = ASTnode('identifier', p.lineno(1), p[1])
|
||||||
|
p[0].child_type = ASTnode('identifier', p.lineno(3), p[3])
|
||||||
|
|
||||||
|
def p_procedure_call1(p):
|
||||||
|
'procedure_call : PROC_IDENT LPAREN RPAREN'
|
||||||
|
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
|
||||||
|
p[0].children_arguments = []
|
||||||
|
|
||||||
|
def p_procedure_call(p):
|
||||||
|
'''procedure_call : PROC_IDENT LPAREN arguments RPAREN'''
|
||||||
|
p[0] = ASTnode('procedure_call', p.lineno(1), p[1])
|
||||||
|
p[0].children_arguments = p[3].children_arguments
|
||||||
|
|
||||||
|
def p_arguments1(p):
|
||||||
|
'arguments : expression'
|
||||||
|
p[0] = ASTnode('arguments', p.lineno(1))
|
||||||
|
p[0].children_arguments = [ p[1] ]
|
||||||
|
|
||||||
|
def p_arguments2(p):
|
||||||
|
'arguments : arguments COMMA expression'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_arguments += [ p[3] ]
|
||||||
|
|
||||||
|
def p_assignment(p):
|
||||||
|
'assignment : lvalue EQ rvalue'
|
||||||
|
p[0] = ASTnode('assignment', p.lineno(1))
|
||||||
|
p[0].child_lhs = p[1]
|
||||||
|
p[0].child_rhs = p[3]
|
||||||
|
|
||||||
|
def p_lvalue1(p):
|
||||||
|
'lvalue : IDENT'
|
||||||
|
p[0] = ASTnode('identifier', p.lineno(1), p[1])
|
||||||
|
|
||||||
|
def p_lvalue2(p):
|
||||||
|
'lvalue : IDENT DOT IDENT'
|
||||||
|
p[0] = ASTnode('attribute_write', p.lineno(1))
|
||||||
|
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
|
||||||
|
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
|
||||||
|
|
||||||
|
def p_rvalue(p):
|
||||||
|
'''rvalue : expression
|
||||||
|
| unless_expression'''
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_print_statement1(p):
|
||||||
|
'print_statement : PRINT print_item'
|
||||||
|
p[0] = ASTnode('print', p.lineno(1))
|
||||||
|
p[0].children_items = [ p[2] ]
|
||||||
|
|
||||||
|
def p_print_statement2(p):
|
||||||
|
'print_statement : print_statement AMPERSAND print_item'
|
||||||
|
p[0] = p[1]
|
||||||
|
p[0].children_items += [ p[3] ]
|
||||||
|
|
||||||
|
def p_print_item1(p):
|
||||||
|
'print_item : STRING'
|
||||||
|
p[0] = ASTnode('string_literal', p.lineno(1), p[1])
|
||||||
|
|
||||||
|
def p_print_item2(p):
|
||||||
|
'print_item : expression'
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_statement1(p):
|
||||||
|
'''statement : procedure_call
|
||||||
|
| assignment
|
||||||
|
| print_statement'''
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_statement2(p):
|
||||||
|
'statement : DO statement_list UNTIL expression'
|
||||||
|
p[0] = ASTnode('do_until', p.lineno(1))
|
||||||
|
p[0].children_statements = p[2].children_statements
|
||||||
|
p[0].child_condition = p[4]
|
||||||
|
|
||||||
|
def p_statement3(p):
|
||||||
|
'statement : DO statement_list UNLESS expression DONE'
|
||||||
|
p[0] = ASTnode('do_unless', p.lineno(1))
|
||||||
|
p[0].children_statements = p[2].children_statements
|
||||||
|
p[0].child_condition = p[4]
|
||||||
|
p[0].children_otherwise = []
|
||||||
|
|
||||||
|
def p_statement4(p):
|
||||||
|
'statement : DO statement_list UNLESS expression OTHERWISE statement_list DONE'
|
||||||
|
p[0] = ASTnode('do_unless', p.lineno(1))
|
||||||
|
p[0].children_statements = p[2].children_statements
|
||||||
|
p[0].child_condition = p[4]
|
||||||
|
p[0].children_otherwise = p[6].children_statements
|
||||||
|
|
||||||
|
def p_statement5(p):
|
||||||
|
'statement : RETURN expression'
|
||||||
|
p[0] = ASTnode('return', p.lineno(1))
|
||||||
|
p[0].child_expression = p[2]
|
||||||
|
|
||||||
|
def p_expression1(p):
|
||||||
|
'expression : simple_expr'
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_expression2(p):
|
||||||
|
'''expression : expression EQ simple_expr
|
||||||
|
| expression LT simple_expr'''
|
||||||
|
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||||
|
p[0].child_lhs = p[1]
|
||||||
|
p[0].child_rhs = p[3]
|
||||||
|
|
||||||
|
def p_simple_expr1(p):
|
||||||
|
'simple_expr : term'
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_simple_expr2(p):
|
||||||
|
'''simple_expr : simple_expr PLUS term
|
||||||
|
| simple_expr MINUS term'''
|
||||||
|
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||||
|
p[0].child_lhs = p[1]
|
||||||
|
p[0].child_rhs = p[3]
|
||||||
|
|
||||||
|
def p_term1(p):
|
||||||
|
'term : factor'
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_term2(p):
|
||||||
|
'''term : term MULT factor
|
||||||
|
| term DIV factor'''
|
||||||
|
p[0] = ASTnode('binary_op', p.lineno(2), p[2])
|
||||||
|
p[0].child_lhs = p[1]
|
||||||
|
p[0].child_rhs = p[3]
|
||||||
|
|
||||||
|
def p_factor1(p):
|
||||||
|
'factor : atom'
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_factor2(p):
|
||||||
|
'''factor : MINUS atom
|
||||||
|
| PLUS atom'''
|
||||||
|
p[0] = ASTnode('unary_op', p.lineno(1), p[1])
|
||||||
|
p[0].child_atom = p[2]
|
||||||
|
|
||||||
|
def p_atom1(p):
|
||||||
|
'atom : IDENT'
|
||||||
|
p[0] = ASTnode('identifier', p.lineno(1), p[1])
|
||||||
|
|
||||||
|
def p_atom2(p):
|
||||||
|
'atom : INT_LITERAL'
|
||||||
|
p[0] = ASTnode('int_literal', p.lineno(1), p[1])
|
||||||
|
|
||||||
|
def p_atom3(p):
|
||||||
|
'atom : DATE_LITERAL'
|
||||||
|
p[0] = ASTnode('date_literal', p.lineno(1), p[1])
|
||||||
|
|
||||||
|
def p_atom4(p):
|
||||||
|
'atom : IDENT APOSTROPHE IDENT'
|
||||||
|
p[0] = ASTnode('attribute_read', p.lineno(1))
|
||||||
|
p[0].child_identifier = ASTnode('identifier', p.lineno(1), p[1])
|
||||||
|
p[0].child_attribute = ASTnode('identifier', p.lineno(3), p[3])
|
||||||
|
|
||||||
|
def p_atom5(p):
|
||||||
|
'atom : LPAREN expression RPAREN'
|
||||||
|
p[0] = p[2]
|
||||||
|
|
||||||
|
def p_atom6(p):
|
||||||
|
'''atom : function_call
|
||||||
|
| procedure_call'''
|
||||||
|
p[0] = p[1]
|
||||||
|
|
||||||
|
def p_function_call1(p):
|
||||||
|
'function_call : FUNC_IDENT LPAREN RPAREN'
|
||||||
|
p[0] = ASTnode('function_call', p.lineno(1), p[1])
|
||||||
|
p[0].children_arguments = []
|
||||||
|
|
||||||
|
def p_function_call2(p):
|
||||||
|
'function_call : FUNC_IDENT LPAREN arguments RPAREN'
|
||||||
|
p[0] = ASTnode('function_call', p.lineno(1), p[1])
|
||||||
|
p[0].children_arguments = p[3].children_arguments
|
||||||
|
|
||||||
|
def p_unless_expression(p):
|
||||||
|
'unless_expression : DO expression UNLESS expression OTHERWISE expression DONE'
|
||||||
|
p[0] = ASTnode('unless_expression', p.lineno(1))
|
||||||
|
p[0].child_condition = p[4]
|
||||||
|
p[0].child_true_expr = p[2]
|
||||||
|
p[0].child_false_expr = p[6]
|
||||||
|
|
||||||
|
def p_error(p):
|
||||||
|
if p is not None:
|
||||||
|
print(f"{{{p.lexer.lineno}}}:Syntax Error (token:'{p.value}')")
|
||||||
|
else:
|
||||||
|
print('Syntax Error at the end of file')
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
|
||||||
|
def syntax_check_file(file_path: str, debug: bool) -> ASTnode:
|
||||||
|
parser = yacc.yacc()
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
result = parser.parse(file.read(), lexer=lexer.lexer, debug=debug)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def semantic_check(node: ASTnode, sem_data: SemData):
|
||||||
|
is_procedure = False
|
||||||
|
is_function = False
|
||||||
|
return_type = None
|
||||||
|
|
||||||
|
match node.nodetype:
|
||||||
|
case 'attribute_read':
|
||||||
|
if node.child_attribute.value not in ['day', 'month', 'year', 'weekday', 'weeknum']:
|
||||||
|
print(f'Semantic Error: invalid read attribute \'{node.child_attribute.value}\' at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
case 'attribute_write':
|
||||||
|
if node.child_attribute.value not in ['day', 'month', 'year']:
|
||||||
|
print(f'Semantic Error: invalid write attribute \'{node.child_attribute.value}\' at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
case 'procedure_definition':
|
||||||
|
if node.child_return_type is not None:
|
||||||
|
if node.child_return_type.value not in ['int', 'date']:
|
||||||
|
print(f'Semantic Error: procedure definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
return_type = node.child_return_type.value
|
||||||
|
for formal in node.children_formals:
|
||||||
|
if formal.child_type.value not in ['int', 'date']:
|
||||||
|
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
is_procedure = True
|
||||||
|
case 'function_definition':
|
||||||
|
if node.child_return_type is not None:
|
||||||
|
if node.child_return_type.value not in ['int', 'date']:
|
||||||
|
print(f'Semantic Error: function definition with invalid return type \'{node.child_return_type.value}\' at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
return_type = node.child_return_type.value
|
||||||
|
for formal in node.children_formals:
|
||||||
|
if formal.child_type.value not in ['int', 'date']:
|
||||||
|
print(f'Semantic Error: procedure definition with invalid argument type \'{formal.child_type.value}\' at line {formal.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
is_function = True
|
||||||
|
case 'procedure_call':
|
||||||
|
if sem_data.in_function_def:
|
||||||
|
print(f'Semantic Error: procedure call inside function at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
case 'return':
|
||||||
|
if not sem_data.in_procedure_def:
|
||||||
|
print(f'Semantic Error: return statement outside of procedure definition at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
if sem_data.return_type is None:
|
||||||
|
print(f'Semantic Error: return statement in returnless procedure definition at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
case 'date_literal':
|
||||||
|
# date literal can be in variable definition
|
||||||
|
if sem_data.parent.nodetype == 'variable_definition':
|
||||||
|
pass
|
||||||
|
# right side of assignment
|
||||||
|
elif sem_data.parent.nodetype == 'assignment':
|
||||||
|
if sem_data.parent.child_lhs.nodetype == 'date_literal':
|
||||||
|
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
# either side of subtraction or left side of addition
|
||||||
|
elif sem_data.parent.nodetype == 'binary_op':
|
||||||
|
if sem_data.parent.value == '-':
|
||||||
|
pass
|
||||||
|
elif sem_data.parent.value == '+':
|
||||||
|
if sem_data.parent.child_rhs.nodetype == 'date_literal':
|
||||||
|
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
else:
|
||||||
|
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
else:
|
||||||
|
print(f'Semantic Error: invalid date literal at line {node.lineno}')
|
||||||
|
raise SystemExit
|
||||||
|
|
||||||
|
if is_procedure or is_function:
|
||||||
|
sem_data.in_procedure_def = is_procedure
|
||||||
|
sem_data.in_function_def = is_function
|
||||||
|
sem_data.return_type = return_type
|
||||||
|
|
||||||
|
temp_parent = sem_data.parent
|
||||||
|
sem_data.parent = node
|
||||||
|
|
||||||
|
for name, child in tree_print.get_childvars(node):
|
||||||
|
if child is not None:
|
||||||
|
semantic_check(child, sem_data)
|
||||||
|
|
||||||
|
sem_data.parent = temp_parent
|
||||||
|
|
||||||
|
if is_procedure or is_function:
|
||||||
|
sem_data.in_procedure_def = False
|
||||||
|
sem_data.in_function_def = False
|
||||||
|
sem_data.return_type = None
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||||
|
group.add_argument('-f', '--file', help='filename to process')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.who:
|
||||||
|
print('Author')
|
||||||
|
print(' Student ID: 150189237')
|
||||||
|
print(' Name: Oskari Alaranta')
|
||||||
|
else:
|
||||||
|
ast = syntax_check_file(args.file, args.debug)
|
||||||
|
tree_print.treeprint(ast, 'unicode')
|
||||||
|
|
||||||
|
semantic_check(ast, SemData())
|
||||||
|
|
|
@ -0,0 +1,209 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Values to control the module's working
|
||||||
|
|
||||||
|
# How to recognize attributes in nodes by their names
|
||||||
|
|
||||||
|
child_prefix_default = "child_"
|
||||||
|
children_prefix_default = "children_"
|
||||||
|
value_attr = "value"
|
||||||
|
nodetype_attr = "nodetype"
|
||||||
|
lineno_attr = "lineno"
|
||||||
|
type_attr = "type"
|
||||||
|
|
||||||
|
# Finding and creating a list of all children nodes of a node, based on
|
||||||
|
# attribute names of a node
|
||||||
|
|
||||||
|
def get_childvars(node, child_prefix=child_prefix_default,
|
||||||
|
children_prefix=children_prefix_default):
|
||||||
|
'''Return all children nodes of a tree node
|
||||||
|
|
||||||
|
This function assumes that all attributes of a node beginning with
|
||||||
|
child_prefix refer to a child node, and attributes beginning with
|
||||||
|
children_prefix refer to a LIST of child nodes. The return value is a list
|
||||||
|
of pairs (tuples), where the first element of each pair is a "label"
|
||||||
|
for the node (the name of the attribute without the child/children prefix),
|
||||||
|
and the second element is the child node itself. For child lists, the label
|
||||||
|
also contains the number of the child, or EMPTY if the list is empty
|
||||||
|
(in which case None is used as the second element, as there is no child).'''
|
||||||
|
|
||||||
|
childvars = []
|
||||||
|
# Only search for attributes if we have an object
|
||||||
|
if hasattr(node, "__dict__"):
|
||||||
|
# Iterate though all attributes of the node object
|
||||||
|
for name,val in vars(node).items():
|
||||||
|
# An attribute containing one child node
|
||||||
|
if name.startswith(child_prefix):
|
||||||
|
label = name[len(child_prefix):]
|
||||||
|
childvars.append((label, val))
|
||||||
|
# An attribute containing a child list
|
||||||
|
elif name.startswith(children_prefix):
|
||||||
|
label = name[len(children_prefix):]
|
||||||
|
# Make sure contents is not None and is a list (or actually, can
|
||||||
|
# be iterated through
|
||||||
|
if val is None:
|
||||||
|
childvars.append((label+"[NONE stored instead of a list!!!]", None))
|
||||||
|
else:
|
||||||
|
if not hasattr(val, "__iter__"):
|
||||||
|
childvars.append((label+"[Not a list!!!]", None))
|
||||||
|
# An empty list/iterable (no nodes)
|
||||||
|
elif not val:
|
||||||
|
childvars.append((label+"[EMPTY]", None))
|
||||||
|
# A non-empty list/iterable
|
||||||
|
else:
|
||||||
|
childvars.extend([(label+"["+str(i)+"]", child) for (i, child) in enumerate(val)])
|
||||||
|
return childvars
|
||||||
|
|
||||||
|
|
||||||
|
# Printing the syntax tree (AST)
|
||||||
|
|
||||||
|
# Strings that ASCII and Unicode trees are made out of
|
||||||
|
|
||||||
|
vertical_uni = "\N{BOX DRAWINGS LIGHT VERTICAL}"
|
||||||
|
horizontal_uni = "\N{BOX DRAWINGS LIGHT HORIZONTAL}"
|
||||||
|
vertical_right_uni = "\N{BOX DRAWINGS LIGHT VERTICAL AND RIGHT}"
|
||||||
|
up_right_uni = "\N{BOX DRAWINGS LIGHT UP AND RIGHT}"
|
||||||
|
child_indent_uni = vertical_right_uni + horizontal_uni + horizontal_uni
|
||||||
|
last_child_indent_uni = up_right_uni + horizontal_uni + horizontal_uni
|
||||||
|
normal_indent_uni = vertical_uni + " "
|
||||||
|
last_normal_indent_uni = " "
|
||||||
|
|
||||||
|
vertical_asc = "|"
|
||||||
|
horizontal_asc = "-"
|
||||||
|
vertical_right_asc = "+"
|
||||||
|
up_right_asc = "+"
|
||||||
|
child_indent_asc = vertical_right_asc + horizontal_asc + horizontal_asc
|
||||||
|
last_child_indent_asc = up_right_asc + horizontal_asc + horizontal_asc
|
||||||
|
normal_indent_asc = vertical_asc + " "
|
||||||
|
last_normal_indent_asc = " "
|
||||||
|
|
||||||
|
# What to put to the beginning and end of dot files
|
||||||
|
|
||||||
|
dot_preamble='''digraph parsetree {
|
||||||
|
ratio=fill
|
||||||
|
node [shape="box"]
|
||||||
|
edge [style=bold]
|
||||||
|
ranksep=equally
|
||||||
|
nodesep=0.5
|
||||||
|
rankdir = TB
|
||||||
|
clusterrank = local'''
|
||||||
|
|
||||||
|
dot_postamble='}'
|
||||||
|
|
||||||
|
def dotnodeid(nodenum):
|
||||||
|
'''Convert node number to a dot id'''
|
||||||
|
return "N"+str(nodenum)
|
||||||
|
|
||||||
|
def treeprint_indent(node, outtype="unicode", label="", first_indent="", indent=""):
|
||||||
|
'''Print out an ASCII/Unicode version of a subtree in a tree.
|
||||||
|
|
||||||
|
node = the root of the subtree
|
||||||
|
outtype = unicode/ascii
|
||||||
|
label = the "role" of the subtree on the parent node (from attribute name)
|
||||||
|
first_indent = what to print at the beginning of the first line (indentation)
|
||||||
|
indent = what to print at the beginning of the rest of the lines (indentation)'''
|
||||||
|
|
||||||
|
# Add label (if any) to the first line after the indentation
|
||||||
|
if label:
|
||||||
|
first_indent += label + ": "
|
||||||
|
if not node:
|
||||||
|
# If node is None, just print NONE
|
||||||
|
print(first_indent + "NONE")
|
||||||
|
else:
|
||||||
|
# If node has node type attribute, print that, otherwise try to print the whole
|
||||||
|
# node take help in finding the error
|
||||||
|
if hasattr(node, nodetype_attr):
|
||||||
|
print(first_indent + getattr(node, nodetype_attr), end="")
|
||||||
|
else:
|
||||||
|
print(first_indent + "??? '" + str(node) + "' ???", end="")
|
||||||
|
# If node has a value attribute, print the value of the node in parenthesis
|
||||||
|
if hasattr(node, value_attr):
|
||||||
|
print(" (" + str(getattr(node, value_attr)) + ")", end="")
|
||||||
|
if hasattr(node, type_attr):
|
||||||
|
print(" :" + str(getattr(node, type_attr)), end="")
|
||||||
|
if hasattr(node, lineno_attr):
|
||||||
|
print(" #" + str(getattr(node, lineno_attr)), end="")
|
||||||
|
print()
|
||||||
|
# Get all children of the node and iterate through them
|
||||||
|
childvars = get_childvars(node)
|
||||||
|
i = len(childvars)
|
||||||
|
for name,value in childvars:
|
||||||
|
i -= 1
|
||||||
|
if i > 0:
|
||||||
|
# Not the last child, use normal indentation
|
||||||
|
if outtype == "unicode":
|
||||||
|
first_indent = child_indent_uni
|
||||||
|
rest_indent = normal_indent_uni
|
||||||
|
else:
|
||||||
|
first_indent = child_indent_asc
|
||||||
|
rest_indent = normal_indent_asc
|
||||||
|
else:
|
||||||
|
# The last child, use indentation for that case
|
||||||
|
if outtype == "unicode":
|
||||||
|
first_indent = last_child_indent_uni
|
||||||
|
rest_indent = last_normal_indent_uni
|
||||||
|
else:
|
||||||
|
first_indent = last_child_indent_asc
|
||||||
|
rest_indent = last_normal_indent_asc
|
||||||
|
# Recursively print the child subtrees, adding indentation
|
||||||
|
treeprint_indent(value, outtype, name, indent+first_indent,
|
||||||
|
indent+rest_indent)
|
||||||
|
|
||||||
|
def treeprint_dot(node, nodenum, nodecount):
|
||||||
|
'''Print a subtree in dot format.
|
||||||
|
|
||||||
|
nodenum = number of the node (for dot id generation)
|
||||||
|
nodecount = a list containing the maximum used id'''
|
||||||
|
|
||||||
|
nodeline = dotnodeid(nodenum)
|
||||||
|
if not node:
|
||||||
|
# None is output as an ellipse with label NONE
|
||||||
|
nodeline += ' [shape="ellipse", label="NONE"]'
|
||||||
|
print(nodeline)
|
||||||
|
else:
|
||||||
|
# Normal nodes use the default shape
|
||||||
|
nodeline += ' [label="'
|
||||||
|
# If node has node type attribute, print that, otherwise try to print the whole
|
||||||
|
# node take help in finding the error
|
||||||
|
if hasattr(node, nodetype_attr):
|
||||||
|
nodeline += getattr(node, nodetype_attr)
|
||||||
|
else:
|
||||||
|
nodeline += "??? '" + str(node) + "' ???"
|
||||||
|
nextnodeline = ""
|
||||||
|
# If node has a value attribute, output the value in parenthesis
|
||||||
|
if hasattr(node, value_attr):
|
||||||
|
nextnodeline += " (" + str(getattr(node, value_attr)) + ")"
|
||||||
|
if hasattr(node, type_attr):
|
||||||
|
nextnodeline += " :" + str(getattr(node, type_attr))
|
||||||
|
if hasattr(node, lineno_attr):
|
||||||
|
nextnodeline += " #" + str(getattr(node, lineno_attr))
|
||||||
|
if nextnodeline:
|
||||||
|
nodeline += "\n"+nextnodeline
|
||||||
|
nodeline += '"]'
|
||||||
|
print(nodeline)
|
||||||
|
# Get all children of the node and iterate through them
|
||||||
|
childvars = get_childvars(node)
|
||||||
|
for name,value in childvars:
|
||||||
|
# Number the child by one more than current maximum (and update maximum)
|
||||||
|
nodecount[0] += 1
|
||||||
|
childnum = nodecount[0]
|
||||||
|
# Recursively print the child subtrees
|
||||||
|
treeprint_dot(value, childnum, nodecount)
|
||||||
|
# Output the named connection between parent and child
|
||||||
|
print(dotnodeid(nodenum)+"->"+dotnodeid(childnum)+ ' [label="'+name+'"]')
|
||||||
|
|
||||||
|
def treeprint(rootnode, outtype="unicode"):
|
||||||
|
'''Prints out a tree, given its root.
|
||||||
|
|
||||||
|
The second argument is the output type:
|
||||||
|
"unicode" (default) prints a text-version of the tree using Unicode block characters.
|
||||||
|
"ascii" prints an ASCII-only version, with |, -, +.
|
||||||
|
"dot" prints a tree in dot format (can be converted to a graphical tree
|
||||||
|
using dot command in graphwiz).'''
|
||||||
|
if outtype == "dot":
|
||||||
|
print(dot_preamble)
|
||||||
|
treeprint_dot(rootnode, 0, [0])
|
||||||
|
print(dot_postamble)
|
||||||
|
else:
|
||||||
|
treeprint_indent(rootnode, outtype)
|
Loading…
Reference in New Issue