301 lines
15 KiB
Python
301 lines
15 KiB
Python
|
#!/bin/env python3
|
||
|
|
||
|
import argparse
|
||
|
from copy import deepcopy
|
||
|
from datetime import date, timedelta
|
||
|
import tree_print
|
||
|
from build_ast import ASTnode, syntax_check_file
|
||
|
|
||
|
class SemData:
|
||
|
def __init__(self):
|
||
|
self.scope = None
|
||
|
self.root = None
|
||
|
self.global_symbol_table = {}
|
||
|
self.local_symbol_table = {}
|
||
|
|
||
|
def semantic_error(msg: str, node: ASTnode) -> None:
|
||
|
print(f'\033[31mSemantic Error: {msg} at line {node.lineno}\033[m')
|
||
|
raise SystemExit(1)
|
||
|
|
||
|
def print_todo(msg: str, node: ASTnode) -> None:
|
||
|
print(f'\033[33mTODO: {msg} at line {node.lineno}\033[m')
|
||
|
raise SystemExit(2)
|
||
|
|
||
|
def semantic_check(node: ASTnode, sem_data: SemData) -> None | ASTnode:
|
||
|
if sem_data.root is None:
|
||
|
sem_data.root = node
|
||
|
match node.nodetype:
|
||
|
case 'program':
|
||
|
# Collect function and procedure definitions first,
|
||
|
# since they can be called before they are defined
|
||
|
for child in node.children_definitions:
|
||
|
if child.nodetype in ['function_definition', 'procedure_definition']:
|
||
|
if child.value in sem_data.global_symbol_table:
|
||
|
semantic_error(f'Redefinition of {child.nodetype.split("_")[0]} \'{child.value}\'', child)
|
||
|
sem_data.global_symbol_table[child.value] = child
|
||
|
|
||
|
# Then do the actual semantic checking
|
||
|
for child in node.children_definitions:
|
||
|
semantic_check(child, sem_data)
|
||
|
for child in node.children_statements:
|
||
|
if semantic_check(child, sem_data) is not None:
|
||
|
semantic_error(f'Expression return value is not handled', child)
|
||
|
|
||
|
return None
|
||
|
case 'variable_definition':
|
||
|
# Check if variable is already defined
|
||
|
symbol_table = sem_data.global_symbol_table
|
||
|
if sem_data.scope is not None:
|
||
|
symbol_table = sem_data.local_symbol_table
|
||
|
if node.value in symbol_table:
|
||
|
semantic_error(f'Redefinition of variable \'{node.value}\'', node)
|
||
|
|
||
|
# Check if expression is valid and store it in symbol table
|
||
|
variable = semantic_check(node.child_expression, sem_data)
|
||
|
if variable is None or variable.type not in ['int', 'string', 'date']:
|
||
|
semantic_error(f'Invalid variable type \'{variable.type if variable is not None else None}\'', node)
|
||
|
symbol_table[node.value] = variable
|
||
|
|
||
|
return None
|
||
|
case 'function_definition' | 'procedure_definition':
|
||
|
# Function and procedures are added to global symbol table
|
||
|
# as the first step, so they can be called before they are defined
|
||
|
assert node.value in sem_data.global_symbol_table
|
||
|
|
||
|
# Local symbols table should be empty while doing checking,
|
||
|
# since functions and procedures can only be defined in global scope
|
||
|
assert len(sem_data.local_symbol_table) == 0 and sem_data.scope is None
|
||
|
sem_data.scope = node
|
||
|
|
||
|
# Collect local arguments
|
||
|
for formal in node.children_formals:
|
||
|
if formal.value in sem_data.local_symbol_table:
|
||
|
semantic_error(f'Redefinition of variable \'{formal.value}\' in {node.nodetype.split("_")[0]} \'{node.value}\' arguments', node)
|
||
|
sem_data.local_symbol_table[formal.value] = formal
|
||
|
|
||
|
# Collect local variables
|
||
|
for variable_definition in node.children_variable_definitions:
|
||
|
semantic_check(variable_definition, sem_data)
|
||
|
|
||
|
# Check return type
|
||
|
if node.nodetype == 'function_definition':
|
||
|
expression = semantic_check(node.child_expression, sem_data)
|
||
|
if expression is None:
|
||
|
semantic_error(f'Function \'{node.value}\' must return a value', node)
|
||
|
if node.child_return_type == 'auto':
|
||
|
node.child_return_type = expression.type
|
||
|
if expression.type != node.child_return_type:
|
||
|
semantic_error(f'Function \'{node.value}\' return type is {node.child_return_type} but returns {expression.type}', node)
|
||
|
elif node.nodetype == 'procedure_definition':
|
||
|
returns = None
|
||
|
for statement in node.children_statements:
|
||
|
returns = None
|
||
|
value = semantic_check(statement, sem_data)
|
||
|
if value is None:
|
||
|
continue
|
||
|
if value.nodetype != 'return':
|
||
|
semantic_error(f'Expression return value is not handled', statement)
|
||
|
if node.child_return_type is None:
|
||
|
semantic_error(f'Procedure \'{node.value}\' does not have a return type', node)
|
||
|
if node.child_return_type == 'auto':
|
||
|
node.child_return_type = value.type
|
||
|
if value.type != node.child_return_type:
|
||
|
semantic_error(f'Procedure \'{node.value}\' return type is {node.child_return_type} but returns {value.type}', node)
|
||
|
returns = value.type
|
||
|
if returns is None and node.child_return_type is not None:
|
||
|
if node.child_return_type != 'void':
|
||
|
semantic_error(f'Procedure \'{node.value}\' must return a value when scope exits', node)
|
||
|
else:
|
||
|
assert False
|
||
|
|
||
|
node.type = node.child_return_type
|
||
|
|
||
|
sem_data.scope = None
|
||
|
sem_data.local_symbol_table = {}
|
||
|
|
||
|
return None
|
||
|
case 'return':
|
||
|
if sem_data.scope is None or sem_data.scope.nodetype != 'procedure_definition':
|
||
|
semantic_error(f'Keyword \'return\' can only appear in procefure_definition')
|
||
|
result = semantic_check(node.child_expression, sem_data)
|
||
|
if result is None:
|
||
|
semantic_error(f'Procedure \'{sem_data.scope.value}\' must return a value', node)
|
||
|
node.type = result.type
|
||
|
return node
|
||
|
case 'date_literal' | 'int_literal' | 'string_literal':
|
||
|
node.type = node.nodetype.split('_')[0]
|
||
|
return node
|
||
|
case 'assignment':
|
||
|
lhs = semantic_check(node.child_lhs, sem_data)
|
||
|
rhs = semantic_check(node.child_rhs, sem_data)
|
||
|
if lhs is None or rhs is None or lhs.type != rhs.type:
|
||
|
semantic_error(f'Invalid assignment of \'{rhs.type if rhs is not None else None}\' to \'{lhs.type if lhs is not None else None}\'', node)
|
||
|
return None
|
||
|
case 'binary_op':
|
||
|
lhs = semantic_check(node.child_lhs, sem_data)
|
||
|
rhs = semantic_check(node.child_rhs, sem_data)
|
||
|
if lhs is None or rhs is None:
|
||
|
semantic_error(f'Invalid operands \'{lhs.type if lhs is not None else None}\' and \'{rhs.type if rhs is not None else None}\' for binary operation {node.value}', node)
|
||
|
|
||
|
# Validate operands and result type
|
||
|
if node.value in ['*', '/']:
|
||
|
if lhs.type == 'int' and rhs.type == 'int':
|
||
|
node.type = 'int'
|
||
|
return node
|
||
|
elif node.value == '+':
|
||
|
if lhs.type == 'date' and rhs.type == 'int':
|
||
|
node.type = 'date'
|
||
|
return node
|
||
|
if lhs.type == 'int' and rhs.type == 'int':
|
||
|
node.type = 'int'
|
||
|
return node
|
||
|
elif node.value == '-':
|
||
|
if lhs.type == 'date' and rhs.type == 'int':
|
||
|
node.type = 'date'
|
||
|
return node
|
||
|
if lhs.type == 'date' and rhs.type == 'date':
|
||
|
node.type = 'int'
|
||
|
return node
|
||
|
if lhs.type == 'int' and rhs.type == 'int':
|
||
|
node.type = 'int'
|
||
|
return node
|
||
|
elif node.value in ['<', '>', '=']:
|
||
|
if lhs.type == rhs.type:
|
||
|
node.type = 'bool'
|
||
|
return node
|
||
|
|
||
|
semantic_error(f'Invalid operands \'{lhs.type}\' and \'{rhs.type}\' for operation {node.value}', node)
|
||
|
case 'identifier':
|
||
|
# Check if variable is defined
|
||
|
symbol = None
|
||
|
if node.value in sem_data.local_symbol_table:
|
||
|
symbol = sem_data.local_symbol_table[node.value]
|
||
|
if node.value in sem_data.global_symbol_table:
|
||
|
symbol = sem_data.global_symbol_table[node.value]
|
||
|
if symbol is not None:
|
||
|
node.type = symbol.type
|
||
|
return symbol
|
||
|
|
||
|
semantic_error(f'Variable \'{node.value}\' not defined', node)
|
||
|
case 'function_call' | 'procedure_call':
|
||
|
# Handle built in functions
|
||
|
if node.nodetype == 'function_call' and node.value == 'Today':
|
||
|
if len(node.children_arguments) != 0:
|
||
|
semantic_error(f'Builtin function \'Today\' takes no arguments', node)
|
||
|
node.type = 'date'
|
||
|
return node
|
||
|
|
||
|
# Check if function/procedure is defined
|
||
|
if node.value not in sem_data.global_symbol_table:
|
||
|
semantic_error(f'{node.nodetype.split("_")[0]} \'{node.value}\' not defined', node)
|
||
|
func = sem_data.global_symbol_table[node.value]
|
||
|
|
||
|
# Check if arguments match (count and types)
|
||
|
if len(node.children_arguments) != len(func.children_formals):
|
||
|
semantic_error(f'Argument count mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected {len(func.children_formals)} but got {len(node.children_arguments)}', node)
|
||
|
for formal, actual in zip(func.children_formals, node.children_arguments):
|
||
|
resolved = semantic_check(actual, sem_data)
|
||
|
if resolved is None or formal.type != resolved.type:
|
||
|
semantic_error(f'Argument type mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected \'{formal.type}\' but got \'{resolved.type if resolved is not None else None}\'', node)
|
||
|
|
||
|
# Set return type and return node if func has a return type
|
||
|
node.type = func.child_return_type
|
||
|
return node if node.type is not None else None
|
||
|
case 'do_unless':
|
||
|
# Validate condition
|
||
|
condition = semantic_check(node.child_condition, sem_data)
|
||
|
if condition is None or condition.type != 'bool':
|
||
|
semantic_error('Condition must be of type \'bool\'', node)
|
||
|
|
||
|
# Validate both branches
|
||
|
for statement in node.children_statements_true:
|
||
|
if semantic_check(statement, sem_data) is not None:
|
||
|
semantic_error(f'Expression return value is not handled', statement)
|
||
|
for statement in node.children_statements_false:
|
||
|
if semantic_check(statement, sem_data) is not None:
|
||
|
semantic_error(f'Expression return value is not handled', statement)
|
||
|
|
||
|
return None
|
||
|
case 'do_until':
|
||
|
# Validate condition
|
||
|
condition = semantic_check(node.child_condition, sem_data)
|
||
|
if condition is None or condition.type != 'bool':
|
||
|
semantic_error('Condition must be of type bool', node)
|
||
|
|
||
|
# Validate body
|
||
|
for statement in node.children_statements:
|
||
|
if semantic_check(statement, sem_data) is not None:
|
||
|
semantic_error(f'Expression return value is not handled', statement)
|
||
|
|
||
|
return None
|
||
|
case 'unless_expression':
|
||
|
# Validate condition
|
||
|
condition = semantic_check(node.child_condition, sem_data)
|
||
|
if condition is None or condition.type != 'bool':
|
||
|
semantic_error('Condition must be of type bool', node)
|
||
|
|
||
|
# Validate both branches
|
||
|
expression_true = semantic_check(node.child_expression_true, sem_data)
|
||
|
expression_false = semantic_check(node.child_expression_false, sem_data)
|
||
|
if expression_true is None or expression_false is None or expression_true.type != expression_false.type:
|
||
|
semantic_error(f'Branches must return the same type, got \'{expression_false.type}\' and \'{expression_true.type}\'', node)
|
||
|
|
||
|
node.type = expression_true.type
|
||
|
return node
|
||
|
case 'attribute_read' | 'attribute_write':
|
||
|
# Check if variable is defined
|
||
|
symbol = None
|
||
|
if node.child_identifier.value in sem_data.local_symbol_table:
|
||
|
symbol = sem_data.local_symbol_table[node.child_identifier.value]
|
||
|
elif node.child_identifier.value in sem_data.global_symbol_table:
|
||
|
symbol = sem_data.global_symbol_table[node.child_identifier.value]
|
||
|
else:
|
||
|
semantic_error(f'Variable \'{node.child_identifier.value}\' not defined', node.child_identifier)
|
||
|
|
||
|
# Validate attribute
|
||
|
assert node.child_attribute.nodetype == 'identifier'
|
||
|
if symbol.type != 'date':
|
||
|
semantic_error(f'Cannot access attribute of non-date variable', node.child_attribute)
|
||
|
valid_attributes = ['day', 'month', 'year']
|
||
|
if node.nodetype == 'attribute_read':
|
||
|
valid_attributes += ['weekday', 'weeknum']
|
||
|
if node.child_attribute.value not in valid_attributes:
|
||
|
semantic_error(f'Invalid attribute \'{node.child_attribute.value}\' for {node.nodetype.split("_")[0]}, allowed values {valid_attributes}', node.child_attribute)
|
||
|
|
||
|
node.type = 'date'
|
||
|
return node
|
||
|
case 'print':
|
||
|
for item in node.children_items:
|
||
|
value = semantic_check(item, sem_data)
|
||
|
if value is None or value.type not in ['int', 'string', 'date']:
|
||
|
semantic_error('Print argument can only be \'int\', \'date\' or \'string\'', node)
|
||
|
return None
|
||
|
case _:
|
||
|
print_todo(f'Semantic check type \'{node.nodetype}\'', node)
|
||
|
|
||
|
def execute_ast(node: ASTnode, sem_data: SemData) -> None | ASTnode:
|
||
|
match node.nodetype:
|
||
|
case _:
|
||
|
print_todo(f'Execute type \'{node.nodetype}\'', node)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
|
||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||
|
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||
|
group.add_argument('-f', '--file', help='filename to process')
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if args.who:
|
||
|
print('Author')
|
||
|
print(' Student ID: 150189237')
|
||
|
print(' Name: Oskari Alaranta')
|
||
|
else:
|
||
|
ast = syntax_check_file(args.file, args.debug)
|
||
|
#tree_print.treeprint(ast, 'unicode')
|
||
|
|
||
|
sem_data = SemData()
|
||
|
semantic_check(ast, sem_data)
|
||
|
execute_ast(ast, sem_data)
|