#!/bin/env python3 import argparse from calendar import timegm from copy import deepcopy from datetime import date, timedelta import subprocess import tree_print from build_ast import ASTnode, syntax_check_file class SemData: def __init__(self): self.scope = None self.root = None self.callables = {} self.global_symbol_table = {} self.local_symbol_table = {} def semantic_error(msg: str, node: ASTnode) -> None: print(f'\033[31mSemantic Error: {msg} at line {node.lineno}\033[m') raise SystemExit(1) def print_todo(msg: str, node: ASTnode) -> None: print(f'\033[33mTODO: {msg} at line {node.lineno}\033[m') raise SystemExit(2) def semantic_check(node: ASTnode, sem_data: SemData) -> None | ASTnode: if sem_data.root is None: sem_data.root = node match node.nodetype: case 'program': # Collect function and procedure definitions first, # since they can be called before they are defined for child in node.children_definitions: if child.nodetype in ['function_definition', 'procedure_definition']: if child.value in sem_data.callables: semantic_error(f'Redefinition of {child.nodetype.split("_")[0]} \'{child.value}\'', child) sem_data.callables[child.value] = child # Then do the actual semantic checking for child in node.children_definitions: semantic_check(child, sem_data) for child in node.children_statements: if semantic_check(child, sem_data) is not None: semantic_error(f'Expression return value is not handled', child) return None case 'variable_definition': # Check if variable is already defined symbol_table = sem_data.global_symbol_table if sem_data.scope is not None: symbol_table = sem_data.local_symbol_table if node.value in symbol_table: semantic_error(f'Redefinition of variable \'{node.value}\'', node) # Check if expression is valid and store it in symbol table variable = semantic_check(node.child_expression, sem_data) if variable is None or variable.type not in ['int', 'string', 'date']: semantic_error(f'Invalid variable type \'{variable.type if variable is not None else None}\'', node) symbol_table[node.value] = variable return None case 'function_definition' | 'procedure_definition': # Function and procedures are added to global symbol table # as the first step, so they can be called before they are defined assert node.value in sem_data.callables # Local symbols table should be empty while doing checking, # since functions and procedures can only be defined in global scope assert len(sem_data.local_symbol_table) == 0 and sem_data.scope is None sem_data.scope = node # Collect local arguments for formal in node.children_formals: if formal.value in sem_data.local_symbol_table: semantic_error(f'Redefinition of variable \'{formal.value}\' in {node.nodetype.split("_")[0]} \'{node.value}\' arguments', node) sem_data.local_symbol_table[formal.value] = formal # Collect local variables for variable_definition in node.children_variable_definitions: semantic_check(variable_definition, sem_data) # Check return type if node.nodetype == 'function_definition': expression = semantic_check(node.child_expression, sem_data) if expression is None: semantic_error(f'Function \'{node.value}\' must return a value', node) if node.child_return_type == 'auto': node.child_return_type = expression.type if expression.type != node.child_return_type: semantic_error(f'Function \'{node.value}\' return type is {node.child_return_type} but returns {expression.type}', node) elif node.nodetype == 'procedure_definition': returns = None for statement in node.children_statements: returns = None value = semantic_check(statement, sem_data) if value is None: continue if value.nodetype != 'return': semantic_error(f'Expression return value is not handled', statement) if node.child_return_type is None: semantic_error(f'Procedure \'{node.value}\' does not have a return type', node) if node.child_return_type == 'auto': node.child_return_type = value.type if value.type != node.child_return_type: semantic_error(f'Procedure \'{node.value}\' return type is {node.child_return_type} but returns {value.type}', node) returns = value.type if returns is None and node.child_return_type is not None: if node.child_return_type != 'void': semantic_error(f'Procedure \'{node.value}\' must return a value when scope exits', node) else: assert False node.type = node.child_return_type sem_data.scope = None sem_data.local_symbol_table = {} return None case 'return': if sem_data.scope is None or sem_data.scope.nodetype != 'procedure_definition': semantic_error(f'Keyword \'return\' can only appear in procefure_definition') result = semantic_check(node.child_expression, sem_data) if result is None: semantic_error(f'Procedure \'{sem_data.scope.value}\' must return a value', node) node.type = result.type return node case 'date_literal' | 'int_literal' | 'string_literal': node.type = node.nodetype.split('_')[0] return node case 'assignment': lhs = semantic_check(node.child_lhs, sem_data) rhs = semantic_check(node.child_rhs, sem_data) if lhs is None or rhs is None or lhs.type != rhs.type: semantic_error(f'Invalid assignment of \'{rhs.type if rhs is not None else None}\' to \'{lhs.type if lhs is not None else None}\'', node) return None case 'binary_op': lhs = semantic_check(node.child_lhs, sem_data) rhs = semantic_check(node.child_rhs, sem_data) if lhs is None or rhs is None: semantic_error(f'Invalid operands \'{lhs.type if lhs is not None else None}\' and \'{rhs.type if rhs is not None else None}\' for binary operation {node.value}', node) # Validate operands and result type if node.value in ['*', '/']: if lhs.type == 'int' and rhs.type == 'int': node.type = 'int' return node elif node.value == '+': if lhs.type == 'date' and rhs.type == 'int': node.type = 'date' return node if lhs.type == 'int' and rhs.type == 'int': node.type = 'int' return node elif node.value == '-': if lhs.type == 'date' and rhs.type == 'int': node.type = 'date' return node if lhs.type == 'date' and rhs.type == 'date': node.type = 'int' return node if lhs.type == 'int' and rhs.type == 'int': node.type = 'int' return node elif node.value in ['<', '=']: if lhs.type == rhs.type: node.type = 'bool' return node semantic_error(f'Invalid operands \'{lhs.type}\' and \'{rhs.type}\' for operation {node.value}', node) case 'identifier': # Check if variable is defined symbol = None if node.value in sem_data.local_symbol_table: symbol = sem_data.local_symbol_table[node.value] if node.value in sem_data.global_symbol_table: symbol = sem_data.global_symbol_table[node.value] if symbol is not None: node.type = symbol.type return symbol semantic_error(f'Variable \'{node.value}\' not defined', node) case 'function_call' | 'procedure_call': # Handle built in functions if node.nodetype == 'function_call' and node.value == 'Today': if len(node.children_arguments) != 0: semantic_error(f'Builtin function \'Today\' takes no arguments', node) node.type = 'date' return node # Check if function/procedure is defined if node.value not in sem_data.callables: semantic_error(f'{node.nodetype.split("_")[0]} \'{node.value}\' not defined', node) func = sem_data.callables[node.value] # Check if arguments match (count and types) if len(node.children_arguments) != len(func.children_formals): semantic_error(f'Argument count mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected {len(func.children_formals)} but got {len(node.children_arguments)}', node) for formal, actual in zip(func.children_formals, node.children_arguments): resolved = semantic_check(actual, sem_data) if resolved is None or formal.type != resolved.type: semantic_error(f'Argument type mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected \'{formal.type}\' but got \'{resolved.type if resolved is not None else None}\'', node) # Set return type and return node if func has a return type node.type = func.child_return_type return node if node.type is not None else None case 'do_unless': # Validate condition condition = semantic_check(node.child_condition, sem_data) if condition is None or condition.type != 'bool': semantic_error('Condition must be of type \'bool\'', node) # Validate both branches for statement in node.children_statements_true: if semantic_check(statement, sem_data) is not None: semantic_error(f'Expression return value is not handled', statement) for statement in node.children_statements_false: if semantic_check(statement, sem_data) is not None: semantic_error(f'Expression return value is not handled', statement) return None case 'do_until': # Validate condition condition = semantic_check(node.child_condition, sem_data) if condition is None or condition.type != 'bool': semantic_error('Condition must be of type bool', node) # Validate body for statement in node.children_statements: if semantic_check(statement, sem_data) is not None: semantic_error(f'Expression return value is not handled', statement) return None case 'unless_expression': # Validate condition condition = semantic_check(node.child_condition, sem_data) if condition is None or condition.type != 'bool': semantic_error('Condition must be of type bool', node) # Validate both branches expression_true = semantic_check(node.child_expression_true, sem_data) expression_false = semantic_check(node.child_expression_false, sem_data) if expression_true is None or expression_false is None or expression_true.type != expression_false.type: semantic_error(f'Branches must return the same type, got \'{expression_false.type}\' and \'{expression_true.type}\'', node) node.type = expression_true.type return node case 'attribute_read' | 'attribute_write': # Check if variable is defined symbol = None if node.child_identifier.value in sem_data.local_symbol_table: symbol = sem_data.local_symbol_table[node.child_identifier.value] elif node.child_identifier.value in sem_data.global_symbol_table: symbol = sem_data.global_symbol_table[node.child_identifier.value] else: semantic_error(f'Variable \'{node.child_identifier.value}\' not defined', node.child_identifier) # Validate attribute assert node.child_attribute.nodetype == 'identifier' if symbol.type != 'date': semantic_error(f'Cannot access attribute of non-date variable', node.child_attribute) valid_attributes = ['day', 'month', 'year'] if node.nodetype == 'attribute_read': valid_attributes += ['weekday', 'weeknum'] if node.child_attribute.value not in valid_attributes: semantic_error(f'Invalid attribute \'{node.child_attribute.value}\' for {node.nodetype.split("_")[0]}, allowed values {valid_attributes}', node.child_attribute) node.type = 'int' return node case 'print': for item in node.children_items: value = semantic_check(item, sem_data) if value is None or value.type not in ['int', 'string', 'date']: semantic_error('Print argument can only be \'int\', \'date\' or \'string\'', node) return None case _: print_todo(f'Semantic check type \'{node.nodetype}\'', node) class Instruction: def __init__(self, opcode: str, operands: list[str] = []): self.opcode = opcode self.operands = operands def __str__(self): return f'{self.opcode} {', '.join(self.operands)}' class CompileData: def __init__(self, sem_data: SemData): self.sem_data = sem_data self.date_buffer_size = 128 self.label_counter = 0 self.string_literals: list[str] = [] self.callables: dict[str, list[Instruction]] = {} self.scope: ASTnode = None self.code: list[Instruction] = [] self.add_builtin_functions() def get_label(self) -> str: self.label_counter += 1 return f'.L{self.label_counter - 1}' def insert_label(self, label) -> None: self.code.append(Instruction('