956 lines
44 KiB
Python
956 lines
44 KiB
Python
#!/bin/env python3
|
|
|
|
import argparse
|
|
from calendar import timegm
|
|
from copy import deepcopy
|
|
from datetime import date, timedelta
|
|
import subprocess
|
|
import tree_print
|
|
from build_ast import ASTnode, syntax_check_file
|
|
|
|
class SemData:
|
|
def __init__(self):
|
|
self.scope = None
|
|
self.root = None
|
|
self.callables = {}
|
|
self.global_symbol_table = {}
|
|
self.local_symbol_table = {}
|
|
|
|
def semantic_error(msg: str, node: ASTnode) -> None:
|
|
print(f'\033[31mSemantic Error: {msg} at line {node.lineno}\033[m')
|
|
raise SystemExit(1)
|
|
|
|
def print_todo(msg: str, node: ASTnode) -> None:
|
|
print(f'\033[33mTODO: {msg} at line {node.lineno}\033[m')
|
|
raise SystemExit(2)
|
|
|
|
def semantic_check(node: ASTnode, sem_data: SemData) -> None | ASTnode:
|
|
if sem_data.root is None:
|
|
sem_data.root = node
|
|
match node.nodetype:
|
|
case 'program':
|
|
# Collect function and procedure definitions first,
|
|
# since they can be called before they are defined
|
|
for child in node.children_definitions:
|
|
if child.nodetype in ['function_definition', 'procedure_definition']:
|
|
if child.value in sem_data.callables:
|
|
semantic_error(f'Redefinition of {child.nodetype.split("_")[0]} \'{child.value}\'', child)
|
|
sem_data.callables[child.value] = child
|
|
|
|
# Then do the actual semantic checking
|
|
for child in node.children_definitions:
|
|
semantic_check(child, sem_data)
|
|
for child in node.children_statements:
|
|
if semantic_check(child, sem_data) is not None:
|
|
semantic_error(f'Expression return value is not handled', child)
|
|
|
|
return None
|
|
case 'variable_definition':
|
|
# Check if variable is already defined
|
|
symbol_table = sem_data.global_symbol_table
|
|
if sem_data.scope is not None:
|
|
symbol_table = sem_data.local_symbol_table
|
|
if node.value in symbol_table:
|
|
semantic_error(f'Redefinition of variable \'{node.value}\'', node)
|
|
|
|
# Check if expression is valid and store it in symbol table
|
|
variable = semantic_check(node.child_expression, sem_data)
|
|
if variable is None or variable.type not in ['int', 'string', 'date']:
|
|
semantic_error(f'Invalid variable type \'{variable.type if variable is not None else None}\'', node)
|
|
symbol_table[node.value] = variable
|
|
|
|
return None
|
|
case 'function_definition' | 'procedure_definition':
|
|
# Function and procedures are added to global symbol table
|
|
# as the first step, so they can be called before they are defined
|
|
assert node.value in sem_data.callables
|
|
|
|
# Local symbols table should be empty while doing checking,
|
|
# since functions and procedures can only be defined in global scope
|
|
assert len(sem_data.local_symbol_table) == 0 and sem_data.scope is None
|
|
sem_data.scope = node
|
|
|
|
# Collect local arguments
|
|
for formal in node.children_formals:
|
|
if formal.value in sem_data.local_symbol_table:
|
|
semantic_error(f'Redefinition of variable \'{formal.value}\' in {node.nodetype.split("_")[0]} \'{node.value}\' arguments', node)
|
|
sem_data.local_symbol_table[formal.value] = formal
|
|
|
|
# Collect local variables
|
|
for variable_definition in node.children_variable_definitions:
|
|
semantic_check(variable_definition, sem_data)
|
|
|
|
# Check return type
|
|
if node.nodetype == 'function_definition':
|
|
expression = semantic_check(node.child_expression, sem_data)
|
|
if expression is None:
|
|
semantic_error(f'Function \'{node.value}\' must return a value', node)
|
|
if node.child_return_type == 'auto':
|
|
node.child_return_type = expression.type
|
|
if expression.type != node.child_return_type:
|
|
semantic_error(f'Function \'{node.value}\' return type is {node.child_return_type} but returns {expression.type}', node)
|
|
elif node.nodetype == 'procedure_definition':
|
|
returns = None
|
|
for statement in node.children_statements:
|
|
returns = None
|
|
value = semantic_check(statement, sem_data)
|
|
if value is None:
|
|
continue
|
|
if value.nodetype != 'return':
|
|
semantic_error(f'Expression return value is not handled', statement)
|
|
if node.child_return_type is None:
|
|
semantic_error(f'Procedure \'{node.value}\' does not have a return type', node)
|
|
if node.child_return_type == 'auto':
|
|
node.child_return_type = value.type
|
|
if value.type != node.child_return_type:
|
|
semantic_error(f'Procedure \'{node.value}\' return type is {node.child_return_type} but returns {value.type}', node)
|
|
returns = value.type
|
|
if returns is None and node.child_return_type is not None:
|
|
if node.child_return_type != 'void':
|
|
semantic_error(f'Procedure \'{node.value}\' must return a value when scope exits', node)
|
|
else:
|
|
assert False
|
|
|
|
node.type = node.child_return_type
|
|
|
|
sem_data.scope = None
|
|
sem_data.local_symbol_table = {}
|
|
|
|
return None
|
|
case 'return':
|
|
if sem_data.scope is None or sem_data.scope.nodetype != 'procedure_definition':
|
|
semantic_error(f'Keyword \'return\' can only appear in procefure_definition')
|
|
result = semantic_check(node.child_expression, sem_data)
|
|
if result is None:
|
|
semantic_error(f'Procedure \'{sem_data.scope.value}\' must return a value', node)
|
|
node.type = result.type
|
|
return node
|
|
case 'date_literal' | 'int_literal' | 'string_literal':
|
|
node.type = node.nodetype.split('_')[0]
|
|
return node
|
|
case 'assignment':
|
|
lhs = semantic_check(node.child_lhs, sem_data)
|
|
rhs = semantic_check(node.child_rhs, sem_data)
|
|
if lhs is None or rhs is None or lhs.type != rhs.type:
|
|
semantic_error(f'Invalid assignment of \'{rhs.type if rhs is not None else None}\' to \'{lhs.type if lhs is not None else None}\'', node)
|
|
return None
|
|
case 'binary_op':
|
|
lhs = semantic_check(node.child_lhs, sem_data)
|
|
rhs = semantic_check(node.child_rhs, sem_data)
|
|
if lhs is None or rhs is None:
|
|
semantic_error(f'Invalid operands \'{lhs.type if lhs is not None else None}\' and \'{rhs.type if rhs is not None else None}\' for binary operation {node.value}', node)
|
|
|
|
# Validate operands and result type
|
|
if node.value in ['*', '/']:
|
|
if lhs.type == 'int' and rhs.type == 'int':
|
|
node.type = 'int'
|
|
return node
|
|
elif node.value == '+':
|
|
if lhs.type == 'date' and rhs.type == 'int':
|
|
node.type = 'date'
|
|
return node
|
|
if lhs.type == 'int' and rhs.type == 'int':
|
|
node.type = 'int'
|
|
return node
|
|
elif node.value == '-':
|
|
if lhs.type == 'date' and rhs.type == 'int':
|
|
node.type = 'date'
|
|
return node
|
|
if lhs.type == 'date' and rhs.type == 'date':
|
|
node.type = 'int'
|
|
return node
|
|
if lhs.type == 'int' and rhs.type == 'int':
|
|
node.type = 'int'
|
|
return node
|
|
elif node.value in ['<', '=']:
|
|
if lhs.type == rhs.type:
|
|
node.type = 'bool'
|
|
return node
|
|
|
|
semantic_error(f'Invalid operands \'{lhs.type}\' and \'{rhs.type}\' for operation {node.value}', node)
|
|
case 'identifier':
|
|
# Check if variable is defined
|
|
symbol = None
|
|
if node.value in sem_data.local_symbol_table:
|
|
symbol = sem_data.local_symbol_table[node.value]
|
|
if node.value in sem_data.global_symbol_table:
|
|
symbol = sem_data.global_symbol_table[node.value]
|
|
if symbol is not None:
|
|
node.type = symbol.type
|
|
return symbol
|
|
|
|
semantic_error(f'Variable \'{node.value}\' not defined', node)
|
|
case 'function_call' | 'procedure_call':
|
|
# Handle built in functions
|
|
if node.nodetype == 'function_call' and node.value == 'Today':
|
|
if len(node.children_arguments) != 0:
|
|
semantic_error(f'Builtin function \'Today\' takes no arguments', node)
|
|
node.type = 'date'
|
|
return node
|
|
|
|
# Check if function/procedure is defined
|
|
if node.value not in sem_data.callables:
|
|
semantic_error(f'{node.nodetype.split("_")[0]} \'{node.value}\' not defined', node)
|
|
func = sem_data.callables[node.value]
|
|
|
|
# Check if arguments match (count and types)
|
|
if len(node.children_arguments) != len(func.children_formals):
|
|
semantic_error(f'Argument count mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected {len(func.children_formals)} but got {len(node.children_arguments)}', node)
|
|
for formal, actual in zip(func.children_formals, node.children_arguments):
|
|
resolved = semantic_check(actual, sem_data)
|
|
if resolved is None or formal.type != resolved.type:
|
|
semantic_error(f'Argument type mismatch for {node.nodetype.split("_")[0]} \'{node.value}\', expected \'{formal.type}\' but got \'{resolved.type if resolved is not None else None}\'', node)
|
|
|
|
# Set return type and return node if func has a return type
|
|
node.type = func.child_return_type
|
|
return node if node.type is not None else None
|
|
case 'do_unless':
|
|
# Validate condition
|
|
condition = semantic_check(node.child_condition, sem_data)
|
|
if condition is None or condition.type != 'bool':
|
|
semantic_error('Condition must be of type \'bool\'', node)
|
|
|
|
# Validate both branches
|
|
for statement in node.children_statements_true:
|
|
if semantic_check(statement, sem_data) is not None:
|
|
semantic_error(f'Expression return value is not handled', statement)
|
|
for statement in node.children_statements_false:
|
|
if semantic_check(statement, sem_data) is not None:
|
|
semantic_error(f'Expression return value is not handled', statement)
|
|
|
|
return None
|
|
case 'do_until':
|
|
# Validate condition
|
|
condition = semantic_check(node.child_condition, sem_data)
|
|
if condition is None or condition.type != 'bool':
|
|
semantic_error('Condition must be of type bool', node)
|
|
|
|
# Validate body
|
|
for statement in node.children_statements:
|
|
if semantic_check(statement, sem_data) is not None:
|
|
semantic_error(f'Expression return value is not handled', statement)
|
|
|
|
return None
|
|
case 'unless_expression':
|
|
# Validate condition
|
|
condition = semantic_check(node.child_condition, sem_data)
|
|
if condition is None or condition.type != 'bool':
|
|
semantic_error('Condition must be of type bool', node)
|
|
|
|
# Validate both branches
|
|
expression_true = semantic_check(node.child_expression_true, sem_data)
|
|
expression_false = semantic_check(node.child_expression_false, sem_data)
|
|
if expression_true is None or expression_false is None or expression_true.type != expression_false.type:
|
|
semantic_error(f'Branches must return the same type, got \'{expression_false.type}\' and \'{expression_true.type}\'', node)
|
|
|
|
node.type = expression_true.type
|
|
return node
|
|
case 'attribute_read' | 'attribute_write':
|
|
# Check if variable is defined
|
|
symbol = None
|
|
if node.child_identifier.value in sem_data.local_symbol_table:
|
|
symbol = sem_data.local_symbol_table[node.child_identifier.value]
|
|
elif node.child_identifier.value in sem_data.global_symbol_table:
|
|
symbol = sem_data.global_symbol_table[node.child_identifier.value]
|
|
else:
|
|
semantic_error(f'Variable \'{node.child_identifier.value}\' not defined', node.child_identifier)
|
|
|
|
# Validate attribute
|
|
assert node.child_attribute.nodetype == 'identifier'
|
|
if symbol.type != 'date':
|
|
semantic_error(f'Cannot access attribute of non-date variable', node.child_attribute)
|
|
valid_attributes = ['day', 'month', 'year']
|
|
if node.nodetype == 'attribute_read':
|
|
valid_attributes += ['weekday', 'weeknum']
|
|
if node.child_attribute.value not in valid_attributes:
|
|
semantic_error(f'Invalid attribute \'{node.child_attribute.value}\' for {node.nodetype.split("_")[0]}, allowed values {valid_attributes}', node.child_attribute)
|
|
|
|
node.type = 'int'
|
|
return node
|
|
case 'print':
|
|
for item in node.children_items:
|
|
value = semantic_check(item, sem_data)
|
|
if value is None or value.type not in ['int', 'string', 'date']:
|
|
semantic_error('Print argument can only be \'int\', \'date\' or \'string\'', node)
|
|
return None
|
|
case _:
|
|
print_todo(f'Semantic check type \'{node.nodetype}\'', node)
|
|
|
|
class Instruction:
|
|
def __init__(self, opcode: str, operands: list[str] = []):
|
|
self.opcode = opcode
|
|
self.operands = operands
|
|
|
|
def __str__(self):
|
|
return f'{self.opcode} {', '.join(self.operands)}'
|
|
|
|
class CompileData:
|
|
def __init__(self, sem_data: SemData):
|
|
self.sem_data = sem_data
|
|
self.date_buffer_size = 128
|
|
self.label_counter = 0
|
|
self.string_literals: list[str] = []
|
|
self.callables: dict[str, list[Instruction]] = {}
|
|
self.scope: ASTnode = None
|
|
self.code: list[Instruction] = []
|
|
self.add_builtin_functions()
|
|
|
|
def get_label(self) -> str:
|
|
self.label_counter += 1
|
|
return f'.L{self.label_counter - 1}'
|
|
|
|
def insert_label(self, label) -> None:
|
|
self.code.append(Instruction('<label>', [label]))
|
|
|
|
def add_string_literal(self, value: str) -> str:
|
|
for index, string in enumerate(self.string_literals):
|
|
if string == value:
|
|
return f'S{index}'
|
|
self.string_literals.append(value)
|
|
return f'S{len(self.string_literals) - 1}'
|
|
|
|
def symbol_address(self, symbol: str) -> str:
|
|
if self.scope is not None:
|
|
for index, formal in enumerate(self.scope.children_formals):
|
|
if formal.value == symbol:
|
|
offset = 8 * index + 16
|
|
return f'{offset}(%rbp)'
|
|
for index, variable in enumerate(self.scope.children_variable_definitions):
|
|
if variable.value == symbol:
|
|
offset = 8 * index + 8
|
|
return f'-{offset}(%rbp)'
|
|
if symbol in self.sem_data.global_symbol_table:
|
|
offset = 8 * list(self.sem_data.global_symbol_table.keys()).index(symbol)
|
|
return f'(.globals + {offset})'
|
|
assert False
|
|
|
|
def optimize_assembly(self) -> str:
|
|
for name, instructions in self.callables.items():
|
|
if name.startswith('__builtin_'):
|
|
continue
|
|
|
|
changed = True
|
|
while changed:
|
|
changed = False
|
|
|
|
i = 0
|
|
# Remove redundant movq instructions
|
|
# movq %rax, %rax
|
|
while i < len(instructions):
|
|
if instructions[i].opcode != 'movq' or instructions[i].operands[0] != instructions[i].operands[1]:
|
|
i += 1
|
|
continue
|
|
instructions.pop(i)
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize movq to register followed by pushq register
|
|
# movq $1, %rax
|
|
# pushq %rax
|
|
# becomes
|
|
# pushq $1
|
|
while i < len(instructions) - 1:
|
|
if instructions[i].opcode != 'movq' or instructions[i + 1].opcode != 'pushq':
|
|
i += 1
|
|
continue
|
|
# push of 64 bit immediate is not possible
|
|
if instructions[i].operands[0][0] == '$' and int(instructions[i].operands[0][1:]) > 0xFFFFFFFF:
|
|
continue
|
|
instructions[i] = Instruction('pushq', [instructions[i].operands[0]])
|
|
instructions.pop(i + 1)
|
|
i -= 1
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize movq to rax followed by movq from rax
|
|
# movq $1, %rax
|
|
# movq %rax, %rcx
|
|
# becomes
|
|
# movq $1, %rcx
|
|
while i < len(instructions) - 1:
|
|
if instructions[i].opcode != 'movq' or instructions[i + 1].opcode != 'movq':
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[1] != '%rax' or instructions[i + 1].operands[0] != '%rax':
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[0][0] not in ['$', '%'] and instructions[i + 1].operands[1][0] != '%':
|
|
i += 1
|
|
continue
|
|
# move of 64 bit immediate to memory is not possible
|
|
if instructions[i].operands[0][0] == '$' and instructions[i + 1].operands[0] != '%' and int(instructions[i].operands[0][1:]) > 0xFFFFFFFF:
|
|
continue
|
|
instructions[i] = Instruction('movq', [instructions[i].operands[0], instructions[i + 1].operands[1]])
|
|
instructions.pop(i + 1)
|
|
i -= 1
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Replace negative immediate in addq/subq with positive immediate
|
|
# This is not a real optimization, but it makes the code easier to optimize
|
|
# subq $-1, %rax
|
|
# becomes
|
|
# addq $1, %rax
|
|
while i < len(instructions):
|
|
if instructions[i].opcode not in ['addq', 'subq']:
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[1][0] != '$':
|
|
i += 1
|
|
continue
|
|
value = int(instructions[i].operands[1][1:])
|
|
if value >= 0:
|
|
i += 1
|
|
continue
|
|
new_opcode = 'subq' if instructions[i].opcode == 'addq' else 'addq'
|
|
instructions[i] = Instruction(new_opcode, [instructions[i].operands[0], f'${-value}'])
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize repeated addq/subq instructions
|
|
# addq $1, %rax
|
|
# addq $2, %rax
|
|
# becomes
|
|
# addq $3, %rax
|
|
while i < len(instructions) - 1:
|
|
if instructions[i].opcode not in ['addq', 'subq']:
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[1][0] != '$' or instructions[i + 1].operands[1][0] != '$':
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[1] != instructions[i + 1].operands[1]:
|
|
i += 1
|
|
continue
|
|
lhs = int(instructions[i].operands[0][1:])
|
|
if instructions[i].opcode == 'subq': lhs = -lhs
|
|
rhs = int(instructions[i + 1].operands[0][1:])
|
|
if instructions[i + 1].opcode == 'subq': rhs = -rhs
|
|
new_value = lhs + rhs
|
|
if abs(new_value) > 0xFFFFFFFF:
|
|
i += 1
|
|
continue
|
|
new_opcode = 'addq' if new_value >= 0 else 'subq'
|
|
instructions[i] = Instruction(new_opcode, [f'${abs(new_value)}', instructions[i].operands[1]])
|
|
instructions.pop(i + 1)
|
|
i -= 1
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize movq immediate to register followed addq/subq with immediate
|
|
# movq $1, %rax
|
|
# addq $2, %rax
|
|
# becomes
|
|
# movq $3, %rax
|
|
while i < len(instructions) - 1:
|
|
if instructions[i].opcode != 'movq' or instructions[i + 1].opcode not in ['addq', 'subq']:
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[1] != instructions[i + 1].operands[1]:
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[0][0] != '$' or instructions[i + 1].operands[0][0] != '$':
|
|
i += 1
|
|
continue
|
|
lhs = int(instructions[i].operands[0][1:])
|
|
rhs = int(instructions[i + 1].operands[0][1:])
|
|
if instructions[i + 1].opcode == 'subq': rhs = -rhs
|
|
new_value = lhs + rhs
|
|
instructions[i] = Instruction('movq', [f'${new_value}', instructions[i].operands[1]])
|
|
instructions.pop(i + 1)
|
|
i -= 1
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize addq/subq for immediate 1
|
|
# addq $1, %rax
|
|
# becomes
|
|
# incq %rax
|
|
while i < len(instructions):
|
|
if instructions[i].opcode not in ['addq', 'subq']:
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[0] != '$1':
|
|
i += 1
|
|
continue
|
|
new_opcode = 'incq' if instructions[i].opcode == 'addq' else 'decq'
|
|
instructions[i] = Instruction(new_opcode, [instructions[i].operands[1]])
|
|
changed = True
|
|
if changed: continue
|
|
|
|
i = 0
|
|
# Optimize zeroing of register
|
|
# movq $0, %rax
|
|
# becomes
|
|
# xorq %rax, %rax
|
|
while i < len(instructions):
|
|
if instructions[i].opcode != 'movq':
|
|
i += 1
|
|
continue
|
|
if instructions[i].operands[0] != '$0' or instructions[i].operands[1][0] != '%':
|
|
i += 1
|
|
continue
|
|
instructions[i] = Instruction('xorq', [instructions[i].operands[1], instructions[i].operands[1]])
|
|
changed = True
|
|
if changed: continue
|
|
|
|
def add_builtin_functions(self) -> None:
|
|
today = []
|
|
today.append(Instruction('xorq', ['%rdi', '%rdi']))
|
|
today.append(Instruction('call', ['time']))
|
|
today.append(Instruction('movq', ['%rax', '%rdi']))
|
|
today.append(Instruction('movq', ['$86400', '%rcx']))
|
|
today.append(Instruction('xorq', ['%rdx', '%rdx']))
|
|
today.append(Instruction('divq', ['%rcx']))
|
|
today.append(Instruction('movq', ['%rdi', '%rax']))
|
|
today.append(Instruction('subq', ['%rdx', '%rax']))
|
|
self.callables['__builtin_today'] = today
|
|
|
|
print_date = []
|
|
print_date.append(Instruction('subq', ['$16', '%rsp']))
|
|
print_date.append(Instruction('movq', ['%rdi', '0(%rsp)']))
|
|
print_date.append(Instruction('leaq', ['0(%rsp)', '%rdi']))
|
|
print_date.append(Instruction('call', ['localtime']))
|
|
print_date.append(Instruction('movq', ['$.date_buffer', '%rdi']))
|
|
print_date.append(Instruction('movq', [f'${self.date_buffer_size}', '%rsi']))
|
|
print_date.append(Instruction('movq', ['$.date_format', '%rdx']))
|
|
print_date.append(Instruction('movq', ['%rax', '%rcx']))
|
|
print_date.append(Instruction('call', ['strftime']))
|
|
print_date.append(Instruction('movq', ['$.str_format', '%rdi']))
|
|
print_date.append(Instruction('movq', ['$.date_buffer', '%rsi']))
|
|
print_date.append(Instruction('call', ['printf']))
|
|
self.callables['__builtin_print_date'] = print_date
|
|
|
|
get_day_attr = []
|
|
get_day_attr.append(Instruction('subq', ['$16', '%rsp']))
|
|
get_day_attr.append(Instruction('movq', ['%rdi', '0(%rsp)']))
|
|
get_day_attr.append(Instruction('movq', ['%rsi', '8(%rsp)']))
|
|
get_day_attr.append(Instruction('leaq', ['0(%rsp)', '%rdi']))
|
|
get_day_attr.append(Instruction('call', ['localtime']))
|
|
get_day_attr.append(Instruction('movq', ['$.date_buffer', '%rdi']))
|
|
get_day_attr.append(Instruction('movq', [f'${self.date_buffer_size}', '%rsi']))
|
|
get_day_attr.append(Instruction('movq', ['8(%rsp)', '%rdx']))
|
|
get_day_attr.append(Instruction('movq', ['%rax', '%rcx']))
|
|
get_day_attr.append(Instruction('call', ['strftime']))
|
|
get_day_attr.append(Instruction('movq', ['$.date_buffer', '%rdi']))
|
|
get_day_attr.append(Instruction('call', ['atoi']))
|
|
self.callables['__builtin_get_day_attr'] = get_day_attr
|
|
|
|
def get_full_code(self) -> str:
|
|
# Data section with string literals
|
|
code_str = '.section .data\n'
|
|
code_str += '.int_format: .asciz "%lld"\n'
|
|
code_str += '.str_format: .asciz "%s"\n'
|
|
code_str += '.date_format: .asciz "%Y-%m-%d"\n'
|
|
code_str += '.day_format: .asciz "%d"\n'
|
|
code_str += '.month_format: .asciz "%m"\n'
|
|
code_str += '.year_format: .asciz "%Y"\n'
|
|
code_str += '.weekday_format: .asciz "%u"\n'
|
|
code_str += '.weeknum_format: .asciz "%W"\n'
|
|
for index, string in enumerate(self.string_literals):
|
|
code_str += f'S{index}: .asciz "{string}"\n'
|
|
code_str += '\n'
|
|
|
|
# BSS section for uninitialized data
|
|
code_str += f'.section .bss\n'
|
|
code_str += f'.date_buffer:\n'
|
|
code_str += f' .skip {self.date_buffer_size}\n'
|
|
if len(self.sem_data.global_symbol_table) != 0:
|
|
code_str += '.globals:\n'
|
|
code_str += f' .skip {len(sem_data.global_symbol_table) * 8}\n'
|
|
code_str += '\n'
|
|
|
|
# Text section with code
|
|
code_str += '.section .text\n'
|
|
code_str += '\n'
|
|
|
|
# Add function and procedure definitions
|
|
for name, code in self.callables.items():
|
|
|
|
if name == 'main':
|
|
code_str += '.global main\n'
|
|
saved_registers = []
|
|
else:
|
|
caller_saved = ['%r8', '%r9', '%r10', '%r11']
|
|
saved_registers = set()
|
|
for instruction in code:
|
|
for reg in caller_saved:
|
|
if reg in instruction.operands:
|
|
saved_registers.add(reg)
|
|
break
|
|
saved_registers = list(saved_registers)
|
|
|
|
# Stack frame is needed for builtin functions, functions with local variables
|
|
# and functions that call other functions
|
|
needs_stack_frame = False
|
|
if name.startswith('__builtin'):
|
|
needs_stack_frame = True
|
|
elif name in self.sem_data.callables and len(self.sem_data.callables[name].children_formals) > 0:
|
|
needs_stack_frame = True
|
|
else:
|
|
for instruction in code:
|
|
if instruction.opcode == 'call':
|
|
needs_stack_frame = True
|
|
break
|
|
|
|
code_str += name + ':\n'
|
|
if needs_stack_frame:
|
|
code_str += ' pushq %rbp\n'
|
|
code_str += ' movq %rsp, %rbp\n'
|
|
for reg in saved_registers:
|
|
code_str += f' pushq {reg}\n'
|
|
if len(saved_registers) % 2 != 0:
|
|
code_str += ' subq $8, %rsp\n'
|
|
for instruction in code:
|
|
if instruction.opcode == '<label>':
|
|
code_str += f'{instruction.operands[0]}:\n'
|
|
else:
|
|
code_str += f' {instruction}\n'
|
|
if len(saved_registers) % 2 != 0:
|
|
code_str += ' addq $8, %rsp\n'
|
|
for reg in reversed(saved_registers):
|
|
code_str += f' popq {reg}\n'
|
|
if needs_stack_frame:
|
|
code_str += ' leave\n'
|
|
code_str += ' ret\n'
|
|
code_str += '\n'
|
|
|
|
return code_str
|
|
|
|
def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
|
|
match node.nodetype:
|
|
case 'program':
|
|
# Compile function and procedure definitions
|
|
for definition in node.children_definitions:
|
|
if definition.nodetype not in ['function_definition', 'procedure_definition']:
|
|
continue
|
|
assert len(compile_data.code) == 0
|
|
assert compile_data.scope is None
|
|
compile_data.scope = definition
|
|
|
|
# initialize local variables
|
|
stack_size = 8 * len(definition.children_variable_definitions)
|
|
if stack_size % 16 != 0:
|
|
stack_size += 8
|
|
if stack_size != 0:
|
|
compile_data.code.append(Instruction('subq', [f'${stack_size}', '%rsp']))
|
|
for variable in definition.children_variable_definitions:
|
|
address = compile_data.symbol_address(variable.value)
|
|
compile_ast(variable.child_expression, compile_data)
|
|
compile_data.code.append(Instruction('movq', ['%rax', address]))
|
|
|
|
# compile statements
|
|
if definition.nodetype == 'function_definition':
|
|
compile_ast(definition.child_expression, compile_data)
|
|
elif definition.nodetype == 'procedure_definition':
|
|
for statement in definition.children_statements:
|
|
compile_ast(statement, compile_data)
|
|
else: assert False
|
|
|
|
# Add function/procedure to callables
|
|
compile_data.callables[definition.value] = compile_data.code
|
|
compile_data.code = []
|
|
compile_data.scope = None
|
|
|
|
# Initialize global variables
|
|
for index, (name, variable) in enumerate(compile_data.sem_data.global_symbol_table.items()):
|
|
address = compile_data.symbol_address(name)
|
|
compile_ast(variable, compile_data)
|
|
compile_data.code.append(Instruction('movq', ['%rax', address]))
|
|
|
|
# Compile program statements
|
|
for statement in node.children_statements:
|
|
compile_ast(statement, compile_data)
|
|
compile_data.code.append(Instruction('xorq', ['%rax', '%rax']))
|
|
|
|
# Add main function
|
|
compile_data.callables['main'] = compile_data.code
|
|
compile_data.code = []
|
|
case 'variable_definition' | 'function_definition' | 'procedure_definition':
|
|
assert False
|
|
case 'identifier':
|
|
address = compile_data.symbol_address(node.value)
|
|
compile_data.code.append(Instruction('movq', [address, '%rax']))
|
|
case 'assignment':
|
|
if node.child_lhs.nodetype == 'attribute_write':
|
|
print_todo('Attribute write', node)
|
|
elif node.child_lhs.nodetype == 'identifier':
|
|
address = compile_data.symbol_address(node.child_lhs.value)
|
|
compile_ast(node.child_rhs, compile_data)
|
|
compile_data.code.append(Instruction('movq', ['%rax', address]))
|
|
else: assert False
|
|
case 'binary_op':
|
|
assert node.value in ['+', '-', '*', '/', '<', '=']
|
|
|
|
if node.value in ['*', '/']:
|
|
assert node.child_lhs.type == 'int'
|
|
else:
|
|
assert node.child_lhs.type in ['int', 'date']
|
|
|
|
if node.value == '-' and node.child_lhs.type == 'date':
|
|
assert node.child_rhs.type in ['int', 'date']
|
|
else:
|
|
assert node.child_rhs.type == 'int'
|
|
|
|
|
|
old_code = compile_data.code
|
|
|
|
# compile LHS
|
|
compile_data.code = []
|
|
compile_ast(node.child_lhs, compile_data)
|
|
lhs_code = compile_data.code
|
|
|
|
# compile RHS
|
|
compile_data.code = []
|
|
compile_ast(node.child_rhs, compile_data)
|
|
rhs_code = compile_data.code
|
|
|
|
compile_data.code = old_code
|
|
|
|
# If RHS is an 32 bit integer literal, we can use it as an immediate value
|
|
register = None
|
|
if node.child_rhs.nodetype == 'int_literal' and node.child_rhs.value <= 0x7FFFFFFF:
|
|
if node.child_lhs.type != 'date':
|
|
register = f'${node.child_rhs.value}'
|
|
elif node.child_rhs.value * 86400 <= 0x7FFFFFFF:
|
|
register = f'${node.child_rhs.value * 86400}'
|
|
|
|
# Otherwise, we need to use a register
|
|
usable_registers = ['%r8', '%r9', '%r10', '%r11']
|
|
if register is None:
|
|
register = '%rcx'
|
|
for reg in usable_registers:
|
|
valid = True
|
|
for instruction in lhs_code:
|
|
if reg in instruction.operands:
|
|
valid = False
|
|
break
|
|
if valid:
|
|
register = reg
|
|
break
|
|
|
|
# check if lhs uses call, this determines whether we need to align stack
|
|
align_stack = False
|
|
for instruction in lhs_code:
|
|
if instruction.opcode == 'call':
|
|
align_stack = True
|
|
break
|
|
|
|
# Add code for RHS calculation
|
|
if register[0] == '$':
|
|
pass
|
|
else:
|
|
compile_data.code += rhs_code
|
|
if register != '%rcx':
|
|
compile_data.code.append(Instruction('movq', ['%rax', register]))
|
|
elif not align_stack:
|
|
compile_data.code.append(Instruction('pushq', ['%rax']))
|
|
else:
|
|
compile_data.code.append(Instruction('subq', ['$16', '%rsp']))
|
|
compile_data.code.append(Instruction('movq', ['%rax', '0(%rsp)']))
|
|
|
|
# Add code for LHS calculation
|
|
compile_data.code += lhs_code
|
|
if register[0] == '$' or register != '%rcx':
|
|
pass
|
|
elif not align_stack:
|
|
compile_data.code.append(Instruction('popq', [register]))
|
|
else:
|
|
compile_data.code.append(Instruction('movq', ['0(%rsp)', register]))
|
|
compile_data.code.append(Instruction('addq', ['$16', '%rsp']))
|
|
|
|
# If we are adding or subtracting dates with integers, multiply the integer by number of seconds in a day
|
|
# If register is immediate, this has already been done
|
|
if register[0] != '$' and node.child_lhs.type == 'date' and node.child_rhs.type == 'int':
|
|
compile_data.code.append(Instruction('imulq', ['$86400', register]))
|
|
|
|
# Perform operation
|
|
if node.value == '+':
|
|
compile_data.code.append(Instruction('addq', [register, '%rax']))
|
|
elif node.value == '-':
|
|
compile_data.code.append(Instruction('subq', [register, '%rax']))
|
|
elif node.value == '*':
|
|
compile_data.code.append(Instruction('imulq', [register, '%rax']))
|
|
elif node.value == '/':
|
|
# Division by immediate is not possible
|
|
if register[0] == '$':
|
|
compile_data.code.append(Instruction('movq', [register, '%rcx']))
|
|
register = '%rcx'
|
|
compile_data.code.append(Instruction('cqo'))
|
|
compile_data.code.append(Instruction('idivq', [register]))
|
|
elif node.value == '<':
|
|
compile_data.code.append(Instruction('cmpq', [register, '%rax']))
|
|
compile_data.code.append(Instruction('setl', ['%al']))
|
|
elif node.value == '=':
|
|
compile_data.code.append(Instruction('cmpq', [register, '%rax']))
|
|
compile_data.code.append(Instruction('sete', ['%al']))
|
|
else: assert False
|
|
|
|
# If both operands are dates, divide result by number of seconds in a day
|
|
if node.child_lhs.type == 'date' and node.child_rhs.type == 'date':
|
|
assert node.value == '-'
|
|
compile_data.code.append(Instruction('movq', ['$86400', register]))
|
|
compile_data.code.append(Instruction('cqo'))
|
|
compile_data.code.append(Instruction('idivq', [register]))
|
|
case 'function_call' | 'procedure_call':
|
|
if node.value == 'Today':
|
|
compile_data.code.append(Instruction('call', ['__builtin_today']))
|
|
else:
|
|
# align stack
|
|
stack_needed = len(node.children_arguments) * 8
|
|
if stack_needed % 16 != 0:
|
|
stack_needed += 8
|
|
if stack_needed != 0:
|
|
compile_data.code.append(Instruction('subq', [f'${stack_needed}', '%rsp']))
|
|
|
|
# push arguments to the stack
|
|
offset = 0
|
|
for argument in node.children_arguments:
|
|
compile_ast(argument, compile_data)
|
|
compile_data.code.append(Instruction('movq', ['%rax', f'{offset}(%rsp)']))
|
|
offset += 8
|
|
|
|
# call function and restore stack
|
|
compile_data.code.append(Instruction('call', [node.value]))
|
|
if stack_needed != 0:
|
|
compile_data.code.append(Instruction('addq', [f'${stack_needed}', '%rsp']))
|
|
case 'return':
|
|
compile_ast(node.child_expression, compile_data)
|
|
compile_data.code.append(Instruction('leave'))
|
|
compile_data.code.append(Instruction('ret'))
|
|
case 'int_literal':
|
|
compile_data.code.append(Instruction('movq', [f'${node.value}', '%rax']))
|
|
case 'string_literal':
|
|
label = compile_data.add_string_literal(node.value)
|
|
compile_data.code.append(Instruction('movq', [f'${label}', '%rax']))
|
|
case 'date_literal':
|
|
compile_data.code.append(Instruction('movq', [f'${timegm(node.value.timetuple())}', '%rax']))
|
|
case 'attribute_read':
|
|
compile_ast(node.child_identifier, compile_data)
|
|
compile_data.code.append(Instruction('movq', ['%rax', '%rdi']))
|
|
compile_data.code.append(Instruction('movq', [f'$.{node.child_attribute.value}_format', '%rsi']))
|
|
compile_data.code.append(Instruction('call', ['__builtin_get_day_attr']))
|
|
case 'do_until':
|
|
label_loop = compile_data.get_label()
|
|
compile_data.insert_label(label_loop)
|
|
|
|
# compile statements
|
|
for statement in node.children_statements:
|
|
compile_ast(statement, compile_data)
|
|
|
|
# compile condition
|
|
compile_ast(node.child_condition, compile_data)
|
|
compile_data.code.append(Instruction('testb', ['%al', '%al']))
|
|
compile_data.code.append(Instruction('jz', [label_loop]))
|
|
case 'do_unless' | 'unless_expression':
|
|
label_true = compile_data.get_label()
|
|
label_done = compile_data.get_label()
|
|
|
|
# compile condition
|
|
compile_ast(node.child_condition, compile_data)
|
|
compile_data.code.append(Instruction('testb', ['%al', '%al']))
|
|
compile_data.code.append(Instruction('jnz', [label_true]))
|
|
|
|
# compile false statements
|
|
if node.nodetype == 'unless_expression':
|
|
compile_ast(node.child_expression_false, compile_data)
|
|
elif node.nodetype == 'do_unless':
|
|
for statement in node.children_statements_false:
|
|
compile_ast(statement, compile_data)
|
|
else: assert False
|
|
compile_data.code.append(Instruction('jmp', [label_done]))
|
|
|
|
# compile true statements
|
|
compile_data.insert_label(label_true)
|
|
if node.nodetype == 'unless_expression':
|
|
compile_ast(node.child_expression_true, compile_data)
|
|
elif node.nodetype == 'do_unless':
|
|
for statement in node.children_statements_true:
|
|
compile_ast(statement, compile_data)
|
|
else: assert False
|
|
|
|
# add label for done
|
|
compile_data.insert_label(label_done)
|
|
case 'print':
|
|
for i, item in enumerate(node.children_items):
|
|
assert item.type in ['int', 'string', 'date']
|
|
compile_ast(item, compile_data)
|
|
|
|
match item.type:
|
|
case 'int':
|
|
compile_data.code.append(Instruction('movq', ['$.int_format', '%rdi']))
|
|
compile_data.code.append(Instruction('movq', ['%rax', '%rsi']))
|
|
compile_data.code.append(Instruction('call', ['printf']))
|
|
case 'string':
|
|
compile_data.code.append(Instruction('movq', ['$.str_format', '%rdi']))
|
|
compile_data.code.append(Instruction('movq', ['%rax', '%rsi']))
|
|
compile_data.code.append(Instruction('call', ['printf']))
|
|
case 'date':
|
|
compile_data.code.append(Instruction('movq', ['%rax', '%rdi']))
|
|
compile_data.code.append(Instruction('call', ['__builtin_print_date']))
|
|
case _:
|
|
assert False
|
|
|
|
# Print space if there are more items
|
|
if i < len(node.children_items) - 1:
|
|
compile_data.code.append(Instruction('movl', ["$' '", '%edi']))
|
|
compile_data.code.append(Instruction('call', ['putchar']))
|
|
|
|
# Print newline
|
|
compile_data.code.append(Instruction('movl', ["$'\\n'", '%edi']))
|
|
compile_data.code.append(Instruction('call', ['putchar']))
|
|
case _:
|
|
print_todo(f'Compile type \'{node.nodetype}\'', node)
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
|
group.add_argument('-f', '--file', help='filename to process')
|
|
parser.add_argument('-o', '--output', help='output filename for compiled code. default (a.out)', default='a.out')
|
|
parser.add_argument('-a', '--assembly', help='output filename for generated assembly code')
|
|
parser.add_argument('-O', '--optimize', action='store_true', help='run simple optimization steps on the generated assembly code')
|
|
parser.add_argument('-r', '--run', action='store_true', help='run the compiled code after compilation')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.who:
|
|
print('Author')
|
|
print(' Student ID: 150189237')
|
|
print(' Name: Oskari Alaranta')
|
|
else:
|
|
ast = syntax_check_file(args.file, args.debug)
|
|
|
|
sem_data = SemData()
|
|
semantic_check(ast, sem_data)
|
|
|
|
if args.debug:
|
|
tree_print.treeprint(ast, 'unicode')
|
|
|
|
compile_data = CompileData(sem_data)
|
|
compile_ast(ast, compile_data)
|
|
|
|
if args.optimize:
|
|
compile_data.optimize_assembly()
|
|
|
|
assembly = compile_data.get_full_code()
|
|
|
|
if args.assembly is not None:
|
|
with open(args.assembly, 'w', encoding='utf-8') as file:
|
|
file.write(assembly)
|
|
|
|
subprocess.run(['gcc', '-x', 'assembler', '-o', args.output, '-static', '-'], input=assembly, encoding='utf-8')
|
|
|
|
if args.run:
|
|
if args.output.startswith('/'):
|
|
subprocess.run([args.output])
|
|
else:
|
|
subprocess.run([f'./{args.output}'])
|