2024-04-26 13:36:20 +03:00
#!/bin/env python3
import argparse
2024-04-28 01:37:24 +03:00
from calendar import timegm
2024-04-26 13:36:20 +03:00
from copy import deepcopy
from datetime import date , timedelta
2024-04-28 03:02:25 +03:00
import subprocess
2024-04-26 13:36:20 +03:00
import tree_print
from build_ast import ASTnode , syntax_check_file
class SemData :
def __init__ ( self ) :
self . scope = None
self . root = None
2024-04-28 01:37:24 +03:00
self . callables = { }
2024-04-26 13:36:20 +03:00
self . global_symbol_table = { }
self . local_symbol_table = { }
def semantic_error ( msg : str , node : ASTnode ) - > None :
print ( f ' \033 [31mSemantic Error: { msg } at line { node . lineno } \033 [m ' )
raise SystemExit ( 1 )
def print_todo ( msg : str , node : ASTnode ) - > None :
print ( f ' \033 [33mTODO: { msg } at line { node . lineno } \033 [m ' )
raise SystemExit ( 2 )
def semantic_check ( node : ASTnode , sem_data : SemData ) - > None | ASTnode :
if sem_data . root is None :
sem_data . root = node
match node . nodetype :
case ' program ' :
# Collect function and procedure definitions first,
# since they can be called before they are defined
for child in node . children_definitions :
if child . nodetype in [ ' function_definition ' , ' procedure_definition ' ] :
2024-04-28 01:37:24 +03:00
if child . value in sem_data . callables :
2024-04-26 13:36:20 +03:00
semantic_error ( f ' Redefinition of { child . nodetype . split ( " _ " ) [ 0 ] } \' { child . value } \' ' , child )
2024-04-28 01:37:24 +03:00
sem_data . callables [ child . value ] = child
2024-04-26 13:36:20 +03:00
# Then do the actual semantic checking
for child in node . children_definitions :
semantic_check ( child , sem_data )
for child in node . children_statements :
if semantic_check ( child , sem_data ) is not None :
semantic_error ( f ' Expression return value is not handled ' , child )
return None
case ' variable_definition ' :
# Check if variable is already defined
symbol_table = sem_data . global_symbol_table
if sem_data . scope is not None :
symbol_table = sem_data . local_symbol_table
if node . value in symbol_table :
semantic_error ( f ' Redefinition of variable \' { node . value } \' ' , node )
# Check if expression is valid and store it in symbol table
variable = semantic_check ( node . child_expression , sem_data )
if variable is None or variable . type not in [ ' int ' , ' string ' , ' date ' ] :
semantic_error ( f ' Invalid variable type \' { variable . type if variable is not None else None } \' ' , node )
symbol_table [ node . value ] = variable
return None
case ' function_definition ' | ' procedure_definition ' :
# Function and procedures are added to global symbol table
# as the first step, so they can be called before they are defined
2024-04-28 01:37:24 +03:00
assert node . value in sem_data . callables
2024-04-26 13:36:20 +03:00
# Local symbols table should be empty while doing checking,
# since functions and procedures can only be defined in global scope
assert len ( sem_data . local_symbol_table ) == 0 and sem_data . scope is None
sem_data . scope = node
# Collect local arguments
for formal in node . children_formals :
if formal . value in sem_data . local_symbol_table :
semantic_error ( f ' Redefinition of variable \' { formal . value } \' in { node . nodetype . split ( " _ " ) [ 0 ] } \' { node . value } \' arguments ' , node )
sem_data . local_symbol_table [ formal . value ] = formal
# Collect local variables
for variable_definition in node . children_variable_definitions :
semantic_check ( variable_definition , sem_data )
# Check return type
if node . nodetype == ' function_definition ' :
expression = semantic_check ( node . child_expression , sem_data )
if expression is None :
semantic_error ( f ' Function \' { node . value } \' must return a value ' , node )
if node . child_return_type == ' auto ' :
node . child_return_type = expression . type
if expression . type != node . child_return_type :
semantic_error ( f ' Function \' { node . value } \' return type is { node . child_return_type } but returns { expression . type } ' , node )
elif node . nodetype == ' procedure_definition ' :
returns = None
for statement in node . children_statements :
returns = None
value = semantic_check ( statement , sem_data )
if value is None :
continue
if value . nodetype != ' return ' :
semantic_error ( f ' Expression return value is not handled ' , statement )
if node . child_return_type is None :
semantic_error ( f ' Procedure \' { node . value } \' does not have a return type ' , node )
if node . child_return_type == ' auto ' :
node . child_return_type = value . type
if value . type != node . child_return_type :
semantic_error ( f ' Procedure \' { node . value } \' return type is { node . child_return_type } but returns { value . type } ' , node )
returns = value . type
if returns is None and node . child_return_type is not None :
if node . child_return_type != ' void ' :
semantic_error ( f ' Procedure \' { node . value } \' must return a value when scope exits ' , node )
else :
assert False
node . type = node . child_return_type
sem_data . scope = None
sem_data . local_symbol_table = { }
return None
case ' return ' :
if sem_data . scope is None or sem_data . scope . nodetype != ' procedure_definition ' :
semantic_error ( f ' Keyword \' return \' can only appear in procefure_definition ' )
result = semantic_check ( node . child_expression , sem_data )
if result is None :
semantic_error ( f ' Procedure \' { sem_data . scope . value } \' must return a value ' , node )
node . type = result . type
return node
case ' date_literal ' | ' int_literal ' | ' string_literal ' :
node . type = node . nodetype . split ( ' _ ' ) [ 0 ]
return node
case ' assignment ' :
lhs = semantic_check ( node . child_lhs , sem_data )
rhs = semantic_check ( node . child_rhs , sem_data )
if lhs is None or rhs is None or lhs . type != rhs . type :
semantic_error ( f ' Invalid assignment of \' { rhs . type if rhs is not None else None } \' to \' { lhs . type if lhs is not None else None } \' ' , node )
return None
case ' binary_op ' :
lhs = semantic_check ( node . child_lhs , sem_data )
rhs = semantic_check ( node . child_rhs , sem_data )
if lhs is None or rhs is None :
semantic_error ( f ' Invalid operands \' { lhs . type if lhs is not None else None } \' and \' { rhs . type if rhs is not None else None } \' for binary operation { node . value } ' , node )
# Validate operands and result type
if node . value in [ ' * ' , ' / ' ] :
if lhs . type == ' int ' and rhs . type == ' int ' :
node . type = ' int '
return node
elif node . value == ' + ' :
if lhs . type == ' date ' and rhs . type == ' int ' :
node . type = ' date '
return node
if lhs . type == ' int ' and rhs . type == ' int ' :
node . type = ' int '
return node
elif node . value == ' - ' :
if lhs . type == ' date ' and rhs . type == ' int ' :
node . type = ' date '
return node
if lhs . type == ' date ' and rhs . type == ' date ' :
node . type = ' int '
return node
if lhs . type == ' int ' and rhs . type == ' int ' :
node . type = ' int '
return node
2024-04-28 01:37:24 +03:00
elif node . value in [ ' < ' , ' = ' ] :
2024-04-26 13:36:20 +03:00
if lhs . type == rhs . type :
node . type = ' bool '
return node
semantic_error ( f ' Invalid operands \' { lhs . type } \' and \' { rhs . type } \' for operation { node . value } ' , node )
case ' identifier ' :
# Check if variable is defined
symbol = None
if node . value in sem_data . local_symbol_table :
symbol = sem_data . local_symbol_table [ node . value ]
if node . value in sem_data . global_symbol_table :
symbol = sem_data . global_symbol_table [ node . value ]
if symbol is not None :
node . type = symbol . type
return symbol
semantic_error ( f ' Variable \' { node . value } \' not defined ' , node )
case ' function_call ' | ' procedure_call ' :
# Handle built in functions
if node . nodetype == ' function_call ' and node . value == ' Today ' :
if len ( node . children_arguments ) != 0 :
semantic_error ( f ' Builtin function \' Today \' takes no arguments ' , node )
node . type = ' date '
return node
# Check if function/procedure is defined
2024-04-28 01:37:24 +03:00
if node . value not in sem_data . callables :
2024-04-26 13:36:20 +03:00
semantic_error ( f ' { node . nodetype . split ( " _ " ) [ 0 ] } \' { node . value } \' not defined ' , node )
2024-04-28 01:37:24 +03:00
func = sem_data . callables [ node . value ]
2024-04-26 13:36:20 +03:00
# Check if arguments match (count and types)
if len ( node . children_arguments ) != len ( func . children_formals ) :
semantic_error ( f ' Argument count mismatch for { node . nodetype . split ( " _ " ) [ 0 ] } \' { node . value } \' , expected { len ( func . children_formals ) } but got { len ( node . children_arguments ) } ' , node )
for formal , actual in zip ( func . children_formals , node . children_arguments ) :
resolved = semantic_check ( actual , sem_data )
if resolved is None or formal . type != resolved . type :
semantic_error ( f ' Argument type mismatch for { node . nodetype . split ( " _ " ) [ 0 ] } \' { node . value } \' , expected \' { formal . type } \' but got \' { resolved . type if resolved is not None else None } \' ' , node )
# Set return type and return node if func has a return type
node . type = func . child_return_type
return node if node . type is not None else None
case ' do_unless ' :
# Validate condition
condition = semantic_check ( node . child_condition , sem_data )
if condition is None or condition . type != ' bool ' :
semantic_error ( ' Condition must be of type \' bool \' ' , node )
# Validate both branches
for statement in node . children_statements_true :
if semantic_check ( statement , sem_data ) is not None :
semantic_error ( f ' Expression return value is not handled ' , statement )
for statement in node . children_statements_false :
if semantic_check ( statement , sem_data ) is not None :
semantic_error ( f ' Expression return value is not handled ' , statement )
return None
case ' do_until ' :
# Validate condition
condition = semantic_check ( node . child_condition , sem_data )
if condition is None or condition . type != ' bool ' :
semantic_error ( ' Condition must be of type bool ' , node )
# Validate body
for statement in node . children_statements :
if semantic_check ( statement , sem_data ) is not None :
semantic_error ( f ' Expression return value is not handled ' , statement )
return None
case ' unless_expression ' :
# Validate condition
condition = semantic_check ( node . child_condition , sem_data )
if condition is None or condition . type != ' bool ' :
semantic_error ( ' Condition must be of type bool ' , node )
# Validate both branches
expression_true = semantic_check ( node . child_expression_true , sem_data )
expression_false = semantic_check ( node . child_expression_false , sem_data )
if expression_true is None or expression_false is None or expression_true . type != expression_false . type :
semantic_error ( f ' Branches must return the same type, got \' { expression_false . type } \' and \' { expression_true . type } \' ' , node )
node . type = expression_true . type
return node
case ' attribute_read ' | ' attribute_write ' :
# Check if variable is defined
symbol = None
if node . child_identifier . value in sem_data . local_symbol_table :
symbol = sem_data . local_symbol_table [ node . child_identifier . value ]
elif node . child_identifier . value in sem_data . global_symbol_table :
symbol = sem_data . global_symbol_table [ node . child_identifier . value ]
else :
semantic_error ( f ' Variable \' { node . child_identifier . value } \' not defined ' , node . child_identifier )
# Validate attribute
assert node . child_attribute . nodetype == ' identifier '
if symbol . type != ' date ' :
semantic_error ( f ' Cannot access attribute of non-date variable ' , node . child_attribute )
valid_attributes = [ ' day ' , ' month ' , ' year ' ]
if node . nodetype == ' attribute_read ' :
valid_attributes + = [ ' weekday ' , ' weeknum ' ]
if node . child_attribute . value not in valid_attributes :
semantic_error ( f ' Invalid attribute \' { node . child_attribute . value } \' for { node . nodetype . split ( " _ " ) [ 0 ] } , allowed values { valid_attributes } ' , node . child_attribute )
2024-04-28 02:20:35 +03:00
node . type = ' int '
2024-04-26 13:36:20 +03:00
return node
case ' print ' :
for item in node . children_items :
value = semantic_check ( item , sem_data )
if value is None or value . type not in [ ' int ' , ' string ' , ' date ' ] :
semantic_error ( ' Print argument can only be \' int \' , \' date \' or \' string \' ' , node )
return None
case _ :
print_todo ( f ' Semantic check type \' { node . nodetype } \' ' , node )
2024-04-29 17:03:51 +03:00
class Instruction :
def __init__ ( self , opcode : str , operands : list [ str ] = [ ] ) :
self . opcode = opcode
self . operands = operands
def __str__ ( self ) :
return f ' { self . opcode } { ' , ' . join ( self . operands ) } '
2024-04-28 01:37:24 +03:00
class CompileData :
def __init__ ( self , sem_data : SemData ) :
self . sem_data = sem_data
2024-04-28 03:02:25 +03:00
self . date_buffer_size = 128
2024-04-28 01:37:24 +03:00
self . label_counter = 0
2024-04-29 17:03:51 +03:00
self . string_literals : list [ str ] = [ ]
self . callables : dict [ str , list [ Instruction ] ] = { }
self . scope : ASTnode = None
self . code : list [ Instruction ] = [ ]
self . add_builtin_functions ( )
2024-04-28 02:22:32 +03:00
2024-04-28 01:37:24 +03:00
def get_label ( self ) - > str :
self . label_counter + = 1
return f ' .L { self . label_counter - 1 } '
def insert_label ( self , label ) - > None :
2024-04-29 17:03:51 +03:00
self . code . append ( Instruction ( ' <label> ' , [ label ] ) )
2024-04-28 01:37:24 +03:00
def add_string_literal ( self , value : str ) - > str :
for index , string in enumerate ( self . string_literals ) :
if string == value :
return f ' S { index } '
self . string_literals . append ( value )
return f ' S { len ( self . string_literals ) - 1 } '
def symbol_address ( self , symbol : str ) - > str :
if self . scope is not None :
for index , formal in enumerate ( self . scope . children_formals ) :
if formal . value == symbol :
offset = 8 * index + 16
return f ' { offset } (%rbp) '
for index , variable in enumerate ( self . scope . children_variable_definitions ) :
if variable . value == symbol :
offset = 8 * index + 8
return f ' - { offset } (%rbp) '
if symbol in self . sem_data . global_symbol_table :
offset = 8 * list ( self . sem_data . global_symbol_table . keys ( ) ) . index ( symbol )
2024-04-28 03:02:25 +03:00
return f ' (.globals + { offset } ) '
2024-04-28 01:37:24 +03:00
assert False
2024-04-29 17:03:51 +03:00
def add_builtin_functions ( self ) - > None :
today = [ ]
today . append ( Instruction ( ' xorq ' , [ ' %r di ' , ' %r di ' ] ) )
today . append ( Instruction ( ' call ' , [ ' time ' ] ) )
today . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r di ' ] ) )
today . append ( Instruction ( ' movq ' , [ ' $86400 ' , ' %r cx ' ] ) )
today . append ( Instruction ( ' xorq ' , [ ' %r dx ' , ' %r dx ' ] ) )
today . append ( Instruction ( ' divq ' , [ ' %r cx ' ] ) )
today . append ( Instruction ( ' movq ' , [ ' %r di ' , ' %r ax ' ] ) )
today . append ( Instruction ( ' subq ' , [ ' %r dx ' , ' %r ax ' ] ) )
self . callables [ ' __builtin_today ' ] = today
print_date = [ ]
print_date . append ( Instruction ( ' subq ' , [ ' $16 ' , ' %r sp ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' %r di ' , ' 0( %r sp) ' ] ) )
print_date . append ( Instruction ( ' leaq ' , [ ' 0( %r sp) ' , ' %r di ' ] ) )
print_date . append ( Instruction ( ' call ' , [ ' localtime ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' $.date_buffer ' , ' %r di ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ f ' $ { self . date_buffer_size } ' , ' %r si ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' $.date_format ' , ' %r dx ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r cx ' ] ) )
print_date . append ( Instruction ( ' call ' , [ ' strftime ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' $.str_format ' , ' %r di ' ] ) )
print_date . append ( Instruction ( ' movq ' , [ ' $.date_buffer ' , ' %r si ' ] ) )
print_date . append ( Instruction ( ' call ' , [ ' printf ' ] ) )
self . callables [ ' __builtin_print_date ' ] = print_date
get_day_attr = [ ]
get_day_attr . append ( Instruction ( ' subq ' , [ ' $16 ' , ' %r sp ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' %r di ' , ' 0( %r sp) ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' %r si ' , ' 8( %r sp) ' ] ) )
get_day_attr . append ( Instruction ( ' leaq ' , [ ' 0( %r sp) ' , ' %r di ' ] ) )
get_day_attr . append ( Instruction ( ' call ' , [ ' localtime ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' $.date_buffer ' , ' %r di ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ f ' $ { self . date_buffer_size } ' , ' %r si ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' 8( %r sp) ' , ' %r dx ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r cx ' ] ) )
get_day_attr . append ( Instruction ( ' call ' , [ ' strftime ' ] ) )
get_day_attr . append ( Instruction ( ' movq ' , [ ' $.date_buffer ' , ' %r di ' ] ) )
get_day_attr . append ( Instruction ( ' call ' , [ ' atoi ' ] ) )
self . callables [ ' __builtin_get_day_attr ' ] = get_day_attr
2024-04-28 01:37:24 +03:00
def get_full_code ( self ) - > str :
# Data section with string literals
2024-04-29 17:03:51 +03:00
code_str = ' .section .data \n '
code_str + = ' .int_format: .asciz " % lld " \n '
code_str + = ' .str_format: .asciz " %s " \n '
code_str + = ' .date_format: .asciz " % Y- % m- %d " \n '
code_str + = ' .day_format: .asciz " %d " \n '
code_str + = ' .month_format: .asciz " % m " \n '
code_str + = ' .year_format: .asciz " % Y " \n '
code_str + = ' .weekday_format: .asciz " %u " \n '
code_str + = ' .weeknum_format: .asciz " % W " \n '
2024-04-28 01:37:24 +03:00
for index , string in enumerate ( self . string_literals ) :
2024-04-29 17:03:51 +03:00
code_str + = f ' S { index } : .asciz " { string } " \n '
code_str + = ' \n '
2024-04-28 01:37:24 +03:00
# BSS section for uninitialized data
2024-04-29 17:03:51 +03:00
code_str + = f ' .section .bss \n '
code_str + = f ' .date_buffer: \n '
code_str + = f ' .skip { self . date_buffer_size } \n '
2024-04-28 01:37:24 +03:00
if len ( self . sem_data . global_symbol_table ) != 0 :
2024-04-29 17:03:51 +03:00
code_str + = ' .globals: \n '
code_str + = f ' .skip { len ( sem_data . global_symbol_table ) * 8 } \n '
code_str + = ' \n '
2024-04-28 01:37:24 +03:00
# Text section with code
2024-04-29 17:03:51 +03:00
code_str + = ' .section .text \n '
code_str + = ' \n '
2024-04-28 01:37:24 +03:00
# Add function and procedure definitions
for name , code in self . callables . items ( ) :
2024-04-29 17:03:51 +03:00
if name == ' main ' :
code_str + = ' .global main \n '
code_str + = name + ' : \n '
code_str + = ' pushq %r bp \n '
code_str + = ' movq %r sp, %r bp \n '
for instruction in code :
if instruction . opcode == ' <label> ' :
code_str + = f ' { instruction . operands [ 0 ] } : \n '
else :
code_str + = f ' { instruction } \n '
code_str + = ' leave \n '
code_str + = ' ret \n '
code_str + = ' \n '
return code_str
2024-04-28 01:37:24 +03:00
def compile_ast ( node : ASTnode , compile_data : CompileData ) - > None :
2024-04-26 13:36:20 +03:00
match node . nodetype :
2024-04-28 01:37:24 +03:00
case ' program ' :
# Compile function and procedure definitions
for definition in node . children_definitions :
if definition . nodetype not in [ ' function_definition ' , ' procedure_definition ' ] :
continue
assert len ( compile_data . code ) == 0
assert compile_data . scope is None
compile_data . scope = definition
2024-04-29 17:03:51 +03:00
2024-04-28 01:37:24 +03:00
# initialize local variables
stack_size = 8 * len ( definition . children_variable_definitions )
if stack_size % 16 != 0 :
stack_size + = 8
if stack_size != 0 :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' subq ' , [ f ' $ { stack_size } ' , ' %r sp ' ] ) )
2024-04-28 01:37:24 +03:00
for variable in definition . children_variable_definitions :
address = compile_data . symbol_address ( variable . value )
compile_ast ( variable . child_expression , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , address ] ) )
2024-04-28 01:37:24 +03:00
# compile statements
if definition . nodetype == ' function_definition ' :
compile_ast ( definition . child_expression , compile_data )
elif definition . nodetype == ' procedure_definition ' :
for statement in definition . children_statements :
compile_ast ( statement , compile_data )
else : assert False
2024-04-29 17:03:51 +03:00
# Add function/procedure to callables
2024-04-28 01:37:24 +03:00
compile_data . callables [ definition . value ] = compile_data . code
2024-04-29 17:03:51 +03:00
compile_data . code = [ ]
2024-04-28 01:37:24 +03:00
compile_data . scope = None
2024-04-29 17:03:51 +03:00
2024-04-28 01:37:24 +03:00
# Initialize global variables
for index , ( name , variable ) in enumerate ( compile_data . sem_data . global_symbol_table . items ( ) ) :
2024-04-28 03:02:25 +03:00
address = compile_data . symbol_address ( name )
2024-04-28 01:37:24 +03:00
compile_ast ( variable , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , address ] ) )
2024-04-28 01:37:24 +03:00
# Compile program statements
for statement in node . children_statements :
compile_ast ( statement , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' xorq ' , [ ' %r ax ' , ' %r ax ' ] ) )
# Add main function
compile_data . callables [ ' main ' ] = compile_data . code
compile_data . code = [ ]
2024-04-28 01:37:24 +03:00
case ' variable_definition ' | ' function_definition ' | ' procedure_definition ' :
assert False
case ' identifier ' :
address = compile_data . symbol_address ( node . value )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ address , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
case ' assignment ' :
2024-04-28 03:02:25 +03:00
if node . child_lhs . nodetype == ' attribute_write ' :
print_todo ( ' Attribute write ' , node )
elif node . child_lhs . nodetype == ' identifier ' :
address = compile_data . symbol_address ( node . child_lhs . value )
compile_ast ( node . child_rhs , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , address ] ) )
2024-04-28 03:02:25 +03:00
else : assert False
2024-04-28 01:37:24 +03:00
case ' binary_op ' :
assert node . value in [ ' + ' , ' - ' , ' * ' , ' / ' , ' < ' , ' = ' ]
if node . value in [ ' * ' , ' / ' ] :
assert node . child_lhs . type == ' int '
else :
assert node . child_lhs . type in [ ' int ' , ' date ' ]
if node . value == ' - ' and node . child_lhs . type == ' date ' :
assert node . child_rhs . type in [ ' int ' , ' date ' ]
else :
assert node . child_rhs . type == ' int '
2024-04-29 18:18:24 +03:00
old_code = compile_data . code
# compile LHS
compile_data . code = [ ]
2024-04-28 01:37:24 +03:00
compile_ast ( node . child_lhs , compile_data )
2024-04-29 18:18:24 +03:00
lhs_code = compile_data . code
2024-04-28 01:37:24 +03:00
2024-04-29 18:18:24 +03:00
# compile RHS
compile_data . code = [ ]
2024-04-28 01:37:24 +03:00
compile_ast ( node . child_rhs , compile_data )
2024-04-29 18:18:24 +03:00
rhs_code = compile_data . code
compile_data . code = old_code
# check if we can use temporary registers instead of stack
temp_registers = [ ' %r cx ' , ' %r 8 ' , ' %r 9 ' , ' %r 10 ' , ' %r 11 ' ]
temp_reg = None
for reg in temp_registers :
valid = True
for instruction in lhs_code :
if reg in instruction . operands :
valid = False
break
if valid :
temp_reg = reg
break
# check if lhs uses call, this determines whether we need to align stack
lhs_call = False
for instruction in lhs_code :
if instruction . opcode == ' call ' :
lhs_call = True
break
# Add code for RHS calculation
compile_data . code + = rhs_code
if temp_reg is not None :
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , temp_reg ] ) )
elif not lhs_call :
compile_data . code . append ( Instruction ( ' pushq ' , [ ' %r ax ' ] ) )
else :
compile_data . code . append ( Instruction ( ' subq ' , [ ' $16 ' , ' %r sp ' ] ) )
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' 0( %r sp) ' ] ) )
# Add code for LHS calculation
compile_data . code + = lhs_code
if temp_reg is not None :
compile_data . code . append ( Instruction ( ' movq ' , [ temp_reg , ' %r cx ' ] ) )
elif not lhs_call :
compile_data . code . append ( Instruction ( ' popq ' , [ ' %r cx ' ] ) )
2024-04-28 01:37:24 +03:00
else :
2024-04-29 18:18:24 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' 0( %r sp) ' , ' %r cx ' ] ) )
compile_data . code . append ( Instruction ( ' addq ' , [ ' $16 ' , ' %r sp ' ] ) )
2024-04-28 01:37:24 +03:00
2024-04-29 18:18:24 +03:00
# If we are adding or subtracting dates with integers, multiply the integer by number of seconds in a day
if node . child_lhs . type == ' date ' and node . child_rhs . type == ' int ' :
compile_data . code . append ( Instruction ( ' imulq ' , [ ' $86400 ' , ' %r cx ' ] ) )
2024-04-28 01:37:24 +03:00
# perform operation
if node . value == ' + ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' addq ' , [ ' %r cx ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
elif node . value == ' - ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' subq ' , [ ' %r cx ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
elif node . value == ' * ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' imulq ' , [ ' %r cx ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
elif node . value == ' / ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' cqo ' ) )
compile_data . code . append ( Instruction ( ' idivq ' , [ ' %r cx ' ] ) )
2024-04-28 01:37:24 +03:00
elif node . value == ' < ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' cmpq ' , [ ' %r cx ' , ' %r ax ' ] ) )
compile_data . code . append ( Instruction ( ' setl ' , [ ' %a l ' ] ) )
compile_data . code . append ( Instruction ( ' movzbq ' , [ ' %a l ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
elif node . value == ' = ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' cmpq ' , [ ' %r cx ' , ' %r ax ' ] ) )
compile_data . code . append ( Instruction ( ' sete ' , [ ' %a l ' ] ) )
compile_data . code . append ( Instruction ( ' movzbq ' , [ ' %a l ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
else : assert False
# if both operands are dates, divide result by number of seconds in a day
if node . child_lhs . type == ' date ' and node . child_rhs . type == ' date ' :
assert node . value == ' - '
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' $86400 ' , ' %r cx ' ] ) )
compile_data . code . append ( Instruction ( ' cqo ' ) )
compile_data . code . append ( Instruction ( ' idivq ' , [ ' %r cx ' ] ) )
2024-04-28 01:37:24 +03:00
case ' function_call ' | ' procedure_call ' :
if node . value == ' Today ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' call ' , [ ' __builtin_today ' ] ) )
2024-04-28 01:37:24 +03:00
else :
# align stack
stack_needed = len ( node . children_arguments ) * 8
if stack_needed % 16 != 0 :
stack_needed + = 8
if stack_needed != 0 :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' subq ' , [ f ' $ { stack_needed } ' , ' %r sp ' ] ) )
2024-04-28 01:37:24 +03:00
# push arguments to the stack
offset = 0
for argument in node . children_arguments :
compile_ast ( argument , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , f ' { offset } (%rsp) ' ] ) )
2024-04-28 01:37:24 +03:00
offset + = 8
# call function and restore stack
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' call ' , [ node . value ] ) )
2024-04-28 01:37:24 +03:00
if stack_needed != 0 :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' addq ' , [ f ' $ { stack_needed } ' , ' %r sp ' ] ) )
2024-04-28 01:37:24 +03:00
case ' return ' :
compile_ast ( node . child_expression , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' leave ' ) )
compile_data . code . append ( Instruction ( ' ret ' ) )
2024-04-28 01:37:24 +03:00
case ' int_literal ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ f ' $ { node . value } ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
case ' string_literal ' :
label = compile_data . add_string_literal ( node . value )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ f ' $ { label } ' , ' %r ax ' ] ) )
2024-04-28 01:37:24 +03:00
case ' date_literal ' :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ f ' $ { timegm ( node . value . timetuple ( ) ) } ' , ' %r ax ' ] ) )
2024-04-28 02:22:32 +03:00
case ' attribute_read ' :
compile_ast ( node . child_identifier , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r di ' ] ) )
compile_data . code . append ( Instruction ( ' movq ' , [ f ' $. { node . child_attribute . value } _format ' , ' %r si ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' __builtin_get_day_attr ' ] ) )
2024-04-28 01:37:24 +03:00
case ' do_until ' :
label_loop = compile_data . get_label ( )
compile_data . insert_label ( label_loop )
# compile statements
for statement in node . children_statements :
compile_ast ( statement , compile_data )
# compile condition
compile_ast ( node . child_condition , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' testq ' , [ ' %r ax ' , ' %r ax ' ] ) )
compile_data . code . append ( Instruction ( ' jz ' , [ label_loop ] ) )
2024-04-28 01:37:24 +03:00
case ' do_unless ' | ' unless_expression ' :
label_true = compile_data . get_label ( )
label_done = compile_data . get_label ( )
# compile condition
compile_ast ( node . child_condition , compile_data )
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' testq ' , [ ' %r ax ' , ' %r ax ' ] ) )
compile_data . code . append ( Instruction ( ' jnz ' , [ label_true ] ) )
2024-04-28 01:37:24 +03:00
# compile false statements
if node . nodetype == ' unless_expression ' :
compile_ast ( node . child_expression_false , compile_data )
elif node . nodetype == ' do_unless ' :
for statement in node . children_statements_false :
compile_ast ( statement , compile_data )
else : assert False
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' jmp ' , [ label_done ] ) )
2024-04-28 01:37:24 +03:00
# compile true statements
compile_data . insert_label ( label_true )
if node . nodetype == ' unless_expression ' :
compile_ast ( node . child_expression_true , compile_data )
elif node . nodetype == ' do_unless ' :
for statement in node . children_statements_true :
compile_ast ( statement , compile_data )
else : assert False
# add label for done
compile_data . insert_label ( label_done )
case ' print ' :
for i , item in enumerate ( node . children_items ) :
assert item . type in [ ' int ' , ' string ' , ' date ' ]
compile_ast ( item , compile_data )
2024-04-29 17:03:51 +03:00
match item . type :
case ' int ' :
compile_data . code . append ( Instruction ( ' movq ' , [ ' $.int_format ' , ' %r di ' ] ) )
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r si ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' printf ' ] ) )
case ' string ' :
compile_data . code . append ( Instruction ( ' movq ' , [ ' $.str_format ' , ' %r di ' ] ) )
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r si ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' printf ' ] ) )
case ' date ' :
compile_data . code . append ( Instruction ( ' movq ' , [ ' %r ax ' , ' %r di ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' __builtin_print_date ' ] ) )
case _ :
assert False
2024-04-28 01:37:24 +03:00
# Print space if there are more items
if i < len ( node . children_items ) - 1 :
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movl ' , [ " $ ' ' " , ' %e di ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' putchar ' ] ) )
2024-04-28 01:37:24 +03:00
# Print newline
2024-04-29 17:03:51 +03:00
compile_data . code . append ( Instruction ( ' movl ' , [ " $ ' \\ n ' " , ' %e di ' ] ) )
compile_data . code . append ( Instruction ( ' call ' , [ ' putchar ' ] ) )
2024-04-26 13:36:20 +03:00
case _ :
2024-04-28 01:37:24 +03:00
print_todo ( f ' Compile type \' { node . nodetype } \' ' , node )
2024-04-26 13:36:20 +03:00
if __name__ == ' __main__ ' :
parser = argparse . ArgumentParser ( )
parser . add_argument ( ' -d ' , ' --debug ' , action = ' store_true ' , help = ' debug? ' )
group = parser . add_mutually_exclusive_group ( required = True )
group . add_argument ( ' --who ' , action = ' store_true ' , help = ' print out student IDs and NAMEs of authors ' )
group . add_argument ( ' -f ' , ' --file ' , help = ' filename to process ' )
2024-04-28 03:22:32 +03:00
parser . add_argument ( ' -o ' , ' --output ' , help = ' output filename for compiled code. default (a.out) ' , default = ' a.out ' )
2024-04-28 03:02:25 +03:00
parser . add_argument ( ' -a ' , ' --assembly ' , help = ' output filename for generated assembly code ' )
2024-04-28 03:22:32 +03:00
parser . add_argument ( ' -r ' , ' --run ' , action = ' store_true ' , help = ' run the compiled code after compilation ' )
2024-04-26 13:36:20 +03:00
args = parser . parse_args ( )
if args . who :
print ( ' Author ' )
print ( ' Student ID: 150189237 ' )
print ( ' Name: Oskari Alaranta ' )
else :
ast = syntax_check_file ( args . file , args . debug )
sem_data = SemData ( )
semantic_check ( ast , sem_data )
2024-04-28 03:02:25 +03:00
if args . debug :
tree_print . treeprint ( ast , ' unicode ' )
2024-04-28 01:37:24 +03:00
compile_data = CompileData ( sem_data )
compile_ast ( ast , compile_data )
2024-04-28 03:02:25 +03:00
assembly = compile_data . get_full_code ( )
2024-04-28 03:22:32 +03:00
if args . assembly is not None :
2024-04-28 03:02:25 +03:00
with open ( args . assembly , ' w ' , encoding = ' utf-8 ' ) as file :
file . write ( assembly )
2024-04-28 03:22:32 +03:00
subprocess . run ( [ ' gcc ' , ' -x ' , ' assembler ' , ' -o ' , args . output , ' -static ' , ' - ' ] , input = assembly , encoding = ' utf-8 ' )
if args . run :
subprocess . run ( [ f ' ./ { args . output } ' ] )