Generated code is now kept as list of instructions

This commit is contained in:
Bananymous 2024-04-29 17:03:51 +03:00
parent 8c8d294527
commit 56bcad6fc1
1 changed files with 163 additions and 160 deletions

View File

@ -276,75 +276,31 @@ def semantic_check(node: ASTnode, sem_data: SemData) -> None | ASTnode:
case _:
print_todo(f'Semantic check type \'{node.nodetype}\'', node)
class Instruction:
def __init__(self, opcode: str, operands: list[str] = []):
self.opcode = opcode
self.operands = operands
def __str__(self):
return f'{self.opcode} {', '.join(self.operands)}'
class CompileData:
def __init__(self, sem_data: SemData):
self.sem_data = sem_data
self.date_buffer_size = 128
self.string_literals = []
self.label_counter = 0
self.callables = {}
self.scope = None
self.code = ''
self.callables['__builtin_today'] = '''\
pushq %rbp
movq %rsp, %rbp
xorq %rdi, %rdi
call time
movq %rax, %rdi
movq $86400, %rcx
xorq %rdx, %rdx
divq %rcx
movq %rdi, %rax
subq %rdx, %rax
popq %rbp
ret
'''
self.callables['__builtin_print_date'] = f'''\
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, 0(%rsp)
leaq 0(%rsp), %rdi
call localtime
movq $.date_buffer, %rdi
movq ${self.date_buffer_size}, %rsi
movq $.date_format, %rdx
movq %rax, %rcx
call strftime
movq $.str_format, %rdi
movq $.date_buffer, %rsi
call printf
leave
ret
'''
self.callables['__builtin_get_day_attr'] = f'''\
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, 0(%rsp)
movq %rsi, 8(%rsp)
leaq 0(%rsp), %rdi
call localtime
movq $.date_buffer, %rdi
movq ${self.date_buffer_size}, %rsi
movq 8(%rsp), %rdx
movq %rax, %rcx
call strftime
movq $.date_buffer, %rdi
call atoi
leave
ret
'''
self.string_literals: list[str] = []
self.callables: dict[str, list[Instruction]] = {}
self.scope: ASTnode = None
self.code: list[Instruction] = []
self.add_builtin_functions()
def get_label(self) -> str:
self.label_counter += 1
return f'.L{self.label_counter - 1}'
def insert_label(self, label) -> None:
self.code += f'{label}:\n'
self.code.append(Instruction('<label>', [label]))
def add_string_literal(self, value: str) -> str:
for index, string in enumerate(self.string_literals):
@ -368,65 +324,93 @@ class CompileData:
return f'(.globals + {offset})'
assert False
def add_builtin_functions(self) -> None:
today = []
today.append(Instruction('xorq', ['%rdi', '%rdi']))
today.append(Instruction('call', ['time']))
today.append(Instruction('movq', ['%rax', '%rdi']))
today.append(Instruction('movq', ['$86400', '%rcx']))
today.append(Instruction('xorq', ['%rdx', '%rdx']))
today.append(Instruction('divq', ['%rcx']))
today.append(Instruction('movq', ['%rdi', '%rax']))
today.append(Instruction('subq', ['%rdx', '%rax']))
self.callables['__builtin_today'] = today
print_date = []
print_date.append(Instruction('subq', ['$16', '%rsp']))
print_date.append(Instruction('movq', ['%rdi', '0(%rsp)']))
print_date.append(Instruction('leaq', ['0(%rsp)', '%rdi']))
print_date.append(Instruction('call', ['localtime']))
print_date.append(Instruction('movq', ['$.date_buffer', '%rdi']))
print_date.append(Instruction('movq', [f'${self.date_buffer_size}', '%rsi']))
print_date.append(Instruction('movq', ['$.date_format', '%rdx']))
print_date.append(Instruction('movq', ['%rax', '%rcx']))
print_date.append(Instruction('call', ['strftime']))
print_date.append(Instruction('movq', ['$.str_format', '%rdi']))
print_date.append(Instruction('movq', ['$.date_buffer', '%rsi']))
print_date.append(Instruction('call', ['printf']))
self.callables['__builtin_print_date'] = print_date
get_day_attr = []
get_day_attr.append(Instruction('subq', ['$16', '%rsp']))
get_day_attr.append(Instruction('movq', ['%rdi', '0(%rsp)']))
get_day_attr.append(Instruction('movq', ['%rsi', '8(%rsp)']))
get_day_attr.append(Instruction('leaq', ['0(%rsp)', '%rdi']))
get_day_attr.append(Instruction('call', ['localtime']))
get_day_attr.append(Instruction('movq', ['$.date_buffer', '%rdi']))
get_day_attr.append(Instruction('movq', [f'${self.date_buffer_size}', '%rsi']))
get_day_attr.append(Instruction('movq', ['8(%rsp)', '%rdx']))
get_day_attr.append(Instruction('movq', ['%rax', '%rcx']))
get_day_attr.append(Instruction('call', ['strftime']))
get_day_attr.append(Instruction('movq', ['$.date_buffer', '%rdi']))
get_day_attr.append(Instruction('call', ['atoi']))
self.callables['__builtin_get_day_attr'] = get_day_attr
def get_full_code(self) -> str:
# Data section with string literals
prefix = '.section .data\n'
prefix += '.int_format: .asciz "%lld"\n'
prefix += '.str_format: .asciz "%s"\n'
prefix += '.date_format: .asciz "%Y-%m-%d"\n'
prefix += '.day_format: .asciz "%d"\n'
prefix += '.month_format: .asciz "%m"\n'
prefix += '.year_format: .asciz "%Y"\n'
prefix += '.weekday_format: .asciz "%u"\n'
prefix += '.weeknum_format: .asciz "%W"\n'
code_str = '.section .data\n'
code_str += '.int_format: .asciz "%lld"\n'
code_str += '.str_format: .asciz "%s"\n'
code_str += '.date_format: .asciz "%Y-%m-%d"\n'
code_str += '.day_format: .asciz "%d"\n'
code_str += '.month_format: .asciz "%m"\n'
code_str += '.year_format: .asciz "%Y"\n'
code_str += '.weekday_format: .asciz "%u"\n'
code_str += '.weeknum_format: .asciz "%W"\n'
for index, string in enumerate(self.string_literals):
prefix += f'S{index}: .asciz "{string}"\n'
prefix += '\n'
code_str += f'S{index}: .asciz "{string}"\n'
code_str += '\n'
# BSS section for uninitialized data
prefix += f'.section .bss\n'
prefix += f'.date_buffer:\n'
prefix += f' .skip {self.date_buffer_size}\n'
code_str += f'.section .bss\n'
code_str += f'.date_buffer:\n'
code_str += f' .skip {self.date_buffer_size}\n'
if len(self.sem_data.global_symbol_table) != 0:
prefix += '.globals:\n'
prefix += f' .skip {len(sem_data.global_symbol_table) * 8}\n'
prefix += '\n'
code_str += '.globals:\n'
code_str += f' .skip {len(sem_data.global_symbol_table) * 8}\n'
code_str += '\n'
# Text section with code
prefix += '.section .text\n'
prefix += '\n'
code_str += '.section .text\n'
code_str += '\n'
# Add function and procedure definitions
for name, code in self.callables.items():
prefix += name + ':\n'
prefix += code
prefix += '\n'
if name == 'main':
code_str += '.global main\n'
code_str += name + ':\n'
code_str += ' pushq %rbp\n'
code_str += ' movq %rsp, %rbp\n'
for instruction in code:
if instruction.opcode == '<label>':
code_str += f'{instruction.operands[0]}:\n'
else:
code_str += f' {instruction}\n'
code_str += ' leave\n'
code_str += ' ret\n'
code_str += '\n'
prefix += '.global main\n'
prefix += 'main:\n'
prefix += ' pushq %rbp\n'
prefix += ' movq %rsp, %rbp\n'
postfix = ' xorq %rax, %rax\n'
postfix += ' leave\n'
postfix += ' ret\n'
return prefix + self.code + postfix
def compile_print_literal(print_type: str, compile_data: CompileData) -> None:
if print_type == 'int':
compile_data.code += f' movq $.int_format, %rdi\n'
compile_data.code += f' movq %rax, %rsi\n'
compile_data.code += f' call printf\n'
elif print_type == 'string':
compile_data.code += f' movq $.str_format, %rdi\n'
compile_data.code += f' movq %rax, %rsi\n'
compile_data.code += f' call printf\n'
elif print_type == 'date':
compile_data.code += f' movq %rax, %rdi\n'
compile_data.code += f' call __builtin_print_date\n'
else:
assert False
return code_str
def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
match node.nodetype:
@ -438,19 +422,18 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
assert len(compile_data.code) == 0
assert compile_data.scope is None
compile_data.scope = definition
# initialize stack frame
compile_data.code += ' pushq %rbp\n'
compile_data.code += ' movq %rsp, %rbp\n'
# initialize local variables
stack_size = 8 * len(definition.children_variable_definitions)
if stack_size % 16 != 0:
stack_size += 8
if stack_size != 0:
compile_data.code += f' subq ${stack_size}, %rsp\n'
compile_data.code.append(Instruction('subq', [f'${stack_size}', '%rsp']))
for variable in definition.children_variable_definitions:
address = compile_data.symbol_address(variable.value)
compile_ast(variable.child_expression, compile_data)
compile_data.code += f' movq %rax, {address}\n'
compile_data.code.append(Instruction('movq', ['%rax', address]))
# compile statements
if definition.nodetype == 'function_definition':
compile_ast(definition.child_expression, compile_data)
@ -458,32 +441,38 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
for statement in definition.children_statements:
compile_ast(statement, compile_data)
else: assert False
# return from procedure
compile_data.code += f' leave\n'
compile_data.code += f' ret\n'
# Add function/procedure to callables
compile_data.callables[definition.value] = compile_data.code
compile_data.code = ''
compile_data.code = []
compile_data.scope = None
# Initialize global variables
for index, (name, variable) in enumerate(compile_data.sem_data.global_symbol_table.items()):
address = compile_data.symbol_address(name)
compile_ast(variable, compile_data)
compile_data.code += f' movq %rax, {address}\n'
compile_data.code.append(Instruction('movq', ['%rax', address]))
# Compile program statements
for statement in node.children_statements:
compile_ast(statement, compile_data)
compile_data.code.append(Instruction('xorq', ['%rax', '%rax']))
# Add main function
compile_data.callables['main'] = compile_data.code
compile_data.code = []
case 'variable_definition' | 'function_definition' | 'procedure_definition':
assert False
case 'identifier':
address = compile_data.symbol_address(node.value)
compile_data.code += f' movq {address}, %rax\n'
compile_data.code.append(Instruction('movq', [address, '%rax']))
case 'assignment':
if node.child_lhs.nodetype == 'attribute_write':
print_todo('Attribute write', node)
elif node.child_lhs.nodetype == 'identifier':
address = compile_data.symbol_address(node.child_lhs.value)
compile_ast(node.child_rhs, compile_data)
compile_data.code += f' movq %rax, {address}\n'
compile_data.code.append(Instruction('movq', ['%rax', address]))
else: assert False
case 'binary_op':
assert node.value in ['+', '-', '*', '/', '<', '=']
@ -499,87 +488,87 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
assert node.child_rhs.type == 'int'
# calculate LHS and store it on stack
compile_data.code += f' subq $16, %rsp\n'
compile_ast(node.child_lhs, compile_data)
compile_data.code += f' movq %rax, 0(%rsp)\n'
compile_data.code.append(Instruction('subq', ['$16', '%rsp']))
compile_data.code.append(Instruction('movq', ['%rax', '0(%rsp)']))
# calculate RHS and store it in RCX
compile_ast(node.child_rhs, compile_data)
if node.child_lhs.type == 'date' and node.child_rhs.type == 'int':
# multiply RHS by number of seconds in a day so we can perform arithmetic on dates
compile_data.code += f' imulq $86400, %rax, %rcx\n'
compile_data.code.append(Instruction('imulq', ['$86400', '%rax', '%rcx']))
else:
compile_data.code += f' movq %rax, %rcx\n'
compile_data.code.append(Instruction('movq', ['%rax', '%rcx']))
# prepare registers, RAX contains LHS and RCX contains RHS
# and restore restore stack
compile_data.code += f' movq 0(%rsp), %rax\n'
compile_data.code += f' addq $16, %rsp\n'
compile_data.code.append(Instruction('movq', ['0(%rsp)', '%rax']))
compile_data.code.append(Instruction('addq', ['$16', '%rsp']))
# perform operation
if node.value == '+':
compile_data.code += f' addq %rcx, %rax\n'
compile_data.code.append(Instruction('addq', ['%rcx', '%rax']))
elif node.value == '-':
compile_data.code += f' subq %rcx, %rax\n'
compile_data.code.append(Instruction('subq', ['%rcx', '%rax']))
elif node.value == '*':
compile_data.code += f' imulq %rcx, %rax\n'
compile_data.code.append(Instruction('imulq', ['%rcx', '%rax']))
elif node.value == '/':
compile_data.code += f' cqo\n'
compile_data.code += f' idivq %rcx\n'
compile_data.code.append(Instruction('cqo'))
compile_data.code.append(Instruction('idivq', ['%rcx']))
elif node.value == '<':
compile_data.code += f' cmpq %rcx, %rax\n'
compile_data.code += f' setl %al\n'
compile_data.code += f' movzbq %al, %rax\n'
compile_data.code.append(Instruction('cmpq', ['%rcx', '%rax']))
compile_data.code.append(Instruction('setl', ['%al']))
compile_data.code.append(Instruction('movzbq', ['%al', '%rax']))
elif node.value == '=':
compile_data.code += f' cmpq %rcx, %rax\n'
compile_data.code += f' sete %al\n'
compile_data.code += f' movzbq %al, %rax\n'
compile_data.code.append(Instruction('cmpq', ['%rcx', '%rax']))
compile_data.code.append(Instruction('sete', ['%al']))
compile_data.code.append(Instruction('movzbq', ['%al', '%rax']))
else: assert False
# if both operands are dates, divide result by number of seconds in a day
if node.child_lhs.type == 'date' and node.child_rhs.type == 'date':
assert node.value == '-'
compile_data.code += f' movq $86400, %rcx\n'
compile_data.code += f' cqo\n'
compile_data.code += f' idivq %rcx\n'
compile_data.code.append(Instruction('movq', ['$86400', '%rcx']))
compile_data.code.append(Instruction('cqo'))
compile_data.code.append(Instruction('idivq', ['%rcx']))
case 'function_call' | 'procedure_call':
if node.value == 'Today':
compile_data.code += f' call __builtin_today\n'
compile_data.code.append(Instruction('call', ['__builtin_today']))
else:
# align stack
stack_needed = len(node.children_arguments) * 8
if stack_needed % 16 != 0:
stack_needed += 8
if stack_needed != 0:
compile_data.code += f' subq ${stack_needed}, %rsp\n'
compile_data.code.append(Instruction('subq', [f'${stack_needed}', '%rsp']))
# push arguments to the stack
offset = 0
for argument in node.children_arguments:
compile_ast(argument, compile_data)
compile_data.code += f' movq %rax, {offset}(%rsp)\n'
compile_data.code.append(Instruction('movq', ['%rax', f'{offset}(%rsp)']))
offset += 8
# call function and restore stack
compile_data.code += f' call {node.value}\n'
compile_data.code.append(Instruction('call', [node.value]))
if stack_needed != 0:
compile_data.code += f' addq ${stack_needed}, %rsp\n'
compile_data.code.append(Instruction('addq', [f'${stack_needed}', '%rsp']))
case 'return':
compile_ast(node.child_expression, compile_data)
compile_data.code += f' leave\n'
compile_data.code += f' ret\n'
compile_data.code.append(Instruction('leave'))
compile_data.code.append(Instruction('ret'))
case 'int_literal':
compile_data.code += f' movq ${node.value}, %rax\n'
compile_data.code.append(Instruction('movq', [f'${node.value}', '%rax']))
case 'string_literal':
label = compile_data.add_string_literal(node.value)
compile_data.code += f' movq ${label}, %rax\n'
compile_data.code.append(Instruction('movq', [f'${label}', '%rax']))
case 'date_literal':
compile_data.code += f' movq ${timegm(node.value.timetuple())}, %rax\n'
compile_data.code.append(Instruction('movq', [f'${timegm(node.value.timetuple())}', '%rax']))
case 'attribute_read':
compile_ast(node.child_identifier, compile_data)
compile_data.code += f' movq %rax, %rdi\n'
compile_data.code += f' movq $.{node.child_attribute.value}_format, %rsi\n'
compile_data.code += f' call __builtin_get_day_attr\n'
compile_data.code.append(Instruction('movq', ['%rax', '%rdi']))
compile_data.code.append(Instruction('movq', [f'$.{node.child_attribute.value}_format', '%rsi']))
compile_data.code.append(Instruction('call', ['__builtin_get_day_attr']))
case 'do_until':
label_loop = compile_data.get_label()
compile_data.insert_label(label_loop)
@ -590,16 +579,16 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
# compile condition
compile_ast(node.child_condition, compile_data)
compile_data.code += f' testq %rax, %rax\n'
compile_data.code += f' jz {label_loop}\n'
compile_data.code.append(Instruction('testq', ['%rax', '%rax']))
compile_data.code.append(Instruction('jz', [label_loop]))
case 'do_unless' | 'unless_expression':
label_true = compile_data.get_label()
label_done = compile_data.get_label()
# compile condition
compile_ast(node.child_condition, compile_data)
compile_data.code += f' testq %rax, %rax\n'
compile_data.code += f' jnz {label_true}\n'
compile_data.code.append(Instruction('testq', ['%rax', '%rax']))
compile_data.code.append(Instruction('jnz', [label_true]))
# compile false statements
if node.nodetype == 'unless_expression':
@ -608,7 +597,7 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
for statement in node.children_statements_false:
compile_ast(statement, compile_data)
else: assert False
compile_data.code += f' jmp {label_done}\n'
compile_data.code.append(Instruction('jmp', [label_done]))
# compile true statements
compile_data.insert_label(label_true)
@ -625,16 +614,30 @@ def compile_ast(node: ASTnode, compile_data: CompileData) -> None:
for i, item in enumerate(node.children_items):
assert item.type in ['int', 'string', 'date']
compile_ast(item, compile_data)
compile_print_literal(item.type, compile_data)
match item.type:
case 'int':
compile_data.code.append(Instruction('movq', ['$.int_format', '%rdi']))
compile_data.code.append(Instruction('movq', ['%rax', '%rsi']))
compile_data.code.append(Instruction('call', ['printf']))
case 'string':
compile_data.code.append(Instruction('movq', ['$.str_format', '%rdi']))
compile_data.code.append(Instruction('movq', ['%rax', '%rsi']))
compile_data.code.append(Instruction('call', ['printf']))
case 'date':
compile_data.code.append(Instruction('movq', ['%rax', '%rdi']))
compile_data.code.append(Instruction('call', ['__builtin_print_date']))
case _:
assert False
# Print space if there are more items
if i < len(node.children_items) - 1:
compile_data.code += f' movl $\' \', %edi\n'
compile_data.code += f' call putchar\n'
compile_data.code.append(Instruction('movl', ["$' '", '%edi']))
compile_data.code.append(Instruction('call', ['putchar']))
# Print newline
compile_data.code += f' movl $\'\\n\', %edi\n'
compile_data.code += f' call putchar\n'
compile_data.code.append(Instruction('movl', ["$'\\n'", '%edi']))
compile_data.code.append(Instruction('call', ['putchar']))
case _:
print_todo(f'Compile type \'{node.nodetype}\'', node)