Implement phase 02 syntax analysis
This commit is contained in:
parent
d4708ae322
commit
edadabc7ba
|
@ -0,0 +1,128 @@
|
|||
#!/bin/env python3
|
||||
|
||||
import argparse
|
||||
import datetime
|
||||
import ply.lex as lex
|
||||
|
||||
reserved = {
|
||||
'var': 'VAR',
|
||||
'is': 'IS',
|
||||
'unless': 'UNLESS',
|
||||
'otherwise': 'OTHERWISE',
|
||||
'until': 'UNTIL',
|
||||
'do': 'DO',
|
||||
'done': 'DONE',
|
||||
'procedure': 'PROCEDURE',
|
||||
'function': 'FUNCTION',
|
||||
'return': 'RETURN',
|
||||
'print': 'PRINT',
|
||||
'end': 'END',
|
||||
}
|
||||
|
||||
tokens = [
|
||||
'LPAREN',
|
||||
'RPAREN',
|
||||
'LSQUARE',
|
||||
'RSQUARE',
|
||||
'LCURLY',
|
||||
'RCURLY',
|
||||
'APOSTROPHE',
|
||||
'AMPERSAND',
|
||||
'COMMA',
|
||||
'DOT',
|
||||
'EQ',
|
||||
'LT',
|
||||
'PLUS',
|
||||
'MINUS',
|
||||
'MULT',
|
||||
'DIV',
|
||||
'STRING',
|
||||
'DATE_LITERAL',
|
||||
'INT_LITERAL',
|
||||
'IDENT',
|
||||
'FUNC_IDENT',
|
||||
'PROC_IDENT',
|
||||
] + list(reserved.values())
|
||||
|
||||
def t_whitespace(t):
|
||||
r'[ \t\n]+'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
def t_comment(t):
|
||||
r'\(%(.|\n)*%\)'
|
||||
t.lexer.lineno += t.value.count('\n')
|
||||
|
||||
t_LPAREN = r'\('
|
||||
t_RPAREN = r'\)'
|
||||
t_LSQUARE = r'\['
|
||||
t_RSQUARE = r'\]'
|
||||
t_LCURLY = r'\{'
|
||||
t_RCURLY = r'\}'
|
||||
t_APOSTROPHE = r'\''
|
||||
t_AMPERSAND = r'&'
|
||||
t_COMMA = r','
|
||||
t_DOT = r'\.'
|
||||
t_EQ = r'='
|
||||
t_LT = r'<'
|
||||
t_PLUS = r'\+'
|
||||
t_MINUS = r'-'
|
||||
t_MULT = r'\*'
|
||||
t_DIV = r'/'
|
||||
|
||||
def t_STRING(t):
|
||||
r'".*?"'
|
||||
t.value = t.value[1:-1]
|
||||
return t
|
||||
|
||||
def t_DATE_LITERAL(t):
|
||||
r'\d{4}-\d{2}-\d{2}'
|
||||
t.value = datetime.date.fromisoformat(t.value)
|
||||
return t
|
||||
|
||||
def t_INT_LITERAL(t):
|
||||
r'-?\d{1,3}(\'\d{3})*'
|
||||
t.value = int(t.value.replace('\'', ''))
|
||||
return t
|
||||
|
||||
def t_IDENT(t):
|
||||
r'[a-z][a-zA-Z0-9_]+'
|
||||
t.type = reserved.get(t.value, 'IDENT')
|
||||
return t
|
||||
|
||||
def t_FUNC_IDENT(t):
|
||||
r'[A-Z][a-z0-9_]+'
|
||||
return t
|
||||
|
||||
def t_PROC_IDENT(t):
|
||||
r'[A-Z]{2}[A-Z0-9_]*'
|
||||
return t
|
||||
|
||||
def t_error(t):
|
||||
print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}')
|
||||
raise SystemExit
|
||||
|
||||
lexer = lex.lex()
|
||||
|
||||
def tokenize_file(file_path: str):
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
lexer.input(file.read())
|
||||
|
||||
tok = lexer.token()
|
||||
while tok:
|
||||
print(tok)
|
||||
tok = lexer.token()
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||
group.add_argument('-f', '--file', help='filename to process')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.who:
|
||||
print('Author')
|
||||
print(' Student ID: 150189237')
|
||||
print(' Name: Oskari Alaranta')
|
||||
else:
|
||||
tokenize_file(args.file)
|
|
@ -0,0 +1,186 @@
|
|||
#!/bin/env python3
|
||||
|
||||
import argparse
|
||||
import ply.lex as lex
|
||||
import ply.yacc as yacc
|
||||
import lexer
|
||||
|
||||
# Simple debuggin function to call from syntax rules
|
||||
symbolnum = 0
|
||||
def debug_print(p):
|
||||
global symbolnum
|
||||
symbolnum += 1
|
||||
p[0] = symbolnum
|
||||
msg = ''
|
||||
for i, s in enumerate(p.slice):
|
||||
if s is not None:
|
||||
if type(s) is lex.LexToken:
|
||||
msg += str(s.type) + '<' + str(s.value) + '> '
|
||||
else:
|
||||
msg += str(s) + '(' + str(p[i]) + ') '
|
||||
else:
|
||||
msg += '?? '
|
||||
if i == 0:
|
||||
msg += ':: '
|
||||
print(msg)
|
||||
|
||||
tokens = lexer.tokens
|
||||
|
||||
def p_program(p):
|
||||
'''program : statement_list
|
||||
| definitions program'''
|
||||
debug_print(p)
|
||||
|
||||
def p_statement_list(p):
|
||||
'''statement_list : statement
|
||||
| statement COMMA statement_list'''
|
||||
debug_print(p)
|
||||
|
||||
def p_definitions(p):
|
||||
'''definitions : function_definition
|
||||
| procedure_definition
|
||||
| variable_definition'''
|
||||
debug_print(p)
|
||||
|
||||
def p_variable_definition(p):
|
||||
'''variable_definition : VAR IDENT EQ expression'''
|
||||
debug_print(p)
|
||||
|
||||
def p_empty(p):
|
||||
'''empty :'''
|
||||
debug_print(p)
|
||||
|
||||
def p_opt_formals(p):
|
||||
'''opt_formals : empty
|
||||
| formals'''
|
||||
debug_print(p)
|
||||
|
||||
def p_opt_variable_defitions(p):
|
||||
'''opt_variable_definitions : empty
|
||||
| variable_definition opt_variable_definitions'''
|
||||
debug_print(p)
|
||||
|
||||
def p_function_definition(p):
|
||||
'''function_definition : FUNCTION FUNC_IDENT LCURLY opt_formals RCURLY RETURN IDENT opt_variable_definitions IS rvalue END FUNCTION'''
|
||||
debug_print(p)
|
||||
|
||||
def p_procedure_definition(p):
|
||||
'''procedure_definition : PROCEDURE PROC_IDENT LCURLY opt_formals RCURLY opt_variable_definitions IS statement_list END PROCEDURE
|
||||
| PROCEDURE PROC_IDENT LCURLY opt_formals RCURLY RETURN IDENT opt_variable_definitions IS statement_list END PROCEDURE'''
|
||||
debug_print(p)
|
||||
|
||||
def p_formals(p):
|
||||
'''formals : formal_arg
|
||||
| formal_arg COMMA formals'''
|
||||
debug_print(p)
|
||||
|
||||
def p_formal_arg(p):
|
||||
'''formal_arg : IDENT LSQUARE IDENT RSQUARE'''
|
||||
debug_print(p)
|
||||
|
||||
def p_procedure_call(p):
|
||||
'''procedure_call : PROC_IDENT LPAREN RPAREN
|
||||
| PROC_IDENT LPAREN arguments RPAREN'''
|
||||
debug_print(p)
|
||||
|
||||
def p_arguments(p):
|
||||
'''arguments : expression
|
||||
| expression COMMA arguments'''
|
||||
debug_print(p)
|
||||
|
||||
def p_assignment(p):
|
||||
'''assignment : lvalue EQ rvalue'''
|
||||
debug_print(p)
|
||||
|
||||
def p_lvalue(p):
|
||||
'''lvalue : IDENT
|
||||
| IDENT DOT IDENT'''
|
||||
debug_print(p)
|
||||
|
||||
def p_rvalue(p):
|
||||
'''rvalue : expression
|
||||
| unless_expression'''
|
||||
debug_print(p)
|
||||
|
||||
def p_print_statement(p):
|
||||
'''print_statement : PRINT print_item
|
||||
| print_statement AMPERSAND print_item'''
|
||||
debug_print(p)
|
||||
|
||||
def p_print_item(p):
|
||||
'''print_item : STRING
|
||||
| expression'''
|
||||
debug_print(p)
|
||||
|
||||
def p_statement(p):
|
||||
'''statement : procedure_call
|
||||
| assignment
|
||||
| print_statement
|
||||
| DO statement_list UNTIL expression
|
||||
| DO statement_list UNLESS expression DONE
|
||||
| DO statement_list UNLESS expression OTHERWISE statement_list DONE
|
||||
| RETURN expression'''
|
||||
debug_print(p)
|
||||
|
||||
def p_expression(p):
|
||||
'''expression : simple_expr
|
||||
| expression EQ simple_expr
|
||||
| expression LT simple_expr'''
|
||||
debug_print(p)
|
||||
|
||||
def p_simple_expr(p):
|
||||
'''simple_expr : term
|
||||
| simple_expr PLUS term
|
||||
| simple_expr MINUS term
|
||||
term : factor
|
||||
| term MULT factor
|
||||
| term DIV factor
|
||||
factor : atom
|
||||
| MINUS atom
|
||||
| PLUS atom
|
||||
atom : IDENT
|
||||
| IDENT APOSTROPHE IDENT
|
||||
| INT_LITERAL
|
||||
| DATE_LITERAL
|
||||
| function_call
|
||||
| procedure_call
|
||||
| LPAREN expression RPAREN'''
|
||||
debug_print(p)
|
||||
|
||||
def p_function_call(p):
|
||||
'''function_call : FUNC_IDENT LPAREN RPAREN
|
||||
| FUNC_IDENT LPAREN arguments RPAREN'''
|
||||
debug_print(p)
|
||||
|
||||
def p_unless_expression(p):
|
||||
'''unless_expression : DO expression UNLESS expression OTHERWISE expression DONE'''
|
||||
debug_print(p)
|
||||
|
||||
def p_error(p):
|
||||
if p is not None:
|
||||
print(f"{{{p.lineno}}}:Syntax Error (token:'{p.value}')")
|
||||
else:
|
||||
print('Syntax Error at the end of file')
|
||||
raise SystemExit
|
||||
|
||||
def syntax_check_file(file_path: str, debug: bool):
|
||||
parser = yacc.yacc()
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
parser.parse(file.read(), lexer=lexer.lexer, debug=debug)
|
||||
print('syntax OK')
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-d', '--debug', action='store_true', help='debug?')
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
||||
group.add_argument('-f', '--file', help='filename to process')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.who:
|
||||
print('Author')
|
||||
print(' Student ID: 150189237')
|
||||
print(' Name: Oskari Alaranta')
|
||||
else:
|
||||
syntax_check_file(args.file, args.debug)
|
Loading…
Reference in New Issue