2024-03-04 01:54:42 +02:00
|
|
|
#!/bin/env python3
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import datetime
|
|
|
|
import ply.lex as lex
|
|
|
|
|
|
|
|
reserved = {
|
|
|
|
'var': 'VAR',
|
|
|
|
'is': 'IS',
|
|
|
|
'unless': 'UNLESS',
|
|
|
|
'otherwise': 'OTHERWISE',
|
|
|
|
'until': 'UNTIL',
|
|
|
|
'do': 'DO',
|
|
|
|
'done': 'DONE',
|
|
|
|
'procedure': 'PROCEDURE',
|
|
|
|
'function': 'FUNCTION',
|
|
|
|
'return': 'RETURN',
|
|
|
|
'print': 'PRINT',
|
|
|
|
'end': 'END',
|
|
|
|
}
|
|
|
|
|
|
|
|
tokens = [
|
|
|
|
'LPAREN',
|
|
|
|
'RPAREN',
|
|
|
|
'LSQUARE',
|
|
|
|
'RSQUARE',
|
|
|
|
'LCURLY',
|
|
|
|
'RCURLY',
|
|
|
|
'APOSTROPHE',
|
|
|
|
'AMPERSAND',
|
|
|
|
'COMMA',
|
|
|
|
'DOT',
|
|
|
|
'EQ',
|
|
|
|
'LT',
|
|
|
|
'PLUS',
|
|
|
|
'MINUS',
|
|
|
|
'MULT',
|
|
|
|
'DIV',
|
|
|
|
'STRING',
|
|
|
|
'DATE_LITERAL',
|
|
|
|
'INT_LITERAL',
|
|
|
|
'IDENT',
|
|
|
|
'FUNC_IDENT',
|
|
|
|
'PROC_IDENT',
|
|
|
|
] + list(reserved.values())
|
|
|
|
|
|
|
|
def t_whitespace(t):
|
|
|
|
r'[ \t\n]+'
|
|
|
|
t.lexer.lineno += t.value.count('\n')
|
|
|
|
|
|
|
|
def t_comment(t):
|
2024-03-15 08:35:17 +02:00
|
|
|
r'\(%(.|\n)*?%\)'
|
2024-03-04 01:54:42 +02:00
|
|
|
t.lexer.lineno += t.value.count('\n')
|
|
|
|
|
|
|
|
t_LPAREN = r'\('
|
|
|
|
t_RPAREN = r'\)'
|
|
|
|
t_LSQUARE = r'\['
|
|
|
|
t_RSQUARE = r'\]'
|
|
|
|
t_LCURLY = r'\{'
|
|
|
|
t_RCURLY = r'\}'
|
|
|
|
t_APOSTROPHE = r'\''
|
|
|
|
t_AMPERSAND = r'&'
|
|
|
|
t_COMMA = r','
|
|
|
|
t_DOT = r'\.'
|
|
|
|
t_EQ = r'='
|
|
|
|
t_LT = r'<'
|
|
|
|
t_PLUS = r'\+'
|
|
|
|
t_MINUS = r'-'
|
|
|
|
t_MULT = r'\*'
|
|
|
|
t_DIV = r'/'
|
|
|
|
|
|
|
|
def t_STRING(t):
|
|
|
|
r'".*?"'
|
|
|
|
t.value = t.value[1:-1]
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_DATE_LITERAL(t):
|
|
|
|
r'\d{4}-\d{2}-\d{2}'
|
2024-03-04 02:01:19 +02:00
|
|
|
try:
|
|
|
|
t.value = datetime.date.fromisoformat(t.value)
|
|
|
|
except:
|
|
|
|
print(f'Invalid date \'{t.value}\' at line {t.lexer.lineno}')
|
|
|
|
raise SystemExit
|
2024-03-04 01:54:42 +02:00
|
|
|
return t
|
|
|
|
|
|
|
|
def t_INT_LITERAL(t):
|
|
|
|
r'-?\d{1,3}(\'\d{3})*'
|
|
|
|
t.value = int(t.value.replace('\'', ''))
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_IDENT(t):
|
|
|
|
r'[a-z][a-zA-Z0-9_]+'
|
|
|
|
t.type = reserved.get(t.value, 'IDENT')
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_FUNC_IDENT(t):
|
|
|
|
r'[A-Z][a-z0-9_]+'
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_PROC_IDENT(t):
|
|
|
|
r'[A-Z]{2}[A-Z0-9_]*'
|
|
|
|
return t
|
|
|
|
|
|
|
|
def t_error(t):
|
|
|
|
print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}')
|
|
|
|
raise SystemExit
|
|
|
|
|
|
|
|
lexer = lex.lex()
|
|
|
|
|
|
|
|
def tokenize_file(file_path: str):
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
|
|
lexer.input(file.read())
|
|
|
|
|
|
|
|
tok = lexer.token()
|
|
|
|
while tok:
|
|
|
|
print(tok)
|
|
|
|
tok = lexer.token()
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
parser = argparse.ArgumentParser()
|
|
|
|
group = parser.add_mutually_exclusive_group(required=True)
|
|
|
|
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
|
|
|
|
group.add_argument('-f', '--file', help='filename to process')
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.who:
|
|
|
|
print('Author')
|
|
|
|
print(' Student ID: 150189237')
|
|
|
|
print(' Name: Oskari Alaranta')
|
|
|
|
else:
|
|
|
|
tokenize_file(args.file)
|