Implement phase 1 lexer

This commit is contained in:
Bananymous 2024-02-14 02:32:21 +02:00
parent 91716ae12f
commit 9510c0d7a6
1 changed files with 113 additions and 0 deletions

113
01_lexer/main.py Normal file
View File

@ -0,0 +1,113 @@
#!/bin/env python3
import argparse
import datetime
import ply.lex as lex
tokens = (
'LPAREN',
'RPAREN',
'LSQUARE',
'RSQUARE',
'LCURLY',
'RCURLY',
'APOSTROPHE',
'AMPERSAND',
'COMMA',
'DOT',
'EQ',
'LT',
'PLUS',
'MINUS',
'MULT',
'DIV',
'STRING',
'DATE_LITERAL',
'INT_LITERAL',
'IDENT',
'FUNC_IDENT',
'PROC_IDENT',
)
def t_whitespace(t):
r'[ \t\n]+'
t.lexer.lineno += t.value.count('\n')
def t_comment(t):
r'\(%(.|\n)*%\)'
t.lexer.lineno += t.value.count('\n')
t_LPAREN = r'\('
t_RPAREN = r'\)'
t_LSQUARE = r'\['
t_RSQUARE = r'\]'
t_LCURLY = r'\{'
t_RCURLY = r'\}'
t_APOSTROPHE = r'\''
t_AMPERSAND = r'&'
t_COMMA = r','
t_DOT = r'\.'
t_EQ = r'='
t_LT = r'<'
t_PLUS = r'\+'
t_MINUS = r'-'
t_MULT = r'\*'
t_DIV = r'/'
def t_STRING(t):
r'".*?"'
t.value = t.value[1:-1]
return t
def t_DATE_LITERAL(t):
r'\d{4}-\d{2}-\d{2}'
t.value = datetime.date.fromisoformat(t.value)
return t
def t_INT_LITERAL(t):
r'-?\d{1,3}(\'\d{3})*'
t.value = int(t.value.replace('\'', ''))
return t
def t_IDENT(t):
r'[a-z][a-zA-Z0-9_]+'
return t
def t_FUNC_IDENT(t):
r'[A-Z][a-z0-9_]+'
return t
def t_PROC_IDENT(t):
r'[A-Z]{2}[A-Z0-9_]*'
return t
def t_error(t):
print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}')
exit(1)
def tokenize_file(file_path: str):
lexer = lex.lex()
with open(file_path, 'r', encoding='utf-8') as file:
lexer.input(file.read())
tok = lexer.token()
while tok:
print(tok)
tok = lexer.token()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors')
group.add_argument('-f', '--file', help='filename to process')
args = parser.parse_args()
if args.who:
print('Author')
print(' Student ID: 150189237')
print(' Name: Oskari Alaranta')
else:
tokenize_file(args.file)