#!/bin/env python3 import argparse import datetime import ply.lex as lex reserved = { 'var': 'VAR', 'is': 'IS', 'unless': 'UNLESS', 'otherwise': 'OTHERWISE', 'until': 'UNTIL', 'do': 'DO', 'done': 'DONE', 'procedure': 'PROCEDURE', 'function': 'FUNCTION', 'return': 'RETURN', 'print': 'PRINT', 'end': 'END', } tokens = [ 'LPAREN', 'RPAREN', 'LSQUARE', 'RSQUARE', 'LCURLY', 'RCURLY', 'APOSTROPHE', 'AMPERSAND', 'COMMA', 'DOT', 'EQ', 'LT', 'PLUS', 'MINUS', 'MULT', 'DIV', 'STRING', 'DATE_LITERAL', 'INT_LITERAL', 'IDENT', 'FUNC_IDENT', 'PROC_IDENT', ] + list(reserved.values()) def t_whitespace(t): r'[ \t\n]+' t.lexer.lineno += t.value.count('\n') def t_comment(t): r'\(%(.|\n)*%\)' t.lexer.lineno += t.value.count('\n') t_LPAREN = r'\(' t_RPAREN = r'\)' t_LSQUARE = r'\[' t_RSQUARE = r'\]' t_LCURLY = r'\{' t_RCURLY = r'\}' t_APOSTROPHE = r'\'' t_AMPERSAND = r'&' t_COMMA = r',' t_DOT = r'\.' t_EQ = r'=' t_LT = r'<' t_PLUS = r'\+' t_MINUS = r'-' t_MULT = r'\*' t_DIV = r'/' def t_STRING(t): r'".*?"' t.value = t.value[1:-1] return t def t_DATE_LITERAL(t): r'\d{4}-\d{2}-\d{2}' t.value = datetime.date.fromisoformat(t.value) return t def t_INT_LITERAL(t): r'-?\d{1,3}(\'\d{3})*' t.value = int(t.value.replace('\'', '')) return t def t_IDENT(t): r'[a-z][a-zA-Z0-9_]+' t.type = reserved.get(t.value, 'IDENT') return t def t_FUNC_IDENT(t): r'[A-Z][a-z0-9_]+' return t def t_PROC_IDENT(t): r'[A-Z]{2}[A-Z0-9_]*' return t def t_error(t): print(f'Illegal character \'{t.value[0]}\' at line {t.lexer.lineno}') raise SystemExit lexer = lex.lex() def tokenize_file(file_path: str): with open(file_path, 'r', encoding='utf-8') as file: lexer.input(file.read()) tok = lexer.token() while tok: print(tok) tok = lexer.token() if __name__ == '__main__': parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--who', action='store_true', help='print out student IDs and NAMEs of authors') group.add_argument('-f', '--file', help='filename to process') args = parser.parse_args() if args.who: print('Author') print(' Student ID: 150189237') print(' Name: Oskari Alaranta') else: tokenize_file(args.file)