63 lines
1.5 KiB
Python
63 lines
1.5 KiB
Python
from dataclasses import dataclass
|
|
from typing import Union
|
|
|
|
|
|
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class Token:
|
|
kind: str
|
|
value: Union[int, float, str, None]
|
|
|
|
|
|
def tokenize(src: str) -> list:
|
|
tokens = []
|
|
i = 0
|
|
while i < len(src):
|
|
ch = src[i]
|
|
|
|
if ch in ' \t':
|
|
i += 1
|
|
continue
|
|
|
|
if ch.isdigit() or (ch == '.' and i + 1 < len(src) and src[i + 1].isdigit()):
|
|
j = i
|
|
while j < len(src) and src[j].isdigit():
|
|
j += 1
|
|
if j < len(src) and src[j] == '.':
|
|
j += 1
|
|
while j < len(src) and src[j].isdigit():
|
|
j += 1
|
|
value = float(src[i:j])
|
|
else:
|
|
value = int(src[i:j])
|
|
tokens.append(Token('NUMBER', value))
|
|
i = j
|
|
continue
|
|
|
|
if ch == '+':
|
|
tokens.append(Token('PLUS', '+'))
|
|
i += 1
|
|
elif ch == '-':
|
|
tokens.append(Token('MINUS', '-'))
|
|
i += 1
|
|
elif ch == '*':
|
|
tokens.append(Token('STAR', '*'))
|
|
i += 1
|
|
elif ch == '/':
|
|
tokens.append(Token('SLASH', '/'))
|
|
i += 1
|
|
elif ch == '(':
|
|
tokens.append(Token('LPAREN', '('))
|
|
i += 1
|
|
elif ch == ')':
|
|
tokens.append(Token('RPAREN', ')'))
|
|
i += 1
|
|
else:
|
|
raise LexError(f"unexpected character {ch!r} at position {i}")
|
|
|
|
tokens.append(Token('EOF', None))
|
|
return tokens
|