46 lines
1.1 KiB
Python
46 lines
1.1 KiB
Python
from dataclasses import dataclass
|
|
from typing import List
|
|
|
|
|
|
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class Token:
|
|
kind: str
|
|
value: object
|
|
|
|
|
|
_SINGLE = {'+': 'PLUS', '-': 'MINUS', '*': 'STAR', '/': 'SLASH',
|
|
'(': 'LPAREN', ')': 'RPAREN'}
|
|
|
|
|
|
def tokenize(src: str) -> List[Token]:
|
|
tokens: List[Token] = []
|
|
i = 0
|
|
while i < len(src):
|
|
ch = src[i]
|
|
if ch in ' \t\n\r':
|
|
i += 1
|
|
continue
|
|
if ch.isdigit() or ch == '.':
|
|
j = i
|
|
while j < len(src) and (src[j].isdigit() or src[j] == '.'):
|
|
j += 1
|
|
raw = src[i:j]
|
|
try:
|
|
value = float(raw) if '.' in raw else int(raw)
|
|
except ValueError:
|
|
raise LexError(f"invalid number literal {raw!r} at position {i}")
|
|
tokens.append(Token('NUMBER', value))
|
|
i = j
|
|
continue
|
|
if ch in _SINGLE:
|
|
tokens.append(Token(_SINGLE[ch], ch))
|
|
i += 1
|
|
continue
|
|
raise LexError(f"unexpected character {ch!r} at position {i}")
|
|
tokens.append(Token('EOF', None))
|
|
return tokens
|