53 lines
1.0 KiB
Python
53 lines
1.0 KiB
Python
import re
|
|
from dataclasses import dataclass
|
|
from typing import Union
|
|
|
|
|
|
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class Token:
|
|
kind: str
|
|
value: Union[int, float, None]
|
|
|
|
def __repr__(self):
|
|
return f"{self.kind}({self.value!r})"
|
|
|
|
|
|
_NUMBER_RE = re.compile(r'\d+\.?\d*|\.\d+')
|
|
|
|
_SINGLE = {
|
|
'+': 'PLUS',
|
|
'-': 'MINUS',
|
|
'*': 'STAR',
|
|
'/': 'SLASH',
|
|
'(': 'LPAREN',
|
|
')': 'RPAREN',
|
|
}
|
|
|
|
|
|
def tokenize(src: str) -> list:
|
|
tokens = []
|
|
i = 0
|
|
while i < len(src):
|
|
ch = src[i]
|
|
if ch in ' \t':
|
|
i += 1
|
|
continue
|
|
m = _NUMBER_RE.match(src, i)
|
|
if m:
|
|
raw = m.group()
|
|
value = float(raw) if '.' in raw else int(raw)
|
|
tokens.append(Token('NUMBER', value))
|
|
i = m.end()
|
|
continue
|
|
if ch in _SINGLE:
|
|
tokens.append(Token(_SINGLE[ch], None))
|
|
i += 1
|
|
continue
|
|
raise LexError(f"unexpected character {ch!r} at position {i}")
|
|
tokens.append(Token('EOF', None))
|
|
return tokens
|