59 lines
1.3 KiB
Python
59 lines
1.3 KiB
Python
"""Lexer for arithmetic expressions."""
|
|
|
|
from __future__ import annotations
|
|
import re
|
|
|
|
|
|
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
class Token:
|
|
__slots__ = ("kind", "value")
|
|
|
|
def __init__(self, kind: str, value):
|
|
self.kind = kind
|
|
self.value = value
|
|
|
|
def __repr__(self):
|
|
return f"Token({self.kind!r}, {self.value!r})"
|
|
|
|
def __eq__(self, other):
|
|
return isinstance(other, Token) and self.kind == other.kind and self.value == other.value
|
|
|
|
|
|
_NUMBER_RE = re.compile(r"\d+\.?\d*|\.\d+")
|
|
|
|
_SINGLE = {
|
|
"+": "PLUS",
|
|
"-": "MINUS",
|
|
"*": "STAR",
|
|
"/": "SLASH",
|
|
"(": "LPAREN",
|
|
")": "RPAREN",
|
|
}
|
|
|
|
|
|
def tokenize(src: str) -> list[Token]:
|
|
tokens: list[Token] = []
|
|
i = 0
|
|
while i < len(src):
|
|
ch = src[i]
|
|
if ch in " \t":
|
|
i += 1
|
|
continue
|
|
if ch in _SINGLE:
|
|
tokens.append(Token(_SINGLE[ch], ch))
|
|
i += 1
|
|
continue
|
|
m = _NUMBER_RE.match(src, i)
|
|
if m:
|
|
raw = m.group()
|
|
value = float(raw) if "." in raw else int(raw)
|
|
tokens.append(Token("NUMBER", value))
|
|
i = m.end()
|
|
continue
|
|
raise LexError(f"unexpected character {ch!r} at position {i}")
|
|
tokens.append(Token("EOF", None))
|
|
return tokens
|