54 lines
1.4 KiB
Python
54 lines
1.4 KiB
Python
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
class Token:
|
|
def __init__(self, kind: str, value):
|
|
self.kind = kind
|
|
self.value = value
|
|
|
|
def __repr__(self):
|
|
return f"{self.kind}({self.value!r})"
|
|
|
|
def __eq__(self, other):
|
|
return isinstance(other, Token) and self.kind == other.kind and self.value == other.value
|
|
|
|
|
|
_SINGLE = {'+': 'PLUS', '-': 'MINUS', '*': 'STAR', '/': 'SLASH', '(': 'LPAREN', ')': 'RPAREN'}
|
|
|
|
|
|
def tokenize(src: str) -> list:
|
|
tokens = []
|
|
i = 0
|
|
while i < len(src):
|
|
ch = src[i]
|
|
|
|
if ch in ' \t':
|
|
i += 1
|
|
continue
|
|
|
|
if ch.isdigit() or ch == '.':
|
|
j = i
|
|
has_dot = False
|
|
while j < len(src) and (src[j].isdigit() or (src[j] == '.' and not has_dot)):
|
|
if src[j] == '.':
|
|
has_dot = True
|
|
j += 1
|
|
num_str = src[i:j]
|
|
if num_str == '.':
|
|
raise LexError(f"Invalid character '.' at position {i}")
|
|
value = float(num_str) if has_dot else int(num_str)
|
|
tokens.append(Token('NUMBER', value))
|
|
i = j
|
|
continue
|
|
|
|
if ch in _SINGLE:
|
|
tokens.append(Token(_SINGLE[ch], ch))
|
|
i += 1
|
|
continue
|
|
|
|
raise LexError(f"Invalid character {ch!r} at position {i}")
|
|
|
|
tokens.append(Token('EOF', None))
|
|
return tokens
|