59 lines
1.5 KiB
Python
59 lines
1.5 KiB
Python
class LexError(Exception):
|
|
pass
|
|
|
|
|
|
class Token:
|
|
__slots__ = ('kind', 'value')
|
|
|
|
def __init__(self, kind: str, value):
|
|
self.kind = kind
|
|
self.value = value
|
|
|
|
def __repr__(self):
|
|
return f'Token({self.kind!r}, {self.value!r})'
|
|
|
|
def __eq__(self, other):
|
|
if isinstance(other, Token):
|
|
return self.kind == other.kind and self.value == other.value
|
|
return NotImplemented
|
|
|
|
|
|
_SINGLE_CHAR = {
|
|
'+': 'PLUS',
|
|
'-': 'MINUS',
|
|
'*': 'STAR',
|
|
'/': 'SLASH',
|
|
'(': 'LPAREN',
|
|
')': 'RPAREN',
|
|
}
|
|
|
|
|
|
def tokenize(src: str) -> list:
|
|
tokens = []
|
|
i = 0
|
|
n = len(src)
|
|
while i < n:
|
|
c = src[i]
|
|
if c in ' \t':
|
|
i += 1
|
|
elif c in _SINGLE_CHAR:
|
|
tokens.append(Token(_SINGLE_CHAR[c], c))
|
|
i += 1
|
|
elif c.isdigit() or c == '.':
|
|
start = i
|
|
has_dot = False
|
|
while i < n and (src[i].isdigit() or (src[i] == '.' and not has_dot)):
|
|
if src[i] == '.':
|
|
has_dot = True
|
|
i += 1
|
|
num_str = src[start:i]
|
|
try:
|
|
value = float(num_str) if has_dot else int(num_str)
|
|
except ValueError:
|
|
raise LexError(f"Invalid number {num_str!r} at position {start}")
|
|
tokens.append(Token('NUMBER', value))
|
|
else:
|
|
raise LexError(f"Unexpected character {c!r} at position {i}")
|
|
tokens.append(Token('EOF', None))
|
|
return tokens
|