agent-orchestrator-benchmark/calculators/builder-solo/run-01/calc/lexer.py

from __future__ import annotations
from dataclasses import dataclass
from typing import Union


class LexError(Exception):
    pass


@dataclass
class Token:
    kind: str
    value: Union[int, float, str, None]

    def __repr__(self):
        return f"{self.kind}({self.value!r})"


_SINGLE = {
    '+': 'PLUS',
    '-': 'MINUS',
    '*': 'STAR',
    '/': 'SLASH',
    '(': 'LPAREN',
    ')': 'RPAREN',
}


def tokenize(src: str) -> list[Token]:
    tokens: list[Token] = []
    i = 0
    n = len(src)
    while i < n:
        ch = src[i]
        if ch in ' \t\r\n':
            i += 1
            continue
        if ch in _SINGLE:
            tokens.append(Token(_SINGLE[ch], ch))
            i += 1
            continue
        if ch.isdigit() or ch == '.':
            j = i
            has_dot = False
            while j < n and (src[j].isdigit() or (src[j] == '.' and not has_dot)):
                if src[j] == '.':
                    has_dot = True
                j += 1
            raw = src[i:j]
            value: Union[int, float] = float(raw) if has_dot else int(raw)
            tokens.append(Token('NUMBER', value))
            i = j
            continue
        raise LexError(f"unexpected character {ch!r} at position {i}")
    tokens.append(Token('EOF', None))
    return tokens