agent-orchestrator-benchmark/calculators/builder-solo/run-05/calc/lexer.py

"""Lexer for arithmetic expressions."""

from dataclasses import dataclass
from typing import Union


class LexError(Exception):
    pass


@dataclass
class Token:
    kind: str
    value: Union[int, float, None]

    def __repr__(self):
        return f"{self.kind}({self.value!r})"


_SINGLE = {
    '+': 'PLUS',
    '-': 'MINUS',
    '*': 'STAR',
    '/': 'SLASH',
    '(': 'LPAREN',
    ')': 'RPAREN',
}

EOF = Token('EOF', None)


def tokenize(src: str) -> list:
    tokens = []
    i = 0
    while i < len(src):
        ch = src[i]

        if ch in ' \t\r\n':
            i += 1
            continue

        if ch in _SINGLE:
            tokens.append(Token(_SINGLE[ch], ch))
            i += 1
            continue

        if ch.isdigit() or ch == '.':
            j = i
            has_dot = False
            while j < len(src) and (src[j].isdigit() or (src[j] == '.' and not has_dot)):
                if src[j] == '.':
                    has_dot = True
                j += 1
            raw = src[i:j]
            if raw == '.':
                raise LexError(f"Invalid character '.' at position {i}")
            value = float(raw) if has_dot else int(raw)
            tokens.append(Token('NUMBER', value))
            i = j
            continue

        raise LexError(f"Invalid character {ch!r} at position {i}")

    tokens.append(EOF)
    return tokens