agent-orchestrator-benchmark/calculators/builder-adversary/run-01/calc/lexer.py

"""Lexer for arithmetic expressions."""

from dataclasses import dataclass
from typing import Union


class LexError(Exception):
    """Raised on an unrecognised character."""


@dataclass
class Token:
    kind: str   # NUMBER PLUS MINUS STAR SLASH LPAREN RPAREN EOF
    value: Union[int, float, str, None]


_SINGLE = {
    '+': 'PLUS',
    '-': 'MINUS',
    '*': 'STAR',
    '/': 'SLASH',
    '(': 'LPAREN',
    ')': 'RPAREN',
}


def tokenize(src: str) -> list:
    """Return a list of Token for *src*, ending with EOF."""
    tokens = []
    i = 0
    n = len(src)

    while i < n:
        ch = src[i]

        # Skip whitespace
        if ch in ' \t\r\n':
            i += 1
            continue

        # Number: integer or float (leading dot allowed, trailing dot allowed)
        if ch.isdigit() or ch == '.':
            j = i
            has_dot = False
            while j < n and (src[j].isdigit() or (src[j] == '.' and not has_dot)):
                if src[j] == '.':
                    has_dot = True
                j += 1
            raw = src[i:j]
            if raw == '.':
                raise LexError(f"Unexpected character '.' at position {i}")
            value = float(raw) if has_dot else int(raw)
            tokens.append(Token('NUMBER', value))
            i = j
            continue

        # Single-character operators and parentheses
        if ch in _SINGLE:
            tokens.append(Token(_SINGLE[ch], ch))
            i += 1
            continue

        # Anything else is an error
        raise LexError(f"Unexpected character {ch!r} at position {i}")

    tokens.append(Token('EOF', None))
    return tokens