140 lines
3.8 KiB
Python
140 lines
3.8 KiB
Python
"""Recursive-descent parser for arithmetic expressions.
|
||
|
||
Grammar (precedence low → high):
|
||
expr ::= term (('+' | '-') term)*
|
||
term ::= unary (('*' | '/') unary)*
|
||
unary ::= '-' unary | primary
|
||
primary::= NUMBER | '(' expr ')'
|
||
|
||
AST node shapes:
|
||
Num(value) – leaf; value is int or float
|
||
BinOp(op, left, right) – op is '+', '-', '*', or '/'
|
||
Unary(op, operand) – op is '-'
|
||
"""
|
||
from __future__ import annotations
|
||
from dataclasses import dataclass
|
||
from typing import List, Union
|
||
|
||
from calc.lexer import Token
|
||
|
||
|
||
class ParseError(Exception):
|
||
pass
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# AST nodes
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@dataclass
|
||
class Num:
|
||
value: Union[int, float]
|
||
|
||
def __repr__(self) -> str:
|
||
return f"Num({self.value!r})"
|
||
|
||
|
||
@dataclass
|
||
class BinOp:
|
||
op: str
|
||
left: "Node"
|
||
right: "Node"
|
||
|
||
def __repr__(self) -> str:
|
||
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
|
||
|
||
|
||
@dataclass
|
||
class Unary:
|
||
op: str
|
||
operand: "Node"
|
||
|
||
def __repr__(self) -> str:
|
||
return f"Unary({self.op!r}, {self.operand!r})"
|
||
|
||
|
||
Node = Union[Num, BinOp, Unary]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Parser
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class _Parser:
|
||
def __init__(self, tokens: List[Token]) -> None:
|
||
self._tokens = tokens
|
||
self._pos = 0
|
||
|
||
def _peek(self) -> Token:
|
||
return self._tokens[self._pos]
|
||
|
||
def _consume(self, kind: str) -> Token:
|
||
tok = self._peek()
|
||
if tok.kind != kind:
|
||
raise ParseError(
|
||
f"expected {kind!r} but got {tok.kind!r} (value={tok.value!r})"
|
||
)
|
||
self._pos += 1
|
||
return tok
|
||
|
||
def _advance(self) -> Token:
|
||
tok = self._tokens[self._pos]
|
||
self._pos += 1
|
||
return tok
|
||
|
||
# expr ::= term (('+' | '-') term)*
|
||
def _expr(self) -> Node:
|
||
node = self._term()
|
||
while self._peek().kind in ('PLUS', 'MINUS'):
|
||
op_tok = self._advance()
|
||
right = self._term()
|
||
node = BinOp(op_tok.value, node, right)
|
||
return node
|
||
|
||
# term ::= unary (('*' | '/') unary)*
|
||
def _term(self) -> Node:
|
||
node = self._unary()
|
||
while self._peek().kind in ('STAR', 'SLASH'):
|
||
op_tok = self._advance()
|
||
right = self._unary()
|
||
node = BinOp(op_tok.value, node, right)
|
||
return node
|
||
|
||
# unary ::= '-' unary | primary
|
||
def _unary(self) -> Node:
|
||
if self._peek().kind == 'MINUS':
|
||
op_tok = self._advance()
|
||
return Unary(op_tok.value, self._unary())
|
||
return self._primary()
|
||
|
||
# primary ::= NUMBER | '(' expr ')'
|
||
def _primary(self) -> Node:
|
||
tok = self._peek()
|
||
if tok.kind == 'NUMBER':
|
||
self._advance()
|
||
return Num(tok.value)
|
||
if tok.kind == 'LPAREN':
|
||
self._advance()
|
||
node = self._expr()
|
||
self._consume('RPAREN')
|
||
return node
|
||
if tok.kind == 'EOF':
|
||
raise ParseError("unexpected end of input")
|
||
raise ParseError(f"unexpected token {tok.kind!r} (value={tok.value!r})")
|
||
|
||
def parse(self) -> Node:
|
||
if self._peek().kind == 'EOF':
|
||
raise ParseError("empty expression")
|
||
node = self._expr()
|
||
if self._peek().kind != 'EOF':
|
||
tok = self._peek()
|
||
raise ParseError(
|
||
f"unexpected token after expression: {tok.kind!r} (value={tok.value!r})"
|
||
)
|
||
return node
|
||
|
||
|
||
def parse(tokens: List[Token]) -> Node:
|
||
"""Parse a token list produced by calc.lexer.tokenize into an AST."""
|
||
return _Parser(tokens).parse()
|