114 lines
2.8 KiB
Python
114 lines
2.8 KiB
Python
"""Recursive-descent parser for arithmetic expressions.
|
|
|
|
AST node shapes:
|
|
Num(value) — numeric literal; value is int or float
|
|
BinOp(op, left, right) — binary op; op in {'+', '-', '*', '/'}
|
|
Unary(op, operand) — unary op; op is '-'
|
|
|
|
Grammar (lowest to highest precedence):
|
|
expr → term (('+' | '-') term)*
|
|
term → unary (('*' | '/') unary)*
|
|
unary → '-' unary | primary
|
|
primary → NUMBER | '(' expr ')'
|
|
"""
|
|
|
|
from dataclasses import dataclass
|
|
from typing import Union
|
|
|
|
|
|
class ParseError(Exception):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class Num:
|
|
value: Union[int, float]
|
|
|
|
def __repr__(self):
|
|
return f"Num({self.value!r})"
|
|
|
|
|
|
@dataclass
|
|
class BinOp:
|
|
op: str
|
|
left: object
|
|
right: object
|
|
|
|
def __repr__(self):
|
|
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
|
|
|
|
|
|
@dataclass
|
|
class Unary:
|
|
op: str
|
|
operand: object
|
|
|
|
def __repr__(self):
|
|
return f"Unary({self.op!r}, {self.operand!r})"
|
|
|
|
|
|
Node = Union[Num, BinOp, Unary]
|
|
|
|
|
|
def parse(tokens: list) -> Node:
|
|
"""Parse a token list produced by tokenize() and return the AST root."""
|
|
if not tokens:
|
|
raise ParseError("Empty token list")
|
|
p = _Parser(tokens)
|
|
tree = p.expr()
|
|
if p.current().kind != 'EOF':
|
|
raise ParseError(f"Unexpected token {p.current()!r} after expression")
|
|
return tree
|
|
|
|
|
|
class _Parser:
|
|
def __init__(self, tokens):
|
|
self._tokens = tokens
|
|
self._pos = 0
|
|
|
|
def current(self):
|
|
return self._tokens[self._pos]
|
|
|
|
def consume(self, kind=None):
|
|
tok = self.current()
|
|
if kind is not None and tok.kind != kind:
|
|
raise ParseError(f"Expected {kind!r}, got {tok!r}")
|
|
self._pos += 1
|
|
return tok
|
|
|
|
def expr(self):
|
|
left = self.term()
|
|
while self.current().kind in ('PLUS', 'MINUS'):
|
|
tok = self.consume()
|
|
op = '+' if tok.kind == 'PLUS' else '-'
|
|
right = self.term()
|
|
left = BinOp(op, left, right)
|
|
return left
|
|
|
|
def term(self):
|
|
left = self.unary()
|
|
while self.current().kind in ('STAR', 'SLASH'):
|
|
tok = self.consume()
|
|
op = '*' if tok.kind == 'STAR' else '/'
|
|
right = self.unary()
|
|
left = BinOp(op, left, right)
|
|
return left
|
|
|
|
def unary(self):
|
|
if self.current().kind == 'MINUS':
|
|
self.consume()
|
|
return Unary('-', self.unary())
|
|
return self.primary()
|
|
|
|
def primary(self):
|
|
tok = self.current()
|
|
if tok.kind == 'NUMBER':
|
|
self.consume()
|
|
return Num(tok.value)
|
|
if tok.kind == 'LPAREN':
|
|
self.consume()
|
|
node = self.expr()
|
|
self.consume('RPAREN')
|
|
return node
|
|
raise ParseError(f"Unexpected token {tok!r}")
|