141 lines
3.7 KiB
Python
141 lines
3.7 KiB
Python
from dataclasses import dataclass
|
|
from typing import Union
|
|
from calc.lexer import Token
|
|
|
|
|
|
class ParseError(Exception):
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class Num:
|
|
"""Leaf node: a numeric literal."""
|
|
value: Union[int, float]
|
|
|
|
def __repr__(self):
|
|
return f"Num({self.value!r})"
|
|
|
|
|
|
@dataclass
|
|
class Unary:
|
|
"""Unary operator node: op is '-', operand is the inner Node."""
|
|
op: str
|
|
operand: object
|
|
|
|
def __repr__(self):
|
|
return f"Unary({self.op!r}, {self.operand!r})"
|
|
|
|
|
|
@dataclass
|
|
class BinOp:
|
|
"""Binary operator node. op is one of '+', '-', '*', '/'.
|
|
Precedence and associativity are encoded in the tree structure, not here.
|
|
"""
|
|
op: str
|
|
left: object
|
|
right: object
|
|
|
|
def __repr__(self):
|
|
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
|
|
|
|
|
|
Node = Union[Num, Unary, BinOp]
|
|
|
|
|
|
class _Parser:
|
|
def __init__(self, tokens: list):
|
|
self._tokens = tokens
|
|
self._pos = 0
|
|
|
|
def _peek(self) -> Token:
|
|
return self._tokens[self._pos]
|
|
|
|
def _consume(self, kind: str) -> Token:
|
|
tok = self._peek()
|
|
if tok.kind != kind:
|
|
raise ParseError(
|
|
f"expected {kind}, got {tok.kind!r} ({tok.value!r})"
|
|
)
|
|
self._pos += 1
|
|
return tok
|
|
|
|
def _advance(self) -> Token:
|
|
tok = self._tokens[self._pos]
|
|
self._pos += 1
|
|
return tok
|
|
|
|
def parse(self) -> Node:
|
|
if self._peek().kind == 'EOF':
|
|
raise ParseError("empty input")
|
|
node = self._expr()
|
|
if self._peek().kind != 'EOF':
|
|
tok = self._peek()
|
|
raise ParseError(
|
|
f"unexpected token {tok.kind!r} ({tok.value!r}) after expression"
|
|
)
|
|
return node
|
|
|
|
# Grammar (lowest to highest precedence):
|
|
# expr -> term (('+' | '-') term)*
|
|
# term -> unary (('*' | '/') unary)*
|
|
# unary -> '-' unary | primary
|
|
# primary-> NUMBER | '(' expr ')'
|
|
|
|
def _expr(self) -> Node:
|
|
node = self._term()
|
|
while self._peek().kind in ('PLUS', 'MINUS'):
|
|
op_tok = self._advance()
|
|
op = op_tok.value
|
|
right = self._term()
|
|
node = BinOp(op, node, right)
|
|
return node
|
|
|
|
def _term(self) -> Node:
|
|
node = self._unary()
|
|
while self._peek().kind in ('STAR', 'SLASH'):
|
|
op_tok = self._advance()
|
|
op = op_tok.value
|
|
right = self._unary()
|
|
node = BinOp(op, node, right)
|
|
return node
|
|
|
|
def _unary(self) -> Node:
|
|
if self._peek().kind == 'MINUS':
|
|
self._advance()
|
|
operand = self._unary()
|
|
return Unary('-', operand)
|
|
return self._primary()
|
|
|
|
def _primary(self) -> Node:
|
|
tok = self._peek()
|
|
if tok.kind == 'NUMBER':
|
|
self._advance()
|
|
return Num(tok.value)
|
|
if tok.kind == 'LPAREN':
|
|
self._advance()
|
|
node = self._expr()
|
|
if self._peek().kind != 'RPAREN':
|
|
raise ParseError(
|
|
f"expected ')' but got {self._peek().kind!r}"
|
|
)
|
|
self._advance()
|
|
return node
|
|
if tok.kind == 'EOF':
|
|
raise ParseError("unexpected end of input")
|
|
raise ParseError(
|
|
f"unexpected token {tok.kind!r} ({tok.value!r})"
|
|
)
|
|
|
|
|
|
def parse(tokens: list) -> Node:
|
|
"""Parse a token list (from lexer.tokenize) into an AST Node.
|
|
|
|
AST node shapes:
|
|
Num(value) — numeric literal
|
|
Unary('-', operand) — unary negation
|
|
BinOp(op, left, right) — binary operation; op in {'+','-','*','/'}
|
|
|
|
Raises ParseError on malformed input.
|
|
"""
|
|
return _Parser(tokens).parse()
|