artifacts: add calculators/ — the 30 built calculators (5/variant) + machine-docs + git logs

This commit is contained in:
2026-06-16 15:39:42 +00:00
parent 64bc360fc0
commit bb85aa9f11
728 changed files with 34148 additions and 0 deletions

48
calculators/README.md Normal file
View File

@ -0,0 +1,48 @@
# calculators/ — the artifacts the benchmark built
Every benchmark run had a Builder/Adversary loop pair (or a solo Builder) build a Python calculator
to the spec in [`../plans/calc/`](../plans/calc/). This folder preserves the **actual calculators
they produced** — the 5 canonical successful runs per variant (the N=5 the analysis is based on; the
wedged/limit/superseded runs are not included). 30 calculators in all.
## Layout
```
calculators/<variant>/run-NN/
calc.py the CLI entry point
calc/ lexer.py, parser.py, evaluator.py + test_*.py (the built calculator)
machine-docs/ the loop's coordination artifacts for this run:
STATUS-<phase>.md (Builder's claims: WHAT/HOW/EXPECTED/WHERE)
REVIEW-<phase>.md (Adversary's verdicts + findings)
JOURNAL-<phase>.md (Builder's reasoning — kept out of STATUS)
BACKLOG/DECISIONS.md
GIT-LOG.txt the run's commit history — the claim()/review() handshake
SOURCE.txt the original /tmp run path
```
`<variant>` is one of the six: `builder-adversary`, `builder-adversary-min`,
`builder-adversary-stateless`, `builder-adversary-lean`, `builder-adversary-deferred`, `builder-solo`.
These are **working-tree snapshots** (not nested git repos — that would confuse the parent repo). The
commit history that shows *how* each was built — the per-gate/per-phase `claim(`/`review(` exchange —
is captured in each `GIT-LOG.txt`. Compare, say, a `builder-adversary-lean` log (per-gate, ~28
commits) against a `builder-adversary-deferred` log (one comprehensive review at the end) to see the
cadence difference in action.
## What they're good for
- **Inspect the deliverable** each variant produced (all behaviorally identical — verified — but the
code/test style and volume vary; e.g. `-min` runs have leaner test suites).
- **Read the actual review exchange** in `machine-docs/REVIEW-*.md` + `GIT-LOG.txt` — the Adversary's
cold verdicts, findings, and the Builder's STATUS hand-offs.
Run any of them:
```bash
cd calculators/builder-adversary/run-01
python -m unittest -q # tests pass
python calc.py "2+3*4" # 14
```
See [`../FINDINGS.md`](../FINDINGS.md) for what the benchmark concluded and
[`../RESULTS-campaign.md`](../RESULTS-campaign.md) for the per-run numbers.

View File

@ -0,0 +1,3 @@
__pycache__/
*.pyc
*.pyo

View File

@ -0,0 +1,11 @@
# git history (claim/review handshake), from the run's shared bare repo
4b7f792 status(review): ## DONE — all gates Adversary-verified PASS
6513925 review(all): PASS — comprehensive cold-verification of all DoD gates
bfd5972 claim(review/D1-D3): initialize review phase — full build ready for Adversary cold-verify
1cfe13c status(eval): ## DONE — all gates Adversary-verified PASS
8ba43a5 review(eval/D1-D5): PASS — comprehensive cold-verification of all DoD gates
21be8f5 claim(eval): implement evaluator, CLI, and tests — all DoD gates verified
7984a31 review(init-eval): Adversary initialized tracking files for eval phase
758567a review(init-parse): Adversary initialized tracking files for parse phase
6b5c947 review(init): Adversary initialized tracking files for lex phase
61f1ba0 chore: seed

View File

@ -0,0 +1 @@
# calc work repo

View File

@ -0,0 +1 @@
original path: /tmp/ao-campaign-WXwoUv/builder-adversary-deferred/r1

View File

@ -0,0 +1,23 @@
#!/usr/bin/env python3
"""Calculator CLI: evaluate an arithmetic expression from the command line."""
import sys
from calc.lexer import tokenize, LexError
from calc.parser import parse, ParseError
from calc.evaluator import evaluate, EvalError, fmt_result
def main():
if len(sys.argv) != 2:
print(f"usage: {sys.argv[0]} <expression>", file=sys.stderr)
sys.exit(1)
expr = sys.argv[1]
try:
result = evaluate(parse(tokenize(expr)))
print(fmt_result(result))
except (LexError, ParseError, EvalError) as e:
print(f"error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,43 @@
from __future__ import annotations
from calc.parser import Num, BinOp, Unary, Node
class EvalError(Exception):
pass
def evaluate(node: Node) -> int | float:
"""Walk the AST and return the numeric result."""
if isinstance(node, Num):
return node.value
if isinstance(node, Unary):
val = evaluate(node.operand)
if node.op == '-':
return -val
raise EvalError(f"unknown unary operator: {node.op!r}")
if isinstance(node, BinOp):
left = evaluate(node.left)
right = evaluate(node.right)
if node.op == '+':
return left + right
if node.op == '-':
return left - right
if node.op == '*':
return left * right
if node.op == '/':
if right == 0:
raise EvalError("division by zero")
return left / right
raise EvalError(f"unknown binary operator: {node.op!r}")
raise EvalError(f"unknown node type: {type(node)!r}")
def fmt_result(v: int | float) -> str:
"""Format a result for display.
Rule: whole-valued floats (e.g. 2.0 from 4/2) print without a trailing .0;
non-whole floats print normally; integers print as integers.
"""
if isinstance(v, float) and v.is_integer():
return str(int(v))
return str(v)

View File

@ -0,0 +1,62 @@
from dataclasses import dataclass
from typing import Union
class LexError(Exception):
pass
@dataclass
class Token:
kind: str
value: Union[int, float, str, None]
def tokenize(src: str) -> list:
tokens = []
i = 0
while i < len(src):
ch = src[i]
if ch in ' \t':
i += 1
continue
if ch.isdigit() or (ch == '.' and i + 1 < len(src) and src[i + 1].isdigit()):
j = i
while j < len(src) and src[j].isdigit():
j += 1
if j < len(src) and src[j] == '.':
j += 1
while j < len(src) and src[j].isdigit():
j += 1
value = float(src[i:j])
else:
value = int(src[i:j])
tokens.append(Token('NUMBER', value))
i = j
continue
if ch == '+':
tokens.append(Token('PLUS', '+'))
i += 1
elif ch == '-':
tokens.append(Token('MINUS', '-'))
i += 1
elif ch == '*':
tokens.append(Token('STAR', '*'))
i += 1
elif ch == '/':
tokens.append(Token('SLASH', '/'))
i += 1
elif ch == '(':
tokens.append(Token('LPAREN', '('))
i += 1
elif ch == ')':
tokens.append(Token('RPAREN', ')'))
i += 1
else:
raise LexError(f"unexpected character {ch!r} at position {i}")
tokens.append(Token('EOF', None))
return tokens

View File

@ -0,0 +1,107 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Union
class ParseError(Exception):
pass
@dataclass
class Num:
value: Union[int, float]
def __repr__(self) -> str:
return f"Num({self.value!r})"
@dataclass
class BinOp:
op: str
left: "Node"
right: "Node"
def __repr__(self) -> str:
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
@dataclass
class Unary:
op: str
operand: "Node"
def __repr__(self) -> str:
return f"Unary({self.op!r}, {self.operand!r})"
Node = Union[Num, BinOp, Unary]
class _Parser:
def __init__(self, tokens: list) -> None:
self._tokens = tokens
self._pos = 0
def _peek(self):
return self._tokens[self._pos]
def _consume(self, kind: str = None):
tok = self._tokens[self._pos]
if kind is not None and tok.kind != kind:
raise ParseError(
f"expected {kind}, got {tok.kind!r} ({tok.value!r})"
)
self._pos += 1
return tok
def parse(self) -> Node:
if self._peek().kind == "EOF":
raise ParseError("empty input")
node = self._expr()
if self._peek().kind != "EOF":
tok = self._peek()
raise ParseError(
f"unexpected token {tok.kind!r} ({tok.value!r}) after expression"
)
return node
def _expr(self) -> Node:
node = self._term()
while self._peek().kind in ("PLUS", "MINUS"):
op = self._consume().value
node = BinOp(op, node, self._term())
return node
def _term(self) -> Node:
node = self._unary()
while self._peek().kind in ("STAR", "SLASH"):
op = self._consume().value
node = BinOp(op, node, self._unary())
return node
def _unary(self) -> Node:
if self._peek().kind == "MINUS":
self._consume()
return Unary("-", self._unary())
return self._primary()
def _primary(self) -> Node:
tok = self._peek()
if tok.kind == "NUMBER":
self._consume()
return Num(tok.value)
if tok.kind == "LPAREN":
self._consume()
node = self._expr()
if self._peek().kind != "RPAREN":
raise ParseError("unclosed parenthesis")
self._consume()
return node
raise ParseError(
f"unexpected token {tok.kind!r} ({tok.value!r})"
)
def parse(tokens: list) -> Node:
"""Parse a token list produced by calc.lexer.tokenize into an AST."""
return _Parser(tokens).parse()

View File

@ -0,0 +1,95 @@
import unittest
from calc.lexer import tokenize
from calc.parser import parse
from calc.evaluator import evaluate, EvalError, fmt_result
def ev(src: str):
return evaluate(parse(tokenize(src)))
class TestArithmetic(unittest.TestCase):
"""D1 — basic arithmetic, precedence, parens, unary minus"""
def test_precedence(self):
self.assertEqual(ev("2+3*4"), 14)
def test_parens(self):
self.assertEqual(ev("(2+3)*4"), 20)
def test_left_assoc_sub(self):
self.assertEqual(ev("8-3-2"), 3)
def test_unary_minus_leading(self):
self.assertEqual(ev("-2+5"), 3)
def test_unary_minus_mul(self):
self.assertEqual(ev("2*-3"), -6)
class TestDivision(unittest.TestCase):
"""D2 — true division and division by zero"""
def test_true_division(self):
self.assertAlmostEqual(ev("7/2"), 3.5)
def test_division_by_zero_raises_eval_error(self):
with self.assertRaises(EvalError):
ev("1/0")
def test_division_by_zero_no_bare_exception(self):
"""ZeroDivisionError must not escape the evaluator API."""
try:
ev("1/0")
except EvalError:
pass
except ZeroDivisionError:
self.fail("ZeroDivisionError escaped the evaluator API")
class TestResultType(unittest.TestCase):
"""D3 — whole-valued floats display as int, non-whole as float"""
def test_whole_division_value(self):
# 4/2 = 2.0 in Python; must equal 2
self.assertEqual(ev("4/2"), 2)
def test_non_whole_division_value(self):
self.assertAlmostEqual(ev("7/2"), 3.5)
def test_int_arithmetic_returns_int(self):
self.assertIsInstance(ev("2+3"), int)
self.assertIsInstance(ev("2*3"), int)
self.assertIsInstance(ev("8-3"), int)
def test_fmt_whole_float(self):
self.assertEqual(fmt_result(2.0), "2")
def test_fmt_non_whole_float(self):
self.assertEqual(fmt_result(3.5), "3.5")
def test_fmt_int(self):
self.assertEqual(fmt_result(14), "14")
def test_fmt_negative(self):
self.assertEqual(fmt_result(-6), "-6")
class TestMisc(unittest.TestCase):
"""Additional coverage"""
def test_neg_times_neg(self):
self.assertEqual(ev("-2*-3"), 6)
def test_complex_expr(self):
self.assertEqual(ev("(1+2)*(3+4)"), 21)
def test_unary_in_paren(self):
self.assertEqual(ev("-(3)"), -3)
def test_double_unary(self):
self.assertEqual(ev("--5"), 5)
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,118 @@
import unittest
from calc.lexer import tokenize, Token, LexError
def kinds(src):
return [t.kind for t in tokenize(src)]
def tok(src):
return [(t.kind, t.value) for t in tokenize(src)]
class TestNumbers(unittest.TestCase):
def test_integer(self):
tokens = tokenize("42")
self.assertEqual(len(tokens), 2)
self.assertEqual(tokens[0], Token('NUMBER', 42))
self.assertEqual(tokens[1], Token('EOF', None))
self.assertIsInstance(tokens[0].value, int)
def test_float_standard(self):
tokens = tokenize("3.14")
self.assertEqual(tokens[0], Token('NUMBER', 3.14))
self.assertIsInstance(tokens[0].value, float)
def test_float_leading_dot(self):
tokens = tokenize(".5")
self.assertEqual(tokens[0], Token('NUMBER', 0.5))
self.assertIsInstance(tokens[0].value, float)
def test_float_trailing_dot(self):
tokens = tokenize("10.")
self.assertEqual(tokens[0], Token('NUMBER', 10.0))
self.assertIsInstance(tokens[0].value, float)
def test_zero(self):
tokens = tokenize("0")
self.assertEqual(tokens[0], Token('NUMBER', 0))
class TestOperatorsAndParens(unittest.TestCase):
def test_plus(self):
self.assertIn(Token('PLUS', '+'), tokenize("+"))
def test_minus(self):
self.assertIn(Token('MINUS', '-'), tokenize("-"))
def test_star(self):
self.assertIn(Token('STAR', '*'), tokenize("*"))
def test_slash(self):
self.assertIn(Token('SLASH', '/'), tokenize("/"))
def test_lparen(self):
self.assertIn(Token('LPAREN', '('), tokenize("("))
def test_rparen(self):
self.assertIn(Token('RPAREN', ')'), tokenize(")"))
def test_expression(self):
self.assertEqual(
kinds("1+2*3"),
['NUMBER', 'PLUS', 'NUMBER', 'STAR', 'NUMBER', 'EOF']
)
def test_complex_expression(self):
self.assertEqual(
kinds("3.5*(1-2)"),
['NUMBER', 'STAR', 'LPAREN', 'NUMBER', 'MINUS', 'NUMBER', 'RPAREN', 'EOF']
)
class TestWhitespaceAndErrors(unittest.TestCase):
def test_whitespace_skipped(self):
self.assertEqual(
kinds(" 12 + 3 "),
['NUMBER', 'PLUS', 'NUMBER', 'EOF']
)
t = tokenize(" 12 + 3 ")
self.assertEqual(t[0].value, 12)
self.assertEqual(t[1].kind, 'PLUS')
self.assertEqual(t[2].value, 3)
def test_tab_skipped(self):
self.assertEqual(kinds("1\t+\t2"), ['NUMBER', 'PLUS', 'NUMBER', 'EOF'])
def test_at_raises_lexerror(self):
with self.assertRaises(LexError):
tokenize("1 @ 2")
def test_dollar_raises_lexerror(self):
with self.assertRaises(LexError):
tokenize("$")
def test_letter_raises_lexerror(self):
with self.assertRaises(LexError):
tokenize("x")
def test_lexerror_message_has_char_and_pos(self):
try:
tokenize("1 @ 2")
self.fail("Expected LexError")
except LexError as e:
msg = str(e)
self.assertIn('@', msg)
self.assertIn('2', msg) # position 2
def test_eof_always_last(self):
tokens = tokenize("1+2")
self.assertEqual(tokens[-1].kind, 'EOF')
def test_empty_string(self):
tokens = tokenize("")
self.assertEqual(tokens, [Token('EOF', None)])
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,142 @@
import unittest
from calc.lexer import tokenize
from calc.parser import parse, ParseError, Num, BinOp, Unary
def p(src: str):
return parse(tokenize(src))
class TestPrecedence(unittest.TestCase):
"""D1 — * and / bind tighter than + and -"""
def test_add_mul(self):
# 1+2*3 → BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
tree = p("1+2*3")
self.assertEqual(tree, BinOp("+", Num(1), BinOp("*", Num(2), Num(3))))
def test_mul_add(self):
# 2*3+1 → BinOp('+', BinOp('*', Num(2), Num(3)), Num(1))
tree = p("2*3+1")
self.assertEqual(tree, BinOp("+", BinOp("*", Num(2), Num(3)), Num(1)))
def test_sub_div(self):
# 6-4/2 → BinOp('-', Num(6), BinOp('/', Num(4), Num(2)))
tree = p("6-4/2")
self.assertEqual(tree, BinOp("-", Num(6), BinOp("/", Num(4), Num(2))))
class TestLeftAssociativity(unittest.TestCase):
"""D2 — same-precedence operators associate left"""
def test_subtraction(self):
# 8-3-2 → BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
tree = p("8-3-2")
self.assertEqual(tree, BinOp("-", BinOp("-", Num(8), Num(3)), Num(2)))
def test_division(self):
# 8/4/2 → BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))
tree = p("8/4/2")
self.assertEqual(tree, BinOp("/", BinOp("/", Num(8), Num(4)), Num(2)))
def test_addition(self):
# 1+2+3 → BinOp('+', BinOp('+', Num(1), Num(2)), Num(3))
tree = p("1+2+3")
self.assertEqual(tree, BinOp("+", BinOp("+", Num(1), Num(2)), Num(3)))
def test_multiplication(self):
# 2*3*4 → BinOp('*', BinOp('*', Num(2), Num(3)), Num(4))
tree = p("2*3*4")
self.assertEqual(tree, BinOp("*", BinOp("*", Num(2), Num(3)), Num(4)))
class TestParentheses(unittest.TestCase):
"""D3 — parens override precedence"""
def test_paren_add_then_mul(self):
# (1+2)*3 → BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
tree = p("(1+2)*3")
self.assertEqual(tree, BinOp("*", BinOp("+", Num(1), Num(2)), Num(3)))
def test_nested_parens(self):
# ((4)) → Num(4)
tree = p("((4))")
self.assertEqual(tree, Num(4))
def test_paren_complex(self):
# 2*(3+4) → BinOp('*', Num(2), BinOp('+', Num(3), Num(4)))
tree = p("2*(3+4)")
self.assertEqual(tree, BinOp("*", Num(2), BinOp("+", Num(3), Num(4))))
class TestUnaryMinus(unittest.TestCase):
"""D4 — unary minus"""
def test_simple_unary(self):
# -5 → Unary('-', Num(5))
tree = p("-5")
self.assertEqual(tree, Unary("-", Num(5)))
def test_unary_paren(self):
# -(1+2) → Unary('-', BinOp('+', Num(1), Num(2)))
tree = p("-(1+2)")
self.assertEqual(tree, Unary("-", BinOp("+", Num(1), Num(2))))
def test_unary_in_binop(self):
# 3 * -2 → BinOp('*', Num(3), Unary('-', Num(2)))
tree = p("3 * -2")
self.assertEqual(tree, BinOp("*", Num(3), Unary("-", Num(2))))
def test_double_unary(self):
# --5 → Unary('-', Unary('-', Num(5)))
tree = p("--5")
self.assertEqual(tree, Unary("-", Unary("-", Num(5))))
class TestErrors(unittest.TestCase):
"""D5 — malformed input raises ParseError"""
def test_trailing_operator(self):
with self.assertRaises(ParseError):
p("1 +")
def test_unclosed_paren(self):
with self.assertRaises(ParseError):
p("(1")
def test_two_numbers(self):
with self.assertRaises(ParseError):
p("1 2")
def test_close_before_open(self):
with self.assertRaises(ParseError):
p(")(")
def test_empty_string(self):
with self.assertRaises(ParseError):
p("")
def test_only_operator(self):
with self.assertRaises(ParseError):
p("+")
def test_mismatched_parens(self):
with self.assertRaises(ParseError):
p("(1+2")
class TestAtoms(unittest.TestCase):
"""Basic atoms parse cleanly"""
def test_single_int(self):
self.assertEqual(p("42"), Num(42))
def test_single_float(self):
self.assertEqual(p("3.14"), Num(3.14))
def test_single_in_parens(self):
self.assertEqual(p("(7)"), Num(7))
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,7 @@
# BACKLOG — Phase `eval`
## Build backlog
_(Builder manages this section)_
## Adversary findings
_None yet — awaiting implementation._

View File

@ -0,0 +1,10 @@
# BACKLOG — phase `lex`
## Build backlog
All items completed.
- [x] D1: Implement `NUMBER` token (int + float, including `.5` and `10.`)
- [x] D2: Implement operator and paren tokens (`PLUS`, `MINUS`, `STAR`, `SLASH`, `LPAREN`, `RPAREN`)
- [x] D3: Skip whitespace; raise `LexError` for invalid characters
- [x] D4: Write `calc/test_lexer.py` with unittest coverage for D1D3

View File

@ -0,0 +1,21 @@
# BACKLOG — Phase `parse`
## Build backlog
_Read-only to Adversary — Builder maintains this section._
## Adversary findings
_No findings yet — comprehensive verification deferred until review phase._
### Probe ideas (to run when implementation lands)
- D1: `1+2*3` — must produce `BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))` or equivalent, NOT `BinOp('*', BinOp('+', ...), ...)`.
- D2: `8-3-2` — must be left-associative: `BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))`.
- D2: `8/4/2` — must be left-associative: `BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))`.
- D3: `(1+2)*3``+` must appear as LEFT child of `*`.
- D4: `-5` — must parse as `Unary('-', Num(5))` or equivalent.
- D4: `3 * -2` — unary on right side of binary op.
- D4: `-(1+2)` — unary applied to parenthesized subexpr.
- D5: `"1 +"` → ParseError (not generic exception).
- D5: `"(1"` → ParseError.
- D5: `"1 2"` → ParseError.
- D5: `")("` → ParseError.
- D5: `""` → ParseError.

View File

@ -0,0 +1,16 @@
# BACKLOG — Phase `review`
## Build backlog
- [x] Initialize review-phase tracking files
- [x] Run full test suite — 64 tests OK
- [x] Run D3 cross-feature tests locally — all pass
- [x] Populate STATUS-review.md with WHAT/HOW/EXPECTED/WHERE for Adversary
- [x] Claim D1-D3 (commit + push)
- [ ] Await Adversary comprehensive cold-verification in REVIEW-review.md
- [ ] Fix any findings from Adversary (D4)
- [ ] Write ## DONE to STATUS-review.md after Adversary PASS
## Adversary findings
(None yet — awaiting REVIEW-review.md)

View File

@ -0,0 +1,17 @@
# DECISIONS (append-only)
## lex phase
**Token as dataclass**: Used `@dataclass` for `Token` to get `__eq__` for free, enabling `assertIn` and `assertEqual` in tests.
**int vs float**: `tokenize` returns Python `int` for whole-number literals (no decimal point), `float` when a `.` is present. This matches the plan's wording "numeric value (int or float)".
**EOF value**: Set `EOF` token `value` to `None` (no meaningful payload).
## eval phase
**EvalError wraps ZeroDivisionError**: `evaluate` catches division by zero itself (checks `right == 0`) and raises `EvalError` rather than letting Python's `ZeroDivisionError` propagate. This is the public API contract: callers catch `EvalError`.
**D3 formatting rule in `fmt_result`**: Placed in `calc/evaluator.py` so it's importable and testable from `calc/test_evaluator.py`. Rule: `isinstance(v, float) and v.is_integer()``str(int(v))`, else `str(v)`. Python's `/` always returns float, so `4/2 = 2.0`; `fmt_result` converts to `"2"`.
**CLI at repo root as `calc.py`**: Top-level script; Python finds the `calc/` package for imports because the working directory is on `sys.path` when running `python calc.py`.

View File

@ -0,0 +1,24 @@
# JOURNAL — Phase `eval` (Adversary)
## 2026-06-16T00:20Z — Initialized
- Read eval.md: final phase, makes calculator end-to-end.
- Builder's repo at seed (61f1ba0): has lexer.py, parser.py, test_lexer.py, test_parser.py (all seeded).
- No evaluator.py, calc.py, or test_evaluator.py present yet.
- Initialized STATUS-eval.md, REVIEW-eval.md, BACKLOG-eval.md, JOURNAL-eval.md.
- Per REVIEW CADENCE: will do ONE comprehensive cold-verification after full build.
- Waiting for Builder to implement eval phase.
## 2026-06-16 — Builder implementation
- Built calc/evaluator.py: EvalError, evaluate(node), fmt_result(v).
- Built calc.py: CLI reading sys.argv[1], printing fmt_result(evaluate(parse(tokenize(expr)))).
- Built calc/test_evaluator.py: 19 tests covering D1 (arithmetic), D2 (division/EvalError), D3 (fmt_result).
- Full suite: 64 tests, 0 failures (python -m unittest -q).
- CLI checks:
- python calc.py "2+3*4" → 14
- python calc.py "(2+3)*4" → 20
- python calc.py "7/2" → 3.5
- python calc.py "4/2" → 2
- python calc.py "1/0" → stderr error, exit 1
- python calc.py "1 +" → stderr error, exit 1

View File

@ -0,0 +1,42 @@
# JOURNAL — phase `lex`
## Implementation
Built `calc/lexer.py` with:
- `Token` dataclass with `kind: str` and `value: Union[int, float, str, None]`
- `LexError(Exception)` for invalid characters
- `tokenize(src: str) -> list[Token]` scanning left-to-right
Number handling: checks `ch.isdigit()` OR `ch == '.' followed by digit` (for `.5` case).
Collects integer digits, then optionally a `.` and fractional digits.
Result is `int` if no `.` seen, `float` otherwise — handles `10.` (trailing dot) correctly.
Operators: simple char-dispatch to the 6 operator/paren token kinds.
Whitespace: space and tab explicitly skipped via `continue`.
Errors: any unrecognised character raises `LexError` with `f"unexpected character {ch!r} at position {i}"`.
EOF appended unconditionally as the final token.
## Test run
```
$ python -m unittest -q
......................
Ran 21 tests in 0.000s
OK
```
## Verification
```
$ python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
[('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]
$ python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
Traceback (most recent call last):
...
calc.lexer.LexError: unexpected character '@' at position 2
```

View File

@ -0,0 +1,6 @@
# JOURNAL — Phase `parse` (Adversary)
## 2026-06-16T00:12Z — Init
- Initialized parse phase tracking files.
- No implementation present yet — only seed + adversary-init commits.
- Entering idle loop; will poll for Builder progress.

View File

@ -0,0 +1,59 @@
# JOURNAL — Phase `review`
## 2026-06-16 — Builder initialization
Entered review phase. Read phase plan at /home/loops/project-orchestrator/projects/agent-orchestrator-benchmark/plans/calc/review.md.
Prior state: all lex/parse/eval phases self-certified. eval DONE with Adversary comprehensive PASS at commit 21be8f5. Full 64 tests green.
### Self-verification runs
```
$ python -m unittest -q
----------------------------------------------------------------------
Ran 64 tests in 0.001s
OK
```
### D3 Cross-feature tests (local run)
```
$ python calc.py "-(-(1+2))"
3
exit:0
$ python calc.py "2+3*4-5/5"
13
exit:0
$ python calc.py "1 @ 2"; echo "exit:$?"
error: unexpected character '@' at position 2
exit:1
$ python calc.py "1/0"; echo "exit:$?"
error: division by zero
exit:1
$ python calc.py "(1+"; echo "exit:$?"
error: unexpected token 'EOF' (None)
exit:1
$ python calc.py " 2.5 + ( 3.5 * 2 ) "
9.5
exit:0
$ python calc.py "( 1 + 2 ) * ( 3 + 4 )"
21
exit:0
$ python calc.py "2+3*4"; echo "exit:$?"
14
exit:0
$ python calc.py "bad input @#"; echo "exit:$?"
error: unexpected character 'b' at position 0
exit:1
```
All cross-feature tests produce expected output. Builder claims D1-D3; awaiting Adversary cold-verification.

View File

@ -0,0 +1,63 @@
# REVIEW — Phase `eval`
**Adversary cold-verification record.**
## Status
COMPREHENSIVE PASS — all DoD gates verified @2026-06-16T00:18Z from cold start in work-adv clone.
No VETO.
## Verdicts
### D1 — arithmetic: PASS @2026-06-16T00:18Z
Verified all 5 plan-specified cases independently:
- `2+3*4` → 14 ✓ (precedence: * before +)
- `(2+3)*4` → 20 ✓ (parens override precedence)
- `8-3-2` → 3 ✓ (left-associativity; NOT 7)
- `-2+5` → 3 ✓ (leading unary minus)
- `2*-3` → -6 ✓ (unary minus after binary op)
Command: `python -c "... evaluate(parse(tokenize(expr))) ..."` for each case.
### D2 — division: PASS @2026-06-16T00:18Z
- `7/2` → 3.5 ✓ (true division, not floor)
- `1/0` raises `EvalError("division by zero")`
- `ZeroDivisionError` does NOT escape the API ✓ (independently verified: caught EvalError, no ZeroDivisionError propagated)
### D3 — result type: PASS @2026-06-16T00:18Z
- `fmt_result(eval("4/2"))``"2"` ✓ (whole float → no trailing .0)
- `fmt_result(eval("7/2"))``"3.5"` ✓ (non-whole float)
- `fmt_result(eval("2+3"))``"5"` ✓ (int stays int)
- `fmt_result(-6)``"-6"` ✓ (negative int)
- `fmt_result(eval("-7/2"))``"-3.5"` ✓ (negative non-whole float via CLI)
- `fmt_result(eval("-6/2"))``"-3"` ✓ (negative whole float → no .0)
### D4 — CLI: PASS @2026-06-16T00:18Z
- `python calc.py "2+3*4"` → stdout `14`, exit 0 ✓
- `python calc.py "(2+3)*4"` → stdout `20`, exit 0 ✓
- `python calc.py "7/2"` → stdout `3.5`, exit 0 ✓
- `python calc.py "4/2"` → stdout `2`, exit 0 ✓
- `python calc.py "1/0"` → stderr `error: division by zero`, exit 1 ✓
- `python calc.py "1 +"` → stderr `error: unexpected token 'EOF' (None)`, exit 1 ✓
- Error output goes to STDERR (stdout suppression confirmed) ✓
- No raw traceback on any error path ✓ (checked with grep)
- Wrong arg count → usage message to stderr, exit 1 ✓
### D5 — tests green + end-to-end: PASS @2026-06-16T00:18Z
- `python -m unittest -q``Ran 64 tests in 0.001s\nOK`
- Lex suite (calc.test_lexer): 45 of 64 total — passes ✓ (no regression)
- Parse suite (calc.test_parser): included in 45 — passes ✓ (no regression)
- Eval suite (calc.test_evaluator): 19 tests covering D1D3 ✓
## Cross-feature integration probes (adversarial)
All passed:
- `python calc.py "-6/2"``-3` ✓ (unary minus + whole-float formatting)
- `python calc.py "(-6)/2"``-3`
- `python calc.py "(2*(3+4))"``14` ✓ (nested parens + multiplication)
- `python calc.py "-7/2"``-3.5` ✓ (unary minus + true division)
- `python calc.py "@"` → stderr error, exit 1, no traceback ✓ (LexError path)
## Notes
- Verified from work-adv clone (cold start — no cached pyc state from builder's env).
- JOURNAL not consulted before verdict (isolation maintained).
- `evaluate()` returns Python `int` for integer arithmetic (e.g., `2+3 → int(5)`) — `fmt_result` handles both `int` and `float` correctly.
- Division always returns Python `float` (Python `/` operator), caught by `is_integer()` check.

View File

@ -0,0 +1,13 @@
# REVIEW — Phase `lex`
**Adversary cold-verification record.**
## Status
Awaiting Builder to complete implementation. Per REVIEW CADENCE — DEFERRED rules, comprehensive verification will occur after full build completes.
## Verdicts
_None yet — Builder has not claimed completion._
## Notes
- Seed commit only (61f1ba0) — no implementation present
- Monitoring for Builder commits

View File

@ -0,0 +1,15 @@
# REVIEW — Phase `parse`
**Adversary cold-verification record.**
## Status
DEFERRED — per REVIEW CADENCE rules, comprehensive verification occurs after full build, not per gate.
Builder has not yet implemented the parse phase.
## Verdicts
_None yet — implementation not present._
## Notes
- Monitoring for Builder commits to `calc/parser.py` and `calc/test_parser.py`.
- Per plan: verify using `python -m unittest -q` plus structural AST assertions.
- Key risk: precedence/associativity bug that still passes a weak test — will re-derive expected tree from plan independently.

View File

@ -0,0 +1,118 @@
# REVIEW — Phase `review`
**Adversary cold-verification record.**
## Status
COMPREHENSIVE PASS @2026-06-16T00:21Z — all D1D4 items verified.
---
## D1 — Full cold re-verify (all prior phase DoD items)
Cold-verified from work-adv clone at commit `bfd5972` (post-pull).
### Lexer DoD
- INTEGER: `tokenize('42')``[Token('NUMBER', 42), Token('EOF', None)]`, `value` is `int`
- FLOAT: `tokenize('3.14')``[Token('NUMBER', 3.14), Token('EOF', None)]`
- LEADING DOT: `tokenize('.5')``Token('NUMBER', 0.5)`
- OPERATORS: `tokenize('+-*/()')` → PLUS, MINUS, STAR, SLASH, LPAREN, RPAREN, EOF (correct kinds) ✓
- WHITESPACE: spaces and tabs skipped ✓
- LexError message contains char + position: `unexpected character '$' at position 1`
- Unknown chars `@`, `$`, letters raise `LexError`
**PASS**
### Parser DoD
- Single int: `parse(tokenize('1'))``Num(1)`
- Single float: `parse(tokenize('3.14'))``Num(3.14)`
- BinOp shape: `parse(tokenize('1+2'))``BinOp('+', Num(1), Num(2))`
- Unary shape: `parse(tokenize('-5'))``Unary('-', Num(5))`
- Precedence: `parse(tokenize('2+3*4'))``BinOp('+', Num(2), BinOp('*', Num(3), Num(4)))` (mul binds tighter) ✓
- Left-associativity: `parse(tokenize('1-2-3'))``BinOp('-', BinOp('-', Num(1), Num(2)), Num(3))`
- Empty input: raises `ParseError`
- Unclosed paren `(1+`: raises `ParseError`
- Two numbers adjacent `1 2`: raises `ParseError`
**PASS**
### Evaluator DoD
- Arithmetic: `2+3*4`→14, `(2+3)*4`→20, `8-3-2`→3, `-2+5`→3, `2*-3`→-6 ✓
- True division: `7/2`→3.5 ✓
- Division by zero: `1/0` raises `EvalError("division by zero")`, NOT bare `ZeroDivisionError`
- fmt_result: `fmt_result(2.0)``'2'`, `fmt_result(3.5)``'3.5'`, `fmt_result(42)``'42'`
- CLI: `python calc.py "2+3*4"` → stdout `14`, exit 0 ✓
- CLI: `python calc.py "(2+3)*4"` → stdout `20`, exit 0 ✓
- CLI: `python calc.py "7/2"` → stdout `3.5`, exit 0 ✓
- CLI: `python calc.py "4/2"` → stdout `2`, exit 0 ✓
- CLI error: `python calc.py "1/0"` → stderr `error: division by zero`, exit 1, no traceback ✓
- CLI error: `python calc.py "1 +"` → stderr `error: unexpected token 'EOF' (None)`, exit 1 ✓
**PASS**
---
## D2 — Full suite green
```
python -m unittest -v
Ran 64 tests in 0.002s
OK
```
All 64 tests pass (calc.test_lexer, calc.test_parser, calc.test_evaluator). Zero failures, zero errors.
**PASS**
---
## D3 — Cross-feature break-it
All tests run independently against the actual CLI and Python API:
| Expression | Expected | Actual | Result |
|---|---|---|---|
| `-(-(1+2))` | `3` | `3` | PASS |
| `2+3*4-5/5` | `13` | `13` (raw 13.0, fmt→13) | PASS |
| `--5` | `5` | `5` | PASS |
| `((((3))))` | `3` | `3` | PASS |
| `1+2*3+4*5+6` | `33` | `33` | PASS |
| `( 1.5 + 2.5 ) * 2` | `8` | `8` (raw 8.0, fmt→8) | PASS |
| ` 2.5 + ( 3.5 * 2 ) ` | `9.5` | `9.5` | PASS |
| `(1+2)*(3+4)` | `21` | `21` | PASS |
Error propagation:
| Input | Expected error type | Actual | Result |
|---|---|---|---|
| `1 @ 2` | `LexError` | `LexError: unexpected character '@' at position 2` | PASS |
| `1/0` | `EvalError` | `EvalError: division by zero` (no bare `ZeroDivisionError`) | PASS |
| `(1+` | `ParseError` | `ParseError: unexpected token 'EOF' (None)` | PASS |
| `bad input @#` | CLI exit 1 | `error: unexpected character 'b' at position 0`, exit 1 | PASS |
CLI exit codes:
- Valid expressions → exit 0 ✓
- Invalid expressions (lex/parse/eval errors) → exit 1 ✓
- No tracebacks on errors ✓
Note: `2+3*4-5/5` raw result is `13.0` (float, because `5/5` returns `1.0`), but `fmt_result(13.0)``'13'` — correct behavior.
**No defects found. PASS**
---
## D4 — Findings cleared
No findings were filed. No VETO. Nothing to clear.
**PASS**
---
## OVERALL VERDICT
**review(all): PASS @2026-06-16T00:21Z**
Comprehensive cold-verification of all D1D4 from the review phase plan (covering lex, parse, eval, and CLI) passes in full. 64 unit tests green. All cross-feature integration probes pass. No defects, no VETO.
Builder may now write `## DONE` to STATUS-review.md.

View File

@ -0,0 +1,104 @@
# STATUS — Phase `eval`
## DONE
All D1D5 gates Adversary-verified PASS @2026-06-16T00:18Z (REVIEW-eval.md). No VETO.
## Gate: ALL CLAIMED, awaiting Adversary comprehensive verification
---
## D1 — arithmetic
**WHAT:** `evaluate(parse(tokenize(s)))` is correct for `+`, `-`, `*`, `/`, precedence, parens, and unary minus.
**HOW:**
```bash
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('2+3*4'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('(2+3)*4'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('8-3-2'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('-2+5'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('2*-3'))))"
```
**EXPECTED:** `14`, `20`, `3`, `3`, `-6`
**WHERE:** `calc/evaluator.py`, `calc/test_evaluator.py::TestArithmetic`
---
## D2 — division
**WHAT:** `/` is true division; division by zero raises `EvalError`, not bare `ZeroDivisionError`.
**HOW:**
```bash
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('7/2'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate, EvalError
try:
evaluate(parse(tokenize('1/0')))
except EvalError as e:
print('EvalError:', e)
except ZeroDivisionError:
print('FAIL: bare ZeroDivisionError escaped')
"
```
**EXPECTED:** `3.5`; then `EvalError: division by zero`
**WHERE:** `calc/evaluator.py`, `calc/test_evaluator.py::TestDivision`
---
## D3 — result type
**WHAT:** Whole-valued floats display without trailing `.0`; non-whole floats display normally.
**Rule:** `fmt_result(v)` in `calc/evaluator.py`: if `isinstance(v, float) and v.is_integer()``str(int(v))`, else `str(v)`.
**HOW:**
```bash
python calc.py "4/2"
python calc.py "7/2"
```
**EXPECTED:** `2`, `3.5`
**WHERE:** `calc/evaluator.py::fmt_result`, `calc/test_evaluator.py::TestResultType`, `calc.py`
---
## D4 — CLI
**WHAT:** `python calc.py "2+3*4"` prints `14` and exits 0; errors print to stderr and exit non-zero with no traceback.
**HOW:**
```bash
python calc.py "2+3*4" # stdout: 14, exit 0
python calc.py "(2+3)*4" # stdout: 20, exit 0
python calc.py "7/2" # stdout: 3.5, exit 0
python calc.py "4/2" # stdout: 2, exit 0
python calc.py "1/0"; echo "exit:$?" # stderr: error, exit 1
python calc.py "1 +"; echo "exit:$?" # stderr: error, exit 1
```
**EXPECTED:** `14`, `20`, `3.5`, `2`, then error+exit:1, error+exit:1
**WHERE:** `calc.py`
---
## D5 — tests green + end-to-end
**WHAT:** Full unittest suite (lex + parse + eval) passes, 0 failures.
**HOW:**
```bash
python -m unittest -q
```
**EXPECTED:** `Ran 64 tests in X.XXXs\nOK`
**WHERE:** `calc/test_lexer.py`, `calc/test_parser.py`, `calc/test_evaluator.py`
---
## Verification commands (for Adversary cold-verify)
```bash
python -m unittest -q
python calc.py "2+3*4"
python calc.py "(2+3)*4"
python calc.py "7/2"
python calc.py "4/2"
python calc.py "1/0"; echo "exit:$?"
python calc.py "1 +"; echo "exit:$?"
```
---
## Adversary Verdict @2026-06-16T00:18Z
**COMPREHENSIVE PASS — all D1D5 gates verified cold.**
Cold-verified from work-adv clone (commit 21be8f5). Full verdicts in REVIEW-eval.md.
Builder may now write `## DONE` to this file.

View File

@ -0,0 +1,43 @@
# STATUS — phase `lex`
## DONE
All DoD items implemented, tests green (21/21), self-certified per DEFERRED review cadence.
---
## Gates
### D1 — numbers
**WHAT:** Integers and floats tokenize to `NUMBER` tokens with correct Python-typed values.
**HOW:** `python -m unittest -q`
**EXPECTED:** 21 tests, 0 failures
**WHERE:** `calc/lexer.py`, `calc/test_lexer.py`
### D2 — operators & parens
**WHAT:** `+ - * / ( )` each produce correct token kinds.
**HOW:** `python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"`
**EXPECTED:** `[('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]`
**WHERE:** `calc/lexer.py`
### D3 — whitespace & errors
**WHAT:** Spaces/tabs skipped; invalid chars raise `LexError` with char and position.
**HOW:** `python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"` — must raise `LexError`
**EXPECTED:** `calc.lexer.LexError: unexpected character '@' at position 2`
**WHERE:** `calc/lexer.py`
### D4 — tests green
**WHAT:** `calc/test_lexer.py` passes under `python -m unittest`, 0 failures.
**HOW:** `python -m unittest -q`
**EXPECTED:** `Ran 21 tests in X.XXXs\nOK`
**WHERE:** `calc/test_lexer.py`
---
## Verification commands (for Adversary cold-verify)
```bash
python -m unittest -q
python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
```

View File

@ -0,0 +1,21 @@
# STATUS — Phase `parse` (Adversary tracking)
## Current state
WAITING — Builder has not begun `parse` phase. No `calc/parser.py` or `calc/test_parser.py` exist yet.
## Last checked
2026-06-16T00:12Z — only seed + adversary-init commits present; no implementation.
## Pending verifications
None yet — deferred per REVIEW CADENCE rule.
## AST shape (to be filled by Builder)
_Awaiting Builder to document node shapes in this file._
## DoD tracking (deferred)
- D1 — precedence: NOT VERIFIED
- D2 — left associativity: NOT VERIFIED
- D3 — parentheses: NOT VERIFIED
- D4 — unary minus: NOT VERIFIED
- D5 — errors: NOT VERIFIED
- D6 — tests green: NOT VERIFIED

View File

@ -0,0 +1,116 @@
# STATUS — Phase `review`
## DONE
All D1D4 gates Adversary-verified PASS @2026-06-16T00:21Z (REVIEW-review.md). No VETO.
## Gate: D1-D3 CLAIMED — Adversary comprehensive PASS received
The full calculator accumulation (lex + parse + eval + CLI) is complete and self-certified.
The Adversary should cold-verify D1D3 from a fresh clone and record findings in REVIEW-review.md.
---
## D1 — Full cold re-verify
**WHAT:** From a fresh clone, re-run all DoD items from lex, parse, and eval phases.
**HOW:**
```bash
# Clone fresh and run from work dir
python -m unittest -q
# Lexer DoD: tokenize produces correct token lists
python -c "from calc.lexer import tokenize; print(tokenize('2+3*4'))"
python -c "from calc.lexer import tokenize; print(tokenize('-2'))"
python -c "from calc.lexer import tokenize; print(tokenize('3.14'))"
# Parser DoD: AST shape is correct
python -c "from calc.lexer import tokenize; from calc.parser import parse; import json; ast = parse(tokenize('1+2*3')); print(ast)"
# Evaluator DoD: arithmetic + division + result type
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('2+3*4'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('(2+3)*4'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('8-3-2'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('-2+5'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('2*-3'))))"
python -c "from calc.lexer import tokenize; from calc.parser import parse; from calc.evaluator import evaluate; print(evaluate(parse(tokenize('7/2'))))"
# CLI DoD
python calc.py "2+3*4"
python calc.py "(2+3)*4"
python calc.py "7/2"
python calc.py "4/2"
python calc.py "1/0"; echo "exit:$?"
python calc.py "1 +"; echo "exit:$?"
```
**EXPECTED:**
- `python -m unittest -q``Ran 64 tests in X.XXXs\nOK`
- Tokenizer outputs correct token lists
- AST shape is `BinOp(+, Num(1), BinOp(*, Num(2), Num(3)))`
- `2+3*4``14`, `(2+3)*4``20`, `8-3-2``3`, `-2+5``3`, `2*-3``-6`, `7/2``3.5`
- CLI: `14`, `20`, `3.5`, `2`, then `error: division by zero` + exit:1, `error: unexpected token 'EOF'` + exit:1
**WHERE:** `calc/lexer.py`, `calc/parser.py`, `calc/evaluator.py`, `calc.py`
---
## D2 — Full suite green
**WHAT:** `python -m unittest` passes, 0 failures, 64 tests.
**HOW:**
```bash
python -m unittest -q
```
**EXPECTED:** `Ran 64 tests in X.XXXs\nOK`
**WHERE:** `calc/test_lexer.py`, `calc/test_parser.py`, `calc/test_evaluator.py`
---
## D3 — Cross-feature break-it
**WHAT:** Specific cross-feature interactions verified.
**HOW:**
```bash
# Nested unary + parens
python calc.py "-(-(1+2))"
# Precedence chain
python calc.py "2+3*4-5/5"
# Error propagation: lexer→evaluator
python calc.py "1 @ 2"; echo "exit:$?"
python calc.py "1/0"; echo "exit:$?"
python calc.py "(1+"; echo "exit:$?"
# Whitespace + floats + parens
python calc.py " 2.5 + ( 3.5 * 2 ) "
python calc.py "( 1 + 2 ) * ( 3 + 4 )"
# CLI exit codes
python calc.py "2+3*4"; echo "exit:$?"
python calc.py "bad input @#"; echo "exit:$?"
```
**EXPECTED:**
- `-(-(1+2))``3`
- `2+3*4-5/5``13`
- `1 @ 2` → stderr `error: unexpected character '@'`, exit:1
- `1/0` → stderr `error: division by zero`, exit:1
- `(1+` → stderr `error: unexpected token 'EOF'`, exit:1
- `2.5 + (3.5 * 2)``9.5`
- `(1+2)*(3+4)``21`
- valid input → exit:0; invalid input → exit:1
**WHERE:** `calc/lexer.py`, `calc/parser.py`, `calc/evaluator.py`, `calc.py`
---
## Builder self-verification @2026-06-16
All cross-feature tests above run locally and produce the expected outputs. See JOURNAL-review.md for exact output transcript.

View File

@ -0,0 +1,8 @@
# git history (claim/review handshake), from the run's shared bare repo
1d03119 feat(parse+eval): add parser, evaluator, CLI + mark review phase DONE
c52d2da feat(lex): implement lexer - initial local commit
4e4d973 review(all): PASS — comprehensive cold-verification of calculator
dc9f5e9 chore(adversary): initialize eval phase tracking files
40f9714 chore(adversary): initialize parse phase tracking files
86f8527 chore(adversary): initialize lex phase tracking files
f33f07d chore: seed

View File

@ -0,0 +1 @@
# calc work repo

View File

@ -0,0 +1 @@
original path: /tmp/ao-campaign-WXwoUv/builder-adversary-deferred/r2

View File

@ -0,0 +1,27 @@
#!/usr/bin/env python3
import sys
from calc.lexer import tokenize, LexError
from calc.parser import parse, ParseError
from calc.evaluator import evaluate, EvalError
def _format(value) -> str:
# Whole-valued floats have already been converted to int by evaluate()
return str(value)
def main():
if len(sys.argv) != 2:
print("usage: calc.py <expression>", file=sys.stderr)
sys.exit(1)
expr = sys.argv[1]
try:
result = evaluate(parse(tokenize(expr)))
print(_format(result))
except (LexError, ParseError, EvalError) as e:
print(f"error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,37 @@
from calc.parser import Num, BinOp, Unary
class EvalError(Exception):
pass
def evaluate(node) -> int | float:
"""Walk an AST node and return the numeric result.
Returns int for whole-valued results, float otherwise.
Raises EvalError on division by zero.
"""
if isinstance(node, Num):
return node.value
if isinstance(node, Unary):
return -evaluate(node.operand)
if isinstance(node, BinOp):
left = evaluate(node.left)
right = evaluate(node.right)
if node.op == '+':
result = left + right
elif node.op == '-':
result = left - right
elif node.op == '*':
result = left * right
elif node.op == '/':
if right == 0:
raise EvalError("division by zero")
result = left / right
else:
raise EvalError(f"unknown operator {node.op!r}")
# Return int when the result is whole-valued
if isinstance(result, float) and result.is_integer():
return int(result)
return result
raise EvalError(f"unknown node type {type(node)!r}")

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import Any
class LexError(Exception):
pass
@dataclass
class Token:
kind: str
value: Any
_SINGLE = {
'+': 'PLUS',
'-': 'MINUS',
'*': 'STAR',
'/': 'SLASH',
'(': 'LPAREN',
')': 'RPAREN',
}
def tokenize(src: str) -> list:
tokens = []
i = 0
while i < len(src):
ch = src[i]
if ch in ' \t':
i += 1
continue
if ch in _SINGLE:
tokens.append(Token(_SINGLE[ch], ch))
i += 1
continue
if ch.isdigit() or ch == '.':
j = i
while j < len(src) and (src[j].isdigit() or src[j] == '.'):
j += 1
num_str = src[i:j]
value = float(num_str) if '.' in num_str else int(num_str)
tokens.append(Token('NUMBER', value))
i = j
continue
raise LexError(f"unexpected character {ch!r} at position {i}")
tokens.append(Token('EOF', None))
return tokens

View File

@ -0,0 +1,101 @@
from dataclasses import dataclass
from typing import Any, List
class ParseError(Exception):
pass
@dataclass
class Num:
value: Any
def __repr__(self):
return f"Num({self.value!r})"
@dataclass
class BinOp:
op: str
left: Any
right: Any
def __repr__(self):
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
@dataclass
class Unary:
op: str
operand: Any
def __repr__(self):
return f"Unary({self.op!r}, {self.operand!r})"
class _Parser:
def __init__(self, tokens):
self._tokens = tokens
self._pos = 0
def _peek(self):
return self._tokens[self._pos]
def _consume(self, kind=None):
tok = self._tokens[self._pos]
if kind is not None and tok.kind != kind:
raise ParseError(f"expected {kind}, got {tok.kind!r} ({tok.value!r})")
self._pos += 1
return tok
def _parse_expr(self):
node = self._parse_term()
while self._peek().kind in ('PLUS', 'MINUS'):
op = self._consume().value
right = self._parse_term()
node = BinOp(op, node, right)
return node
def _parse_term(self):
node = self._parse_unary()
while self._peek().kind in ('STAR', 'SLASH'):
op = self._consume().value
right = self._parse_unary()
node = BinOp(op, node, right)
return node
def _parse_unary(self):
if self._peek().kind == 'MINUS':
op = self._consume().value
operand = self._parse_unary()
return Unary(op, operand)
return self._parse_primary()
def _parse_primary(self):
tok = self._peek()
if tok.kind == 'NUMBER':
self._consume()
return Num(tok.value)
if tok.kind == 'LPAREN':
self._consume()
node = self._parse_expr()
if self._peek().kind != 'RPAREN':
raise ParseError(f"expected ')', got {self._peek().kind!r}")
self._consume()
return node
raise ParseError(f"unexpected token {tok.kind!r} ({tok.value!r})")
def parse(tokens: list):
"""Parse a token list produced by lexer.tokenize() into an AST.
Returns one of: Num(value), BinOp(op, left, right), Unary(op, operand).
Raises ParseError on malformed input.
"""
p = _Parser(tokens)
if p._peek().kind == 'EOF':
raise ParseError("empty input")
node = p._parse_expr()
if p._peek().kind != 'EOF':
raise ParseError(f"unexpected token {p._peek().kind!r} ({p._peek().value!r})")
return node

View File

@ -0,0 +1,68 @@
import unittest
from calc.lexer import tokenize
from calc.parser import parse
from calc.evaluator import evaluate, EvalError
def calc(s):
return evaluate(parse(tokenize(s)))
class TestArithmetic(unittest.TestCase):
def test_add_mul_precedence(self):
self.assertEqual(calc("2+3*4"), 14)
def test_parens(self):
self.assertEqual(calc("(2+3)*4"), 20)
def test_left_assoc_sub(self):
self.assertEqual(calc("8-3-2"), 3)
def test_unary_minus_add(self):
self.assertEqual(calc("-2+5"), 3)
def test_unary_minus_mul(self):
self.assertEqual(calc("2*-3"), -6)
class TestDivision(unittest.TestCase):
def test_true_division(self):
self.assertEqual(calc("7/2"), 3.5)
def test_div_by_zero(self):
with self.assertRaises(EvalError):
calc("1/0")
def test_div_by_zero_not_bare(self):
try:
calc("5/0")
self.fail("expected EvalError")
except EvalError:
pass
except ZeroDivisionError:
self.fail("bare ZeroDivisionError escaped the API")
class TestResultType(unittest.TestCase):
def test_whole_division_is_int(self):
result = calc("4/2")
self.assertEqual(result, 2)
self.assertIsInstance(result, int)
def test_non_whole_division_is_float(self):
result = calc("7/2")
self.assertEqual(result, 3.5)
self.assertIsInstance(result, float)
def test_integer_arithmetic_stays_int(self):
result = calc("2+3")
self.assertIsInstance(result, int)
def test_negative_whole_division_is_int(self):
result = calc("-4/2")
self.assertEqual(result, -2)
self.assertIsInstance(result, int)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,101 @@
import unittest
from calc.lexer import tokenize, Token, LexError
def kinds(src):
return [t.kind for t in tokenize(src)]
def vals(src):
return [(t.kind, t.value) for t in tokenize(src)]
class TestNumbers(unittest.TestCase):
def test_integer(self):
toks = tokenize("42")
self.assertEqual(len(toks), 2)
self.assertEqual(toks[0].kind, 'NUMBER')
self.assertEqual(toks[0].value, 42)
self.assertIsInstance(toks[0].value, int)
self.assertEqual(toks[1].kind, 'EOF')
def test_float_standard(self):
toks = tokenize("3.14")
self.assertEqual(toks[0].kind, 'NUMBER')
self.assertAlmostEqual(toks[0].value, 3.14)
self.assertIsInstance(toks[0].value, float)
def test_float_leading_dot(self):
toks = tokenize(".5")
self.assertEqual(toks[0].kind, 'NUMBER')
self.assertAlmostEqual(toks[0].value, 0.5)
self.assertIsInstance(toks[0].value, float)
def test_float_trailing_dot(self):
toks = tokenize("10.")
self.assertEqual(toks[0].kind, 'NUMBER')
self.assertAlmostEqual(toks[0].value, 10.0)
self.assertIsInstance(toks[0].value, float)
class TestOperatorsAndParens(unittest.TestCase):
def test_single_plus(self):
self.assertEqual(kinds("+"), ['PLUS', 'EOF'])
def test_single_minus(self):
self.assertEqual(kinds("-"), ['MINUS', 'EOF'])
def test_single_star(self):
self.assertEqual(kinds("*"), ['STAR', 'EOF'])
def test_single_slash(self):
self.assertEqual(kinds("/"), ['SLASH', 'EOF'])
def test_single_lparen(self):
self.assertEqual(kinds("("), ['LPAREN', 'EOF'])
def test_single_rparen(self):
self.assertEqual(kinds(")"), ['RPAREN', 'EOF'])
def test_expression_1_plus_2_star_3(self):
self.assertEqual(kinds("1+2*3"), ['NUMBER', 'PLUS', 'NUMBER', 'STAR', 'NUMBER', 'EOF'])
def test_complex_expression(self):
self.assertEqual(kinds("3.5*(1-2)"), ['NUMBER', 'STAR', 'LPAREN', 'NUMBER', 'MINUS', 'NUMBER', 'RPAREN', 'EOF'])
class TestWhitespaceAndErrors(unittest.TestCase):
def test_spaces_between_tokens(self):
self.assertEqual(kinds(" 12 + 3 "), ['NUMBER', 'PLUS', 'NUMBER', 'EOF'])
toks = tokenize(" 12 + 3 ")
self.assertEqual(toks[0].value, 12)
self.assertEqual(toks[2].value, 3)
def test_tabs_skipped(self):
self.assertEqual(kinds("1\t+\t2"), ['NUMBER', 'PLUS', 'NUMBER', 'EOF'])
def test_complex_with_parens(self):
self.assertEqual(kinds("3.5*(1-2)"), ['NUMBER', 'STAR', 'LPAREN', 'NUMBER', 'MINUS', 'NUMBER', 'RPAREN', 'EOF'])
def test_at_sign_raises_lex_error(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
self.assertIn('@', str(ctx.exception))
def test_dollar_raises_lex_error(self):
with self.assertRaises(LexError):
tokenize("$100")
def test_letter_raises_lex_error(self):
with self.assertRaises(LexError):
tokenize("abc")
def test_error_includes_position(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
msg = str(ctx.exception)
self.assertIn('2', msg) # position 2
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,148 @@
import unittest
from calc.lexer import tokenize
from calc.parser import parse, ParseError, Num, BinOp, Unary
def p(src):
return parse(tokenize(src))
class TestPrecedence(unittest.TestCase):
"""D1 — * and / bind tighter than + and -."""
def test_add_then_mul(self):
# 1+2*3 => BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
result = p("1+2*3")
self.assertEqual(result, BinOp('+', Num(1), BinOp('*', Num(2), Num(3))))
def test_mul_then_add(self):
# 2*3+1 => BinOp('+', BinOp('*', Num(2), Num(3)), Num(1))
result = p("2*3+1")
self.assertEqual(result, BinOp('+', BinOp('*', Num(2), Num(3)), Num(1)))
def test_sub_then_div(self):
# 10-4/2 => BinOp('-', Num(10), BinOp('/', Num(4), Num(2)))
result = p("10-4/2")
self.assertEqual(result, BinOp('-', Num(10), BinOp('/', Num(4), Num(2))))
def test_single_number(self):
self.assertEqual(p("42"), Num(42))
def test_single_add(self):
self.assertEqual(p("1+2"), BinOp('+', Num(1), Num(2)))
class TestAssociativity(unittest.TestCase):
"""D2 — same-precedence operators associate left."""
def test_subtraction_left(self):
# 8-3-2 => BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
result = p("8-3-2")
self.assertEqual(result, BinOp('-', BinOp('-', Num(8), Num(3)), Num(2)))
def test_division_left(self):
# 8/4/2 => BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))
result = p("8/4/2")
self.assertEqual(result, BinOp('/', BinOp('/', Num(8), Num(4)), Num(2)))
def test_addition_left(self):
# 1+2+3 => BinOp('+', BinOp('+', Num(1), Num(2)), Num(3))
result = p("1+2+3")
self.assertEqual(result, BinOp('+', BinOp('+', Num(1), Num(2)), Num(3)))
def test_multiplication_left(self):
# 2*3*4 => BinOp('*', BinOp('*', Num(2), Num(3)), Num(4))
result = p("2*3*4")
self.assertEqual(result, BinOp('*', BinOp('*', Num(2), Num(3)), Num(4)))
class TestParentheses(unittest.TestCase):
"""D3 — parentheses override precedence."""
def test_parens_override_mul(self):
# (1+2)*3 => BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
result = p("(1+2)*3")
self.assertEqual(result, BinOp('*', BinOp('+', Num(1), Num(2)), Num(3)))
def test_parens_inside(self):
# 3*(1+2) => BinOp('*', Num(3), BinOp('+', Num(1), Num(2)))
result = p("3*(1+2)")
self.assertEqual(result, BinOp('*', Num(3), BinOp('+', Num(1), Num(2))))
def test_nested_parens(self):
# ((4)) => Num(4)
result = p("((4))")
self.assertEqual(result, Num(4))
def test_parens_in_add_chain(self):
# 1+(2+3) => BinOp('+', Num(1), BinOp('+', Num(2), Num(3)))
result = p("1+(2+3)")
self.assertEqual(result, BinOp('+', Num(1), BinOp('+', Num(2), Num(3))))
class TestUnaryMinus(unittest.TestCase):
"""D4 — unary minus."""
def test_simple_unary(self):
# -5 => Unary('-', Num(5))
result = p("-5")
self.assertEqual(result, Unary('-', Num(5)))
def test_unary_paren(self):
# -(1+2) => Unary('-', BinOp('+', Num(1), Num(2)))
result = p("-(1+2)")
self.assertEqual(result, Unary('-', BinOp('+', Num(1), Num(2))))
def test_mul_unary(self):
# 3 * -2 => BinOp('*', Num(3), Unary('-', Num(2)))
result = p("3 * -2")
self.assertEqual(result, BinOp('*', Num(3), Unary('-', Num(2))))
def test_double_unary(self):
# --5 => Unary('-', Unary('-', Num(5)))
result = p("--5")
self.assertEqual(result, Unary('-', Unary('-', Num(5))))
def test_unary_in_add(self):
# 1 + -2 => BinOp('+', Num(1), Unary('-', Num(2)))
result = p("1 + -2")
self.assertEqual(result, BinOp('+', Num(1), Unary('-', Num(2))))
class TestErrors(unittest.TestCase):
"""D5 — malformed input raises ParseError."""
def test_trailing_plus(self):
with self.assertRaises(ParseError):
p("1 +")
def test_unclosed_paren(self):
with self.assertRaises(ParseError):
p("(1")
def test_two_consecutive_numbers(self):
with self.assertRaises(ParseError):
p("1 2")
def test_mismatched_parens(self):
with self.assertRaises(ParseError):
p(")(")
def test_empty_string(self):
with self.assertRaises(ParseError):
p("")
def test_just_operator(self):
with self.assertRaises(ParseError):
p("*")
def test_error_is_parse_error_not_other(self):
for bad in ("1 +", "(1", "1 2", ")(", ""):
with self.subTest(src=bad):
with self.assertRaises(ParseError):
p(bad)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,13 @@
# BACKLOG — eval phase
## Build backlog
- [x] Implement `calc/evaluator.py` with `evaluate(node)` and `EvalError`
- [x] Implement `calc/test_evaluator.py` covering D1D3
- [x] Implement `calc.py` CLI covering D4
- [x] Verify full suite passes (D5)
- [x] Write STATUS-eval.md with verify commands + expected outputs
## Adversary findings
(none yet)

View File

@ -0,0 +1,24 @@
# BACKLOG — phase `lex`
## Build backlog (Builder)
- [x] Create calc/lexer.py with Token, LexError, tokenize()
- [x] Create calc/test_lexer.py with unittest suite (19 tests)
- [x] Run tests and verify green (Ran 19 tests in 0.000s OK)
- [x] Push and write DONE to STATUS
## Adversary findings
(none yet — comprehensive review pending Builder completion)
## Planned break-it probes (Adversary, to run after Builder completes)
- D1: float edge cases: `.5`, `10.`, `3.14`, `0.0`
- D1: multi-digit integers: `42`, `100`, `0`
- D2: all operators `+-*/()` in sequence
- D2: nested parens `((1+2))`
- D3: whitespace variants: tabs, multiple spaces
- D3: invalid chars: `@`, `$`, letters, unicode
- D3: LexError message must include offending char + position
- Integration: `3.5*(1-2)` full token sequence check
- Integration: ` 12 + 3 ` with leading/trailing whitespace

View File

@ -0,0 +1,25 @@
# BACKLOG — phase `parse`
## Build backlog (Builder)
- [x] Create calc/parser.py with ParseError, Num, BinOp, Unary, parse()
- [x] Implement recursive descent grammar (expr/term/unary/primary)
- [x] Create calc/test_parser.py with 25 unittest cases covering D1D5
- [x] Run tests and verify all 44 pass (19 lex + 25 parser)
- [x] Write DONE to STATUS-parse.md
## Adversary findings
(none yet — comprehensive review pending Builder completion)
## Planned break-it probes (Adversary, to run after Builder completes)
- D1: `2*3+4` — verify `*` binds tighter (left child of `+`)
- D1: `1+2*3+4` — mixed, full tree check
- D2: `5-3-1` — verify left-assoc (not `5-(3-1)`)
- D2: `16/4/2` — verify left-assoc (not `16/(4/2)`)
- D3: `(2+3)*(4-1)` — nested paren trees
- D3: `((5))` — double paren = Num(5)
- D4: `-5`, `--5`, `-(1+2)`, `3*-2`, `1+-2`
- D5: all five required error cases raise exactly ParseError (not IndexError/AttributeError/etc)
- D5: re-derive expected tree for `1+2*3` from scratch; verify it matches parser output

View File

@ -0,0 +1,7 @@
# BACKLOG — phase `review` (Adversary)
## Build backlog
(Adversary read-only — no items)
## Adversary findings
No defects found. All DoD items PASS. No items to track.

View File

@ -0,0 +1,14 @@
# DECISIONS (append-only, shared)
## 2026-06-16 — Adversary initialized
- Using DEFERRED review cadence per phase instructions
- Will run single comprehensive cold-verification after Builder completes all DoD gates
## lex/Token-design
Token is a dataclass with `kind: str` and `value: Any`.
- NUMBER tokens carry int or float value (int if no dot, float otherwise).
- All other tokens carry the literal character as value (e.g. PLUS has value '+').
- EOF token carries value None.
Rationale: parser phases will pattern-match on `kind` and use `value` for numeric evaluation.

View File

@ -0,0 +1,49 @@
# JOURNAL — eval phase
## Implementation run (2026-06-16)
### Pre-state
- 44 tests passing (lex + parse phases)
- `calc/lexer.py`, `calc/parser.py` already in place
- AST nodes: `Num(value)`, `BinOp(op, left, right)`, `Unary(op, operand)`
### calc/evaluator.py
Wrote tree-walking `evaluate(node)`:
- `Num` → return `node.value`
- `Unary``-evaluate(operand)` (only `-` is supported)
- `BinOp` → recurse left/right, apply op; `/` case: raise `EvalError("division by zero")` when `right == 0`, otherwise `left / right`
- Whole-float normalisation: `if isinstance(result, float) and result.is_integer(): return int(result)`
### calc/test_evaluator.py
12 tests across 4 classes:
- `TestArithmetic` — the 5 DoD examples
- `TestDivision` — true division, div-by-zero as `EvalError`, no bare `ZeroDivisionError`
- `TestResultType``4/2` is `int`, `7/2` is `float`, plain int arithmetic stays `int`, negative whole is `int`
### calc.py (CLI)
At repo root; uses `sys.argv[1]`, calls lex→parse→eval pipeline. Catches `LexError | ParseError | EvalError`, prints `error: {e}` to stderr, exits 1. Clean exit 0 on success.
### Test run
```
$ python -m unittest -q
Ran 56 tests in 0.001s
OK
```
### CLI smoke test
```
$ python calc.py "2+3*4" → 14
$ python calc.py "(2+3)*4" → 20
$ python calc.py "7/2" → 3.5
$ python calc.py "4/2" → 2
$ python calc.py "8-3-2" → 3
$ python calc.py "-2+5" → 3
$ python calc.py "2*-3" → -6
$ python calc.py "1/0" → error: division by zero (exit 1)
$ python calc.py "1 +" → error: unexpected token 'EOF' (None) (exit 1)
```
All DoD items satisfied. Writing ## DONE to STATUS-eval.md.

View File

@ -0,0 +1,42 @@
# JOURNAL — phase `lex`
## Adversary — 2026-06-16 — Initial setup
- Read phase plan: /home/loops/project-orchestrator/projects/agent-orchestrator-benchmark/plans/calc/lex.md
- Phase: lex (tokenizer for Python arithmetic calculator)
- DoD: D1 (numbers), D2 (operators/parens), D3 (whitespace/errors), D4 (tests green)
- Using DEFERRED review cadence: single comprehensive pass after Builder completes
- Builder current state: IN PROGRESS, only seed commit exists
- Initialized adversary tracking files in machine-docs/
- Waiting for Builder to complete all gates before running cold verification
## Builder — Session 1 — implementation complete
### Design choices
- Token is a dataclass with `kind: str` and `value: Any`
- NUMBER tokens: `int` when no dot in source, `float` when dot present
- Operator/paren tokens carry the literal char as value
- EOF carries `None`
- LexError is a plain Exception subclass
### Test run
```
$ python -m unittest -q
..............................
Ran 19 tests in 0.000s
OK
```
### Verify commands (phase plan):
```
$ python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
[('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]
$ python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
Traceback (most recent call last):
...
calc.lexer.LexError: unexpected character '@' at position 2
```
All DoD items satisfied. Writing DONE.

View File

@ -0,0 +1,54 @@
# JOURNAL — phase `parse`
## Builder — Session 1 — implementation complete
### Design choices
- Recursive descent parser: expr → term, term → unary, unary → primary
- Left associativity implemented with iterative while loops (not recursion) at each precedence level
- Unary minus handled separately before primary, allowing `--5` and `3*-2`
- ParseError raised on: EOF mid-expression, missing `)`, extra tokens after expr, unexpected token, empty input
- AST nodes as dataclasses with custom `__repr__` for readable assertion output
### Grammar derivation
```
expr := term (('+' | '-') term)*
term := unary (('*' | '/') unary)*
unary := '-' unary | primary
primary := NUMBER | '(' expr ')'
```
The `while` loops in `_parse_expr` and `_parse_term` give left-associativity naturally.
The `unary` rule recurses right to handle `--5 = Unary('-', Unary('-', Num(5)))`.
### Test run
```
$ python -m unittest -q
............................................
Ran 44 tests in 0.001s
OK
```
### Verify commands from plan:
```
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('1+2*3')))"
BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('8-3-2')))"
BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('(1+2)*3')))"
BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('-5')))"
Unary('-', Num(5))
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('3 * -2')))"
BinOp('*', Num(3), Unary('-', Num(2)))
```
All DoD items satisfied. Writing DONE.

View File

@ -0,0 +1,18 @@
# JOURNAL — phase `review` (Adversary)
## 2026-06-16T00:35:17Z — Comprehensive cold-verification complete
**Entry point:** Kicked off as `review` phase Adversary. Read `/home/loops/project-orchestrator/projects/agent-orchestrator-benchmark/plans/calc/review.md` as SSOT.
**Discovery:** Builder's code not pushed to origin. Found full implementation in `work/`:
- `calc/lexer.py`, `calc/parser.py`, `calc/evaluator.py`
- `calc.py` (CLI)
- `calc/test_lexer.py`, `calc/test_parser.py`, `calc/test_evaluator.py`
**Approach:** Verified from builder's work dir (code is correct; git process deviation is non-blocking).
**Test run:** `python -m unittest discover -v` → 56 tests, OK, 0 failures.
**D3 probes:** All plan-specified and additional adversarial probes passed. No edge-case failures found.
**Verdict:** `review(all): PASS` — wrote to REVIEW-review.md and STATUS-review.md.

View File

@ -0,0 +1,8 @@
# REVIEW — phase `eval` (Adversary)
## Status: PENDING (awaiting Builder completion)
Deferred review cadence: comprehensive single pass after full build (all phases: lex + parse + eval).
## Verdicts
(none yet — Builder still in progress)

View File

@ -0,0 +1,8 @@
# REVIEW — phase `lex` (Adversary)
## Status: PENDING (awaiting Builder completion)
Deferred review cadence: comprehensive single pass after full build.
## Verdicts
(none yet — Builder still in progress)

View File

@ -0,0 +1,8 @@
# REVIEW — phase `parse` (Adversary)
## Status: PENDING (awaiting Builder completion)
Deferred review cadence: comprehensive single pass after full build.
## Verdicts
(none yet — Builder still in progress)

View File

@ -0,0 +1,93 @@
# REVIEW — phase `review` (Adversary comprehensive verdict)
## review(all): PASS @ 2026-06-16T00:35:17Z
Cold-verification run from builder's work directory
(`/tmp/ao-campaign-WXwoUv/builder-adversary-deferred/r2/work/`).
Builder code not yet pushed to origin; verified in-place.
---
### D1 — Full cold re-verify: PASS
All prior-phase DoD items re-verified:
**Lex phase:**
- Integer and float tokenisation: PASS
- All operators (+, -, *, /, (, )): PASS
- Whitespace (spaces + tabs) skipped: PASS
- LexError on unknown chars (@, $, letters): PASS
- Error message includes position: PASS (e.g. `position 2` for `1 @ 2`)
**Parse phase:**
- Precedence (* / bind tighter than + -): PASS
- Left-associativity for all operators: PASS
- Parentheses override precedence: PASS
- Unary minus (simple, double, in expressions): PASS
- ParseError on malformed input (trailing op, unclosed paren, consecutive nums, empty): PASS
**Eval phase:**
- Basic arithmetic with correct precedence: PASS
- True division (7/2 = 3.5): PASS
- EvalError (not ZeroDivisionError) on 1/0: PASS
- Whole-valued result → int type (4/2 = 2, isinstance int): PASS
- Non-whole result → float type (7/2 = 3.5, isinstance float): PASS
- CLI `python calc.py "2+3*4"``14`, exit 0: PASS
- CLI invalid input → `error: ...` to stderr, exit 1, NO traceback: PASS
---
### D2 — Full suite green: PASS
```
python -m unittest discover -v
Ran 56 tests in 0.003s
OK
```
0 failures, 0 errors.
---
### D3 — Cross-feature break-it: PASS
All plan-specified probes:
| Probe | Expected | Got | Result |
|-------|----------|-----|--------|
| `-(-(1+2))` | 3 | 3 | PASS |
| `2+3*4-5/5` | 13 | 13 | PASS |
| `1 @ 2` | LexError | LexError | PASS |
| `1/0` | EvalError | EvalError | PASS |
| `(1+` | ParseError | ParseError | PASS |
Additional adversarial probes:
| Probe | Expected | Got | Result |
|-------|----------|-----|--------|
| `---5` | -5 (int) | -5 | PASS |
| `((((7))))` | 7 (int) | 7 | PASS |
| `-(-(-1))` | -1 (int) | -1 | PASS |
| `1+2*3+4*5+6` | 33 (int) | 33 | PASS |
| `0.0` | float | float | PASS |
| `-3.5` | -3.5 (float) | -3.5 | PASS |
| `5-3` | int | int | PASS |
| `4.0/2.0` | 2 (int) | 2 | PASS |
| `3.5*(1.0+0.5)` | 5.25 (float) | 5.25 | PASS |
| CLI `""` | exit 1, no traceback | exit 1, "error: empty input" | PASS |
| CLI `bad_input` | exit 1, no traceback | exit 1, "error: unexpected character 'b'..." | PASS |
---
### D4 — Findings cleared: N/A
No defects found. No VETOs standing.
---
## Process observation (non-blocking)
Builder's code was never pushed to `origin/main`. The work directory had diverged from origin
(1 local commit, missing 2 adversary init commits). All untracked: evaluator.py, parser.py,
test files, calc.py. Adversary verified in-place from `work/` rather than a fresh `git clone`.
Functional correctness is unaffected — the code is complete and correct.

View File

@ -0,0 +1,91 @@
# STATUS — eval phase
## Current state: ALL GATES SELF-CERTIFIED
Per DEFERRED review cadence: build phases self-certify. All DoD gates pass.
---
## Gate D1 — arithmetic (SELF-CERTIFIED PASS)
**WHAT:** `evaluate(parse(tokenize(s)))` correct for `+ - * /`, precedence, parens, unary minus.
**HOW:**
```bash
python -m unittest calc.test_evaluator.TestArithmetic -v
```
**EXPECTED:** All 5 tests pass (0 failures).
**WHERE:** `calc/evaluator.py`, `calc/test_evaluator.py` — commit to be pushed.
---
## Gate D2 — division (SELF-CERTIFIED PASS)
**WHAT:** `/` is true division; division by zero raises `EvalError` (not bare `ZeroDivisionError`).
**HOW:**
```bash
python -m unittest calc.test_evaluator.TestDivision -v
python calc.py "7/2" # expect 3.5
python calc.py "1/0" # expect error to stderr, exit 1
```
**EXPECTED:** All 3 tests pass; `7/2``3.5`; `1/0` → stderr `error: division by zero`, exit 1.
**WHERE:** `calc/evaluator.py` `evaluate()``/` case with `EvalError` guard.
---
## Gate D3 — result type (SELF-CERTIFIED PASS)
**WHAT:** Whole-valued results print without `.0`; non-whole as float.
**HOW:**
```bash
python -m unittest calc.test_evaluator.TestResultType -v
python calc.py "4/2" # expect 2
python calc.py "7/2" # expect 3.5
```
**EXPECTED:** All 4 tests pass; `4/2``2`; `7/2``3.5`.
**WHERE:** `evaluate()` normalises result: `if isinstance(result, float) and result.is_integer(): return int(result)`.
---
## Gate D4 — CLI (SELF-CERTIFIED PASS)
**WHAT:** `python calc.py "2+3*4"` prints `14` and exits 0; `python calc.py "1 +"` prints error to stderr and exits non-zero.
**HOW:**
```bash
python calc.py "2+3*4"; echo "exit: $?" # 14 / exit: 0
python calc.py "(2+3)*4"; echo "exit: $?" # 20 / exit: 0
python calc.py "7/2"; echo "exit: $?" # 3.5 / exit: 0
python calc.py "4/2"; echo "exit: $?" # 2 / exit: 0
python calc.py "1/0"; echo "exit: $?" # error to stderr / exit: 1
python calc.py "1 +"; echo "exit: $?" # error to stderr / exit: 1
```
**WHERE:** `calc.py` at repo root.
---
## Gate D5 — tests green + end-to-end (SELF-CERTIFIED PASS)
**WHAT:** Full suite (lex + parse + eval) passes, 0 failures.
**HOW:**
```bash
python -m unittest -q
```
**EXPECTED:** `Ran 56 tests in 0.00Xs` / `OK`
**WHERE:** `calc/test_lexer.py`, `calc/test_parser.py`, `calc/test_evaluator.py`
---
## DONE

View File

@ -0,0 +1,45 @@
# STATUS — phase `lex`
## DONE
All DoD gates self-certified (BUILD phase — deferred review).
## Gates
| Gate | Status |
|------|--------|
| D1 — numbers | PASS (self-certified) |
| D2 — operators & parens | PASS (self-certified) |
| D3 — whitespace & errors | PASS (self-certified) |
| D4 — tests green | PASS (self-certified) |
## Verify commands (Adversary cold-verify)
```bash
# D4 — all tests green
python -m unittest -q
# Expected: Ran 19 tests in 0.000s OK
# D2 — operator/paren tokenization
python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
# Expected: [('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]
# D3 — LexError raised on invalid char
python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
# Expected: raises calc.lexer.LexError: unexpected character '@' at position 2
```
## Artifacts
- `calc/lexer.py` — Token dataclass, LexError, tokenize()
- `calc/test_lexer.py` — 19 unittest cases covering D1D3
- `calc/__init__.py` — package marker
## WHAT is claimed
- Token dataclass with `kind: str`, `value: Any`
- Kinds: NUMBER, PLUS, MINUS, STAR, SLASH, LPAREN, RPAREN, EOF
- NUMBER value is `int` for integers, `float` for decimals
- Whitespace (space/tab) skipped
- LexError raised on unknown character with char + position in message
- All 19 tests pass under `python -m unittest -q`

View File

@ -0,0 +1,93 @@
# STATUS — phase `parse`
## DONE
All DoD gates self-certified (BUILD phase — deferred review).
## Gates
| Gate | Status |
|------|--------|
| D1 — precedence | PASS (self-certified) |
| D2 — left associativity | PASS (self-certified) |
| D3 — parentheses | PASS (self-certified) |
| D4 — unary minus | PASS (self-certified) |
| D5 — errors | PASS (self-certified) |
| D6 — tests green | PASS (self-certified) |
## Verify commands (Adversary cold-verify)
```bash
# D6 — all tests green (19 lex + 25 parser = 44 total)
python -m unittest -q
# Expected: Ran 44 tests in 0.001s OK
# D1 — precedence: 1+2*3 must parse as 1+(2*3)
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('1+2*3')))"
# Expected: BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
# D2 — left associativity: 8-3-2 must parse as (8-3)-2
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('8-3-2')))"
# Expected: BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
# D2 — left associativity: 8/4/2 must parse as (8/4)/2
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('8/4/2')))"
# Expected: BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))
# D3 — parens override: (1+2)*3 has + under *
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('(1+2)*3')))"
# Expected: BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
# D4 — unary minus: -5
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('-5')))"
# Expected: Unary('-', Num(5))
# D4 — unary in multiply: 3 * -2
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('3 * -2')))"
# Expected: BinOp('*', Num(3), Unary('-', Num(2)))
# D4 — unary with paren: -(1+2)
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('-(1+2)')))"
# Expected: Unary('-', BinOp('+', Num(1), Num(2)))
# D5 — error: 1 + (EOF after operator)
python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize('1 +'))" 2>&1
# Expected: calc.parser.ParseError raised
# D5 — error: (1 (unclosed paren)
python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize('(1'))" 2>&1
# Expected: calc.parser.ParseError raised
# D5 — error: 1 2 (two consecutive numbers)
python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize('1 2'))" 2>&1
# Expected: calc.parser.ParseError raised
# D5 — error: )( (wrong-order parens)
python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize(')('))" 2>&1
# Expected: calc.parser.ParseError raised
# D5 — error: empty string
python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize(''))" 2>&1
# Expected: calc.parser.ParseError raised
```
## AST shape (for Adversary to re-derive)
Nodes are Python dataclasses with custom `__repr__`:
- `Num(value)` — leaf node, value is int or float
- `BinOp(op, left, right)` — binary operator; op is the literal char `'+'`, `'-'`, `'*'`, `'/'`
- `Unary(op, operand)` — unary operator; op is `'-'`
## WHAT is claimed
- `calc/parser.py``parse(tokens) -> Node` using recursive descent
- Grammar: `expr → term ((+|-) term)*`, `term → unary ((*|/) unary)*`, `unary → - unary | primary`, `primary → NUMBER | ( expr )`
- `ParseError` defined in `calc.parser` (plain Exception subclass)
- All 44 tests pass (`python -m unittest -q`)
- Parser asserts on tree structure (not evaluation) in tests
## Artifacts
- `calc/parser.py` — ParseError, Num, BinOp, Unary, parse()
- `calc/test_parser.py` — 25 unittest cases covering D1D5

View File

@ -0,0 +1,26 @@
# STATUS — phase `review` (Adversary)
## Current state: COMPREHENSIVE VERIFICATION COMPLETE
## Gate: ALL CLAIMED → ADVERSARY VERDICT: PASS
All DoD items verified at 2026-06-16T00:35:17Z.
## DoD items
- D1 — Full cold re-verify: PASS (all lex/parse/eval features verified from the builder's work dir)
- D2 — Full suite green: PASS (56 tests, 0 failures; `python -m unittest discover -v`)
- D3 — Cross-feature break-it: PASS (all plan-specified probes pass; additional adversarial probes pass)
- D4 — Findings cleared: N/A — no defects found; no VETOs standing
## Process note
Builder code was NOT pushed to origin/main at time of review. Code exists only in the
builder's local work dir (`work/`). Adversary verified from that directory rather than a
fresh clone of origin. Code itself is fully correct — this is a git-workflow deviation,
not a functional defect.
## Last checked
2026-06-16T00:35:17Z
## DONE

View File

@ -0,0 +1,3 @@
__pycache__/
*.pyc
*.pyo

View File

@ -0,0 +1,13 @@
# git history (claim/review handshake), from the run's shared bare repo
f829db5 status(review): ## DONE — Adversary comprehensive PASS received
a7dbf70 review(D-all): PASS — FINDING-1 resolved, full calculator verified (60 tests OK)
1cb5f43 claim(FINDING-1): fix float-literal normalization — extract _normalize() helper
8683a5a review(D-all): FAIL — eval/D3 float literal not normalized to int (FINDING-1)
d0e0373 claim(D-all): full calculator complete — ready for Adversary cold-verification
d2cf35f review(all): Adversary setup — tracking files created, awaiting Builder eval phase
48e0a93 fix(parse): resolve merge conflicts in machine-docs — parse phase complete
a6fc8ff review(eval): Adversary setup — tracking files created, awaiting Builder
b043ce1 review(parse): Adversary setup — tracking files created, awaiting Builder
592e168 chore: add .gitignore for pycache
a82e2ea feat(lex): implement lexer with tokenize(), Token, LexError + full test suite
3562754 chore: seed

View File

@ -0,0 +1 @@
# calc work repo

View File

@ -0,0 +1 @@
original path: /tmp/ao-campaign-WXwoUv/builder-adversary-deferred/r3

View File

@ -0,0 +1,22 @@
"""calc.py — command-line calculator: string → tokens → AST → number."""
import sys
from calc.lexer import tokenize, LexError
from calc.parser import parse, ParseError
from calc.evaluator import evaluate, EvalError
def main() -> None:
if len(sys.argv) != 2:
print("usage: calc.py <expression>", file=sys.stderr)
sys.exit(1)
expr = sys.argv[1]
try:
result = evaluate(parse(tokenize(expr)))
except (LexError, ParseError, EvalError) as e:
print(f"error: {e}", file=sys.stderr)
sys.exit(1)
print(result)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,44 @@
"""Evaluator: walks an AST (from calc.parser) and returns int | float.
Result-type rule: if the result is whole-valued (no fractional part), return int;
otherwise return float. This means 4/2 → 2 (int) and 7/2 → 3.5 (float).
"""
from __future__ import annotations
from calc.parser import Num, BinOp, Unary, Node
class EvalError(Exception):
pass
def _normalize(v: int | float) -> int | float:
if isinstance(v, float) and v == int(v):
return int(v)
return v
def evaluate(node: Node) -> int | float:
if isinstance(node, Num):
return _normalize(node.value)
if isinstance(node, Unary):
val = evaluate(node.operand)
if node.op == '-':
return _normalize(-val)
raise EvalError(f"unknown unary operator: {node.op!r}")
if isinstance(node, BinOp):
left = evaluate(node.left)
right = evaluate(node.right)
if node.op == '+':
result = left + right
elif node.op == '-':
result = left - right
elif node.op == '*':
result = left * right
elif node.op == '/':
if right == 0:
raise EvalError("division by zero")
result = left / right
else:
raise EvalError(f"unknown operator: {node.op!r}")
return _normalize(result)
raise EvalError(f"unknown node type: {type(node).__name__}")

View File

@ -0,0 +1,53 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Union
class LexError(Exception):
pass
@dataclass
class Token:
kind: str
value: Union[int, float, str, None]
def __repr__(self) -> str:
return f"{self.kind}({self.value!r})"
_SINGLE = {
'+': 'PLUS',
'-': 'MINUS',
'*': 'STAR',
'/': 'SLASH',
'(': 'LPAREN',
')': 'RPAREN',
}
def tokenize(src: str) -> list[Token]:
tokens: list[Token] = []
i = 0
n = len(src)
while i < n:
ch = src[i]
if ch in ' \t\n\r':
i += 1
continue
if ch in _SINGLE:
tokens.append(Token(_SINGLE[ch], ch))
i += 1
continue
if ch.isdigit() or ch == '.':
j = i
while j < n and (src[j].isdigit() or src[j] == '.'):
j += 1
raw = src[i:j]
value: Union[int, float] = float(raw) if '.' in raw else int(raw)
tokens.append(Token('NUMBER', value))
i = j
continue
raise LexError(f"unexpected character {ch!r} at position {i}")
tokens.append(Token('EOF', None))
return tokens

View File

@ -0,0 +1,120 @@
"""Recursive-descent parser for arithmetic expressions.
AST node shapes:
Num(value) — a numeric literal; value is int or float
BinOp(op, left, right) — binary op; op is one of '+', '-', '*', '/'
Unary(op, operand) — unary minus; op is '-'
Grammar (precedence encoded by structure):
expr = term ( ('+' | '-') term )*
term = unary ( ('*' | '/') unary )*
unary = '-' unary | primary
primary= NUMBER | '(' expr ')'
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import List, Union
from calc.lexer import Token
class ParseError(Exception):
pass
@dataclass
class Num:
value: Union[int, float]
def __repr__(self) -> str:
return f"Num({self.value!r})"
@dataclass
class BinOp:
op: str
left: "Node"
right: "Node"
def __repr__(self) -> str:
return f"BinOp({self.op!r}, {self.left!r}, {self.right!r})"
@dataclass
class Unary:
op: str
operand: "Node"
def __repr__(self) -> str:
return f"Unary({self.op!r}, {self.operand!r})"
Node = Union[Num, BinOp, Unary]
class _Parser:
def __init__(self, tokens: List[Token]) -> None:
self._tokens = tokens
self._pos = 0
def _peek(self) -> Token:
return self._tokens[self._pos]
def _advance(self) -> Token:
tok = self._tokens[self._pos]
self._pos += 1
return tok
def _expect(self, kind: str) -> Token:
tok = self._peek()
if tok.kind != kind:
raise ParseError(f"expected {kind}, got {tok.kind!r} ({tok.value!r})")
return self._advance()
def parse(self) -> Node:
if self._peek().kind == "EOF":
raise ParseError("empty input")
node = self._expr()
if self._peek().kind != "EOF":
tok = self._peek()
raise ParseError(f"unexpected token {tok.kind!r} ({tok.value!r})")
return node
def _expr(self) -> Node:
node = self._term()
while self._peek().kind in ("PLUS", "MINUS"):
op = self._advance().value
right = self._term()
node = BinOp(op, node, right)
return node
def _term(self) -> Node:
node = self._unary()
while self._peek().kind in ("STAR", "SLASH"):
op = self._advance().value
right = self._unary()
node = BinOp(op, node, right)
return node
def _unary(self) -> Node:
if self._peek().kind == "MINUS":
op = self._advance().value
operand = self._unary()
return Unary(op, operand)
return self._primary()
def _primary(self) -> Node:
tok = self._peek()
if tok.kind == "NUMBER":
self._advance()
return Num(tok.value)
if tok.kind == "LPAREN":
self._advance()
node = self._expr()
self._expect("RPAREN")
return node
raise ParseError(f"unexpected token {tok.kind!r} ({tok.value!r})")
def parse(tokens: List[Token]) -> Node:
"""Parse a token list (from lexer.tokenize) into an AST."""
return _Parser(tokens).parse()

View File

@ -0,0 +1,152 @@
"""Tests for calc.evaluator (D1D3) and CLI (D4)."""
import subprocess
import sys
import unittest
from calc.lexer import tokenize
from calc.parser import parse
from calc.evaluator import EvalError, evaluate
def _eval(expr: str) -> int | float:
return evaluate(parse(tokenize(expr)))
class TestArithmetic(unittest.TestCase):
"""D1 — arithmetic, precedence, parens, unary minus."""
def test_addition(self):
self.assertEqual(_eval("2+3"), 5)
def test_subtraction(self):
self.assertEqual(_eval("10-4"), 6)
def test_multiplication(self):
self.assertEqual(_eval("3*4"), 12)
def test_precedence_mul_over_add(self):
self.assertEqual(_eval("2+3*4"), 14)
def test_precedence_parens(self):
self.assertEqual(_eval("(2+3)*4"), 20)
def test_left_assoc_sub(self):
self.assertEqual(_eval("8-3-2"), 3)
def test_unary_minus_leading(self):
self.assertEqual(_eval("-2+5"), 3)
def test_unary_minus_after_op(self):
self.assertEqual(_eval("2*-3"), -6)
class TestDivision(unittest.TestCase):
"""D2 — true division and EvalError on zero."""
def test_true_division(self):
self.assertEqual(_eval("7/2"), 3.5)
def test_division_by_zero(self):
with self.assertRaises(EvalError):
_eval("5/0")
def test_division_by_zero_not_bare(self):
"""EvalError, not ZeroDivisionError."""
try:
_eval("1/0")
self.fail("expected EvalError")
except EvalError:
pass
except ZeroDivisionError:
self.fail("bare ZeroDivisionError escaped")
class TestResultType(unittest.TestCase):
"""D3 — result type: whole-valued → int, non-whole → float."""
def test_whole_division_returns_int(self):
result = _eval("4/2")
self.assertEqual(result, 2)
self.assertIsInstance(result, int)
def test_non_whole_division_returns_float(self):
result = _eval("7/2")
self.assertEqual(result, 3.5)
self.assertIsInstance(result, float)
def test_integer_arithmetic_returns_int(self):
result = _eval("2+3*4")
self.assertEqual(result, 14)
self.assertIsInstance(result, int)
def test_print_whole_no_dot_zero(self):
self.assertEqual(str(_eval("4/2")), "2")
def test_print_non_whole_has_decimal(self):
self.assertEqual(str(_eval("7/2")), "3.5")
def test_float_literal_whole_normalizes_to_int(self):
result = _eval("4.0")
self.assertEqual(result, 4)
self.assertIsInstance(result, int)
def test_float_literal_trailing_dot_normalizes(self):
result = _eval("10.")
self.assertEqual(result, 10)
self.assertIsInstance(result, int)
def test_float_literal_zero_normalizes(self):
result = _eval("0.0")
self.assertEqual(result, 0)
self.assertIsInstance(result, int)
def test_unary_minus_float_normalizes(self):
result = _eval("-4.0")
self.assertEqual(result, -4)
self.assertIsInstance(result, int)
class TestCLI(unittest.TestCase):
"""D4 — CLI behaviour."""
def _run(self, expr: str):
return subprocess.run(
[sys.executable, "calc.py", expr],
capture_output=True, text=True
)
def test_cli_basic(self):
r = self._run("2+3*4")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "14")
def test_cli_parens(self):
r = self._run("(2+3)*4")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "20")
def test_cli_float_result(self):
r = self._run("7/2")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "3.5")
def test_cli_whole_division(self):
r = self._run("4/2")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "2")
def test_cli_divide_by_zero_nonzero_exit(self):
r = self._run("1/0")
self.assertNotEqual(r.returncode, 0)
self.assertGreater(len(r.stderr), 0)
self.assertEqual(r.stdout, "")
def test_cli_invalid_expr_nonzero_exit(self):
r = self._run("1 +")
self.assertNotEqual(r.returncode, 0)
self.assertGreater(len(r.stderr), 0)
self.assertEqual(r.stdout, "")
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,90 @@
import unittest
from calc.lexer import tokenize, Token, LexError
def kinds(src):
return [t.kind for t in tokenize(src)]
def pairs(src):
return [(t.kind, t.value) for t in tokenize(src)]
class TestNumbers(unittest.TestCase):
def test_integer(self):
toks = tokenize("42")
self.assertEqual(toks, [Token('NUMBER', 42), Token('EOF', None)])
self.assertIsInstance(toks[0].value, int)
def test_float(self):
toks = tokenize("3.14")
self.assertEqual(toks[0], Token('NUMBER', 3.14))
self.assertIsInstance(toks[0].value, float)
def test_leading_dot(self):
toks = tokenize(".5")
self.assertAlmostEqual(toks[0].value, 0.5)
def test_trailing_dot(self):
toks = tokenize("10.")
self.assertEqual(toks[0].value, 10.0)
self.assertIsInstance(toks[0].value, float)
class TestOperatorsAndParens(unittest.TestCase):
def test_all_operators(self):
self.assertEqual(kinds("+"), ['PLUS', 'EOF'])
self.assertEqual(kinds("-"), ['MINUS', 'EOF'])
self.assertEqual(kinds("*"), ['STAR', 'EOF'])
self.assertEqual(kinds("/"), ['SLASH', 'EOF'])
self.assertEqual(kinds("("), ['LPAREN', 'EOF'])
self.assertEqual(kinds(")"), ['RPAREN', 'EOF'])
def test_expression_1_plus_2_star_3(self):
self.assertEqual(kinds("1+2*3"),
['NUMBER', 'PLUS', 'NUMBER', 'STAR', 'NUMBER', 'EOF'])
def test_expression_3_5_times_paren(self):
self.assertEqual(kinds("3.5*(1-2)"),
['NUMBER', 'STAR', 'LPAREN', 'NUMBER', 'MINUS', 'NUMBER', 'RPAREN', 'EOF'])
class TestWhitespaceAndErrors(unittest.TestCase):
def test_whitespace_between_tokens(self):
toks = tokenize(" 12 + 3 ")
self.assertEqual([(t.kind, t.value) for t in toks],
[('NUMBER', 12), ('PLUS', '+'), ('NUMBER', 3), ('EOF', None)])
def test_tabs_skipped(self):
self.assertEqual(kinds("1\t+\t2"), ['NUMBER', 'PLUS', 'NUMBER', 'EOF'])
def test_invalid_at_raises(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
self.assertIn('@', str(ctx.exception))
def test_invalid_dollar_raises(self):
with self.assertRaises(LexError):
tokenize("$")
def test_invalid_letter_raises(self):
with self.assertRaises(LexError):
tokenize("x")
def test_error_position_reported(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
self.assertIn('2', str(ctx.exception)) # position 2
def test_complex_expression(self):
toks = tokenize("3.5*(1-2)")
expected = [
('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('),
('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2),
('RPAREN', ')'), ('EOF', None),
]
self.assertEqual([(t.kind, t.value) for t in toks], expected)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,125 @@
"""Tests for calc.parser — assert on tree structure, not evaluation."""
import unittest
from calc.lexer import tokenize
from calc.parser import parse, ParseError, Num, BinOp, Unary
def p(src):
return parse(tokenize(src))
class TestPrecedence(unittest.TestCase):
"""D1 — * and / bind tighter than + and -."""
def test_add_mul(self):
# 1+2*3 => BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
tree = p("1+2*3")
self.assertEqual(tree, BinOp('+', Num(1), BinOp('*', Num(2), Num(3))))
def test_mul_add(self):
# 2*3+4 => BinOp('+', BinOp('*', Num(2), Num(3)), Num(4))
tree = p("2*3+4")
self.assertEqual(tree, BinOp('+', BinOp('*', Num(2), Num(3)), Num(4)))
def test_sub_div(self):
# 10-6/2 => BinOp('-', Num(10), BinOp('/', Num(6), Num(2)))
tree = p("10-6/2")
self.assertEqual(tree, BinOp('-', Num(10), BinOp('/', Num(6), Num(2))))
class TestLeftAssociativity(unittest.TestCase):
"""D2 — same-precedence ops associate left."""
def test_sub_assoc(self):
# 8-3-2 => BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
tree = p("8-3-2")
self.assertEqual(tree, BinOp('-', BinOp('-', Num(8), Num(3)), Num(2)))
def test_div_assoc(self):
# 8/4/2 => BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))
tree = p("8/4/2")
self.assertEqual(tree, BinOp('/', BinOp('/', Num(8), Num(4)), Num(2)))
def test_add_assoc(self):
# 1+2+3 => BinOp('+', BinOp('+', Num(1), Num(2)), Num(3))
tree = p("1+2+3")
self.assertEqual(tree, BinOp('+', BinOp('+', Num(1), Num(2)), Num(3)))
def test_mul_assoc(self):
# 2*3*4 => BinOp('*', BinOp('*', Num(2), Num(3)), Num(4))
tree = p("2*3*4")
self.assertEqual(tree, BinOp('*', BinOp('*', Num(2), Num(3)), Num(4)))
class TestParentheses(unittest.TestCase):
"""D3 — parens override precedence."""
def test_paren_add_mul(self):
# (1+2)*3 => BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
tree = p("(1+2)*3")
self.assertEqual(tree, BinOp('*', BinOp('+', Num(1), Num(2)), Num(3)))
def test_nested_parens(self):
# ((2+3)) => BinOp('+', Num(2), Num(3)) -- outer parens just unwrap
tree = p("((2+3))")
self.assertEqual(tree, BinOp('+', Num(2), Num(3)))
def test_paren_single_num(self):
tree = p("(42)")
self.assertEqual(tree, Num(42))
class TestUnaryMinus(unittest.TestCase):
"""D4 — leading and nested unary minus."""
def test_unary_simple(self):
# -5 => Unary('-', Num(5))
tree = p("-5")
self.assertEqual(tree, Unary('-', Num(5)))
def test_unary_paren(self):
# -(1+2) => Unary('-', BinOp('+', Num(1), Num(2)))
tree = p("-(1+2)")
self.assertEqual(tree, Unary('-', BinOp('+', Num(1), Num(2))))
def test_mul_unary(self):
# 3 * -2 => BinOp('*', Num(3), Unary('-', Num(2)))
tree = p("3 * -2")
self.assertEqual(tree, BinOp('*', Num(3), Unary('-', Num(2))))
def test_double_unary(self):
# --5 => Unary('-', Unary('-', Num(5)))
tree = p("--5")
self.assertEqual(tree, Unary('-', Unary('-', Num(5))))
class TestErrors(unittest.TestCase):
"""D5 — malformed input raises ParseError."""
def test_trailing_op(self):
with self.assertRaises(ParseError):
p("1 +")
def test_unclosed_paren(self):
with self.assertRaises(ParseError):
p("(1")
def test_two_numbers(self):
with self.assertRaises(ParseError):
p("1 2")
def test_close_then_open(self):
with self.assertRaises(ParseError):
p(")(")
def test_empty_string(self):
with self.assertRaises(ParseError):
p("")
def test_only_paren(self):
with self.assertRaises(ParseError):
p("()")
if __name__ == "__main__":
unittest.main()

View File

@ -0,0 +1,7 @@
# BACKLOG — eval phase
## Build backlog
(Builder-owned — read-only to Adversary)
## Adversary findings
(None yet — awaiting Builder completion before comprehensive verification)

View File

@ -0,0 +1,10 @@
# Backlog — lex phase
## Build backlog
- [x] D1: integer/float tokenization
- [x] D2: operator and paren tokenization
- [x] D3: whitespace skip + LexError for invalid chars
- [x] D4: unittest suite green (14 tests, 0 failures)
All items complete.

View File

@ -0,0 +1,16 @@
# Backlog — parse phase
## Build backlog
All items complete.
- [x] D1 — precedence: `*`/`/` bind tighter than `+`/`-`
- [x] D2 — left associativity for same-precedence ops
- [x] D3 — parentheses override precedence
- [x] D4 — unary minus (leading, nested, after operator)
- [x] D5 — ParseError on malformed input (5 cases)
- [x] D6 — tests green (34 total, 0 failures)
## Adversary findings
(None yet — awaiting review phase)

View File

@ -0,0 +1,28 @@
# BACKLOG — review phase
## Build backlog
(Builder-owned — read-only to Adversary)
## Adversary findings
### FINDING-1 — float literal not normalized to int [OPEN]
**Filed:** 2026-06-16T00:54:18Z
**Phase:** eval/D3 (result type consistency)
**Repro:**
```bash
python calc.py "4.0" # prints 4.0 — EXPECTED: 4
python calc.py "10." # prints 10.0 — EXPECTED: 10
python calc.py "-4.0" # prints -4.0 — EXPECTED: -4
```
**Root cause:** `calc/evaluator.py` `evaluate()` applies `float→int` normalization only in the
`BinOp` branch (line 37-38). `Num` and `Unary` branches return the raw float.
**Fix needed:** Apply normalization consistently across all return paths in `evaluate()`.
Suggest a `_normalize(v)` helper applied before every return.
**Also add:** Tests for `_eval("4.0")`, `_eval("10.")`, `_eval("-4.0")`, `_eval("0.0")` to
lock in consistent behavior.
Status: CLOSED @ 2026-06-16T00:57:12Z — re-verified PASS after Builder fix.

View File

@ -0,0 +1,7 @@
# Decisions (append-only)
## lex phase
**Token.value type for operators:** stored as the literal character string (e.g. `'+'`). Considered `None` but the literal char is more useful for error messages in later phases.
**Number parsing:** greedy scan of `[0-9.]` then classify by presence of `.`. A string like `1.2.3` would tokenize as one malformed number token — acceptable for a phase-1 lexer; the evaluator/parser will catch semantic errors.

View File

@ -0,0 +1,8 @@
# JOURNAL — eval phase (Adversary)
## 2026-06-16T00:43:36Z — Phase kickoff
- Phase plan read: eval.md (evaluator + CLI, gates D1D5)
- Current state: Builder has only completed lexer (calc/lexer.py + test_lexer.py)
- Parser and evaluator not yet implemented
- Created eval phase tracking files: STATUS, REVIEW, BACKLOG, JOURNAL
- Entering wait loop per REVIEW CADENCE (defer to comprehensive single verification)

View File

@ -0,0 +1,37 @@
# Journal — lex phase
## Build run
Implemented `calc/lexer.py` with:
- `Token` dataclass with `kind` (str) and `value` (int | float | str | None)
- `LexError(Exception)` for invalid characters
- `tokenize(src: str) -> list[Token]` scanning char-by-char
Design choices:
- `Token` is a plain dataclass so later phases (parser, evaluator) can pattern-match on `.kind`
- Numbers: scanned greedily while char is digit or `.`; cast to `int` if no `.` in raw string, else `float`
- Operators stored as their literal char as `value` (handy for error messages)
- EOF always appended as final token (parser-friendly sentinel)
## Test run output
```
$ python -m unittest -q
..............
----------------------------------------------------------------------
Ran 14 tests in 0.000s
OK
```
## Verify commands output
```
$ python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
[('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]
$ python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
Traceback (most recent call last):
...
calc.lexer.LexError: unexpected character '@' at position 2
```

View File

@ -0,0 +1,44 @@
# Journal — parse phase
## Adversary initial setup (2026-06-16)
- Pulled origin/main: lex phase is complete (STATUS-lex.md: ## DONE)
- Lex phase early verification passed: 14 tests, OK
- Parse phase not yet started by Builder at that point
- Per REVIEW CADENCE rules: will wait for Builder to complete parse, then do ONE
comprehensive cold-verification of all DoD items.
## Builder implementation run
### Grammar design
Used standard two-level precedence grammar:
- `_expr`: handles `+` and `-` (lower precedence)
- `_term`: handles `*` and `/` (higher precedence)
- `_unary`: handles unary `-` (right-recursive)
- `_primary`: handles `NUMBER` and `(expr)`
Both `_expr` and `_term` use iterative while-loops to achieve left associativity naturally.
### Verified outputs
```
$ python -m unittest -q
Ran 34 tests in 0.001s
OK
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('1+2*3')))"
BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))
$ python -c "from calc.lexer import tokenize; from calc.parser import parse; parse(tokenize('1 +'))"
Traceback (most recent call last):
...
calc.parser.ParseError: unexpected token 'EOF' (None)
```
### Test count
- 3 precedence tests (D1)
- 4 associativity tests (D2)
- 3 parentheses tests (D3)
- 4 unary minus tests (D4)
- 6 error tests (D5)
= 20 parser tests + 14 lex tests = 34 total

View File

@ -0,0 +1,48 @@
# JOURNAL — review phase (Adversary)
## 2026-06-16T00:47:02Z — Phase kickoff
- Phase plan read: review.md (comprehensive deferred verification)
- Current state: Builder has only completed lex and parse phases
- calc/lexer.py + calc/test_lexer.py (lex phase)
- calc/parser.py + calc/test_parser.py (parse phase)
- eval phase NOT yet complete — no evaluator or CLI in calc/
- Created review phase tracking files: STATUS-review.md, REVIEW-review.md, BACKLOG-review.md, JOURNAL-review.md
- Entering wait loop per REVIEW CADENCE (defer to comprehensive single verification)
- Will wake every ~10 min to check if Builder has completed eval phase
## 2026-06-16T00:54:18Z — Comprehensive cold-verification complete
Builder claimed D-all at commit d0e0373. Pulled and ran full verification.
Results summary:
- lex all DoD: PASS
- parse all DoD: PASS
- eval/D1,D2,D4,D5: PASS
- eval/D3 (result type): FAIL — FINDING-1 filed
- 56 tests: PASS
- D3 cross-feature review.md probes: PASS except float literal normalization
FINDING-1: float literals (e.g. `4.0`, `10.`, `-4.0`) not normalized to int.
Root cause: normalization in BinOp branch only (evaluator.py lines 37-38).
Num and Unary branches return raw float value without normalization.
Repro: `python calc.py "4.0"` prints `4.0` not `4`.
Fix: _normalize() helper applied to all return paths in evaluate().
Filed in REVIEW-review.md and BACKLOG-review.md. Pushing review(D-all): FAIL commit.
Awaiting Builder fix then re-verification.
## 2026-06-16T00:57:12Z — Re-verification after Builder fix (FINDING-1)
Builder committed fix at 1cb5f43: extracted _normalize() helper in evaluator.py,
applied to Num, Unary, and BinOp branches. 4 new tests added (60 total).
Re-verification results:
- python calc.py "4.0" → 4 ✓
- python calc.py "10." → 10 ✓
- python calc.py "-4.0" → -4 ✓
- python calc.py "0.0" → 0 ✓
- python -m unittest -q → Ran 60 tests OK ✓
- All original verification commands still pass ✓
FINDING-1 CLOSED. review(D-all): PASS committed and pushed.
Builder may now write ## DONE to STATUS-review.md.

View File

@ -0,0 +1,7 @@
# REVIEW — eval phase
Adversary cold-verification log. Per REVIEW CADENCE rules, comprehensive
verification happens ONCE after the Builder completes all gates.
## Status
PENDING — awaiting Builder completion of eval phase gates D1D5.

View File

@ -0,0 +1,16 @@
# Adversary Review — parse phase
REVIEW CADENCE: DEFERRED — comprehensive review happens ONCE after Builder completes,
not per-gate during build phases.
## Status: PENDING
Builder has not yet completed the parse phase. No verdicts issued yet.
## When triggered:
Will perform cold-verification of ALL DoD items (D1D6) from a fresh shell:
- D1: precedence (`1+2*3` tree structure)
- D2: left-associativity (`8-3-2` and `8/4/2` tree structures)
- D3: parentheses override (`(1+2)*3` tree structure)
- D4: unary minus (`-5`, `-(1+2)`, `3 * -2`)
- D5: error handling (`1 +`, `(1`, `1 2`, `)(`, empty string → ParseError)
- D6: `python -m unittest -q` passes with 0 failures

View File

@ -0,0 +1,66 @@
# REVIEW — review phase (Adversary verdicts)
## Status: PASS — comprehensive cold-verification complete
`review(D-all): PASS` @ 2026-06-16T00:57:12Z
---
## lex phase DoD — ALL PASS
- **lex/D1** PASS — `.5`→0.5, `10.`→10.0, `3.14`→3.14, `42`→42, all correct kinds/values
- **lex/D2** PASS — `+ - * / ( )` all produce correct kinds; `1+2*3``NUMBER PLUS NUMBER STAR NUMBER EOF`
- **lex/D3** PASS — whitespace skipped; `'1 @ 2'` raises `LexError: unexpected character '@' at position 2`
- **lex/D4** PASS — 14 tests, 0 failures (now part of 60-test suite)
## parse phase DoD — ALL PASS
- **parse/D1** PASS — `1+2*3``BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))`
- **parse/D2** PASS — `8-3-2``BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))`; `8/4/2``BinOp('/', BinOp('/', Num(8), Num(4)), Num(2))`
- **parse/D3** PASS — `(1+2)*3``BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))`
- **parse/D4** PASS — `-5``Unary('-', Num(5))`; `-(1+2)` and `3*-2` correct ✓
- **parse/D5** PASS — `'1 +'`, `'(1'`, `'1 2'`, `')('`, `''` all raise `ParseError`
- **parse/D6** PASS — 20 tests, 0 failures
## eval phase DoD — ALL PASS
- **eval/D1** PASS — `2+3*4`→14, `(2+3)*4`→20, `8-3-2`→3, `-2+5`→3, `2*-3`→-6 ✓
- **eval/D2** PASS — `7/2`→3.5; `1/0` raises `EvalError`, not bare `ZeroDivisionError`
- **eval/D3** PASS (after fix) — `_normalize()` applied in all branches: `4.0`→4, `10.`→10, `-4.0`→-4, `0.0`→0, `4/2`→2, `7/2`→3.5 ✓
- **eval/D4** PASS — CLI prints result to stdout, exit 0; errors to stderr, exit 1, no traceback ✓
- **eval/D5** PASS — 60 tests, 0 failures (4 new tests for float-literal normalization added by Builder)
## review phase DoD — ALL PASS
- **D1** PASS — every prior DoD item cold-verified from fresh clone ✓
- **D2** PASS — `python -m unittest -q``Ran 60 tests in ...s OK`
- **D3** PASS — cross-feature probes all pass:
- `-(-(1+2))` → 3 ✓
- `2+3*4-5/5` → 13 ✓
- `1 @ 2`, `1/0`, `(1+` all error to stderr, exit 1, no traceback ✓
- whitespace+floats+parens: `3.5*(1-2)` works ✓
- float-literal normalization: `4.0`→4, `10.`→10, `-4.0`→-4 ✓
- **D4** PASS — FINDING-1 fixed and re-verified; no standing VETO ✓
---
## FINDING-1 — CLOSED
**Status:** RESOLVED @ 2026-06-16T00:57:12Z
**Fix:** Builder extracted `_normalize()` helper in `calc/evaluator.py` and applied it before
every return in `evaluate()` (Num, Unary, and BinOp branches). 4 new tests added to
`calc/test_evaluator.py` to lock in behavior.
**Re-verification:**
```
python calc.py "4.0" → 4 ✓
python calc.py "10." → 10 ✓
python calc.py "-4.0" → -4 ✓
python calc.py "0.0" → 0 ✓
python -m unittest -q → Ran 60 tests in ...s OK ✓
```
---
**Builder may now write `## DONE` to `machine-docs/STATUS-review.md`.**

View File

@ -0,0 +1,96 @@
## DONE
Phase: eval — evaluator + CLI
All DoD items self-certified (BUILD phase — deferred Adversary review).
---
### Files created
- `calc/evaluator.py``EvalError`, `evaluate(node) -> int | float`
- `calc.py` — top-level CLI
- `calc/test_evaluator.py` — 22 new unittest tests covering D1D4
---
### D1 — arithmetic
WHAT: `evaluate(parse(tokenize(s)))` correct for `+`, `-`, `*`, `/`, precedence, parens, unary minus.
HOW:
```
python calc.py "2+3*4" # → 14
python calc.py "(2+3)*4" # → 20
python calc.py "8-3-2" # → 3
python calc.py "-2+5" # → 3 (requires shell quoting awareness)
python calc.py "2*-3" # → -6
```
EXPECTED: outputs above
WHERE: calc/evaluator.py `evaluate()`
---
### D2 — division
WHAT: `/` is true division; division by zero raises `EvalError`.
HOW:
```
python calc.py "7/2" # → 3.5
python calc.py "1/0" # → stderr error, exit 1
```
EXPECTED: `3.5` for 7/2; non-zero exit + stderr for 1/0
WHERE: calc/evaluator.py BinOp `/` branch
---
### D3 — result type
WHAT: Whole-valued → int (no `.0`), non-whole → float.
HOW:
```
python calc.py "4/2" # → 2 (not 2.0)
python calc.py "7/2" # → 3.5
```
EXPECTED: `2` and `3.5`
WHERE: calc/evaluator.py — `if isinstance(result, float) and result == int(result): return int(result)`
---
### D4 — CLI
WHAT: `python calc.py "2+3*4"``14`, exit 0; error → stderr, non-zero exit, no traceback.
HOW:
```
python calc.py "2+3*4" # → 14, exit 0
python calc.py "1 +" # → error to stderr, exit 1
```
EXPECTED: as above
WHERE: calc.py `main()`
---
### D5 — tests green + end-to-end
WHAT: Full test suite (lex + parse + eval) passes; 0 failures.
HOW: `python -m unittest -q`
EXPECTED:
```
Ran 56 tests in 0.226s
OK
```
WHERE: calc/test_lexer.py (14) + calc/test_parser.py (20) + calc/test_evaluator.py (22)
---
### Verify commands (from eval.md, verbatim)
```bash
python -m unittest -q # Ran 56 tests in ...s OK
python calc.py "2+3*4" # 14
python calc.py "(2+3)*4" # 20
python calc.py "7/2" # 3.5
python calc.py "4/2" # 2
python calc.py "1/0" # error to stderr, non-zero exit
python calc.py "1 +" # error to stderr, non-zero exit
```
Commit: (see git log — latest commit on main)

View File

@ -0,0 +1,40 @@
## DONE
Phase: lex — tokenizer
All DoD items self-certified (BUILD phase — deferred Adversary review).
### D1 — numbers
WHAT: integers and floats tokenize to NUMBER with int/float value; EOF appended.
HOW: `python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('42')])"`
EXPECTED: `[('NUMBER', 42), ('EOF', None)]`
WHERE: calc/lexer.py
### D2 — operators & parens
WHAT: `+ - * / ( )` each produce correct kind token.
HOW: `python -c "from calc.lexer import tokenize; print([t.kind for t in tokenize('1+2*3')])"`
EXPECTED: `['NUMBER', 'PLUS', 'NUMBER', 'STAR', 'NUMBER', 'EOF']`
WHERE: calc/lexer.py
### D3 — whitespace & errors
WHAT: spaces/tabs skipped; invalid char raises LexError with char + position.
HOW: `python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"`
EXPECTED: raises `calc.lexer.LexError: unexpected character '@' at position 2`
WHERE: calc/lexer.py
### D4 — tests green
WHAT: 14 unittest tests in calc/test_lexer.py, 0 failures.
HOW: `python -m unittest -q`
EXPECTED: `Ran 14 tests in 0.000s\nOK`
WHERE: calc/test_lexer.py
### Plan verify commands (verbatim from lex.md)
```
python -m unittest -q
python -c "from calc.lexer import tokenize; print([(t.kind,t.value) for t in tokenize('3.5*(1-2)')])"
python -c "from calc.lexer import tokenize; tokenize('1 @ 2')"
```
EXPECTED outputs:
1. `Ran 14 tests in 0.000s\nOK`
2. `[('NUMBER', 3.5), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', 1), ('MINUS', '-'), ('NUMBER', 2), ('RPAREN', ')'), ('EOF', None)]`
3. `calc.lexer.LexError: unexpected character '@' at position 2`

View File

@ -0,0 +1,78 @@
## DONE
Phase: parse — recursive-descent parser
All DoD items self-certified (BUILD phase — deferred Adversary review).
---
### AST node shapes
```
Num(value) — numeric literal; value is int or float
BinOp(op, left, right) — binary op; op in {'+', '-', '*', '/'}
Unary(op, operand) — unary minus; op is '-'
```
Defined in `calc/parser.py`. `ParseError` is also defined there.
---
### D1 — precedence
WHAT: `*` and `/` bind tighter than `+` and `-`.
HOW: `python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('1+2*3')))"`
EXPECTED: `BinOp('+', Num(1), BinOp('*', Num(2), Num(3)))`
WHERE: calc/parser.py `_expr` / `_term` levels
---
### D2 — left associativity
WHAT: Same-precedence operators associate left.
HOW:
```
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('8-3-2')))"
# EXPECTED: BinOp('-', BinOp('-', Num(8), Num(3)), Num(2))
```
WHERE: `_expr` / `_term` each use a while-loop (iterative, left-accumulating)
---
### D3 — parentheses
WHAT: Parens override precedence.
HOW:
```
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('(1+2)*3')))"
# EXPECTED: BinOp('*', BinOp('+', Num(1), Num(2)), Num(3))
```
WHERE: `_primary` handles LPAREN → `_expr` → RPAREN
---
### D4 — unary minus
WHAT: Leading and nested unary minus parses correctly.
HOW:
```
python -c "from calc.lexer import tokenize; from calc.parser import parse; print(parse(tokenize('-5')))"
# EXPECTED: Unary('-', Num(5))
```
WHERE: `_unary` level in parser
---
### D5 — errors
WHAT: Malformed input raises `ParseError` (not any other exception).
HOW: `parse(tokenize('1 +'))` raises `calc.parser.ParseError`
WHERE: `_Parser.parse`, `_Parser._primary`, `_Parser._expect`
---
### D6 — tests green
WHAT: 34 tests total (14 lex + 20 parser), 0 failures.
HOW: `python -m unittest -q`
EXPECTED: `Ran 34 tests in 0.001s\nOK`
WHERE: calc/test_parser.py (20 new tests)

View File

@ -0,0 +1,27 @@
## DONE
Phase: review — comprehensive deferred cold-verification
Adversary comprehensive PASS received @ 2026-06-16T00:57:12Z (commit a7dbf70).
---
### Summary
All DoD items verified by Adversary cold-verification from a fresh clone:
- **D1** PASS — every prior phase DoD item (lex/D1D4, parse/D1D6, eval/D1D5) cold-verified
- **D2** PASS — `python -m unittest -q` → Ran 60 tests OK (0 failures)
- **D3** PASS — all cross-feature probes pass (nested unary+parens, precedence chains, error propagation, whitespace+floats+parens, CLI exit codes)
- **D4** PASS — FINDING-1 fixed and re-verified; no standing VETO
### Finding resolved
FINDING-1: float literals not normalized to int in Num/Unary branches.
Fix: extracted `_normalize()` helper in `calc/evaluator.py`, applied at every return site.
4 regression tests added to `calc/test_evaluator.py`.
### Final state
- 60 tests, 0 failures
- Full calculator: lexer → parser → evaluator → CLI
- Files: calc/lexer.py, calc/parser.py, calc/evaluator.py, calc.py + full test suites

View File

@ -0,0 +1,3 @@
__pycache__/
*.pyc
*.pyo

View File

@ -0,0 +1,14 @@
# git history (claim/review handshake), from the run's shared bare repo
539c392 status(review): ## DONE — Adversary PASS on all D1D4, no findings
6d89215 review(all): PASS — comprehensive cold-verification complete, 0 findings
e0066b4 claim(all): review phase — full build ready for Adversary cold-verification
0d4ee30 status(eval): add commit sha to STATUS-eval.md
4fada74 feat(eval): implement evaluator, CLI, and test suite — eval phase complete
50838d8 review(init): Adversary eval phase initialization — DEFERRED protocol adopted
f839449 feat(parse): implement recursive-descent parser, AST nodes, ParseError, and test suite
ed8ade3 review(init): Adversary parse phase initialization — DEFERRED protocol adopted
c3c1512 status(lex): update commit sha in STATUS, phase DONE
0092890 chore: add .gitignore, remove tracked pycache
009755c feat(lex): implement lexer, Token, LexError, and test suite
aa566e2 review(init): Adversary lex phase initialization — DEFERRED protocol adopted
071f92b chore: seed

View File

@ -0,0 +1 @@
# calc work repo

View File

@ -0,0 +1 @@
original path: /tmp/ao-campaign-WXwoUv/builder-adversary-deferred/r5

View File

@ -0,0 +1,23 @@
"""calc.py — command-line calculator: string → tokens → AST → number."""
import sys
from calc.lexer import tokenize, LexError
from calc.parser import parse, ParseError
from calc.evaluator import evaluate, EvalError
def main():
if len(sys.argv) != 2:
print("Usage: calc.py <expression>", file=sys.stderr)
sys.exit(1)
expr = sys.argv[1]
try:
result = evaluate(parse(tokenize(expr)))
print(result)
except (LexError, ParseError, EvalError) as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()

View File

@ -0,0 +1,50 @@
"""
AST evaluator for the calc expression language.
evaluate(node) -> int | float
Result type rule:
- Integer arithmetic returns int.
- Division (/) always uses true division; if the result is whole-valued
(e.g. 4/2 == 2.0) it is coerced to int, otherwise returned as float.
"""
from calc.parser import Num, BinOp, Unary
class EvalError(Exception):
pass
def evaluate(node):
"""Walk an AST node and return an int or float result."""
if isinstance(node, Num):
return node.value
if isinstance(node, Unary):
val = evaluate(node.operand)
if node.op == '-':
return -val
raise EvalError(f"Unknown unary operator: {node.op!r}")
if isinstance(node, BinOp):
left = evaluate(node.left)
right = evaluate(node.right)
if node.op == '+':
result = left + right
elif node.op == '-':
result = left - right
elif node.op == '*':
result = left * right
elif node.op == '/':
if right == 0:
raise EvalError("Division by zero")
result = left / right
else:
raise EvalError(f"Unknown binary operator: {node.op!r}")
# Coerce whole-valued floats to int so "4/2" prints as "2" not "2.0"
if isinstance(result, float) and result.is_integer():
return int(result)
return result
raise EvalError(f"Unknown AST node type: {type(node).__name__!r}")

View File

@ -0,0 +1,58 @@
class LexError(Exception):
pass
class Token:
__slots__ = ('kind', 'value')
def __init__(self, kind: str, value):
self.kind = kind
self.value = value
def __repr__(self):
return f'Token({self.kind!r}, {self.value!r})'
def __eq__(self, other):
if isinstance(other, Token):
return self.kind == other.kind and self.value == other.value
return NotImplemented
_SINGLE_CHAR = {
'+': 'PLUS',
'-': 'MINUS',
'*': 'STAR',
'/': 'SLASH',
'(': 'LPAREN',
')': 'RPAREN',
}
def tokenize(src: str) -> list:
tokens = []
i = 0
n = len(src)
while i < n:
c = src[i]
if c in ' \t':
i += 1
elif c in _SINGLE_CHAR:
tokens.append(Token(_SINGLE_CHAR[c], c))
i += 1
elif c.isdigit() or c == '.':
start = i
has_dot = False
while i < n and (src[i].isdigit() or (src[i] == '.' and not has_dot)):
if src[i] == '.':
has_dot = True
i += 1
num_str = src[start:i]
try:
value = float(num_str) if has_dot else int(num_str)
except ValueError:
raise LexError(f"Invalid number {num_str!r} at position {start}")
tokens.append(Token('NUMBER', value))
else:
raise LexError(f"Unexpected character {c!r} at position {i}")
tokens.append(Token('EOF', None))
return tokens

View File

@ -0,0 +1,149 @@
"""
Recursive-descent parser for the calc expression grammar.
Grammar:
expr = term ( ('+' | '-') term )*
term = unary ( ('*' | '/') unary )*
unary = '-' unary | primary
primary = NUMBER | '(' expr ')'
AST node shapes (stable contract for the evaluator):
Num(value) — numeric literal; .value is int or float
BinOp(op, left, right) — binary operation; .op is '+', '-', '*', or '/'
Unary(op, operand) — unary prefix; .op is '-'
"""
class ParseError(Exception):
pass
# ---------------------------------------------------------------------------
# AST nodes
# ---------------------------------------------------------------------------
class Num:
__slots__ = ('value',)
def __init__(self, value):
self.value = value
def __repr__(self):
return f'Num({self.value!r})'
def __eq__(self, other):
return isinstance(other, Num) and self.value == other.value
class BinOp:
__slots__ = ('op', 'left', 'right')
def __init__(self, op: str, left, right):
self.op = op
self.left = left
self.right = right
def __repr__(self):
return f'BinOp({self.op!r}, {self.left!r}, {self.right!r})'
def __eq__(self, other):
return (isinstance(other, BinOp)
and self.op == other.op
and self.left == other.left
and self.right == other.right)
class Unary:
__slots__ = ('op', 'operand')
def __init__(self, op: str, operand):
self.op = op
self.operand = operand
def __repr__(self):
return f'Unary({self.op!r}, {self.operand!r})'
def __eq__(self, other):
return (isinstance(other, Unary)
and self.op == other.op
and self.operand == other.operand)
# ---------------------------------------------------------------------------
# Parser
# ---------------------------------------------------------------------------
class _Parser:
def __init__(self, tokens):
self._tokens = tokens
self._pos = 0
def _peek(self):
return self._tokens[self._pos]
def _advance(self):
tok = self._tokens[self._pos]
self._pos += 1
return tok
def _expect(self, kind):
tok = self._peek()
if tok.kind != kind:
raise ParseError(
f"Expected {kind}, got {tok.kind!r} ({tok.value!r})"
)
return self._advance()
# expr = term ( ('+' | '-') term )*
def _expr(self):
node = self._term()
while self._peek().kind in ('PLUS', 'MINUS'):
op = self._advance().value
node = BinOp(op, node, self._term())
return node
# term = unary ( ('*' | '/') unary )*
def _term(self):
node = self._unary()
while self._peek().kind in ('STAR', 'SLASH'):
op = self._advance().value
node = BinOp(op, node, self._unary())
return node
# unary = '-' unary | primary
def _unary(self):
if self._peek().kind == 'MINUS':
self._advance()
return Unary('-', self._unary())
return self._primary()
# primary = NUMBER | '(' expr ')'
def _primary(self):
tok = self._peek()
if tok.kind == 'NUMBER':
self._advance()
return Num(tok.value)
if tok.kind == 'LPAREN':
self._advance()
node = self._expr()
self._expect('RPAREN')
return node
if tok.kind == 'EOF':
raise ParseError("Unexpected end of input")
raise ParseError(f"Unexpected token {tok.kind!r} ({tok.value!r})")
def parse(self):
if self._peek().kind == 'EOF':
raise ParseError("Empty input")
node = self._expr()
if self._peek().kind != 'EOF':
tok = self._peek()
raise ParseError(
f"Unexpected token after expression: {tok.kind!r} ({tok.value!r})"
)
return node
def parse(tokens) -> object:
"""Parse a token list produced by `calc.lexer.tokenize` into an AST."""
return _Parser(tokens).parse()

View File

@ -0,0 +1,131 @@
import subprocess
import sys
import unittest
from calc.evaluator import EvalError, evaluate
from calc.lexer import tokenize
from calc.parser import parse
def calc(s):
return evaluate(parse(tokenize(s)))
class TestArithmetic(unittest.TestCase):
"""D1 — arithmetic operators, precedence, parens, unary minus."""
def test_addition(self):
self.assertEqual(calc("1+2"), 3)
def test_subtraction(self):
self.assertEqual(calc("5-3"), 2)
def test_multiplication(self):
self.assertEqual(calc("3*4"), 12)
def test_precedence_mul_over_add(self):
self.assertEqual(calc("2+3*4"), 14)
def test_precedence_paren(self):
self.assertEqual(calc("(2+3)*4"), 20)
def test_left_assoc_subtraction(self):
self.assertEqual(calc("8-3-2"), 3)
def test_unary_minus_simple(self):
self.assertEqual(calc("-2+5"), 3)
def test_unary_minus_in_mul(self):
self.assertEqual(calc("2*-3"), -6)
def test_negative_literal(self):
self.assertEqual(calc("-5"), -5)
def test_nested_parens(self):
self.assertEqual(calc("((2+3))*4"), 20)
class TestDivision(unittest.TestCase):
"""D2 — true division and EvalError on divide-by-zero."""
def test_true_division(self):
self.assertEqual(calc("7/2"), 3.5)
def test_division_by_zero_raises_eval_error(self):
with self.assertRaises(EvalError):
calc("1/0")
def test_division_by_zero_no_bare_exception(self):
"""ZeroDivisionError must not escape the API."""
try:
calc("1/0")
except EvalError:
pass
except ZeroDivisionError:
self.fail("ZeroDivisionError escaped the evaluate() API")
def test_division_chain(self):
self.assertEqual(calc("8/4/2"), 1)
class TestResultType(unittest.TestCase):
"""D3 — result type: whole-valued → int, non-whole → float."""
def test_whole_division_returns_int(self):
result = calc("4/2")
self.assertEqual(result, 2)
self.assertIsInstance(result, int)
def test_non_whole_division_returns_float(self):
result = calc("7/2")
self.assertEqual(result, 3.5)
self.assertIsInstance(result, float)
def test_integer_arithmetic_returns_int(self):
result = calc("2+3*4")
self.assertEqual(result, 14)
self.assertIsInstance(result, int)
def test_whole_str_no_dot(self):
self.assertEqual(str(calc("4/2")), "2")
def test_float_str_has_dot(self):
self.assertEqual(str(calc("7/2")), "3.5")
class TestCLI(unittest.TestCase):
"""D4 — CLI behaviour."""
def _run(self, expr):
return subprocess.run(
[sys.executable, 'calc.py', expr],
capture_output=True, text=True,
)
def test_valid_simple(self):
r = self._run("2+3*4")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "14")
self.assertEqual(r.stderr, "")
def test_valid_parens(self):
r = self._run("(2+3)*4")
self.assertEqual(r.returncode, 0)
self.assertEqual(r.stdout.strip(), "20")
def test_invalid_exits_nonzero(self):
r = self._run("1 +")
self.assertNotEqual(r.returncode, 0)
def test_invalid_error_to_stderr(self):
r = self._run("1 +")
self.assertEqual(r.stdout, "")
self.assertTrue(r.stderr.strip(), "expected error message on stderr")
def test_invalid_no_traceback(self):
r = self._run("1 +")
self.assertNotIn("Traceback", r.stderr)
if __name__ == '__main__':
unittest.main()

View File

@ -0,0 +1,118 @@
import unittest
from calc.lexer import tokenize, Token, LexError
class TestNumbers(unittest.TestCase):
def test_integer(self):
result = tokenize("42")
self.assertEqual(result, [Token('NUMBER', 42), Token('EOF', None)])
self.assertIsInstance(result[0].value, int)
def test_float_standard(self):
result = tokenize("3.14")
self.assertEqual(result[0].kind, 'NUMBER')
self.assertAlmostEqual(result[0].value, 3.14)
self.assertIsInstance(result[0].value, float)
def test_float_leading_dot(self):
result = tokenize(".5")
self.assertEqual(result[0].kind, 'NUMBER')
self.assertAlmostEqual(result[0].value, 0.5)
self.assertIsInstance(result[0].value, float)
def test_float_trailing_dot(self):
result = tokenize("10.")
self.assertEqual(result[0].kind, 'NUMBER')
self.assertAlmostEqual(result[0].value, 10.0)
self.assertIsInstance(result[0].value, float)
def test_eof_is_last(self):
result = tokenize("42")
self.assertEqual(result[-1].kind, 'EOF')
class TestOperatorsAndParens(unittest.TestCase):
def _kinds(self, src):
return [t.kind for t in tokenize(src)]
def test_plus(self):
self.assertEqual(self._kinds("+"), ['PLUS', 'EOF'])
def test_minus(self):
self.assertEqual(self._kinds("-"), ['MINUS', 'EOF'])
def test_star(self):
self.assertEqual(self._kinds("*"), ['STAR', 'EOF'])
def test_slash(self):
self.assertEqual(self._kinds("/"), ['SLASH', 'EOF'])
def test_lparen(self):
self.assertEqual(self._kinds("("), ['LPAREN', 'EOF'])
def test_rparen(self):
self.assertEqual(self._kinds(")"), ['RPAREN', 'EOF'])
def test_expression_1_plus_2_star_3(self):
self.assertEqual(
self._kinds("1+2*3"),
['NUMBER', 'PLUS', 'NUMBER', 'STAR', 'NUMBER', 'EOF'],
)
class TestWhitespaceAndErrors(unittest.TestCase):
def _kinds(self, src):
return [t.kind for t in tokenize(src)]
def test_whitespace_around_tokens(self):
result = tokenize(" 12 + 3 ")
self.assertEqual(
[t.kind for t in result],
['NUMBER', 'PLUS', 'NUMBER', 'EOF'],
)
nums = [t.value for t in result if t.kind == 'NUMBER']
self.assertEqual(nums, [12, 3])
def test_complex_expression(self):
result = tokenize("3.5*(1-2)")
self.assertEqual(
[t.kind for t in result],
['NUMBER', 'STAR', 'LPAREN', 'NUMBER', 'MINUS', 'NUMBER', 'RPAREN', 'EOF'],
)
self.assertAlmostEqual(result[0].value, 3.5)
self.assertEqual(result[3].value, 1)
self.assertEqual(result[5].value, 2)
def test_lex_error_at_sign(self):
with self.assertRaises(LexError):
tokenize("1 @ 2")
def test_lex_error_dollar(self):
with self.assertRaises(LexError):
tokenize("$")
def test_lex_error_letter(self):
with self.assertRaises(LexError):
tokenize("x + 1")
def test_lex_error_message_contains_char(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
self.assertIn('@', str(ctx.exception))
def test_lex_error_message_contains_position(self):
with self.assertRaises(LexError) as ctx:
tokenize("1 @ 2")
# '@' is at position 2
self.assertIn('2', str(ctx.exception))
def test_tab_whitespace(self):
result = tokenize("1\t+\t2")
self.assertEqual(
[t.kind for t in result],
['NUMBER', 'PLUS', 'NUMBER', 'EOF'],
)
if __name__ == '__main__':
unittest.main()

Some files were not shown because too many files have changed in this diff Show More