Unit tests (no agents/tmux): config load + defaults merge, kickoff-template assembly, phase machine (advance/idempotent-complete/append-resumes), limit reset-banner parsing, WAITING-UNTIL/stall parsing, claude+opencode activity detectors. Live smokes bring a throwaway project up THROUGH agents.py on each real backend in an isolated sandbox (unique prefix, opencode on a non-4096 port), verify attach + status + down, and clean up. tests/run.sh runs unit always + smokes when backends present; README documents it. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
125 lines
5.0 KiB
Bash
Executable File
125 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
# Isolated LIVE smoke of the CLAUDE backend, driven entirely through the harness.
|
||
#
|
||
# Brings a throwaway scratch project (its OWN session_prefix "aotest-c-<pid>-" and a temporary
|
||
# log_dir) up through `agents.py up`, on the real `claude` CLI:
|
||
# • the harness builds the claude launch command (arg delivery + remote-control + model flag),
|
||
# • the agent attaches in tmux (claude TUI alive, not an instant crash),
|
||
# • `agents.py status` reports it RUNNING,
|
||
# • `agents.py down` tears it down cleanly — no leftover sessions.
|
||
#
|
||
# SAFE BY CONSTRUCTION — never touches the live cc-ci-* sessions:
|
||
# • a unique per-run session prefix (NOT "cc-ci-")
|
||
# • cleans up everything it creates on exit (even on Ctrl+C / error).
|
||
#
|
||
# Usage: bash tests/smoke_claude.sh
|
||
# Env: CLAUDE_BIN (default: `claude` on PATH, else ~/.local/bin/claude)
|
||
# AOTEST_MODEL (default: claude-haiku-4-5 — a cheap model for the trivial probe)
|
||
# Exit: 0 = PASS or SKIP (claude unavailable); 1 = FAIL.
|
||
# ─────────────────────────────────────────────────────────────────────────────
|
||
set -uo pipefail
|
||
|
||
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||
REPO="$(cd "$HERE/.." && pwd)"
|
||
CLAUDE_BIN="${CLAUDE_BIN:-$(command -v claude 2>/dev/null || echo "$HOME/.local/bin/claude")}"
|
||
MODEL="${AOTEST_MODEL:-claude-haiku-4-5}"
|
||
PREFIX="aotest-c-$$-"
|
||
SANDBOX="$(mktemp -d)"
|
||
CFG="$SANDBOX/agents.toml"
|
||
FAILED=0
|
||
|
||
pass(){ echo " PASS: $*"; }
|
||
fail(){ echo " FAIL: $*"; FAILED=1; }
|
||
|
||
cleanup(){
|
||
local rc=$?
|
||
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1 || true
|
||
if command -v tmux >/dev/null 2>&1; then
|
||
tmux ls 2>/dev/null | sed 's/:.*//' | grep "^${PREFIX}" | while read -r s; do
|
||
tmux kill-session -t "=$s" 2>/dev/null || true
|
||
done || true
|
||
fi
|
||
rm -rf "$SANDBOX"
|
||
exit "$rc"
|
||
}
|
||
trap cleanup EXIT INT TERM
|
||
|
||
echo "=== claude backend smoke (isolated: prefix=${PREFIX}) ==="
|
||
|
||
# 0 — preconditions (SKIP, not FAIL, when claude/tmux can't run here)
|
||
command -v tmux >/dev/null 2>&1 || { echo "SKIP: tmux not on PATH (run inside 'nix develop')"; exit 0; }
|
||
[ -x "$CLAUDE_BIN" ] || command -v "$CLAUDE_BIN" >/dev/null 2>&1 \
|
||
|| { echo "SKIP: claude binary not found ($CLAUDE_BIN)"; exit 0; }
|
||
|
||
# 1 — isolated sandbox config (unique prefix + temp log_dir; one trivial persistent probe)
|
||
cat > "$CFG" <<EOF
|
||
[defaults]
|
||
project_dir = "$REPO"
|
||
session_prefix = "$PREFIX"
|
||
log_dir = "$SANDBOX/state"
|
||
backend = "claude"
|
||
model = "$MODEL"
|
||
watch = "none"
|
||
|
||
[backend.claude]
|
||
bin = "$CLAUDE_BIN"
|
||
flags = "--dangerously-skip-permissions"
|
||
remote_control = true
|
||
supports_resume = true
|
||
prompt_delivery = "arg"
|
||
process_name = "claude"
|
||
submit_key = "Enter"
|
||
stall_idle = 300
|
||
active_re = "esc to interrupt|Running tool|bypass permissions"
|
||
limit_re = "usage limit|limit reached"
|
||
|
||
[[agent]]
|
||
name = "probe"
|
||
kind = "persistent"
|
||
prompt = "You are a harness self-test. Reply with the single word READY and then wait silently. Do nothing else."
|
||
EOF
|
||
|
||
# 2 — bring the probe up THROUGH the harness
|
||
if ! python3 "$REPO/agents.py" --config "$CFG" up probe; then
|
||
fail "agents.py up probe errored"; echo "=== RESULT: FAIL ==="; exit 1
|
||
fi
|
||
|
||
# 3 — session created?
|
||
sleep 6
|
||
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
|
||
cmd=$(tmux display-message -p -t "=${PREFIX}probe:" '#{pane_current_command}' 2>/dev/null)
|
||
pass "session ${PREFIX}probe created via agents.py (pane command: ${cmd})"
|
||
else
|
||
fail "${PREFIX}probe session was not created"; echo "=== RESULT: FAIL ==="; exit 1
|
||
fi
|
||
|
||
# 4 — claude actually attached (TUI alive), not an instant crash
|
||
sleep 6
|
||
cmd=$(tmux display-message -p -t "=${PREFIX}probe:" '#{pane_current_command}' 2>/dev/null)
|
||
pane=$(tmux capture-pane -p -t "=${PREFIX}probe:" 2>/dev/null)
|
||
if [ "$cmd" = "claude" ] || echo "$pane" | grep -qiE "esc to interrupt|bypass permissions|READY|claude|❯|welcome"; then
|
||
pass "claude TUI attached + alive (driven entirely by agents.py)"
|
||
else
|
||
fail "no claude TUI in pane (cmd=${cmd}); tail: $(echo "$pane" | grep -vE '^\s*$' | tail -3)"
|
||
fi
|
||
|
||
# 5 — status reports it RUNNING
|
||
if python3 "$REPO/agents.py" --config "$CFG" status | grep -E '^\s*probe\b' | grep -q RUNNING; then
|
||
pass "agents.py status reports probe RUNNING"
|
||
else
|
||
fail "agents.py status did not report probe RUNNING"
|
||
fi
|
||
|
||
# 6 — lifecycle: down removes it cleanly
|
||
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1
|
||
sleep 2
|
||
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
|
||
fail "${PREFIX}probe still alive after agents.py down"
|
||
else
|
||
pass "agents.py down cleanly removed the session"
|
||
fi
|
||
|
||
if [ "$FAILED" = 0 ]; then echo "=== CLAUDE BACKEND SMOKE: PASS ==="; exit 0
|
||
else echo "=== CLAUDE BACKEND SMOKE: FAIL ==="; exit 1; fi
|