Files
agent-orchestrator/tests/smoke_claude.sh
autonomic-bot cdcece9a9a test: add tests/ — unit suite + isolated live claude/opencode smokes + runner
Unit tests (no agents/tmux): config load + defaults merge, kickoff-template
assembly, phase machine (advance/idempotent-complete/append-resumes), limit
reset-banner parsing, WAITING-UNTIL/stall parsing, claude+opencode activity
detectors. Live smokes bring a throwaway project up THROUGH agents.py on each
real backend in an isolated sandbox (unique prefix, opencode on a non-4096
port), verify attach + status + down, and clean up. tests/run.sh runs unit
always + smokes when backends present; README documents it.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 18:55:34 +00:00

125 lines
5.0 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# Isolated LIVE smoke of the CLAUDE backend, driven entirely through the harness.
#
# Brings a throwaway scratch project (its OWN session_prefix "aotest-c-<pid>-" and a temporary
# log_dir) up through `agents.py up`, on the real `claude` CLI:
# • the harness builds the claude launch command (arg delivery + remote-control + model flag),
# • the agent attaches in tmux (claude TUI alive, not an instant crash),
# • `agents.py status` reports it RUNNING,
# • `agents.py down` tears it down cleanly — no leftover sessions.
#
# SAFE BY CONSTRUCTION — never touches the live cc-ci-* sessions:
# • a unique per-run session prefix (NOT "cc-ci-")
# • cleans up everything it creates on exit (even on Ctrl+C / error).
#
# Usage: bash tests/smoke_claude.sh
# Env: CLAUDE_BIN (default: `claude` on PATH, else ~/.local/bin/claude)
# AOTEST_MODEL (default: claude-haiku-4-5 — a cheap model for the trivial probe)
# Exit: 0 = PASS or SKIP (claude unavailable); 1 = FAIL.
# ─────────────────────────────────────────────────────────────────────────────
set -uo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
REPO="$(cd "$HERE/.." && pwd)"
CLAUDE_BIN="${CLAUDE_BIN:-$(command -v claude 2>/dev/null || echo "$HOME/.local/bin/claude")}"
MODEL="${AOTEST_MODEL:-claude-haiku-4-5}"
PREFIX="aotest-c-$$-"
SANDBOX="$(mktemp -d)"
CFG="$SANDBOX/agents.toml"
FAILED=0
pass(){ echo " PASS: $*"; }
fail(){ echo " FAIL: $*"; FAILED=1; }
cleanup(){
local rc=$?
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1 || true
if command -v tmux >/dev/null 2>&1; then
tmux ls 2>/dev/null | sed 's/:.*//' | grep "^${PREFIX}" | while read -r s; do
tmux kill-session -t "=$s" 2>/dev/null || true
done || true
fi
rm -rf "$SANDBOX"
exit "$rc"
}
trap cleanup EXIT INT TERM
echo "=== claude backend smoke (isolated: prefix=${PREFIX}) ==="
# 0 — preconditions (SKIP, not FAIL, when claude/tmux can't run here)
command -v tmux >/dev/null 2>&1 || { echo "SKIP: tmux not on PATH (run inside 'nix develop')"; exit 0; }
[ -x "$CLAUDE_BIN" ] || command -v "$CLAUDE_BIN" >/dev/null 2>&1 \
|| { echo "SKIP: claude binary not found ($CLAUDE_BIN)"; exit 0; }
# 1 — isolated sandbox config (unique prefix + temp log_dir; one trivial persistent probe)
cat > "$CFG" <<EOF
[defaults]
project_dir = "$REPO"
session_prefix = "$PREFIX"
log_dir = "$SANDBOX/state"
backend = "claude"
model = "$MODEL"
watch = "none"
[backend.claude]
bin = "$CLAUDE_BIN"
flags = "--dangerously-skip-permissions"
remote_control = true
supports_resume = true
prompt_delivery = "arg"
process_name = "claude"
submit_key = "Enter"
stall_idle = 300
active_re = "esc to interrupt|Running tool|bypass permissions"
limit_re = "usage limit|limit reached"
[[agent]]
name = "probe"
kind = "persistent"
prompt = "You are a harness self-test. Reply with the single word READY and then wait silently. Do nothing else."
EOF
# 2 — bring the probe up THROUGH the harness
if ! python3 "$REPO/agents.py" --config "$CFG" up probe; then
fail "agents.py up probe errored"; echo "=== RESULT: FAIL ==="; exit 1
fi
# 3 — session created?
sleep 6
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
cmd=$(tmux display-message -p -t "=${PREFIX}probe:" '#{pane_current_command}' 2>/dev/null)
pass "session ${PREFIX}probe created via agents.py (pane command: ${cmd})"
else
fail "${PREFIX}probe session was not created"; echo "=== RESULT: FAIL ==="; exit 1
fi
# 4 — claude actually attached (TUI alive), not an instant crash
sleep 6
cmd=$(tmux display-message -p -t "=${PREFIX}probe:" '#{pane_current_command}' 2>/dev/null)
pane=$(tmux capture-pane -p -t "=${PREFIX}probe:" 2>/dev/null)
if [ "$cmd" = "claude" ] || echo "$pane" | grep -qiE "esc to interrupt|bypass permissions|READY|claude||welcome"; then
pass "claude TUI attached + alive (driven entirely by agents.py)"
else
fail "no claude TUI in pane (cmd=${cmd}); tail: $(echo "$pane" | grep -vE '^\s*$' | tail -3)"
fi
# 5 — status reports it RUNNING
if python3 "$REPO/agents.py" --config "$CFG" status | grep -E '^\s*probe\b' | grep -q RUNNING; then
pass "agents.py status reports probe RUNNING"
else
fail "agents.py status did not report probe RUNNING"
fi
# 6 — lifecycle: down removes it cleanly
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1
sleep 2
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
fail "${PREFIX}probe still alive after agents.py down"
else
pass "agents.py down cleanly removed the session"
fi
if [ "$FAILED" = 0 ]; then echo "=== CLAUDE BACKEND SMOKE: PASS ==="; exit 0
else echo "=== CLAUDE BACKEND SMOKE: FAIL ==="; exit 1; fi