Files
agent-orchestrator/tests/smoke_opencode.sh
autonomic-bot cdcece9a9a test: add tests/ — unit suite + isolated live claude/opencode smokes + runner
Unit tests (no agents/tmux): config load + defaults merge, kickoff-template
assembly, phase machine (advance/idempotent-complete/append-resumes), limit
reset-banner parsing, WAITING-UNTIL/stall parsing, claude+opencode activity
detectors. Live smokes bring a throwaway project up THROUGH agents.py on each
real backend in an isolated sandbox (unique prefix, opencode on a non-4096
port), verify attach + status + down, and clean up. tests/run.sh runs unit
always + smokes when backends present; README documents it.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-13 18:55:34 +00:00

157 lines
6.7 KiB
Bash
Executable File

#!/usr/bin/env bash
# ─────────────────────────────────────────────────────────────────────────────
# Isolated LIVE smoke of the OPENCODE backend, driven entirely through the harness.
#
# Generalizes the cc-ci `test-opencode.sh` isolation pattern onto the agent-orchestrator harness:
# stands up a DEDICATED opencode server on its own port (≠ 4096), then brings a throwaway scratch
# project up through `agents.py up` on the opencode backend:
# • the harness builds the opencode attach command + the post-connect bootstrap ping,
# • the agent attaches to the server (opencode TUI alive),
# • `agents.py status` reports it RUNNING,
# • `agents.py down` tears it down cleanly — server killed, no leftover sessions, port freed.
#
# SAFE BY CONSTRUCTION — never touches the live cc-ci-* sessions or the live opencode server:
# • a unique per-run session prefix (NOT "cc-ci-")
# • its OWN opencode server on AOTEST_OC_PORT (default 4097, never 4096)
# • cleans up everything it creates on exit (even on Ctrl+C / error).
#
# Usage: bash tests/smoke_opencode.sh
# Env: OPENCODE_BIN (default: `opencode` on PATH, else ~/.local/bin/opencode)
# AOTEST_OC_PORT (default 4097 — MUST differ from the live 4096)
# AOTEST_OC_CREDS (default /srv/cc-ci/.testenv — sourced as the backend preamble)
# AOTEST_MODEL (default: opencode's own configured default)
# Exit: 0 = PASS or SKIP (opencode / creds / server unavailable); 1 = FAIL.
# ─────────────────────────────────────────────────────────────────────────────
set -uo pipefail
HERE="$(cd "$(dirname "$0")" && pwd)"
REPO="$(cd "$HERE/.." && pwd)"
OCBIN="${OPENCODE_BIN:-$(command -v opencode 2>/dev/null || echo "$HOME/.local/bin/opencode")}"
PORT="${AOTEST_OC_PORT:-4097}"
SERVER="http://127.0.0.1:${PORT}"
CREDS="${AOTEST_OC_CREDS:-/srv/cc-ci/.testenv}"
MODEL="${AOTEST_MODEL:-}"
PREFIX="aotest-o-$$-"
SANDBOX="$(mktemp -d)"
CFG="$SANDBOX/agents.toml"
SRVLOG="$SANDBOX/server.log"
SERVER_PID=""
FAILED=0
pass(){ echo " PASS: $*"; }
fail(){ echo " FAIL: $*"; FAILED=1; }
cleanup(){
local rc=$?
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1 || true
if command -v tmux >/dev/null 2>&1; then
tmux ls 2>/dev/null | sed 's/:.*//' | grep "^${PREFIX}" | while read -r s; do
tmux kill-session -t "=$s" 2>/dev/null || true
done || true
fi
# kill the server subshell AND the opencode serve child it forked (the subshell is not the
# listener — target the listener by our unique port so the port is actually freed).
[ -n "$SERVER_PID" ] && kill "$SERVER_PID" 2>/dev/null || true
pkill -f "opencode serve.*--port ${PORT}\b" 2>/dev/null || true
for _ in 1 2 3 4 5; do
ss -ltn 2>/dev/null | grep -q ":${PORT} " || break
sleep 1
done
rm -rf "$SANDBOX"
exit "$rc"
}
trap cleanup EXIT INT TERM
echo "=== opencode backend smoke (isolated: prefix=${PREFIX} port=${PORT}) ==="
# 0 — preconditions (SKIP, not FAIL, when the environment can't run opencode)
command -v tmux >/dev/null 2>&1 || { echo "SKIP: tmux not on PATH (run inside 'nix develop')"; exit 0; }
[ "$PORT" != "4096" ] || { echo "FAIL: refusing port 4096 (the live cc-ci opencode port)"; exit 1; }
[ -x "$OCBIN" ] || command -v "$OCBIN" >/dev/null 2>&1 \
|| { echo "SKIP: opencode binary not found ($OCBIN)"; exit 0; }
[ -f "$CREDS" ] || { echo "SKIP: opencode creds file missing ($CREDS)"; exit 0; }
# 1 — isolated sandbox config (unique prefix + temp log_dir + dedicated server)
cat > "$CFG" <<EOF
[defaults]
project_dir = "$REPO"
session_prefix = "$PREFIX"
log_dir = "$SANDBOX/state"
backend = "opencode"
model = "$MODEL"
watch = "none"
[backend.opencode]
bin = "$OCBIN"
attach = "{bin} attach {server} --dir {dir}"
server = "$SERVER"
supports_resume = false
prompt_delivery = "ping"
process_name = "opencode"
footer_ui = true
log_grace = 180
connect_delay = 12
submit_key = "C-m"
preamble = "set -a; . $CREDS; set +a"
stall_idle = 900
active_re = "esc interrupt|thinking|inferring|running tool|tool call|preparing patch|reading|searching|working"
limit_re = "usage limit|limit reached"
[[agent]]
name = "probe"
kind = "persistent"
prompt = "You are a harness self-test. Reply with the single word READY and then wait silently. Do nothing else."
EOF
# 2 — bring up a dedicated opencode server on our own port
( set -a; . "$CREDS"; set +a; NO_COLOR=1 "$OCBIN" serve --hostname 127.0.0.1 --port "$PORT" ) >"$SRVLOG" 2>&1 &
SERVER_PID=$!
for _ in $(seq 1 30); do ss -ltn 2>/dev/null | grep -q ":${PORT} " && break; sleep 1; done
if ! ss -ltn 2>/dev/null | grep -q ":${PORT} "; then
echo "SKIP: opencode server did not come up on :${PORT} (see ${SRVLOG})"; exit 0
fi
pass "dedicated opencode server listening on :${PORT}"
# 3 — bring the probe up THROUGH the harness (attaches to OUR server)
if ! python3 "$REPO/agents.py" --config "$CFG" up probe; then
fail "agents.py up probe errored"; echo "=== RESULT: FAIL ==="; exit 1
fi
# 4 — session created?
sleep 4
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
cmd=$(tmux display-message -p -t "=${PREFIX}probe:" '#{pane_current_command}' 2>/dev/null)
pass "session ${PREFIX}probe created via agents.py (pane command: ${cmd})"
else
fail "${PREFIX}probe session was not created"; echo "=== RESULT: FAIL ==="; exit 1
fi
# 5 — opencode TUI attached + alive, not an instant crash
sleep 12
pane=$(tmux capture-pane -p -t "=${PREFIX}probe:" 2>/dev/null)
if echo "$pane" | grep -qiE "opencode|build ·|gpt|claude|READY|esc interrupt|ctrl\+p|ctrl\+"; then
pass "opencode TUI attached + alive (driven entirely by agents.py)"
else
fail "no opencode TUI/response in pane; tail: $(echo "$pane" | grep -vE '^\s*$' | tail -3)"
echo " (server log tail:) $(tail -3 "$SRVLOG" 2>/dev/null)"
fi
# 6 — status reports it RUNNING
if python3 "$REPO/agents.py" --config "$CFG" status | grep -E '^\s*probe\b' | grep -q RUNNING; then
pass "agents.py status reports probe RUNNING"
else
fail "agents.py status did not report probe RUNNING"
fi
# 7 — lifecycle: down removes it cleanly
python3 "$REPO/agents.py" --config "$CFG" down probe >/dev/null 2>&1
sleep 2
if tmux has-session -t "=${PREFIX}probe" 2>/dev/null; then
fail "${PREFIX}probe still alive after agents.py down"
else
pass "agents.py down cleanly removed the session"
fi
if [ "$FAILED" = 0 ]; then echo "=== OPENCODE BACKEND SMOKE: PASS ==="; exit 0
else echo "=== OPENCODE BACKEND SMOKE: FAIL ==="; exit 1; fi