resource safety: MAX_TESTS=capacity=1 + per-build 60m timeout (orchestrator design change)
All checks were successful
continuous-integration/drone/push Build is passing

Bound live test apps on the single 28GiB node. DRONE_RUNNER_CAPACITY=1 (MAX_TESTS)
caps concurrent builds; Drone auto-queues the rest natively. deploy-drone reconcile
sets the cc-ci repo build timeout to 60m (best-effort PATCH, non-fatal) so a hung
build is killed and frees its slot. Janitor remains the backstop for SIGKILL'd builds.

Verified on host: DRONE_RUNNER_CAPACITY=1; repo timeout=60 via Drone API.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-27 02:53:29 +01:00
parent 7addb9686c
commit 72ff8e213d
3 changed files with 52 additions and 2 deletions

View File

@ -8,9 +8,14 @@
{ pkgs, ... }:
let
giteaClientId = "ab4cdb9d-ee96-4867-875f-87384505fc52";
# Per-build TIMEOUT (plan §4.2/§4.3 resource safety): if a CI build runs longer than this, Drone
# cancels it (the exec runner kills the process), freeing the MAX_TESTS slot so the queue advances.
# The killed build can't run its own teardown — the run-start janitor reaps its orphaned app
# (modules/drone-runner.nix MAX_TESTS note). Configurable here; reconciled best-effort below.
buildTimeoutMinutes = "60";
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-drone";
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git ];
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
text = ''
DRONE_DOMAIN="drone.ci.commoninternet.net"
ENV_FILE="$HOME/.abra/servers/default/$DRONE_DOMAIN.env"
@ -44,6 +49,19 @@ let
have_secret client_secret || abra app secret insert "$DRONE_DOMAIN" client_secret v1 /run/secrets/drone_gitea_client_secret -f -n
abra app deploy "$DRONE_DOMAIN" -n -C
# Best-effort: set the cc-ci repo's build timeout (resource safety). Non-fatal never break
# the core server reconcile if Drone/token isn't ready. Uses the bridge's Drone admin token and
# hits the local traefik (hairpin-free) keeping SNI=drone... so the wildcard cert validates.
if [ -r /run/secrets/bridge_drone_token ]; then
DT="$(cat /run/secrets/bridge_drone_token)"
curl -fsS -k --resolve "$DRONE_DOMAIN:443:127.0.0.1" \
-X PATCH -H "Authorization: Bearer $DT" -H "Content-Type: application/json" \
-d '{"timeout": ${buildTimeoutMinutes}}' \
"https://$DRONE_DOMAIN/api/repos/recipe-maintainers/cc-ci" >/dev/null \
&& echo "set cc-ci build timeout = ${buildTimeoutMinutes}m" \
|| echo "WARN: could not set build timeout (non-fatal)" >&2
fi
'';
};
in