Files
cc-ci/modules/drone.nix
autonomic-bot 72ff8e213d
All checks were successful
continuous-integration/drone/push Build is passing
resource safety: MAX_TESTS=capacity=1 + per-build 60m timeout (orchestrator design change)
Bound live test apps on the single 28GiB node. DRONE_RUNNER_CAPACITY=1 (MAX_TESTS)
caps concurrent builds; Drone auto-queues the rest natively. deploy-drone reconcile
sets the cc-ci repo build timeout to 60m (best-effort PATCH, non-fatal) so a hung
build is killed and frees its slot. Janitor remains the backstop for SIGKILL'd builds.

Verified on host: DRONE_RUNNER_CAPACITY=1; repo timeout=60 via Drone API.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 02:53:29 +01:00

83 lines
4.0 KiB
Nix

# Drone CI server = coop-cloud `drone` recipe via abra (swarm, traefik-routed at
# drone.ci.commoninternet.net, Gitea SSO, wildcard cert / no ACME). The exec *runner* is a
# separate host systemd service (modules/drone-runner.nix). See DECISIONS.md "CI engine"/"Drone
# deployment shape".
#
# Idempotent-RECONCILE oneshot (same pattern as proxy/swarm-init): converges every boot/activation.
# RPC + OAuth-client secrets come from sops (/run/secrets), inserted as swarm secrets here.
{ pkgs, ... }:
let
giteaClientId = "ab4cdb9d-ee96-4867-875f-87384505fc52";
# Per-build TIMEOUT (plan §4.2/§4.3 resource safety): if a CI build runs longer than this, Drone
# cancels it (the exec runner kills the process), freeing the MAX_TESTS slot so the queue advances.
# The killed build can't run its own teardown — the run-start janitor reaps its orphaned app
# (modules/drone-runner.nix MAX_TESTS note). Configurable here; reconciled best-effort below.
buildTimeoutMinutes = "60";
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-drone";
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
text = ''
DRONE_DOMAIN="drone.ci.commoninternet.net"
ENV_FILE="$HOME/.abra/servers/default/$DRONE_DOMAIN.env"
if [ ! -r /run/secrets/drone_rpc_secret ] || [ ! -r /run/secrets/drone_gitea_client_secret ]; then
echo "FATAL: drone sops secrets missing at /run/secrets (rebuild ordering?)" >&2
exit 1
fi
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
abra recipe fetch drone -n >/dev/null
[ -f "$ENV_FILE" ] || abra app new drone -s default -D "$DRONE_DOMAIN" -n
set_env() {
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
}
set_env LETS_ENCRYPT_ENV ""
set_env EXTRA_DOMAINS ""
set_env DRONE_USER_CREATE "username:autonomic-bot,admin:true"
set_env GITEA_DOMAIN "git.autonomic.zone"
set_env GITEA_CLIENT_ID "${giteaClientId}"
set_env RPC_SECRET_VERSION "v1"
set_env CLIENT_SECRET_VERSION "v1"
set_env DRONE_ENV_VERSION "v1"
set_env COMPOSE_FILE '"compose.yml:compose.gitea.yml"'
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
have_secret rpc_secret || abra app secret insert "$DRONE_DOMAIN" rpc_secret v1 /run/secrets/drone_rpc_secret -f -n
have_secret client_secret || abra app secret insert "$DRONE_DOMAIN" client_secret v1 /run/secrets/drone_gitea_client_secret -f -n
abra app deploy "$DRONE_DOMAIN" -n -C
# Best-effort: set the cc-ci repo's build timeout (resource safety). Non-fatal never break
# the core server reconcile if Drone/token isn't ready. Uses the bridge's Drone admin token and
# hits the local traefik (hairpin-free) keeping SNI=drone... so the wildcard cert validates.
if [ -r /run/secrets/bridge_drone_token ]; then
DT="$(cat /run/secrets/bridge_drone_token)"
curl -fsS -k --resolve "$DRONE_DOMAIN:443:127.0.0.1" \
-X PATCH -H "Authorization: Bearer $DT" -H "Content-Type: application/json" \
-d '{"timeout": ${buildTimeoutMinutes}}' \
"https://$DRONE_DOMAIN/api/repos/recipe-maintainers/cc-ci" >/dev/null \
&& echo "set cc-ci build timeout = ${buildTimeoutMinutes}m" \
|| echo "WARN: could not set build timeout (non-fatal)" >&2
fi
'';
};
in
{
systemd.services.deploy-drone = {
description = "Reconcile the Drone CI server (coop-cloud recipe, Gitea SSO) via abra";
after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment.HOME = "/root";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-drone";
};
};
}