All checks were successful
continuous-integration/drone/push Build is passing
Clean-room finding caught by the e2e: DRONE_USER_CREATE had no token: => a fresh-DB rebuild's Drone auto-generates a random bot token, so the committed (sops) bridge_drone_token gets 401 and the bridge can't trigger builds. The original cc-ci only matched because its token was captured out-of-band. Now the bot's machine token == bridge_drone_token deterministically on every rebuild. (Evolves the toplevel again; re-establish byte-identical on cc-ci after the e2e + Adversary re-verifies C1.) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
91 lines
4.8 KiB
Nix
91 lines
4.8 KiB
Nix
# Drone CI server = coop-cloud `drone` recipe via abra (swarm, traefik-routed at
|
|
# drone.ci.commoninternet.net, Gitea SSO, wildcard cert / no ACME). The exec *runner* is a
|
|
# separate host systemd service (modules/drone-runner.nix). See DECISIONS.md "CI engine"/"Drone
|
|
# deployment shape".
|
|
#
|
|
# Idempotent-RECONCILE oneshot (same pattern as proxy/swarm-init): converges every boot/activation.
|
|
# RPC + OAuth-client secrets come from sops (/run/secrets), inserted as swarm secrets here.
|
|
{ pkgs, ... }:
|
|
let
|
|
giteaClientId = "ab4cdb9d-ee96-4867-875f-87384505fc52";
|
|
# Per-build TIMEOUT (plan §4.2/§4.3 resource safety): if a CI build runs longer than this, Drone
|
|
# cancels it (the exec runner kills the process), freeing the MAX_TESTS slot so the queue advances.
|
|
# The killed build can't run its own teardown — the run-start janitor reaps its orphaned app
|
|
# (modules/drone-runner.nix MAX_TESTS note). Configurable here; reconciled best-effort below.
|
|
buildTimeoutMinutes = "60";
|
|
reconcile = pkgs.writeShellApplication {
|
|
name = "cc-ci-reconcile-drone";
|
|
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
|
|
text = ''
|
|
DRONE_DOMAIN="drone.ci.commoninternet.net"
|
|
ENV_FILE="$HOME/.abra/servers/default/$DRONE_DOMAIN.env"
|
|
|
|
if [ ! -r /run/secrets/drone_rpc_secret ] || [ ! -r /run/secrets/drone_gitea_client_secret ]; then
|
|
echo "FATAL: drone sops secrets missing at /run/secrets (rebuild ordering?)" >&2
|
|
exit 1
|
|
fi
|
|
|
|
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
|
|
abra recipe fetch drone -n >/dev/null
|
|
|
|
[ -f "$ENV_FILE" ] || abra app new drone -s default -D "$DRONE_DOMAIN" -n
|
|
|
|
set_env() {
|
|
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
|
|
# ensure trailing newline before append (a recipe .env.sample may end without one, which
|
|
# would glue the var onto the last line — see modules/backupbot.nix for the bite).
|
|
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
|
|
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
|
|
}
|
|
set_env LETS_ENCRYPT_ENV ""
|
|
set_env EXTRA_DOMAINS ""
|
|
# Inject the bridge's Drone token as the bot's MACHINE TOKEN so it is reproducible on a fresh
|
|
# Drone DB. Without `token:`, Drone auto-generates a random token that the committed (sops)
|
|
# bridge_drone_token can't match → on a clean-room rebuild the bridge gets 401 and can't trigger
|
|
# builds (the original only matched because its token was captured out-of-band post-hoc). Caught
|
|
# by the E2E-TESTME acceptance test. With `token:`, every rebuild's bot carries the sops token.
|
|
set_env DRONE_USER_CREATE "username:autonomic-bot,admin:true,token:$(cat /run/secrets/bridge_drone_token)"
|
|
set_env GITEA_DOMAIN "git.autonomic.zone"
|
|
set_env GITEA_CLIENT_ID "${giteaClientId}"
|
|
set_env RPC_SECRET_VERSION "v1"
|
|
set_env CLIENT_SECRET_VERSION "v1"
|
|
set_env DRONE_ENV_VERSION "v1"
|
|
set_env COMPOSE_FILE '"compose.yml:compose.gitea.yml"'
|
|
|
|
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
|
|
have_secret rpc_secret || abra app secret insert "$DRONE_DOMAIN" rpc_secret v1 /run/secrets/drone_rpc_secret -f -n
|
|
have_secret client_secret || abra app secret insert "$DRONE_DOMAIN" client_secret v1 /run/secrets/drone_gitea_client_secret -f -n
|
|
|
|
abra app deploy "$DRONE_DOMAIN" -n -C
|
|
|
|
# Best-effort: set the cc-ci repo's build timeout (resource safety). Non-fatal — never break
|
|
# the core server reconcile if Drone/token isn't ready. Uses the bridge's Drone admin token and
|
|
# hits the local traefik (hairpin-free) keeping SNI=drone... so the wildcard cert validates.
|
|
if [ -r /run/secrets/bridge_drone_token ]; then
|
|
DT="$(cat /run/secrets/bridge_drone_token)"
|
|
curl -fsS -k --resolve "$DRONE_DOMAIN:443:127.0.0.1" \
|
|
-X PATCH -H "Authorization: Bearer $DT" -H "Content-Type: application/json" \
|
|
-d '{"timeout": ${buildTimeoutMinutes}}' \
|
|
"https://$DRONE_DOMAIN/api/repos/recipe-maintainers/cc-ci" >/dev/null \
|
|
&& echo "set cc-ci build timeout = ${buildTimeoutMinutes}m" \
|
|
|| echo "WARN: could not set build timeout (non-fatal)" >&2
|
|
fi
|
|
'';
|
|
};
|
|
in
|
|
{
|
|
systemd.services.deploy-drone = {
|
|
description = "Reconcile the Drone CI server (coop-cloud recipe, Gitea SSO) via abra";
|
|
after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
|
requires = [ "swarm-init.service" "docker.service" ];
|
|
wants = [ "network-online.target" ];
|
|
wantedBy = [ "multi-user.target" ];
|
|
environment.HOME = "/root";
|
|
serviceConfig = {
|
|
Type = "oneshot";
|
|
RemainAfterExit = true;
|
|
ExecStart = "${reconcile}/bin/cc-ci-reconcile-drone";
|
|
};
|
|
};
|
|
}
|