Files
cc-ci/modules/drone-runner.nix
autonomic-bot 72ff8e213d
All checks were successful
continuous-integration/drone/push Build is passing
resource safety: MAX_TESTS=capacity=1 + per-build 60m timeout (orchestrator design change)
Bound live test apps on the single 28GiB node. DRONE_RUNNER_CAPACITY=1 (MAX_TESTS)
caps concurrent builds; Drone auto-queues the rest natively. deploy-drone reconcile
sets the cc-ci repo build timeout to 60m (best-effort PATCH, non-fatal) so a hung
build is killed and frees its slot. Janitor remains the backstop for SIGKILL'd builds.

Verified on host: DRONE_RUNNER_CAPACITY=1; repo timeout=60 via Drone API.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 02:53:29 +01:00

54 lines
2.9 KiB
Nix

# Drone exec runner (M2). Runs on cc-ci itself (not in a container) so CI pipelines can drive
# host `abra` to deploy real recipes onto the swarm (plan §4.2, §8: exec runner). The Drone
# *server* is deployed separately via abra (scripts/deploy-drone.sh) as a swarm service.
#
# The exec runner is drone-runner-exec (the only exec runner upstream ever shipped; see
# DECISIONS.md "CI engine"). It connects to the server over RPC at drone.ci.commoninternet.net,
# sharing DRONE_RPC_SECRET with the server via the sops-rendered EnvironmentFile.
{ pkgs, config, lib, ... }:
let
# MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone
# queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that
# bounds how many test apps can be live at once — kept LOW (1) on this single 28GiB node since
# recipes are heavy (immich/matrix large volumes). With capacity=1 there is never a concurrent
# in-flight run, so the run-start janitor can safely reap *any* orphan (a SIGKILL'd build runs no
# teardown) and the "at most MAX_TESTS apps live" bound holds exactly. Raise to 2 only if the node
# is shown to handle two light recipes at once (then the janitor MUST stay age-based to avoid
# reaping a concurrent run — see DECISIONS.md "Resource safety").
maxTests = "1";
in
{
# Drone ships under the Polyform Small Business license (nixpkgs marks it unfree);
# permitted for our internal CI use. Allow only this package.
nixpkgs.config.allowUnfreePredicate = pkg:
builtins.elem (lib.getName pkg) [ "drone-runner-exec" ];
systemd.services.drone-runner-exec = {
description = "Drone exec runner (drives host abra/swarm)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
DRONE_RPC_PROTO = "https";
DRONE_RPC_HOST = "drone.ci.commoninternet.net";
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
DRONE_RUNNER_NAME = "cc-ci-exec";
# exec runner needs a writable root for build workspaces
DRONE_RUNNER_ROOT = "/var/lib/drone-runner";
# Pipeline commands shell out to abra/docker/git — all live in the system path.
PATH = lib.mkForce "/run/current-system/sw/bin:/run/wrappers/bin";
};
serviceConfig = {
# DRONE_RPC_SECRET comes from the sops-rendered env file (shared with the server).
EnvironmentFile = config.sops.templates."drone-runner.env".path;
ExecStart = "${pkgs.drone-runner-exec}/bin/drone-runner-exec";
Restart = "always";
RestartSec = "5s";
StateDirectory = "drone-runner";
# exec runner runs pipelines as this service's user; root is needed to drive docker/abra
# and to read the abra config under /root/.abra (same as manual deploys).
User = "root";
};
};
}