refactor(1b): RL5 — consolidate Nix code under nix/ (modules->nix/modules, hosts->nix/hosts)

flake.nix/flake.lock STAY at root so the build ref #cc-ci is unchanged; only flake's internal
configuration.nix path updated. Root-relative refs inside moved modules re-based ../X -> ../../X
(secrets/bridge/dashboard); configuration.nix's ../../modules imports unchanged (both dirs under nix/).
Living docs (README, architecture/install/secrets/enroll) + .drone.yml comment updated to nix/...;
append-only history logs left as-is. DECISIONS.md records RL5 + the deferred-coordinated RL6.

Verified on cc-ci: nixos-rebuild build 'path:#cc-ci' -> toplevel 8i3jcad9 (BYTE-IDENTICAL to the
pre-move build — store derivations are content-addressed on file contents, module .nix not in the
runtime closure); scripts/lint.sh -> lint: PASS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-27 21:19:09 +01:00
parent 5a811e4ae4
commit 433ec9de30
21 changed files with 63 additions and 29 deletions

6
nix/modules/abra.nix Normal file
View File

@ -0,0 +1,6 @@
# abra — the Co-op Cloud CLI used by the harness and the proxy/drone reconcile oneshots.
# The package is defined as an overlay in modules/packages.nix (pkgs.abra), pinned by hash (D8).
{ pkgs, ... }:
{
environment.systemPackages = [ pkgs.abra ];
}

55
nix/modules/backupbot.nix Normal file
View File

@ -0,0 +1,55 @@
# backup-bot-two (M5): the Co-op Cloud backup service. `abra app backup create <app>` / restore
# talk to it; it snapshots volumes labelled `backupbot.backup=true` into a local restic repo.
# Idempotent-reconcile oneshot (same pattern as proxy/drone). restic_password is abra-generated
# (class-B-style internal secret) and kept stable across reconciles (only generated if missing).
{ pkgs, ... }:
let
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-backupbot";
runtimeInputs = with pkgs; [ abra docker gnused gnugrep coreutils git ];
text = ''
DOMAIN="backups.ci.commoninternet.net" # identity/stack name only; no web route
ENV_FILE="$HOME/.abra/servers/default/$DOMAIN.env"
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
abra recipe fetch backup-bot-two -n >/dev/null
[ -f "$ENV_FILE" ] || abra app new backup-bot-two -s default -D "$DOMAIN" -n
set_env() {
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
# Ensure the file ends in a newline before appending backup-bot-two's .env.sample ends
# with a newline-less comment line, so a bare append would glue the var onto that comment
# (commenting it out). `$(tail -c1)` is empty iff the last byte is already a newline.
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
}
set_env RESTIC_REPOSITORY /backups/restic
set_env SECRET_RESTIC_PASSWORD_VERSION v1
set_env CRONJOB_VERSION v1
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
# -m avoids the TTY/table (ioctl) path; redirect stdout so generated values never hit logs (D6).
have_secret restic_password || abra app secret generate "$DOMAIN" --all -m -n >/dev/null
abra app deploy "$DOMAIN" -n -C
'';
};
in
{
systemd.services.deploy-backupbot = {
description = "Reconcile backup-bot-two (volume backups via restic) via abra";
# Serialized last (chain proxy→drone→bridge→dashboard→backupbot) to avoid the concurrent abra-init
# race on a fresh host (see bridge.nix). Ordering-only; transitively after deploy-proxy.
after = [ "deploy-dashboard.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment.HOME = "/root";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-backupbot";
};
};
}

120
nix/modules/bridge.nix Normal file
View File

@ -0,0 +1,120 @@
# Comment-bridge (§4.1): the `!testme` webhook receiver. Packaged as a Nix-built OCI image
# (no Docker Hub pull) and run as a swarm service on `proxy`, routed by traefik at
# ci.commoninternet.net/hook. Deployed by an idempotent-reconcile oneshot (same pattern as
# proxy/drone). Secrets come from sops (/run/secrets) → swarm secrets the container mounts.
{ pkgs, ... }:
let
# bridge.py placed at /app/bridge.py inside the image.
bridgeApp = pkgs.runCommand "cc-ci-bridge-app" { } ''
mkdir -p $out/app
cp ${../../bridge/bridge.py} $out/app/bridge.py
'';
# Content-derived tag so `docker stack deploy` rolls the service whenever bridge.py changes
# (a fixed `:latest` + unchanged stack spec does NOT roll — swarm sees no change).
imageTag = builtins.substring 0 12 (builtins.hashString "sha256"
(builtins.readFile ../../bridge/bridge.py));
image = pkgs.dockerTools.buildLayeredImage {
name = "cc-ci-bridge";
tag = imageTag;
contents = [ pkgs.python3 pkgs.cacert bridgeApp ];
config = {
Cmd = [ "${pkgs.python3}/bin/python3" "/app/bridge.py" ];
Env = [ "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" ];
ExposedPorts = { "8080/tcp" = { }; };
};
};
stack = pkgs.writeText "cc-ci-bridge-stack.yml" ''
version: "3.8"
services:
app:
image: cc-ci-bridge:${imageTag}
environment:
- GITEA_API=https://git.autonomic.zone/api/v1
- DRONE_URL=https://drone.ci.commoninternet.net
- CI_REPO=recipe-maintainers/cc-ci
- BRIDGE_LISTEN=0.0.0.0:8080
# Polling is PRIMARY (outbound, read-only, always on); the /hook webhook is an optional
# admin-registered push optimization deduped against the poller (§4.1). Enrollment = add
# the repo to POLL_REPOS (csv) + ensure tests/<recipe>/ exists.
- POLL_INTERVAL=30
- POLL_REPOS=recipe-maintainers/cc-ci,recipe-maintainers/custom-html,recipe-maintainers/keycloak,recipe-maintainers/cryptpad,recipe-maintainers/matrix-synapse,recipe-maintainers/lasuite-docs,recipe-maintainers/n8n
- HMAC_FILE=/run/secrets/webhook_hmac
- DRONE_TOKEN_FILE=/run/secrets/drone_token
- GITEA_TOKEN_FILE=/run/secrets/gitea_token
secrets:
- webhook_hmac
- drone_token
- gitea_token
networks:
- proxy
deploy:
replicas: 1
restart_policy:
condition: any
labels:
- "traefik.enable=true"
- "traefik.http.services.ccci-bridge.loadbalancer.server.port=8080"
- "traefik.http.routers.ccci-bridge.rule=Host(`ci.commoninternet.net`) && PathPrefix(`/hook`)"
- "traefik.http.routers.ccci-bridge.entrypoints=web-secure"
- "traefik.http.routers.ccci-bridge.tls=true"
networks:
proxy:
external: true
secrets:
webhook_hmac:
external: true
name: cc_ci_bridge_webhook_hmac_v1
drone_token:
external: true
name: cc_ci_bridge_drone_token_v1
gitea_token:
external: true
name: cc_ci_bridge_gitea_token_v1
'';
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-bridge";
runtimeInputs = with pkgs; [ docker coreutils ];
text = ''
for s in webhook_hmac drone_token gitea_token; do
if [ ! -r "/run/secrets/bridge_$s" ]; then
echo "FATAL: /run/secrets/bridge_$s missing (rebuild ordering?)" >&2
exit 1
fi
done
# Load the Nix-built image into the local docker (idempotent; layers cached).
docker load -i ${image}
# Materialise swarm secrets from sops (immutable; create once at v1).
ensure_secret() {
docker secret inspect "$2" >/dev/null 2>&1 || docker secret create "$2" "$1" >/dev/null
}
ensure_secret /run/secrets/bridge_webhook_hmac cc_ci_bridge_webhook_hmac_v1
ensure_secret /run/secrets/bridge_drone_token cc_ci_bridge_drone_token_v1
ensure_secret /run/secrets/bridge_gitea_token cc_ci_bridge_gitea_token_v1
docker stack deploy --detach=true -c ${stack} ccci-bridge
'';
};
in
{
systemd.services.deploy-bridge = {
description = "Reconcile the cc-ci comment-bridge (!testme webhook) swarm service";
# Serialized after deploy-drone (chain proxy→drone→bridge→dashboard→backupbot): on a FRESH host the
# abra-driven reconcilers otherwise run concurrently against an uninitialised ~/.abra and race on
# catalogue/recipe init, leaving units failed after a blank-VM rebuild. Ordering-only `after` fixes it.
after = [ "deploy-drone.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-bridge";
};
};
}

94
nix/modules/dashboard.nix Normal file
View File

@ -0,0 +1,94 @@
# Results dashboard (§4.5, D7): the YunoHost-CI-like overview at ci.commoninternet.net. Reads the
# Drone API (read-only) and renders latest-run-per-recipe + SVG badges. Packaged as a Nix-built OCI
# image and run as a swarm service on `proxy`, routed by traefik at Host(ci.commoninternet.net) — the
# comment-bridge's Host && PathPrefix(`/hook`) rule is longer, so /hook still wins (priority by rule
# length). Deployed by an idempotent-reconcile oneshot (same pattern as bridge/drone).
{ pkgs, ... }:
let
dashApp = pkgs.runCommand "cc-ci-dashboard-app" { } ''
mkdir -p $out/app
cp ${../../dashboard/dashboard.py} $out/app/dashboard.py
'';
# Content-derived tag: changes whenever dashboard.py changes, so `docker stack deploy` actually
# rolls the service to the new image (a fixed `:latest` tag + unchanged stack spec does NOT roll —
# swarm sees no change). Reproducible + self-healing.
imageTag = builtins.substring 0 12 (builtins.hashString "sha256"
(builtins.readFile ../../dashboard/dashboard.py));
image = pkgs.dockerTools.buildLayeredImage {
name = "cc-ci-dashboard";
tag = imageTag;
contents = [ pkgs.python3 pkgs.cacert dashApp ];
config = {
Cmd = [ "${pkgs.python3}/bin/python3" "/app/dashboard.py" ];
Env = [ "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" ];
ExposedPorts = { "8080/tcp" = { }; };
};
};
stack = pkgs.writeText "cc-ci-dashboard-stack.yml" ''
version: "3.8"
services:
app:
image: cc-ci-dashboard:${imageTag}
environment:
- DRONE_URL=https://drone.ci.commoninternet.net
- CI_REPO=recipe-maintainers/cc-ci
- DASH_LISTEN=0.0.0.0:8080
- DRONE_TOKEN_FILE=/run/secrets/drone_token
secrets:
- drone_token
networks:
- proxy
deploy:
replicas: 1
restart_policy:
condition: any
labels:
- "traefik.enable=true"
- "traefik.http.services.ccci-dashboard.loadbalancer.server.port=8080"
- "traefik.http.routers.ccci-dashboard.rule=Host(`ci.commoninternet.net`)"
- "traefik.http.routers.ccci-dashboard.entrypoints=web-secure"
- "traefik.http.routers.ccci-dashboard.tls=true"
networks:
proxy:
external: true
secrets:
drone_token:
external: true
name: cc_ci_dashboard_drone_token_v1
'';
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-dashboard";
runtimeInputs = with pkgs; [ docker coreutils ];
text = ''
if [ ! -r /run/secrets/bridge_drone_token ]; then
echo "FATAL: /run/secrets/bridge_drone_token missing (rebuild ordering?)" >&2
exit 1
fi
docker load -i ${image}
# Dashboard reads the Drone API read-only; reuse the same Drone token value as the bridge.
docker secret inspect cc_ci_dashboard_drone_token_v1 >/dev/null 2>&1 \
|| docker secret create cc_ci_dashboard_drone_token_v1 /run/secrets/bridge_drone_token >/dev/null
docker stack deploy --detach=true -c ${stack} ccci-dashboard
'';
};
in
{
systemd.services.deploy-dashboard = {
description = "Reconcile the cc-ci results dashboard (overview + badges) swarm service";
# Serialized after deploy-bridge (chain proxy→drone→bridge→dashboard→backupbot) to avoid the
# concurrent abra-init race on a fresh host (see bridge.nix). Ordering-only.
after = [ "deploy-bridge.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-dashboard";
};
};
}

View File

@ -0,0 +1,53 @@
# Drone exec runner (M2). Runs on cc-ci itself (not in a container) so CI pipelines can drive
# host `abra` to deploy real recipes onto the swarm (plan §4.2, §8: exec runner). The Drone
# *server* is deployed separately via abra (scripts/deploy-drone.sh) as a swarm service.
#
# The exec runner is drone-runner-exec (the only exec runner upstream ever shipped; see
# DECISIONS.md "CI engine"). It connects to the server over RPC at drone.ci.commoninternet.net,
# sharing DRONE_RPC_SECRET with the server via the sops-rendered EnvironmentFile.
{ pkgs, config, lib, ... }:
let
# MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone
# queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that
# bounds how many test apps can be live at once — kept LOW (1) on this single 28GiB node since
# recipes are heavy (immich/matrix large volumes). With capacity=1 there is never a concurrent
# in-flight run, so the run-start janitor can safely reap *any* orphan (a SIGKILL'd build runs no
# teardown) and the "at most MAX_TESTS apps live" bound holds exactly. Raise to 2 only if the node
# is shown to handle two light recipes at once (then the janitor MUST stay age-based to avoid
# reaping a concurrent run — see DECISIONS.md "Resource safety").
maxTests = "1";
in
{
# Drone ships under the Polyform Small Business license (nixpkgs marks it unfree);
# permitted for our internal CI use. Allow only this package.
nixpkgs.config.allowUnfreePredicate = pkg:
builtins.elem (lib.getName pkg) [ "drone-runner-exec" ];
systemd.services.drone-runner-exec = {
description = "Drone exec runner (drives host abra/swarm)";
after = [ "network-online.target" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment = {
DRONE_RPC_PROTO = "https";
DRONE_RPC_HOST = "drone.ci.commoninternet.net";
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
DRONE_RUNNER_NAME = "cc-ci-exec";
# exec runner needs a writable root for build workspaces
DRONE_RUNNER_ROOT = "/var/lib/drone-runner";
# Pipeline commands shell out to abra/docker/git — all live in the system path.
PATH = lib.mkForce "/run/current-system/sw/bin:/run/wrappers/bin";
};
serviceConfig = {
# DRONE_RPC_SECRET comes from the sops-rendered env file (shared with the server).
EnvironmentFile = config.sops.templates."drone-runner.env".path;
ExecStart = "${pkgs.drone-runner-exec}/bin/drone-runner-exec";
Restart = "always";
RestartSec = "5s";
StateDirectory = "drone-runner";
# exec runner runs pipelines as this service's user; root is needed to drive docker/abra
# and to read the abra config under /root/.abra (same as manual deploys).
User = "root";
};
};
}

90
nix/modules/drone.nix Normal file
View File

@ -0,0 +1,90 @@
# Drone CI server = coop-cloud `drone` recipe via abra (swarm, traefik-routed at
# drone.ci.commoninternet.net, Gitea SSO, wildcard cert / no ACME). The exec *runner* is a
# separate host systemd service (modules/drone-runner.nix). See DECISIONS.md "CI engine"/"Drone
# deployment shape".
#
# Idempotent-RECONCILE oneshot (same pattern as proxy/swarm-init): converges every boot/activation.
# RPC + OAuth-client secrets come from sops (/run/secrets), inserted as swarm secrets here.
{ pkgs, ... }:
let
giteaClientId = "ab4cdb9d-ee96-4867-875f-87384505fc52";
# Per-build TIMEOUT (plan §4.2/§4.3 resource safety): if a CI build runs longer than this, Drone
# cancels it (the exec runner kills the process), freeing the MAX_TESTS slot so the queue advances.
# The killed build can't run its own teardown — the run-start janitor reaps its orphaned app
# (modules/drone-runner.nix MAX_TESTS note). Configurable here; reconciled best-effort below.
buildTimeoutMinutes = "60";
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-drone";
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
text = ''
DRONE_DOMAIN="drone.ci.commoninternet.net"
ENV_FILE="$HOME/.abra/servers/default/$DRONE_DOMAIN.env"
if [ ! -r /run/secrets/drone_rpc_secret ] || [ ! -r /run/secrets/drone_gitea_client_secret ]; then
echo "FATAL: drone sops secrets missing at /run/secrets (rebuild ordering?)" >&2
exit 1
fi
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
abra recipe fetch drone -n >/dev/null
[ -f "$ENV_FILE" ] || abra app new drone -s default -D "$DRONE_DOMAIN" -n
set_env() {
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
# ensure trailing newline before append (a recipe .env.sample may end without one, which
# would glue the var onto the last line see modules/backupbot.nix for the bite).
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
}
set_env LETS_ENCRYPT_ENV ""
set_env EXTRA_DOMAINS ""
# Inject the bridge's Drone token as the bot's MACHINE TOKEN so it is reproducible on a fresh
# Drone DB. Without `token:`, Drone auto-generates a random token that the committed (sops)
# bridge_drone_token can't match on a clean-room rebuild the bridge gets 401 and can't trigger
# builds (the original only matched because its token was captured out-of-band post-hoc). Caught
# by the E2E-TESTME acceptance test. With `token:`, every rebuild's bot carries the sops token.
set_env DRONE_USER_CREATE "username:autonomic-bot,admin:true,token:$(cat /run/secrets/bridge_drone_token)"
set_env GITEA_DOMAIN "git.autonomic.zone"
set_env GITEA_CLIENT_ID "${giteaClientId}"
set_env RPC_SECRET_VERSION "v1"
set_env CLIENT_SECRET_VERSION "v1"
set_env DRONE_ENV_VERSION "v1"
set_env COMPOSE_FILE '"compose.yml:compose.gitea.yml"'
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
have_secret rpc_secret || abra app secret insert "$DRONE_DOMAIN" rpc_secret v1 /run/secrets/drone_rpc_secret -f -n
have_secret client_secret || abra app secret insert "$DRONE_DOMAIN" client_secret v1 /run/secrets/drone_gitea_client_secret -f -n
abra app deploy "$DRONE_DOMAIN" -n -C
# Best-effort: set the cc-ci repo's build timeout (resource safety). Non-fatal never break
# the core server reconcile if Drone/token isn't ready. Uses the bridge's Drone admin token and
# hits the local traefik (hairpin-free) keeping SNI=drone... so the wildcard cert validates.
if [ -r /run/secrets/bridge_drone_token ]; then
DT="$(cat /run/secrets/bridge_drone_token)"
curl -fsS -k --resolve "$DRONE_DOMAIN:443:127.0.0.1" \
-X PATCH -H "Authorization: Bearer $DT" -H "Content-Type: application/json" \
-d '{"timeout": ${buildTimeoutMinutes}}' \
"https://$DRONE_DOMAIN/api/repos/recipe-maintainers/cc-ci" >/dev/null \
&& echo "set cc-ci build timeout = ${buildTimeoutMinutes}m" \
|| echo "WARN: could not set build timeout (non-fatal)" >&2
fi
'';
};
in
{
systemd.services.deploy-drone = {
description = "Reconcile the Drone CI server (coop-cloud recipe, Gitea SSO) via abra";
after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment.HOME = "/root";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-drone";
};
};
}

20
nix/modules/harness.nix Normal file
View File

@ -0,0 +1,20 @@
# CI harness runtime (M4): a reproducible Python env with pytest + Playwright and the
# Nix-provided browsers, exposed as `cc-ci-run` on the host so the Drone exec pipeline (and
# manual dev) can run the harness with `cc-ci-run runner/run_recipe_ci.py`. Playwright on NixOS
# needs the browsers from nixpkgs (not a downloaded copy) via PLAYWRIGHT_BROWSERS_PATH.
{ pkgs, ... }:
let
pyEnv = pkgs.python3.withPackages (ps: with ps; [ pytest playwright ]);
ccciRun = pkgs.writeShellApplication {
name = "cc-ci-run";
runtimeInputs = [ pyEnv pkgs.abra pkgs.docker pkgs.git pkgs.coreutils pkgs.util-linux ];
text = ''
export PLAYWRIGHT_BROWSERS_PATH=${pkgs.playwright-driver.browsers}
export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
exec ${pyEnv}/bin/python3 "$@"
'';
};
in
{
environment.systemPackages = [ ccciRun ];
}

25
nix/modules/packages.nix Normal file
View File

@ -0,0 +1,25 @@
# Project package overlay. `abra` (the Co-op Cloud CLI) is exposed as `pkgs.abra` so every
# module (systemPackages, the proxy/drone reconcile oneshots) can use the same pinned build.
_:
{
nixpkgs.overlays = [
(_: prev: {
abra = prev.stdenv.mkDerivation rec {
pname = "abra";
version = "0.13.0-beta";
src = prev.fetchurl {
url = "https://git.coopcloud.tech/toolshed/abra/releases/download/${version}/abra_${version}_linux_amd64.tar.gz";
sha256 = "12csk6wp1pk9cspzqfl4a6h5jdz8p055sf0ggxw9k7ljhpd5qvc6";
};
sourceRoot = ".";
nativeBuildInputs = [ prev.autoPatchelfHook ];
buildInputs = [ prev.stdenv.cc.cc.lib ];
installPhase = ''
runHook preInstall
install -Dm755 abra "$out/bin/abra"
runHook postInstall
'';
};
})
];
}

67
nix/modules/proxy.nix Normal file
View File

@ -0,0 +1,67 @@
# Reverse proxy = the canonical Co-op Cloud `traefik` recipe, deployed via abra in
# wildcard / file-provider mode (wildcard cert as ssl_cert/ssl_key swarm secrets,
# LETS_ENCRYPT_ENV empty => NO ACME, no DNS token). See DECISIONS.md "Proxy: real coop-cloud/traefik".
# Phase-1c: the cert at CERT_DIR is sops-decrypted from git (cc-ci-secrets) at activation
# (modules/secrets.nix wildcard_cert/wildcard_key), NOT an out-of-band operator file drop.
#
# Declared as an idempotent-RECONCILE systemd oneshot (like swarm-init): it inspects current
# state and converges every activation/boot, self-healing drift (redeploys if the stack is gone,
# re-inserts secrets if missing). No run-once sentinel. So a from-scratch install is just
# `nixos-rebuild switch` + operator preconditions (D8) — no manual post-steps.
{ pkgs, ... }:
let
reconcile = pkgs.writeShellApplication {
name = "cc-ci-reconcile-proxy";
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git ];
text = ''
PROXY_DOMAIN="traefik.ci.commoninternet.net"
CERT_DIR="/var/lib/ci-certs/live"
ENV_FILE="$HOME/.abra/servers/default/$PROXY_DOMAIN.env"
# Fail visibly (failed unit) if the cert is missing do NOT silently skip. It is
# sops-decrypted from git (cc-ci-secrets) at activation; a miss here means the sops decrypt
# path is broken (e.g. age identity not present), which must surface, not be papered over.
if [ ! -r "$CERT_DIR/fullchain.pem" ] || [ ! -r "$CERT_DIR/privkey.pem" ]; then
echo "FATAL: wildcard cert missing at $CERT_DIR (sops decrypt from cc-ci-secrets failed?)" >&2
exit 1
fi
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
abra recipe fetch traefik -n >/dev/null
[ -f "$ENV_FILE" ] || abra app new traefik -s default -D "$PROXY_DOMAIN" -n
set_env() {
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
}
set_env LETS_ENCRYPT_ENV ""
set_env WILDCARDS_ENABLED "1"
set_env SECRET_WILDCARD_CERT_VERSION "v1"
set_env SECRET_WILDCARD_KEY_VERSION "v1"
set_env COMPOSE_FILE '"compose.yml:compose.wildcard.yml"'
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
have_secret ssl_cert || abra app secret insert "$PROXY_DOMAIN" ssl_cert v1 "$CERT_DIR/fullchain.pem" -f -n
have_secret ssl_key || abra app secret insert "$PROXY_DOMAIN" ssl_key v1 "$CERT_DIR/privkey.pem" -f -n
# Converge the stack (idempotent: no-op if already at desired state).
abra app deploy "$PROXY_DOMAIN" -n -C
'';
};
in
{
systemd.services.deploy-proxy = {
description = "Reconcile the Co-op Cloud traefik proxy (wildcard/no-ACME) via abra";
after = [ "swarm-init.service" "docker.service" "network-online.target" ];
requires = [ "swarm-init.service" "docker.service" ];
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
environment.HOME = "/root";
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = "${reconcile}/bin/cc-ci-reconcile-proxy";
};
};
}

58
nix/modules/secrets.nix Normal file
View File

@ -0,0 +1,58 @@
# sops-nix wiring (D6 infra secrets). cc-ci decrypts secrets at activation using its own
# ed25519 SSH host key as the age identity (no separate key file to manage on the box).
# Encrypted material lives in the repo-root `secrets/` git SUBMODULE (the private `cc-ci-secrets`
# repo, Phase-1c). RL5 put this module under nix/modules/, so the relative path is
# ../../secrets/secrets.yaml. Readable only by the recipients in secrets/.sops.yaml (host key +
# off-box master recovery key).
{ config, ... }:
{
sops = {
defaultSopsFile = ../../secrets/secrets.yaml;
# Decrypt using the host's SSH host key (converted to an age identity by sops-nix).
age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ];
# Phase-1c: also accept a bootstrap age key at a fixed path — THE one out-of-band secret,
# provisioned to the host before the first rebuild. On the canonical cc-ci this holds the
# host-derived age identity (== the sshKeyPaths recipient, no new exposure); on a fresh/cloned
# host (e.g. the throwaway-VM rebuild) it holds the off-box recovery key, so a host whose SSH
# host key is NOT a sops recipient can still decrypt every secret. NOTE: sops-install-secrets
# aborts activation if this file is set but missing, so it must exist before `nixos-rebuild`.
age.keyFile = "/var/lib/sops-nix/key.txt";
# Do not also look for a GPG key.
gnupg.sshKeyPaths = [ ];
secrets = {
# M0 proof secret — confirms the decrypt path works end to end.
test_secret = { };
# M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed
# Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and
# the host exec runner (read via the env template below). drone_gitea_client_secret is the
# Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret.
drone_rpc_secret = { };
drone_gitea_client_secret = { };
# M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them
# into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook.
bridge_webhook_hmac = { };
bridge_drone_token = { };
bridge_gitea_token = { };
# Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at
# activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik
# reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step.
wildcard_cert = {
path = "/var/lib/ci-certs/live/fullchain.pem";
mode = "0444"; # leaf+intermediate chain — not secret
};
wildcard_key = {
path = "/var/lib/ci-certs/live/privkey.pem";
mode = "0400"; # private key — root only
};
};
# EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret.
templates."drone-runner.env".content = ''
DRONE_RPC_SECRET=${config.sops.placeholder.drone_rpc_secret}
'';
};
}

45
nix/modules/swarm.nix Normal file
View File

@ -0,0 +1,45 @@
# Docker + single-node Swarm — the deploy target for recipes under test (M1).
# Traefik (modules/traefik.nix) and abra layer on top; recipes attach to the `proxy`
# overlay network, exactly as a real Co-op Cloud host expects.
{ pkgs, ... }:
{
virtualisation.docker = {
enable = true;
# Reclaim disk from churning per-run images/volumes (cc-ci root is ~28 GiB).
autoPrune = {
enable = true;
dates = "daily";
flags = [ "--all" "--volumes" "--filter" "until=24h" ];
};
};
environment.systemPackages = [ pkgs.docker ];
# Gateway forwards 80/443 to cc-ci over the public interface (enp5s0); the coop-cloud
# traefik stack (deployed via abra, see docs/install.md) publishes these ports.
networking.firewall.allowedTCPPorts = [ 80 443 ];
# Bring up a single-node swarm + the shared `proxy` overlay network. Idempotent:
# safe to re-run every boot/rebuild. advertise-addr 127.0.0.1 is fine for a lone node.
systemd.services.swarm-init = {
description = "Initialise single-node Docker Swarm + proxy overlay network";
after = [ "docker.service" ];
requires = [ "docker.service" ];
wantedBy = [ "multi-user.target" ];
path = [ pkgs.docker ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
script = ''
set -eu
state="$(docker info --format '{{.Swarm.LocalNodeState}}' 2>/dev/null || echo error)"
if [ "$state" != "active" ]; then
docker swarm init --advertise-addr 127.0.0.1
fi
if ! docker network inspect proxy >/dev/null 2>&1; then
docker network create --driver overlay --attachable proxy
fi
'';
};
}