refactor(1b): RL5 — consolidate Nix code under nix/ (modules->nix/modules, hosts->nix/hosts)
flake.nix/flake.lock STAY at root so the build ref #cc-ci is unchanged; only flake's internal configuration.nix path updated. Root-relative refs inside moved modules re-based ../X -> ../../X (secrets/bridge/dashboard); configuration.nix's ../../modules imports unchanged (both dirs under nix/). Living docs (README, architecture/install/secrets/enroll) + .drone.yml comment updated to nix/...; append-only history logs left as-is. DECISIONS.md records RL5 + the deferred-coordinated RL6. Verified on cc-ci: nixos-rebuild build 'path:#cc-ci' -> toplevel 8i3jcad9 (BYTE-IDENTICAL to the pre-move build — store derivations are content-addressed on file contents, module .nix not in the runtime closure); scripts/lint.sh -> lint: PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
6
nix/modules/abra.nix
Normal file
6
nix/modules/abra.nix
Normal file
@ -0,0 +1,6 @@
|
||||
# abra — the Co-op Cloud CLI used by the harness and the proxy/drone reconcile oneshots.
|
||||
# The package is defined as an overlay in modules/packages.nix (pkgs.abra), pinned by hash (D8).
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
environment.systemPackages = [ pkgs.abra ];
|
||||
}
|
||||
55
nix/modules/backupbot.nix
Normal file
55
nix/modules/backupbot.nix
Normal file
@ -0,0 +1,55 @@
|
||||
# backup-bot-two (M5): the Co-op Cloud backup service. `abra app backup create <app>` / restore
|
||||
# talk to it; it snapshots volumes labelled `backupbot.backup=true` into a local restic repo.
|
||||
# Idempotent-reconcile oneshot (same pattern as proxy/drone). restic_password is abra-generated
|
||||
# (class-B-style internal secret) and kept stable across reconciles (only generated if missing).
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
reconcile = pkgs.writeShellApplication {
|
||||
name = "cc-ci-reconcile-backupbot";
|
||||
runtimeInputs = with pkgs; [ abra docker gnused gnugrep coreutils git ];
|
||||
text = ''
|
||||
DOMAIN="backups.ci.commoninternet.net" # identity/stack name only; no web route
|
||||
ENV_FILE="$HOME/.abra/servers/default/$DOMAIN.env"
|
||||
|
||||
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
|
||||
abra recipe fetch backup-bot-two -n >/dev/null
|
||||
|
||||
[ -f "$ENV_FILE" ] || abra app new backup-bot-two -s default -D "$DOMAIN" -n
|
||||
|
||||
set_env() {
|
||||
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
|
||||
# Ensure the file ends in a newline before appending — backup-bot-two's .env.sample ends
|
||||
# with a newline-less comment line, so a bare append would glue the var onto that comment
|
||||
# (commenting it out). `$(tail -c1)` is empty iff the last byte is already a newline.
|
||||
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
|
||||
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
|
||||
}
|
||||
set_env RESTIC_REPOSITORY /backups/restic
|
||||
set_env SECRET_RESTIC_PASSWORD_VERSION v1
|
||||
set_env CRONJOB_VERSION v1
|
||||
|
||||
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
|
||||
# -m avoids the TTY/table (ioctl) path; redirect stdout so generated values never hit logs (D6).
|
||||
have_secret restic_password || abra app secret generate "$DOMAIN" --all -m -n >/dev/null
|
||||
|
||||
abra app deploy "$DOMAIN" -n -C
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
systemd.services.deploy-backupbot = {
|
||||
description = "Reconcile backup-bot-two (volume backups via restic) via abra";
|
||||
# Serialized last (chain proxy→drone→bridge→dashboard→backupbot) to avoid the concurrent abra-init
|
||||
# race on a fresh host (see bridge.nix). Ordering-only; transitively after deploy-proxy.
|
||||
after = [ "deploy-dashboard.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
||||
requires = [ "swarm-init.service" "docker.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment.HOME = "/root";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = "${reconcile}/bin/cc-ci-reconcile-backupbot";
|
||||
};
|
||||
};
|
||||
}
|
||||
120
nix/modules/bridge.nix
Normal file
120
nix/modules/bridge.nix
Normal file
@ -0,0 +1,120 @@
|
||||
# Comment-bridge (§4.1): the `!testme` webhook receiver. Packaged as a Nix-built OCI image
|
||||
# (no Docker Hub pull) and run as a swarm service on `proxy`, routed by traefik at
|
||||
# ci.commoninternet.net/hook. Deployed by an idempotent-reconcile oneshot (same pattern as
|
||||
# proxy/drone). Secrets come from sops (/run/secrets) → swarm secrets the container mounts.
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
# bridge.py placed at /app/bridge.py inside the image.
|
||||
bridgeApp = pkgs.runCommand "cc-ci-bridge-app" { } ''
|
||||
mkdir -p $out/app
|
||||
cp ${../../bridge/bridge.py} $out/app/bridge.py
|
||||
'';
|
||||
|
||||
# Content-derived tag so `docker stack deploy` rolls the service whenever bridge.py changes
|
||||
# (a fixed `:latest` + unchanged stack spec does NOT roll — swarm sees no change).
|
||||
imageTag = builtins.substring 0 12 (builtins.hashString "sha256"
|
||||
(builtins.readFile ../../bridge/bridge.py));
|
||||
|
||||
image = pkgs.dockerTools.buildLayeredImage {
|
||||
name = "cc-ci-bridge";
|
||||
tag = imageTag;
|
||||
contents = [ pkgs.python3 pkgs.cacert bridgeApp ];
|
||||
config = {
|
||||
Cmd = [ "${pkgs.python3}/bin/python3" "/app/bridge.py" ];
|
||||
Env = [ "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" ];
|
||||
ExposedPorts = { "8080/tcp" = { }; };
|
||||
};
|
||||
};
|
||||
|
||||
stack = pkgs.writeText "cc-ci-bridge-stack.yml" ''
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
image: cc-ci-bridge:${imageTag}
|
||||
environment:
|
||||
- GITEA_API=https://git.autonomic.zone/api/v1
|
||||
- DRONE_URL=https://drone.ci.commoninternet.net
|
||||
- CI_REPO=recipe-maintainers/cc-ci
|
||||
- BRIDGE_LISTEN=0.0.0.0:8080
|
||||
# Polling is PRIMARY (outbound, read-only, always on); the /hook webhook is an optional
|
||||
# admin-registered push optimization deduped against the poller (§4.1). Enrollment = add
|
||||
# the repo to POLL_REPOS (csv) + ensure tests/<recipe>/ exists.
|
||||
- POLL_INTERVAL=30
|
||||
- POLL_REPOS=recipe-maintainers/cc-ci,recipe-maintainers/custom-html,recipe-maintainers/keycloak,recipe-maintainers/cryptpad,recipe-maintainers/matrix-synapse,recipe-maintainers/lasuite-docs,recipe-maintainers/n8n
|
||||
- HMAC_FILE=/run/secrets/webhook_hmac
|
||||
- DRONE_TOKEN_FILE=/run/secrets/drone_token
|
||||
- GITEA_TOKEN_FILE=/run/secrets/gitea_token
|
||||
secrets:
|
||||
- webhook_hmac
|
||||
- drone_token
|
||||
- gitea_token
|
||||
networks:
|
||||
- proxy
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.ccci-bridge.loadbalancer.server.port=8080"
|
||||
- "traefik.http.routers.ccci-bridge.rule=Host(`ci.commoninternet.net`) && PathPrefix(`/hook`)"
|
||||
- "traefik.http.routers.ccci-bridge.entrypoints=web-secure"
|
||||
- "traefik.http.routers.ccci-bridge.tls=true"
|
||||
networks:
|
||||
proxy:
|
||||
external: true
|
||||
secrets:
|
||||
webhook_hmac:
|
||||
external: true
|
||||
name: cc_ci_bridge_webhook_hmac_v1
|
||||
drone_token:
|
||||
external: true
|
||||
name: cc_ci_bridge_drone_token_v1
|
||||
gitea_token:
|
||||
external: true
|
||||
name: cc_ci_bridge_gitea_token_v1
|
||||
'';
|
||||
|
||||
reconcile = pkgs.writeShellApplication {
|
||||
name = "cc-ci-reconcile-bridge";
|
||||
runtimeInputs = with pkgs; [ docker coreutils ];
|
||||
text = ''
|
||||
for s in webhook_hmac drone_token gitea_token; do
|
||||
if [ ! -r "/run/secrets/bridge_$s" ]; then
|
||||
echo "FATAL: /run/secrets/bridge_$s missing (rebuild ordering?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Load the Nix-built image into the local docker (idempotent; layers cached).
|
||||
docker load -i ${image}
|
||||
|
||||
# Materialise swarm secrets from sops (immutable; create once at v1).
|
||||
ensure_secret() {
|
||||
docker secret inspect "$2" >/dev/null 2>&1 || docker secret create "$2" "$1" >/dev/null
|
||||
}
|
||||
ensure_secret /run/secrets/bridge_webhook_hmac cc_ci_bridge_webhook_hmac_v1
|
||||
ensure_secret /run/secrets/bridge_drone_token cc_ci_bridge_drone_token_v1
|
||||
ensure_secret /run/secrets/bridge_gitea_token cc_ci_bridge_gitea_token_v1
|
||||
|
||||
docker stack deploy --detach=true -c ${stack} ccci-bridge
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
systemd.services.deploy-bridge = {
|
||||
description = "Reconcile the cc-ci comment-bridge (!testme webhook) swarm service";
|
||||
# Serialized after deploy-drone (chain proxy→drone→bridge→dashboard→backupbot): on a FRESH host the
|
||||
# abra-driven reconcilers otherwise run concurrently against an uninitialised ~/.abra and race on
|
||||
# catalogue/recipe init, leaving units failed after a blank-VM rebuild. Ordering-only `after` fixes it.
|
||||
after = [ "deploy-drone.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
||||
requires = [ "swarm-init.service" "docker.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = "${reconcile}/bin/cc-ci-reconcile-bridge";
|
||||
};
|
||||
};
|
||||
}
|
||||
94
nix/modules/dashboard.nix
Normal file
94
nix/modules/dashboard.nix
Normal file
@ -0,0 +1,94 @@
|
||||
# Results dashboard (§4.5, D7): the YunoHost-CI-like overview at ci.commoninternet.net. Reads the
|
||||
# Drone API (read-only) and renders latest-run-per-recipe + SVG badges. Packaged as a Nix-built OCI
|
||||
# image and run as a swarm service on `proxy`, routed by traefik at Host(ci.commoninternet.net) — the
|
||||
# comment-bridge's Host && PathPrefix(`/hook`) rule is longer, so /hook still wins (priority by rule
|
||||
# length). Deployed by an idempotent-reconcile oneshot (same pattern as bridge/drone).
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
dashApp = pkgs.runCommand "cc-ci-dashboard-app" { } ''
|
||||
mkdir -p $out/app
|
||||
cp ${../../dashboard/dashboard.py} $out/app/dashboard.py
|
||||
'';
|
||||
|
||||
# Content-derived tag: changes whenever dashboard.py changes, so `docker stack deploy` actually
|
||||
# rolls the service to the new image (a fixed `:latest` tag + unchanged stack spec does NOT roll —
|
||||
# swarm sees no change). Reproducible + self-healing.
|
||||
imageTag = builtins.substring 0 12 (builtins.hashString "sha256"
|
||||
(builtins.readFile ../../dashboard/dashboard.py));
|
||||
|
||||
image = pkgs.dockerTools.buildLayeredImage {
|
||||
name = "cc-ci-dashboard";
|
||||
tag = imageTag;
|
||||
contents = [ pkgs.python3 pkgs.cacert dashApp ];
|
||||
config = {
|
||||
Cmd = [ "${pkgs.python3}/bin/python3" "/app/dashboard.py" ];
|
||||
Env = [ "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" ];
|
||||
ExposedPorts = { "8080/tcp" = { }; };
|
||||
};
|
||||
};
|
||||
|
||||
stack = pkgs.writeText "cc-ci-dashboard-stack.yml" ''
|
||||
version: "3.8"
|
||||
services:
|
||||
app:
|
||||
image: cc-ci-dashboard:${imageTag}
|
||||
environment:
|
||||
- DRONE_URL=https://drone.ci.commoninternet.net
|
||||
- CI_REPO=recipe-maintainers/cc-ci
|
||||
- DASH_LISTEN=0.0.0.0:8080
|
||||
- DRONE_TOKEN_FILE=/run/secrets/drone_token
|
||||
secrets:
|
||||
- drone_token
|
||||
networks:
|
||||
- proxy
|
||||
deploy:
|
||||
replicas: 1
|
||||
restart_policy:
|
||||
condition: any
|
||||
labels:
|
||||
- "traefik.enable=true"
|
||||
- "traefik.http.services.ccci-dashboard.loadbalancer.server.port=8080"
|
||||
- "traefik.http.routers.ccci-dashboard.rule=Host(`ci.commoninternet.net`)"
|
||||
- "traefik.http.routers.ccci-dashboard.entrypoints=web-secure"
|
||||
- "traefik.http.routers.ccci-dashboard.tls=true"
|
||||
networks:
|
||||
proxy:
|
||||
external: true
|
||||
secrets:
|
||||
drone_token:
|
||||
external: true
|
||||
name: cc_ci_dashboard_drone_token_v1
|
||||
'';
|
||||
|
||||
reconcile = pkgs.writeShellApplication {
|
||||
name = "cc-ci-reconcile-dashboard";
|
||||
runtimeInputs = with pkgs; [ docker coreutils ];
|
||||
text = ''
|
||||
if [ ! -r /run/secrets/bridge_drone_token ]; then
|
||||
echo "FATAL: /run/secrets/bridge_drone_token missing (rebuild ordering?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
docker load -i ${image}
|
||||
# Dashboard reads the Drone API read-only; reuse the same Drone token value as the bridge.
|
||||
docker secret inspect cc_ci_dashboard_drone_token_v1 >/dev/null 2>&1 \
|
||||
|| docker secret create cc_ci_dashboard_drone_token_v1 /run/secrets/bridge_drone_token >/dev/null
|
||||
docker stack deploy --detach=true -c ${stack} ccci-dashboard
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
systemd.services.deploy-dashboard = {
|
||||
description = "Reconcile the cc-ci results dashboard (overview + badges) swarm service";
|
||||
# Serialized after deploy-bridge (chain proxy→drone→bridge→dashboard→backupbot) to avoid the
|
||||
# concurrent abra-init race on a fresh host (see bridge.nix). Ordering-only.
|
||||
after = [ "deploy-bridge.service" "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
||||
requires = [ "swarm-init.service" "docker.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = "${reconcile}/bin/cc-ci-reconcile-dashboard";
|
||||
};
|
||||
};
|
||||
}
|
||||
53
nix/modules/drone-runner.nix
Normal file
53
nix/modules/drone-runner.nix
Normal file
@ -0,0 +1,53 @@
|
||||
# Drone exec runner (M2). Runs on cc-ci itself (not in a container) so CI pipelines can drive
|
||||
# host `abra` to deploy real recipes onto the swarm (plan §4.2, §8: exec runner). The Drone
|
||||
# *server* is deployed separately via abra (scripts/deploy-drone.sh) as a swarm service.
|
||||
#
|
||||
# The exec runner is drone-runner-exec (the only exec runner upstream ever shipped; see
|
||||
# DECISIONS.md "CI engine"). It connects to the server over RPC at drone.ci.commoninternet.net,
|
||||
# sharing DRONE_RPC_SECRET with the server via the sops-rendered EnvironmentFile.
|
||||
{ pkgs, config, lib, ... }:
|
||||
let
|
||||
# MAX_TESTS (plan §4.2/§4.3 resource safety): max CI builds the exec runner runs at once. Drone
|
||||
# queues the rest in its native pending-build queue (no custom queue). THE concurrency cap that
|
||||
# bounds how many test apps can be live at once — kept LOW (1) on this single 28GiB node since
|
||||
# recipes are heavy (immich/matrix large volumes). With capacity=1 there is never a concurrent
|
||||
# in-flight run, so the run-start janitor can safely reap *any* orphan (a SIGKILL'd build runs no
|
||||
# teardown) and the "at most MAX_TESTS apps live" bound holds exactly. Raise to 2 only if the node
|
||||
# is shown to handle two light recipes at once (then the janitor MUST stay age-based to avoid
|
||||
# reaping a concurrent run — see DECISIONS.md "Resource safety").
|
||||
maxTests = "1";
|
||||
in
|
||||
{
|
||||
# Drone ships under the Polyform Small Business license (nixpkgs marks it unfree);
|
||||
# permitted for our internal CI use. Allow only this package.
|
||||
nixpkgs.config.allowUnfreePredicate = pkg:
|
||||
builtins.elem (lib.getName pkg) [ "drone-runner-exec" ];
|
||||
|
||||
systemd.services.drone-runner-exec = {
|
||||
description = "Drone exec runner (drives host abra/swarm)";
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment = {
|
||||
DRONE_RPC_PROTO = "https";
|
||||
DRONE_RPC_HOST = "drone.ci.commoninternet.net";
|
||||
DRONE_RUNNER_CAPACITY = maxTests; # MAX_TESTS concurrency cap (see let-binding above)
|
||||
DRONE_RUNNER_NAME = "cc-ci-exec";
|
||||
# exec runner needs a writable root for build workspaces
|
||||
DRONE_RUNNER_ROOT = "/var/lib/drone-runner";
|
||||
# Pipeline commands shell out to abra/docker/git — all live in the system path.
|
||||
PATH = lib.mkForce "/run/current-system/sw/bin:/run/wrappers/bin";
|
||||
};
|
||||
serviceConfig = {
|
||||
# DRONE_RPC_SECRET comes from the sops-rendered env file (shared with the server).
|
||||
EnvironmentFile = config.sops.templates."drone-runner.env".path;
|
||||
ExecStart = "${pkgs.drone-runner-exec}/bin/drone-runner-exec";
|
||||
Restart = "always";
|
||||
RestartSec = "5s";
|
||||
StateDirectory = "drone-runner";
|
||||
# exec runner runs pipelines as this service's user; root is needed to drive docker/abra
|
||||
# and to read the abra config under /root/.abra (same as manual deploys).
|
||||
User = "root";
|
||||
};
|
||||
};
|
||||
}
|
||||
90
nix/modules/drone.nix
Normal file
90
nix/modules/drone.nix
Normal file
@ -0,0 +1,90 @@
|
||||
# Drone CI server = coop-cloud `drone` recipe via abra (swarm, traefik-routed at
|
||||
# drone.ci.commoninternet.net, Gitea SSO, wildcard cert / no ACME). The exec *runner* is a
|
||||
# separate host systemd service (modules/drone-runner.nix). See DECISIONS.md "CI engine"/"Drone
|
||||
# deployment shape".
|
||||
#
|
||||
# Idempotent-RECONCILE oneshot (same pattern as proxy/swarm-init): converges every boot/activation.
|
||||
# RPC + OAuth-client secrets come from sops (/run/secrets), inserted as swarm secrets here.
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
giteaClientId = "ab4cdb9d-ee96-4867-875f-87384505fc52";
|
||||
# Per-build TIMEOUT (plan §4.2/§4.3 resource safety): if a CI build runs longer than this, Drone
|
||||
# cancels it (the exec runner kills the process), freeing the MAX_TESTS slot so the queue advances.
|
||||
# The killed build can't run its own teardown — the run-start janitor reaps its orphaned app
|
||||
# (modules/drone-runner.nix MAX_TESTS note). Configurable here; reconciled best-effort below.
|
||||
buildTimeoutMinutes = "60";
|
||||
reconcile = pkgs.writeShellApplication {
|
||||
name = "cc-ci-reconcile-drone";
|
||||
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git curl ];
|
||||
text = ''
|
||||
DRONE_DOMAIN="drone.ci.commoninternet.net"
|
||||
ENV_FILE="$HOME/.abra/servers/default/$DRONE_DOMAIN.env"
|
||||
|
||||
if [ ! -r /run/secrets/drone_rpc_secret ] || [ ! -r /run/secrets/drone_gitea_client_secret ]; then
|
||||
echo "FATAL: drone sops secrets missing at /run/secrets (rebuild ordering?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
|
||||
abra recipe fetch drone -n >/dev/null
|
||||
|
||||
[ -f "$ENV_FILE" ] || abra app new drone -s default -D "$DRONE_DOMAIN" -n
|
||||
|
||||
set_env() {
|
||||
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
|
||||
# ensure trailing newline before append (a recipe .env.sample may end without one, which
|
||||
# would glue the var onto the last line — see modules/backupbot.nix for the bite).
|
||||
if [ -s "$ENV_FILE" ] && [ -n "$(tail -c1 "$ENV_FILE")" ]; then printf '\n' >> "$ENV_FILE"; fi
|
||||
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
|
||||
}
|
||||
set_env LETS_ENCRYPT_ENV ""
|
||||
set_env EXTRA_DOMAINS ""
|
||||
# Inject the bridge's Drone token as the bot's MACHINE TOKEN so it is reproducible on a fresh
|
||||
# Drone DB. Without `token:`, Drone auto-generates a random token that the committed (sops)
|
||||
# bridge_drone_token can't match → on a clean-room rebuild the bridge gets 401 and can't trigger
|
||||
# builds (the original only matched because its token was captured out-of-band post-hoc). Caught
|
||||
# by the E2E-TESTME acceptance test. With `token:`, every rebuild's bot carries the sops token.
|
||||
set_env DRONE_USER_CREATE "username:autonomic-bot,admin:true,token:$(cat /run/secrets/bridge_drone_token)"
|
||||
set_env GITEA_DOMAIN "git.autonomic.zone"
|
||||
set_env GITEA_CLIENT_ID "${giteaClientId}"
|
||||
set_env RPC_SECRET_VERSION "v1"
|
||||
set_env CLIENT_SECRET_VERSION "v1"
|
||||
set_env DRONE_ENV_VERSION "v1"
|
||||
set_env COMPOSE_FILE '"compose.yml:compose.gitea.yml"'
|
||||
|
||||
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
|
||||
have_secret rpc_secret || abra app secret insert "$DRONE_DOMAIN" rpc_secret v1 /run/secrets/drone_rpc_secret -f -n
|
||||
have_secret client_secret || abra app secret insert "$DRONE_DOMAIN" client_secret v1 /run/secrets/drone_gitea_client_secret -f -n
|
||||
|
||||
abra app deploy "$DRONE_DOMAIN" -n -C
|
||||
|
||||
# Best-effort: set the cc-ci repo's build timeout (resource safety). Non-fatal — never break
|
||||
# the core server reconcile if Drone/token isn't ready. Uses the bridge's Drone admin token and
|
||||
# hits the local traefik (hairpin-free) keeping SNI=drone... so the wildcard cert validates.
|
||||
if [ -r /run/secrets/bridge_drone_token ]; then
|
||||
DT="$(cat /run/secrets/bridge_drone_token)"
|
||||
curl -fsS -k --resolve "$DRONE_DOMAIN:443:127.0.0.1" \
|
||||
-X PATCH -H "Authorization: Bearer $DT" -H "Content-Type: application/json" \
|
||||
-d '{"timeout": ${buildTimeoutMinutes}}' \
|
||||
"https://$DRONE_DOMAIN/api/repos/recipe-maintainers/cc-ci" >/dev/null \
|
||||
&& echo "set cc-ci build timeout = ${buildTimeoutMinutes}m" \
|
||||
|| echo "WARN: could not set build timeout (non-fatal)" >&2
|
||||
fi
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
systemd.services.deploy-drone = {
|
||||
description = "Reconcile the Drone CI server (coop-cloud recipe, Gitea SSO) via abra";
|
||||
after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ];
|
||||
requires = [ "swarm-init.service" "docker.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment.HOME = "/root";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = "${reconcile}/bin/cc-ci-reconcile-drone";
|
||||
};
|
||||
};
|
||||
}
|
||||
20
nix/modules/harness.nix
Normal file
20
nix/modules/harness.nix
Normal file
@ -0,0 +1,20 @@
|
||||
# CI harness runtime (M4): a reproducible Python env with pytest + Playwright and the
|
||||
# Nix-provided browsers, exposed as `cc-ci-run` on the host so the Drone exec pipeline (and
|
||||
# manual dev) can run the harness with `cc-ci-run runner/run_recipe_ci.py`. Playwright on NixOS
|
||||
# needs the browsers from nixpkgs (not a downloaded copy) via PLAYWRIGHT_BROWSERS_PATH.
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
pyEnv = pkgs.python3.withPackages (ps: with ps; [ pytest playwright ]);
|
||||
ccciRun = pkgs.writeShellApplication {
|
||||
name = "cc-ci-run";
|
||||
runtimeInputs = [ pyEnv pkgs.abra pkgs.docker pkgs.git pkgs.coreutils pkgs.util-linux ];
|
||||
text = ''
|
||||
export PLAYWRIGHT_BROWSERS_PATH=${pkgs.playwright-driver.browsers}
|
||||
export PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
||||
exec ${pyEnv}/bin/python3 "$@"
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
environment.systemPackages = [ ccciRun ];
|
||||
}
|
||||
25
nix/modules/packages.nix
Normal file
25
nix/modules/packages.nix
Normal file
@ -0,0 +1,25 @@
|
||||
# Project package overlay. `abra` (the Co-op Cloud CLI) is exposed as `pkgs.abra` so every
|
||||
# module (systemPackages, the proxy/drone reconcile oneshots) can use the same pinned build.
|
||||
_:
|
||||
{
|
||||
nixpkgs.overlays = [
|
||||
(_: prev: {
|
||||
abra = prev.stdenv.mkDerivation rec {
|
||||
pname = "abra";
|
||||
version = "0.13.0-beta";
|
||||
src = prev.fetchurl {
|
||||
url = "https://git.coopcloud.tech/toolshed/abra/releases/download/${version}/abra_${version}_linux_amd64.tar.gz";
|
||||
sha256 = "12csk6wp1pk9cspzqfl4a6h5jdz8p055sf0ggxw9k7ljhpd5qvc6";
|
||||
};
|
||||
sourceRoot = ".";
|
||||
nativeBuildInputs = [ prev.autoPatchelfHook ];
|
||||
buildInputs = [ prev.stdenv.cc.cc.lib ];
|
||||
installPhase = ''
|
||||
runHook preInstall
|
||||
install -Dm755 abra "$out/bin/abra"
|
||||
runHook postInstall
|
||||
'';
|
||||
};
|
||||
})
|
||||
];
|
||||
}
|
||||
67
nix/modules/proxy.nix
Normal file
67
nix/modules/proxy.nix
Normal file
@ -0,0 +1,67 @@
|
||||
# Reverse proxy = the canonical Co-op Cloud `traefik` recipe, deployed via abra in
|
||||
# wildcard / file-provider mode (wildcard cert as ssl_cert/ssl_key swarm secrets,
|
||||
# LETS_ENCRYPT_ENV empty => NO ACME, no DNS token). See DECISIONS.md "Proxy: real coop-cloud/traefik".
|
||||
# Phase-1c: the cert at CERT_DIR is sops-decrypted from git (cc-ci-secrets) at activation
|
||||
# (modules/secrets.nix wildcard_cert/wildcard_key), NOT an out-of-band operator file drop.
|
||||
#
|
||||
# Declared as an idempotent-RECONCILE systemd oneshot (like swarm-init): it inspects current
|
||||
# state and converges every activation/boot, self-healing drift (redeploys if the stack is gone,
|
||||
# re-inserts secrets if missing). No run-once sentinel. So a from-scratch install is just
|
||||
# `nixos-rebuild switch` + operator preconditions (D8) — no manual post-steps.
|
||||
{ pkgs, ... }:
|
||||
let
|
||||
reconcile = pkgs.writeShellApplication {
|
||||
name = "cc-ci-reconcile-proxy";
|
||||
runtimeInputs = with pkgs; [ abra docker jq gnused gnugrep coreutils git ];
|
||||
text = ''
|
||||
PROXY_DOMAIN="traefik.ci.commoninternet.net"
|
||||
CERT_DIR="/var/lib/ci-certs/live"
|
||||
ENV_FILE="$HOME/.abra/servers/default/$PROXY_DOMAIN.env"
|
||||
|
||||
# Fail visibly (failed unit) if the cert is missing — do NOT silently skip. It is
|
||||
# sops-decrypted from git (cc-ci-secrets) at activation; a miss here means the sops decrypt
|
||||
# path is broken (e.g. age identity not present), which must surface, not be papered over.
|
||||
if [ ! -r "$CERT_DIR/fullchain.pem" ] || [ ! -r "$CERT_DIR/privkey.pem" ]; then
|
||||
echo "FATAL: wildcard cert missing at $CERT_DIR (sops decrypt from cc-ci-secrets failed?)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
abra server ls -m -n >/dev/null 2>&1 || abra server add --local -n || true
|
||||
abra recipe fetch traefik -n >/dev/null
|
||||
|
||||
[ -f "$ENV_FILE" ] || abra app new traefik -s default -D "$PROXY_DOMAIN" -n
|
||||
|
||||
set_env() {
|
||||
sed -i -E "/^[[:space:]]*#?[[:space:]]*$1=/d" "$ENV_FILE"
|
||||
printf '%s=%s\n' "$1" "$2" >> "$ENV_FILE"
|
||||
}
|
||||
set_env LETS_ENCRYPT_ENV ""
|
||||
set_env WILDCARDS_ENABLED "1"
|
||||
set_env SECRET_WILDCARD_CERT_VERSION "v1"
|
||||
set_env SECRET_WILDCARD_KEY_VERSION "v1"
|
||||
set_env COMPOSE_FILE '"compose.yml:compose.wildcard.yml"'
|
||||
|
||||
have_secret() { docker secret ls --format '{{.Name}}' | grep -q "_$1_v1$"; }
|
||||
have_secret ssl_cert || abra app secret insert "$PROXY_DOMAIN" ssl_cert v1 "$CERT_DIR/fullchain.pem" -f -n
|
||||
have_secret ssl_key || abra app secret insert "$PROXY_DOMAIN" ssl_key v1 "$CERT_DIR/privkey.pem" -f -n
|
||||
|
||||
# Converge the stack (idempotent: no-op if already at desired state).
|
||||
abra app deploy "$PROXY_DOMAIN" -n -C
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
systemd.services.deploy-proxy = {
|
||||
description = "Reconcile the Co-op Cloud traefik proxy (wildcard/no-ACME) via abra";
|
||||
after = [ "swarm-init.service" "docker.service" "network-online.target" ];
|
||||
requires = [ "swarm-init.service" "docker.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
environment.HOME = "/root";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
ExecStart = "${reconcile}/bin/cc-ci-reconcile-proxy";
|
||||
};
|
||||
};
|
||||
}
|
||||
58
nix/modules/secrets.nix
Normal file
58
nix/modules/secrets.nix
Normal file
@ -0,0 +1,58 @@
|
||||
# sops-nix wiring (D6 infra secrets). cc-ci decrypts secrets at activation using its own
|
||||
# ed25519 SSH host key as the age identity (no separate key file to manage on the box).
|
||||
# Encrypted material lives in the repo-root `secrets/` git SUBMODULE (the private `cc-ci-secrets`
|
||||
# repo, Phase-1c). RL5 put this module under nix/modules/, so the relative path is
|
||||
# ../../secrets/secrets.yaml. Readable only by the recipients in secrets/.sops.yaml (host key +
|
||||
# off-box master recovery key).
|
||||
{ config, ... }:
|
||||
{
|
||||
sops = {
|
||||
defaultSopsFile = ../../secrets/secrets.yaml;
|
||||
# Decrypt using the host's SSH host key (converted to an age identity by sops-nix).
|
||||
age.sshKeyPaths = [ "/etc/ssh/ssh_host_ed25519_key" ];
|
||||
# Phase-1c: also accept a bootstrap age key at a fixed path — THE one out-of-band secret,
|
||||
# provisioned to the host before the first rebuild. On the canonical cc-ci this holds the
|
||||
# host-derived age identity (== the sshKeyPaths recipient, no new exposure); on a fresh/cloned
|
||||
# host (e.g. the throwaway-VM rebuild) it holds the off-box recovery key, so a host whose SSH
|
||||
# host key is NOT a sops recipient can still decrypt every secret. NOTE: sops-install-secrets
|
||||
# aborts activation if this file is set but missing, so it must exist before `nixos-rebuild`.
|
||||
age.keyFile = "/var/lib/sops-nix/key.txt";
|
||||
# Do not also look for a GPG key.
|
||||
gnupg.sshKeyPaths = [ ];
|
||||
|
||||
secrets = {
|
||||
# M0 proof secret — confirms the decrypt path works end to end.
|
||||
test_secret = { };
|
||||
|
||||
# M2 Drone (A2 internal secrets). drone_rpc_secret is shared between the swarm-deployed
|
||||
# Drone server (inserted as the `rpc_secret` swarm secret by scripts/deploy-drone.sh) and
|
||||
# the host exec runner (read via the env template below). drone_gitea_client_secret is the
|
||||
# Gitea OAuth app secret, inserted as the server's `client_secret` swarm secret.
|
||||
drone_rpc_secret = { };
|
||||
drone_gitea_client_secret = { };
|
||||
|
||||
# M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them
|
||||
# into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook.
|
||||
bridge_webhook_hmac = { };
|
||||
bridge_drone_token = { };
|
||||
bridge_gitea_token = { };
|
||||
|
||||
# Phase-1c C2: the wildcard TLS cert+key are now sops secrets (in cc-ci-secrets), decrypted at
|
||||
# activation to /var/lib/ci-certs/live/{fullchain.pem,privkey.pem} — the exact path the traefik
|
||||
# reconcile (modules/proxy.nix) already reads. Replaces the prior operator-drops-a-cert-file step.
|
||||
wildcard_cert = {
|
||||
path = "/var/lib/ci-certs/live/fullchain.pem";
|
||||
mode = "0444"; # leaf+intermediate chain — not secret
|
||||
};
|
||||
wildcard_key = {
|
||||
path = "/var/lib/ci-certs/live/privkey.pem";
|
||||
mode = "0400"; # private key — root only
|
||||
};
|
||||
};
|
||||
|
||||
# EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret.
|
||||
templates."drone-runner.env".content = ''
|
||||
DRONE_RPC_SECRET=${config.sops.placeholder.drone_rpc_secret}
|
||||
'';
|
||||
};
|
||||
}
|
||||
45
nix/modules/swarm.nix
Normal file
45
nix/modules/swarm.nix
Normal file
@ -0,0 +1,45 @@
|
||||
# Docker + single-node Swarm — the deploy target for recipes under test (M1).
|
||||
# Traefik (modules/traefik.nix) and abra layer on top; recipes attach to the `proxy`
|
||||
# overlay network, exactly as a real Co-op Cloud host expects.
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
virtualisation.docker = {
|
||||
enable = true;
|
||||
# Reclaim disk from churning per-run images/volumes (cc-ci root is ~28 GiB).
|
||||
autoPrune = {
|
||||
enable = true;
|
||||
dates = "daily";
|
||||
flags = [ "--all" "--volumes" "--filter" "until=24h" ];
|
||||
};
|
||||
};
|
||||
|
||||
environment.systemPackages = [ pkgs.docker ];
|
||||
|
||||
# Gateway forwards 80/443 to cc-ci over the public interface (enp5s0); the coop-cloud
|
||||
# traefik stack (deployed via abra, see docs/install.md) publishes these ports.
|
||||
networking.firewall.allowedTCPPorts = [ 80 443 ];
|
||||
|
||||
# Bring up a single-node swarm + the shared `proxy` overlay network. Idempotent:
|
||||
# safe to re-run every boot/rebuild. advertise-addr 127.0.0.1 is fine for a lone node.
|
||||
systemd.services.swarm-init = {
|
||||
description = "Initialise single-node Docker Swarm + proxy overlay network";
|
||||
after = [ "docker.service" ];
|
||||
requires = [ "docker.service" ];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
path = [ pkgs.docker ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
};
|
||||
script = ''
|
||||
set -eu
|
||||
state="$(docker info --format '{{.Swarm.LocalNodeState}}' 2>/dev/null || echo error)"
|
||||
if [ "$state" != "active" ]; then
|
||||
docker swarm init --advertise-addr 127.0.0.1
|
||||
fi
|
||||
if ! docker network inspect proxy >/dev/null 2>&1; then
|
||||
docker network create --driver overlay --attachable proxy
|
||||
fi
|
||||
'';
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user