diff --git a/JOURNAL.md b/JOURNAL.md index d1ba103..56dbaac 100644 --- a/JOURNAL.md +++ b/JOURNAL.md @@ -285,3 +285,29 @@ posts a PR comment linking the run. Secrets read from mounted files; config via traefik at `ci.commoninternet.net/hook` via a reconcile oneshot (modules/bridge.nix); register a per-repo webhook with the HMAC; demo on a scratch PR (!testme triggers; non-!testme + non-collab rejected). That's the M3 gate. + +## 2026-05-26 — M3: bridge deployed + verified; webhook DELIVERY blocked (Gitea-side) + +**Deployed** the comment-bridge as a Nix-built OCI image (no Docker Hub pull) → swarm service on +`proxy`, behind traefik at `ci.commoninternet.net/hook`, via reconcile oneshot `modules/bridge.nix`. +Swarm secrets (webhook_hmac/drone_token/gitea_token) materialised from /run/secrets. + +**Verified working (bridge side):** +- `docker service ls` → ccci-bridge_app 1/1. +- `GET /hook/healthz` → 200 **from the sandbox over real public DNS** (ci.commoninternet.net → + 143.244.213.108); also 200 via gateway from cc-ci. +- HMAC logic: bad sig → 401; a manually openssl-HMAC-signed body → 204 (passes sig, ignored as + non-trigger); wrong event → 204. (Debug log added: `got=/want=/bodylen/seclen`.) +- Registered per-repo `issue_comment` webhook (id 210) on recipe-maintainers/cc-ci → ci.../hook with + the HMAC. Created scratch PR #1. + +**Blocker found:** commenting `!testme` (×several) and Gitea's "Test Delivery" (UI returns 200) yield +ZERO requests at the bridge container. Bridge is publicly reachable by hostname from a 3rd network; +gateway accepts public sources; public DNS correct → Gitea is not *sending* the delivery. Deliveries +panel is AJAX (uninspectable via curl); bot is not Gitea admin (can't read `ALLOWED_HOST_LIST`). +Conclusion: git.autonomic.zone webhook policy (likely `ALLOWED_HOST_LIST`) blocks ci.commoninternet.net. +Recorded in STATUS ## Blocked with operator options (whitelist host, or I pivot bridge to polling). + +**Plan:** surface to operator; meanwhile proceed to M4 (harness + install stage) which doesn't depend +on the webhook (dev recipe-CI builds triggerable directly via the Drone API). Revisit M3 gate once the +host is whitelisted or via the polling fallback. diff --git a/STATUS.md b/STATUS.md index d92c074..17c6e95 100644 --- a/STATUS.md +++ b/STATUS.md @@ -24,7 +24,20 @@ `scripts/bootstrap-drone-oauth.sh`. Starting M3 as independent work; won't flip M3 gate until M2 PASS. ## Blocked -- (none) +- **M3 gate — Gitea→bridge webhook delivery not arriving (suspect Gitea `ALLOWED_HOST_LIST`).** + The comment-bridge is built, deployed (swarm service behind traefik), and **publicly reachable**: + `https://ci.commoninternet.net/hook/healthz` → 200 from the sandbox over *real public DNS* + (ci.commoninternet.net → gateway 143.244.213.108). HMAC logic verified (a manually openssl-signed + POST is accepted; bad sig → 401). BUT Gitea never delivers: commenting `!testme` on PR #1 and even + Gitea's "Test Delivery" (UI returns 200/queued) produce **zero** requests at the bridge container + (and traefik accessLog is off, so unobservable there). Bridge is reachable from a 3rd network, gateway + accepts public sources, public DNS is correct → Gitea is not *sending* the HTTP request. Most likely + git.autonomic.zone's `[webhook] ALLOWED_HOST_LIST` excludes `ci.commoninternet.net` (bot is not Gitea + admin, can't inspect/change). **Operator options:** (a) add `ci.commoninternet.net` to Gitea's webhook + allowed-host list; or (b) tell me to pivot the bridge to **poll** the Gitea API for `!testme` comments + (self-service, satisfies D1's 60s; recorded as the fallback). **Not globally blocking** — M4 (harness + + install stage) is independent of the trigger path (dev builds triggerable via the Drone API), so I + proceed there meanwhile. ## Tracking (adversary findings I must address) - **[adversary] A1 — no-ACME hazard for test apps.** Acknowledged (valid). The harness (M4) MUST diff --git a/bridge/bridge.py b/bridge/bridge.py index 33f2e78..7a7711f 100644 --- a/bridge/bridge.py +++ b/bridge/bridge.py @@ -116,7 +116,9 @@ class Handler(BaseHTTPRequestHandler): sig = self.headers.get("X-Gitea-Signature", "") expected = hmac.new(HMAC_SECRET, body, hashlib.sha256).hexdigest() if not hmac.compare_digest(sig, expected): - log("rejected: bad signature") + log(f"rejected: bad signature event={self.headers.get('X-Gitea-Event')} " + f"got={sig[:12]} want={expected[:12]} bodylen={len(body)} seclen={len(HMAC_SECRET)} " + f"hub256={(self.headers.get('X-Hub-Signature-256') or '')[:20]}") return self._send(401, "bad signature") if self.headers.get("X-Gitea-Event") != "issue_comment": diff --git a/hosts/cc-ci/configuration.nix b/hosts/cc-ci/configuration.nix index 8aeff6d..910ddaf 100644 --- a/hosts/cc-ci/configuration.nix +++ b/hosts/cc-ci/configuration.nix @@ -12,6 +12,7 @@ ../../modules/proxy.nix ../../modules/drone.nix ../../modules/drone-runner.nix + ../../modules/bridge.nix ]; # --- Tailscale (ACCESS-CRITICAL: do not break, this is the only route in) --- diff --git a/modules/bridge.nix b/modules/bridge.nix new file mode 100644 index 0000000..96a999e --- /dev/null +++ b/modules/bridge.nix @@ -0,0 +1,107 @@ +# Comment-bridge (§4.1): the `!testme` webhook receiver. Packaged as a Nix-built OCI image +# (no Docker Hub pull) and run as a swarm service on `proxy`, routed by traefik at +# ci.commoninternet.net/hook. Deployed by an idempotent-reconcile oneshot (same pattern as +# proxy/drone). Secrets come from sops (/run/secrets) → swarm secrets the container mounts. +{ pkgs, ... }: +let + # bridge.py placed at /app/bridge.py inside the image. + bridgeApp = pkgs.runCommand "cc-ci-bridge-app" { } '' + mkdir -p $out/app + cp ${../bridge/bridge.py} $out/app/bridge.py + ''; + + image = pkgs.dockerTools.buildLayeredImage { + name = "cc-ci-bridge"; + tag = "latest"; + contents = [ pkgs.python3 pkgs.cacert bridgeApp ]; + config = { + Cmd = [ "${pkgs.python3}/bin/python3" "/app/bridge.py" ]; + Env = [ "SSL_CERT_FILE=${pkgs.cacert}/etc/ssl/certs/ca-bundle.crt" ]; + ExposedPorts = { "8080/tcp" = { }; }; + }; + }; + + stack = pkgs.writeText "cc-ci-bridge-stack.yml" '' + version: "3.8" + services: + app: + image: cc-ci-bridge:latest + environment: + - GITEA_API=https://git.autonomic.zone/api/v1 + - DRONE_URL=https://drone.ci.commoninternet.net + - CI_REPO=recipe-maintainers/cc-ci + - BRIDGE_LISTEN=0.0.0.0:8080 + - HMAC_FILE=/run/secrets/webhook_hmac + - DRONE_TOKEN_FILE=/run/secrets/drone_token + - GITEA_TOKEN_FILE=/run/secrets/gitea_token + secrets: + - webhook_hmac + - drone_token + - gitea_token + networks: + - proxy + deploy: + replicas: 1 + restart_policy: + condition: any + labels: + - "traefik.enable=true" + - "traefik.http.services.ccci-bridge.loadbalancer.server.port=8080" + - "traefik.http.routers.ccci-bridge.rule=Host(`ci.commoninternet.net`) && PathPrefix(`/hook`)" + - "traefik.http.routers.ccci-bridge.entrypoints=web-secure" + - "traefik.http.routers.ccci-bridge.tls=true" + networks: + proxy: + external: true + secrets: + webhook_hmac: + external: true + name: cc_ci_bridge_webhook_hmac_v1 + drone_token: + external: true + name: cc_ci_bridge_drone_token_v1 + gitea_token: + external: true + name: cc_ci_bridge_gitea_token_v1 + ''; + + reconcile = pkgs.writeShellApplication { + name = "cc-ci-reconcile-bridge"; + runtimeInputs = with pkgs; [ docker coreutils ]; + text = '' + for s in webhook_hmac drone_token gitea_token; do + if [ ! -r "/run/secrets/bridge_$s" ]; then + echo "FATAL: /run/secrets/bridge_$s missing (rebuild ordering?)" >&2 + exit 1 + fi + done + + # Load the Nix-built image into the local docker (idempotent; layers cached). + docker load -i ${image} + + # Materialise swarm secrets from sops (immutable; create once at v1). + ensure_secret() { + docker secret inspect "$2" >/dev/null 2>&1 || docker secret create "$2" "$1" >/dev/null + } + ensure_secret /run/secrets/bridge_webhook_hmac cc_ci_bridge_webhook_hmac_v1 + ensure_secret /run/secrets/bridge_drone_token cc_ci_bridge_drone_token_v1 + ensure_secret /run/secrets/bridge_gitea_token cc_ci_bridge_gitea_token_v1 + + docker stack deploy --detach=true -c ${stack} ccci-bridge + ''; + }; +in +{ + systemd.services.deploy-bridge = { + description = "Reconcile the cc-ci comment-bridge (!testme webhook) swarm service"; + after = [ "deploy-proxy.service" "swarm-init.service" "docker.service" "network-online.target" ]; + requires = [ "swarm-init.service" "docker.service" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = "oneshot"; + RemainAfterExit = true; + ExecStart = "${reconcile}/bin/cc-ci-reconcile-bridge"; + }; + }; +} diff --git a/modules/secrets.nix b/modules/secrets.nix index a4c4212..31236e4 100644 --- a/modules/secrets.nix +++ b/modules/secrets.nix @@ -21,6 +21,12 @@ secrets.drone_rpc_secret = { }; secrets.drone_gitea_client_secret = { }; + # M3 comment-bridge (A2). Read by modules/bridge.nix's reconcile oneshot, which copies them + # into swarm secrets the bridge container mounts. webhook_hmac is also set on the Gitea webhook. + secrets.bridge_webhook_hmac = { }; + secrets.bridge_drone_token = { }; + secrets.bridge_gitea_token = { }; + # EnvironmentFile for the host exec runner: DRONE_RPC_SECRET rendered from the sops secret. templates."drone-runner.env".content = '' DRONE_RPC_SECRET=${config.sops.placeholder.drone_rpc_secret}