cc-ci-upgrade-all now reads an optional EnvironmentFile so the weekly run can switch backend/model (e.g. LOOP_BACKEND=opencode LOOP_MODEL=opencode-go/glm-5.2) without a rebuild. Absent file → claude/sonnet (unchanged). Built+switched on cc-ci-orchestrator-hetzner, host verified healthy. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
228 lines
11 KiB
Nix
228 lines
11 KiB
Nix
# cc-ci-orchestrator-hetzner — NixOS config for the Hetzner loops runtime host.
|
|
#
|
|
# Purpose: run the cc-ci Builder/Adversary/Watchdog loops + orchestrator/assistant sessions
|
|
# on a Hetzner cpx11 (2 vCPU / 2 GB dedicated AMD / 40 GB NVMe), replacing the slow b1 Incus VM.
|
|
#
|
|
# Provision with terraform/ then converge with: nixos-rebuild switch --flake .#cc-ci-orchestrator-hetzner
|
|
# See terraform/README.md for the full Stage 2 procedure.
|
|
{ config, pkgs, lib, ... }:
|
|
{
|
|
# hardware.nix is the nixos-infect generated hardware-configuration.nix (see README Stage 2a).
|
|
|
|
services.openssh = {
|
|
enable = true;
|
|
settings.PermitRootLogin = "yes";
|
|
};
|
|
|
|
# Root SSH access — all keys from the current orchestrator VM's /root/.ssh/authorized_keys.
|
|
users.users.root.openssh.authorizedKeys.keys = [
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOk8NaeBdPbS2gfUvbny8h0AkZlVjGYHzx4QPXSJ38gd claude@claude-vm"
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJVlfoLBPseQ9fA9534KmRg2KWcksKZGzAJIpHJ2JpsI mfowler.email@protonmail.com"
|
|
"ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAcyTGb/wVgdhg5oBCZZvBaR1RuUQRY/3WHnOQpNDCsp claude-cc-ci-sandbox@20260526"
|
|
];
|
|
networking.useDHCP = true;
|
|
networking.nameservers = [ "1.1.1.1" "8.8.8.8" ];
|
|
networking.firewall = {
|
|
enable = true;
|
|
trustedInterfaces = [ "tailscale0" ];
|
|
# Port 80 open only on the tailscale interface (trusted) — nginx binds there for oc.commoninternet.net.
|
|
allowedTCPPorts = [ 22 ];
|
|
};
|
|
nix.settings.experimental-features = [ "nix-command" "flakes" ];
|
|
system.stateVersion = "24.11";
|
|
|
|
# Tailscale — auth key at /etc/ts-auth-key (placed manually in Stage 2, not in git).
|
|
services.tailscale = {
|
|
enable = true;
|
|
authKeyFile = "/etc/ts-auth-key";
|
|
extraUpFlags = [ "--hostname=cc-ci-orchestrator" ];
|
|
};
|
|
|
|
# 4 GB disk swap — claude session memory safety net (2 GB RAM is tight for 3+ sessions).
|
|
swapDevices = [ { device = "/swapfile"; size = 4096; } ];
|
|
|
|
# nix-ld — lets the standalone Claude Code CLI (foreign dynamic ELF / Bun) run on NixOS.
|
|
programs.nix-ld.enable = true;
|
|
programs.nix-ld.libraries = with pkgs; [
|
|
stdenv.cc.cc.lib
|
|
zlib
|
|
openssl
|
|
curl
|
|
glibc
|
|
];
|
|
|
|
environment.systemPackages = with pkgs; [
|
|
git tmux python3 jq curl cacert
|
|
gnused gawk coreutils gnugrep findutils util-linux
|
|
nettools openssh
|
|
age sops # key management (same toolchain as cc-ci server)
|
|
];
|
|
|
|
# loops user — claude sessions run as non-root (--dangerously-skip-permissions blocked for root).
|
|
users.users.loops = {
|
|
isNormalUser = true;
|
|
home = "/home/loops";
|
|
shell = pkgs.bash;
|
|
extraGroups = [ "wheel" ];
|
|
};
|
|
security.sudo.wheelNeedsPassword = false;
|
|
security.sudo.extraRules = [{
|
|
users = [ "loops" ];
|
|
commands = [{ command = "ALL"; options = [ "NOPASSWD" ]; }];
|
|
}];
|
|
|
|
# Ensure /home/loops/.local/bin (claude + opencode) is on the loops user PATH.
|
|
# opencode binary is installed there manually (not yet in nixpkgs); re-install if missing:
|
|
# curl -sL https://github.com/anomalyco/opencode/releases/download/v1.15.13/opencode-linux-x64.tar.gz \
|
|
# | tar -xz -C /home/loops/.local/bin opencode && chmod +x /home/loops/.local/bin/opencode
|
|
environment.variables.PATH = lib.mkForce
|
|
"/home/loops/.local/bin:/run/current-system/sw/bin:/run/wrappers/bin:/usr/bin:/bin";
|
|
|
|
# SSH config for the loops user — points to the cc-ci Hetzner server via tailnet.
|
|
# HostName is the Hetzner cc-ci server's tailnet IP (cutover settled 2026-05-31).
|
|
system.activationScripts.loopsSshConfig = ''
|
|
mkdir -p /home/loops/.ssh && chown loops:users /home/loops/.ssh && chmod 700 /home/loops/.ssh
|
|
# Only write if not already present (preserves manual customisation).
|
|
if [ ! -f /home/loops/.ssh/config ]; then
|
|
cat > /home/loops/.ssh/config <<'SSHCFG'
|
|
Host cc-ci
|
|
HostName 100.95.31.88
|
|
User root
|
|
IdentityFile /home/loops/.ssh/cc-ci-root-ed25519
|
|
IdentitiesOnly yes
|
|
StrictHostKeyChecking accept-new
|
|
ServerAliveInterval 30
|
|
SSHCFG
|
|
chmod 600 /home/loops/.ssh/config
|
|
chown loops:users /home/loops/.ssh/config
|
|
fi
|
|
'';
|
|
|
|
# claude-install — fetch the standalone Claude Code CLI for the loops user if missing.
|
|
systemd.services.claude-install = {
|
|
description = "Install Claude Code CLI for loops user (idempotent)";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; RemainAfterExit = true;
|
|
User = "loops"; Group = "users";
|
|
};
|
|
environment = { HOME = "/home/loops"; };
|
|
path = [ pkgs.curl pkgs.bash pkgs.coreutils pkgs.gnutar pkgs.gzip ];
|
|
script = ''
|
|
if [ ! -x "$HOME/.local/bin/claude" ]; then
|
|
echo "installing Claude Code CLI for loops user..."
|
|
curl -fsSL https://claude.ai/install.sh | bash || echo "install failed — retry on next activation"
|
|
fi
|
|
'';
|
|
};
|
|
|
|
# opencode web server — one shared instance; agent sessions attach to it for web visibility.
|
|
# Serves the web UI at http://oc.commoninternet.net (via nginx below, tailscale-only).
|
|
# Provider creds are read from /srv/cc-ci/.testenv at startup.
|
|
systemd.services.opencode-web = {
|
|
description = "opencode web server for cc-ci agents";
|
|
wantedBy = [ "multi-user.target" ];
|
|
after = [ "network-online.target" "tailscaled.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "simple";
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci-orch/cc-ci";
|
|
EnvironmentFile = "/srv/cc-ci/.testenv";
|
|
ExecStartPre = "${pkgs.coreutils}/bin/rm -rf /tmp/opencode";
|
|
ExecStart = "/home/loops/.local/bin/opencode serve --hostname 127.0.0.1 --port 4096";
|
|
Restart = "on-failure";
|
|
RestartSec = "5s";
|
|
};
|
|
environment = {
|
|
HOME = "/home/loops";
|
|
PATH = lib.mkForce "/run/wrappers/bin:/home/loops/.local/bin:/run/current-system/sw/bin:/usr/bin:/bin:/home/loops/.nix-profile/bin:/nix/profile/bin:/home/loops/.local/state/nix/profile/bin:/etc/profiles/per-user/loops/bin:/nix/var/nix/profiles/default/bin";
|
|
};
|
|
path = [ pkgs.bash pkgs.coreutils pkgs.git pkgs.python3 pkgs.openssh pkgs.tmux pkgs.nettools ];
|
|
};
|
|
|
|
# nginx — reverse-proxy oc.commoninternet.net → opencode web server.
|
|
# Bound to the tailscale IP so it is only reachable on the tailnet.
|
|
# DNS: add A record oc.commoninternet.net → 100.84.190.30 (operator step if hostname access is wanted).
|
|
services.nginx = {
|
|
enable = true;
|
|
recommendedProxySettings = true;
|
|
virtualHosts."oc.commoninternet.net" = {
|
|
# Listen on the tailscale interface only — not the public IP.
|
|
listen = [{ addr = "100.84.190.30"; port = 80; ssl = false; }];
|
|
locations."/" = {
|
|
proxyPass = "http://127.0.0.1:4096";
|
|
proxyWebsockets = true;
|
|
};
|
|
};
|
|
};
|
|
|
|
# cc-ci-loops supervisor — workspace staged 2026-05-31, so ENABLED for reboot-resilience.
|
|
systemd.services.cc-ci-loops = {
|
|
description = "cc-ci Builder/Adversary loops + watchdog (launch.sh start)";
|
|
wantedBy = [ "multi-user.target" ]; # enabled after workspace staged (Hetzner cutover)
|
|
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; RemainAfterExit = true;
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci/cc-ci";
|
|
# Append one line to REBOOTS.md per genuine reboot (boot_id-gated; not on manual restart).
|
|
ExecStartPre = "${pkgs.bash}/bin/bash /srv/cc-ci/cc-ci-plan/reboot-log.sh";
|
|
};
|
|
# CLAUDE_BIN points at the standalone CLI installed by claude-install.service; the loops
|
|
# backend defaults to claude (persisted in .loop-backend). Without this, launch.py's preflight
|
|
# `which(claude)` fails because the systemd `path` below has no /home/loops/.local/bin.
|
|
environment = { RESUME_PHASE = "1"; HOME = "/home/loops"; CLAUDE_BIN = "/home/loops/.local/bin/claude"; };
|
|
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
|
|
script = ''
|
|
# Put the standalone claude/opencode binaries on PATH. On a cold boot this is the env the
|
|
# tmux server (and thus every agent session) inherits, so bare `claude` resolves everywhere.
|
|
export PATH="/home/loops/.local/bin:$PATH"
|
|
[ -x /srv/cc-ci/cc-ci-plan/launch.sh ] && /srv/cc-ci/cc-ci-plan/launch.sh start || \
|
|
echo "workspace not staged yet — skipping loop start"
|
|
'';
|
|
};
|
|
|
|
# Weekly recipe upgrade — runs /upgrade-all over every enrolled recipe (opens recipe PRs
|
|
# verified by !testme, never merges). Replaces the boot-fragile busybox-crond-in-tmux from
|
|
# phase 5 §4 with a reboot-safe systemd timer. The service is timer-triggered only (NOT
|
|
# wantedBy multi-user.target) so it never runs on boot/activation — only on the schedule.
|
|
systemd.services.cc-ci-upgrade-all = {
|
|
description = "cc-ci weekly /upgrade-all run (recipe upgrade survey + PRs, never merges)";
|
|
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
|
|
wants = [ "network-online.target" ];
|
|
serviceConfig = {
|
|
Type = "oneshot"; # launch-upgrader.py spawns the cc-ci-upgrader tmux session and returns
|
|
User = "loops"; Group = "users";
|
|
WorkingDirectory = "/srv/cc-ci";
|
|
# Optional per-run overrides for backend/model (LOOP_BACKEND, LOOP_MODEL, OPENCODE_SHARE,
|
|
# UPGRADER_ARGS, …). The leading "-" makes it optional: absent file → claude/sonnet defaults
|
|
# (current behavior). To run the weekly job on e.g. opencode-go/glm-5.2, drop a file with
|
|
# LOOP_BACKEND=opencode
|
|
# LOOP_MODEL=opencode-go/glm-5.2
|
|
# No rebuild needed to switch — the env file is read at each timer fire. Holds no secrets
|
|
# (the opencode-go API key lives in ~/.local/share/opencode/auth.json, mode 600).
|
|
EnvironmentFile = "-/srv/cc-ci/upgrader.env";
|
|
};
|
|
environment = { HOME = "/home/loops"; CLAUDE_BIN = "/home/loops/.local/bin/claude"; };
|
|
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
|
|
script = ''
|
|
export PATH="/home/loops/.local/bin:$PATH"
|
|
python3 /srv/cc-ci/cc-ci-plan/launch-upgrader.py start >> /srv/cc-ci/.cc-ci-logs/upgrader-cron.log 2>&1
|
|
'';
|
|
};
|
|
|
|
systemd.timers.cc-ci-upgrade-all = {
|
|
description = "Weekly trigger for cc-ci-upgrade-all (Thursdays 22:00 America/New_York — Boston 10pm)";
|
|
wantedBy = [ "timers.target" ];
|
|
timerConfig = {
|
|
# 10pm Thursday Boston time — DST-aware (EDT→02:00 UTC, EST→03:00 UTC) via the tz in OnCalendar.
|
|
OnCalendar = "Thu *-*-* 22:00:00 America/New_York";
|
|
Persistent = true; # if the box was down at the scheduled time, run once on next boot
|
|
};
|
|
};
|
|
}
|