terraform+nix: Hetzner orchestrator server (cpx11, nixos-infect, cc-ci-orchestrator-hetzner flake host)
Adds terraform/ to provision a Hetzner cpx11 (2 vCPU / 2 GB dedicated AMD / 40 GB NVMe) for the loops runtime, and a flake + NixOS host config to converge it — replacing the slow b1 Incus VM. Mirrors the cc-ci server terraform (same nixos-infect pin, same pattern). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
29
flake.nix
Normal file
29
flake.nix
Normal file
@ -0,0 +1,29 @@
|
||||
{
|
||||
description = "cc-ci-orchestrator — NixOS host for the cc-ci loops runtime (Builder/Adversary/Watchdog)";
|
||||
|
||||
inputs = {
|
||||
# Pinned to the same revision as the cc-ci server for ecosystem consistency.
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/50ab793786d9de88ee30ec4e4c24fb4236fc2674";
|
||||
|
||||
# Same pin as cc-ci server (buildGo125Module compatibility with nixpkgs 24.11).
|
||||
sops-nix.url = "github:Mic92/sops-nix/77c423a03b9b2b79709ea2cb63336312e78b72e2";
|
||||
sops-nix.inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
|
||||
outputs = { nixpkgs, sops-nix, ... }:
|
||||
let
|
||||
system = "x86_64-linux";
|
||||
in
|
||||
{
|
||||
# Hetzner cpx11 host (nixos-infect generated hardware.nix + orchestrator config).
|
||||
# Provision with terraform/ then run Stage 2 per terraform/README.md.
|
||||
nixosConfigurations.cc-ci-orchestrator-hetzner = nixpkgs.lib.nixosSystem {
|
||||
inherit system;
|
||||
modules = [
|
||||
sops-nix.nixosModules.sops
|
||||
./nix/hosts/cc-ci-orchestrator-hetzner/hardware.nix
|
||||
./nix/hosts/cc-ci-orchestrator-hetzner/configuration.nix
|
||||
];
|
||||
};
|
||||
};
|
||||
}
|
||||
126
nix/hosts/cc-ci-orchestrator-hetzner/configuration.nix
Normal file
126
nix/hosts/cc-ci-orchestrator-hetzner/configuration.nix
Normal file
@ -0,0 +1,126 @@
|
||||
# cc-ci-orchestrator-hetzner — NixOS config for the Hetzner loops runtime host.
|
||||
#
|
||||
# Purpose: run the cc-ci Builder/Adversary/Watchdog loops + orchestrator/assistant sessions
|
||||
# on a Hetzner cpx11 (2 vCPU / 2 GB dedicated AMD / 40 GB NVMe), replacing the slow b1 Incus VM.
|
||||
#
|
||||
# Provision with terraform/ then converge with: nixos-rebuild switch --flake .#cc-ci-orchestrator-hetzner
|
||||
# See terraform/README.md for the full Stage 2 procedure.
|
||||
{ config, pkgs, lib, ... }:
|
||||
{
|
||||
# hardware.nix is the nixos-infect generated hardware-configuration.nix (see README Stage 2a).
|
||||
|
||||
services.openssh = { enable = true; settings.PermitRootLogin = "yes"; };
|
||||
networking.useDHCP = true;
|
||||
networking.nameservers = [ "1.1.1.1" "8.8.8.8" ];
|
||||
networking.firewall = {
|
||||
enable = true;
|
||||
trustedInterfaces = [ "tailscale0" ];
|
||||
allowedTCPPorts = [ 22 ];
|
||||
};
|
||||
nix.settings.experimental-features = [ "nix-command" "flakes" ];
|
||||
system.stateVersion = "24.11";
|
||||
|
||||
# Tailscale — auth key at /etc/ts-auth-key (placed manually in Stage 2, not in git).
|
||||
services.tailscale = {
|
||||
enable = true;
|
||||
authKeyFile = "/etc/ts-auth-key";
|
||||
extraUpFlags = [ "--hostname=cc-ci-orchestrator" "--ssh" ];
|
||||
};
|
||||
|
||||
# 4 GB disk swap — claude session memory safety net (2 GB RAM is tight for 3+ sessions).
|
||||
swapDevices = [ { device = "/swapfile"; size = 4096; } ];
|
||||
|
||||
# nix-ld — lets the standalone Claude Code CLI (foreign dynamic ELF / Bun) run on NixOS.
|
||||
programs.nix-ld.enable = true;
|
||||
programs.nix-ld.libraries = with pkgs; [
|
||||
stdenv.cc.cc.lib
|
||||
zlib
|
||||
openssl
|
||||
curl
|
||||
glibc
|
||||
];
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
git tmux python3 jq curl cacert
|
||||
gnused gawk coreutils gnugrep findutils util-linux
|
||||
nettools openssh
|
||||
age sops # key management (same toolchain as cc-ci server)
|
||||
];
|
||||
|
||||
# loops user — claude sessions run as non-root (--dangerously-skip-permissions blocked for root).
|
||||
users.users.loops = {
|
||||
isNormalUser = true;
|
||||
home = "/home/loops";
|
||||
shell = pkgs.bash;
|
||||
extraGroups = [ "wheel" ];
|
||||
};
|
||||
security.sudo.wheelNeedsPassword = false;
|
||||
security.sudo.extraRules = [{
|
||||
users = [ "loops" ];
|
||||
commands = [{ command = "ALL"; options = [ "NOPASSWD" ]; }];
|
||||
}];
|
||||
|
||||
# Ensure /home/loops/.local/bin (claude) is on the loops user PATH.
|
||||
environment.variables.PATH = lib.mkForce
|
||||
"/home/loops/.local/bin:/run/current-system/sw/bin:/run/wrappers/bin:/usr/bin:/bin";
|
||||
|
||||
# SSH config for the loops user — points to the cc-ci Hetzner server via tailnet.
|
||||
# HostName is updated post-cutover to the Hetzner cc-ci tailnet IP.
|
||||
system.activationScripts.loopsSshConfig = ''
|
||||
mkdir -p /home/loops/.ssh && chown loops:users /home/loops/.ssh && chmod 700 /home/loops/.ssh
|
||||
# Only write if not already present (preserves manual customisation).
|
||||
if [ ! -f /home/loops/.ssh/config ]; then
|
||||
cat > /home/loops/.ssh/config <<'SSHCFG'
|
||||
Host cc-ci
|
||||
HostName REPLACE_WITH_CC_CI_HETZNER_TAILNET_IP
|
||||
User root
|
||||
IdentityFile /home/loops/.ssh/cc-ci-root-ed25519
|
||||
IdentitiesOnly yes
|
||||
StrictHostKeyChecking accept-new
|
||||
ServerAliveInterval 30
|
||||
SSHCFG
|
||||
chmod 600 /home/loops/.ssh/config
|
||||
chown loops:users /home/loops/.ssh/config
|
||||
fi
|
||||
'';
|
||||
|
||||
# claude-install — fetch the standalone Claude Code CLI for the loops user if missing.
|
||||
systemd.services.claude-install = {
|
||||
description = "Install Claude Code CLI for loops user (idempotent)";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
after = [ "network-online.target" ];
|
||||
wants = [ "network-online.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot"; RemainAfterExit = true;
|
||||
User = "loops"; Group = "users";
|
||||
};
|
||||
environment = { HOME = "/home/loops"; };
|
||||
path = [ pkgs.curl pkgs.bash pkgs.coreutils pkgs.gnutar pkgs.gzip ];
|
||||
script = ''
|
||||
if [ ! -x "$HOME/.local/bin/claude" ]; then
|
||||
echo "installing Claude Code CLI for loops user..."
|
||||
curl -fsSL https://claude.ai/install.sh | bash || echo "install failed — retry on next activation"
|
||||
fi
|
||||
'';
|
||||
};
|
||||
|
||||
# cc-ci-loops supervisor — defined but NOT enabled until workspace is staged.
|
||||
# Enable by adding wantedBy after staging (Stage 2e) for reboot-resilience.
|
||||
systemd.services.cc-ci-loops = {
|
||||
description = "cc-ci Builder/Adversary loops + watchdog (launch.sh start)";
|
||||
# wantedBy = [ "multi-user.target" ]; # uncomment after workspace is staged
|
||||
after = [ "network-online.target" "tailscaled.service" "claude-install.service" ];
|
||||
wants = [ "network-online.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot"; RemainAfterExit = true;
|
||||
User = "loops"; Group = "users";
|
||||
WorkingDirectory = "/srv/cc-ci";
|
||||
};
|
||||
environment = { RESUME_PHASE = "1"; HOME = "/home/loops"; };
|
||||
path = [ pkgs.bash pkgs.tmux pkgs.git pkgs.python3 pkgs.openssh pkgs.nettools ];
|
||||
script = ''
|
||||
[ -x /srv/cc-ci/cc-ci-plan/launch.sh ] && /srv/cc-ci/cc-ci-plan/launch.sh start || \
|
||||
echo "workspace not staged yet — skipping loop start"
|
||||
'';
|
||||
};
|
||||
}
|
||||
21
nix/hosts/cc-ci-orchestrator-hetzner/hardware.nix
Normal file
21
nix/hosts/cc-ci-orchestrator-hetzner/hardware.nix
Normal file
@ -0,0 +1,21 @@
|
||||
# PLACEHOLDER — replace with the output of:
|
||||
# ssh root@<server_ipv4> 'cat /etc/nixos/hardware-configuration.nix'
|
||||
# after nixos-infect completes. See terraform/README.md Stage 2a.
|
||||
#
|
||||
# A typical Hetzner cpx11 nixos-infect hardware.nix looks like:
|
||||
#
|
||||
# { config, lib, pkgs, modulesPath, ... }: {
|
||||
# imports = [ (modulesPath + "/profiles/qemu-guest.nix") ];
|
||||
# boot.initrd.availableKernelModules = [ "ata_piix" "uhci_hcd" "virtio_pci" "virtio_scsi" "sd_mod" "sr_mod" ];
|
||||
# boot.initrd.kernelModules = [ ];
|
||||
# boot.kernelModules = [ "kvm-amd" ];
|
||||
# boot.extraModulePackages = [ ];
|
||||
# fileSystems."/" = { device = "/dev/sda1"; fsType = "ext4"; };
|
||||
# boot.loader.grub.enable = true;
|
||||
# boot.loader.grub.device = "/dev/sda";
|
||||
# swapDevices = [ ];
|
||||
# nixpkgs.hostPlatform = "x86_64-linux";
|
||||
# }
|
||||
#
|
||||
# Do not commit this placeholder — replace it with the real hardware-configuration.nix.
|
||||
throw "Replace this placeholder with the real nixos-infect hardware-configuration.nix"
|
||||
123
terraform/README.md
Normal file
123
terraform/README.md
Normal file
@ -0,0 +1,123 @@
|
||||
# terraform — Hetzner cc-ci-orchestrator server
|
||||
|
||||
Provisions a Hetzner **cpx11** (2 vCPU / 2 GB dedicated AMD / 40 GB NVMe) for the cc-ci loops
|
||||
runtime (Builder + Adversary + Watchdog + Orchestrator sessions), replacing the slow b1 Incus VM.
|
||||
Uses nixos-infect to convert Debian → NixOS, then converges via the cc-ci-orchestrator flake.
|
||||
|
||||
---
|
||||
|
||||
## Stage 1 — provision the server
|
||||
|
||||
```bash
|
||||
# from /srv/cc-ci/terraform/
|
||||
source /srv/cc-ci/.testenv # loads HCLOUD_TOKEN
|
||||
export TF_VAR_ssh_public_key="$(cat /home/loops/.ssh/cc-ci-root-ed25519.pub)"
|
||||
|
||||
tofu init
|
||||
tofu plan
|
||||
tofu apply
|
||||
```
|
||||
|
||||
Note the `server_ipv4` output. nixos-infect runs on first boot — wait ~5 min, then:
|
||||
|
||||
```bash
|
||||
# confirm NixOS is up (may need to retry while infect reboots)
|
||||
ssh root@<server_ipv4> 'nixos-version'
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Stage 2 — converge to cc-ci-orchestrator-hetzner
|
||||
|
||||
### 2a. Capture hardware config
|
||||
|
||||
```bash
|
||||
ssh root@<server_ipv4> 'cat /etc/nixos/hardware-configuration.nix'
|
||||
```
|
||||
|
||||
Copy the output to `nix/hosts/cc-ci-orchestrator-hetzner/hardware.nix` in this repo, commit, push.
|
||||
|
||||
### 2b. Stage workspace on the new server
|
||||
|
||||
```bash
|
||||
ssh root@<server_ipv4>
|
||||
|
||||
# Install Tailscale auth key (from .testenv TS_AUTH_KEY)
|
||||
echo "<TS_AUTH_KEY>" > /etc/ts-auth-key && chmod 600 /etc/ts-auth-key
|
||||
|
||||
# Clone this repo as the loops user workspace
|
||||
git clone --recursive \
|
||||
https://autonomic-bot:<token>@git.autonomic.zone/recipe-maintainers/cc-ci-orchestrator.git \
|
||||
/srv/cc-ci-orch
|
||||
ln -sfn /srv/cc-ci-orch /srv/cc-ci # loops expect /srv/cc-ci
|
||||
|
||||
# Place master age key (copied from current VM .sops/master-age.txt)
|
||||
mkdir -p /srv/cc-ci/.sops
|
||||
scp loops@<old-vm>:/srv/cc-ci/.sops/master-age.txt /srv/cc-ci/.sops/master-age.txt
|
||||
chmod 600 /srv/cc-ci/.sops/master-age.txt
|
||||
```
|
||||
|
||||
### 2c. Run nixos-rebuild
|
||||
|
||||
```bash
|
||||
# on the new server
|
||||
cd /srv/cc-ci
|
||||
nixos-rebuild switch --flake .#cc-ci-orchestrator-hetzner
|
||||
```
|
||||
|
||||
### 2d. Stage credentials (not in git — placed once)
|
||||
|
||||
```bash
|
||||
# SSH key for reaching cc-ci
|
||||
mkdir -p /home/loops/.ssh && chmod 700 /home/loops/.ssh
|
||||
# scp cc-ci-root-ed25519 from current VM or copy content
|
||||
chmod 600 /home/loops/.ssh/cc-ci-root-ed25519
|
||||
|
||||
# .testenv (GITEA creds, etc.)
|
||||
cp /path/to/.testenv /srv/cc-ci/.testenv && chmod 600 /srv/cc-ci/.testenv
|
||||
```
|
||||
|
||||
### 2e. Auth claude and start loops
|
||||
|
||||
```bash
|
||||
# as loops user on new server
|
||||
sudo -u loops /home/loops/.local/bin/claude auth login # device code — operator step
|
||||
|
||||
# start the loops
|
||||
cd /srv/cc-ci && sudo -u loops ./cc-ci-plan/launch.sh start
|
||||
```
|
||||
|
||||
### 2f. Verify
|
||||
|
||||
```bash
|
||||
tmux ls # should show cc-ci-builder, cc-ci-adv, cc-ci-watchdog
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cutover
|
||||
|
||||
Once the new server is running and the loops are verified:
|
||||
|
||||
1. Update the `Host cc-ci` entry in the current VM's `/home/loops/.ssh/config` if needed
|
||||
2. Stop the old Incus VM (or just leave it idle — it costs nothing in disk)
|
||||
|
||||
---
|
||||
|
||||
## Variables
|
||||
|
||||
| Variable | Default | Notes |
|
||||
|---|---|---|
|
||||
| `location` | `nbg1` | Nuremberg |
|
||||
| `server_type` | `cpx11` | 2 vCPU / 2 GB dedicated AMD. Upgrade to `cpx21` (4 GB) if OOM. |
|
||||
| `image` | `debian-12` | nixos-infect base |
|
||||
| `server_name` | `cc-ci-orchestrator` | |
|
||||
| `ssh_public_key` | required | Pass via `TF_VAR_ssh_public_key` |
|
||||
|
||||
---
|
||||
|
||||
## State
|
||||
|
||||
`terraform.tfstate` and `terraform.tfstate.backup` are gitignored. Keep the state file locally or
|
||||
in a remote backend — losing it means `tofu destroy` can't find the server (use `tofu import` to
|
||||
recover, or delete directly via the Hetzner console).
|
||||
32
terraform/main.tf
Normal file
32
terraform/main.tf
Normal file
@ -0,0 +1,32 @@
|
||||
resource "hcloud_ssh_key" "cc_ci_orch" {
|
||||
name = "cc-ci-orchestrator-deploy"
|
||||
public_key = var.ssh_public_key
|
||||
|
||||
labels = {
|
||||
project = "cc-ci-orchestrator"
|
||||
managed = "terraform"
|
||||
}
|
||||
}
|
||||
|
||||
resource "hcloud_server" "cc_ci_orch" {
|
||||
name = var.server_name
|
||||
server_type = var.server_type
|
||||
image = var.image
|
||||
location = var.location
|
||||
ssh_keys = [hcloud_ssh_key.cc_ci_orch.id]
|
||||
|
||||
# Stage 1: cloud-init runs nixos-infect on first boot, converting Debian to NixOS, then reboots.
|
||||
# Wait ~5 min after apply, then SSH in and run Stage 2 per README.md.
|
||||
user_data = file("${path.module}/user-data.sh")
|
||||
|
||||
public_net {
|
||||
ipv4_enabled = true
|
||||
ipv6_enabled = false
|
||||
}
|
||||
|
||||
labels = {
|
||||
project = "cc-ci-orchestrator"
|
||||
managed = "terraform"
|
||||
stage = "infect"
|
||||
}
|
||||
}
|
||||
19
terraform/outputs.tf
Normal file
19
terraform/outputs.tf
Normal file
@ -0,0 +1,19 @@
|
||||
output "server_ipv4" {
|
||||
description = "Public IPv4 address of the cc-ci-orchestrator Hetzner server"
|
||||
value = hcloud_server.cc_ci_orch.ipv4_address
|
||||
}
|
||||
|
||||
output "server_id" {
|
||||
description = "Hetzner internal server ID"
|
||||
value = hcloud_server.cc_ci_orch.id
|
||||
}
|
||||
|
||||
output "ssh_connect" {
|
||||
description = "SSH command to connect as root (after nixos-infect)"
|
||||
value = "ssh root@${hcloud_server.cc_ci_orch.ipv4_address}"
|
||||
}
|
||||
|
||||
output "nixos_infect_log" {
|
||||
description = "Check infect progress"
|
||||
value = "ssh root@${hcloud_server.cc_ci_orch.ipv4_address} 'cat /var/log/nixos-infect.log'"
|
||||
}
|
||||
20
terraform/user-data.sh
Normal file
20
terraform/user-data.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
# Stage 1 — convert Debian 12 → NixOS via nixos-infect (pinned revision).
|
||||
#
|
||||
# nixos-infect generates /etc/nixos/{configuration.nix,hardware-configuration.nix,networking.nix}
|
||||
# with Hetzner-correct bootloader (GRUB) and networking, then reboots into NixOS.
|
||||
#
|
||||
# After the reboot SSH as root is available. Run Stage 2 per terraform/README.md.
|
||||
# Logs: /var/log/nixos-infect.log
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Same pinned revision as the cc-ci server terraform (2026-03-22).
|
||||
INFECT_SHA="40f62a680bb0e8f2f607d79abfaaecd99d59401c"
|
||||
|
||||
export NIX_CHANNEL="nixos-24.11"
|
||||
export PROVIDER="hetzner"
|
||||
export NIXOS_IMPORT=""
|
||||
|
||||
curl -fsSL "https://raw.githubusercontent.com/elitak/nixos-infect/${INFECT_SHA}/nixos-infect" \
|
||||
| bash -x 2>&1 | tee /var/log/nixos-infect.log
|
||||
38
terraform/variables.tf
Normal file
38
terraform/variables.tf
Normal file
@ -0,0 +1,38 @@
|
||||
variable "location" {
|
||||
description = "Hetzner datacenter (nbg1=Nuremberg, fsn1=Falkenstein, hel1=Helsinki)"
|
||||
type = string
|
||||
default = "nbg1"
|
||||
}
|
||||
|
||||
variable "server_type" {
|
||||
description = <<-EOT
|
||||
Hetzner server type. Must be x86 — the flake is x86_64-linux; NEVER use cax* (ARM).
|
||||
cpx11 = AMD 2 vCPU / 2 GB (default; dedicated vCPU, NVMe — the orchestrator loops runtime).
|
||||
cpx21 = AMD 3 vCPU / 4 GB (upgrade if claude sessions OOM under cpx11).
|
||||
cx22 = AMD 2 vCPU / 4 GB (shared vCPU, cheaper alternative with more RAM).
|
||||
EOT
|
||||
type = string
|
||||
default = "cpx11"
|
||||
|
||||
validation {
|
||||
condition = !startswith(var.server_type, "cax")
|
||||
error_message = "ARM server types (cax*) are not supported — the flake is x86_64-linux only."
|
||||
}
|
||||
}
|
||||
|
||||
variable "image" {
|
||||
description = "Base OS image. nixos-infect supports debian-12 and ubuntu-24.04. debian-12 preferred."
|
||||
type = string
|
||||
default = "debian-12"
|
||||
}
|
||||
|
||||
variable "ssh_public_key" {
|
||||
description = "SSH public key content (the full line). Registered with Hetzner for root access post-infect. Pass via TF_VAR_ssh_public_key."
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "server_name" {
|
||||
description = "Hetzner server name and initial NixOS hostname"
|
||||
type = string
|
||||
default = "cc-ci-orchestrator"
|
||||
}
|
||||
14
terraform/versions.tf
Normal file
14
terraform/versions.tf
Normal file
@ -0,0 +1,14 @@
|
||||
terraform {
|
||||
required_version = ">= 1.0"
|
||||
required_providers {
|
||||
hcloud = {
|
||||
source = "hetznercloud/hcloud"
|
||||
version = "1.64.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# The hcloud provider reads HCLOUD_TOKEN from the environment automatically.
|
||||
# Never put the token value in any .tf file or .tfvars — keep it in the shell
|
||||
# environment (export HCLOUD_TOKEN=...) or pass via TF_VAR_hcloud_token.
|
||||
provider "hcloud" {}
|
||||
Reference in New Issue
Block a user