diff --git a/flake.nix b/flake.nix index 3c65eeb..937f997 100644 --- a/flake.nix +++ b/flake.nix @@ -39,6 +39,17 @@ ]; }; + # Hetzner Cloud host (cpx32, nbg1). Provisions via `terraform/` + nixos-infect. + # Used in parallel with cc-ci (Incus) during transition; becomes canonical after cutover. + # See terraform/README.md for the full apply + Stage 2 (nixos-rebuild switch) workflow. + nixosConfigurations.cc-ci-hetzner = nixpkgs.lib.nixosSystem { + inherit system; + modules = [ + sops-nix.nixosModules.sops + ./nix/hosts/cc-ci-hetzner/configuration.nix + ]; + }; + devShells.${system} = { # Devshell for working on the harness/bridge locally (tools + lint toolchain). default = pkgs.mkShell { diff --git a/nix/hosts/cc-ci-hetzner/configuration.nix b/nix/hosts/cc-ci-hetzner/configuration.nix new file mode 100644 index 0000000..ef08c9a --- /dev/null +++ b/nix/hosts/cc-ci-hetzner/configuration.nix @@ -0,0 +1,68 @@ +# cc-ci on Hetzner Cloud — NixOS configuration. +# Extends the shared cc-ci modules (same services as the Incus host) with +# Hetzner-specific hardware + networking. Run in parallel with the Incus cc-ci +# host during transition; make this the canonical cc-ci after cutover (plan §7). +# +# To apply after `terraform apply` + nixos-infect: +# git clone --recursive https://git.autonomic.zone/recipe-maintainers/cc-ci.git /etc/cc-ci +# install -m600 /var/lib/sops-nix/key.txt +# nixos-rebuild switch --flake /etc/cc-ci#cc-ci-hetzner +{ pkgs, lib, ... }: +{ + imports = [ + ./hardware.nix + ./networking.nix + ../../modules/packages.nix + ../../modules/secrets.nix + ../../modules/swarm.nix + ../../modules/docker-prune.nix + ../../modules/abra.nix + ../../modules/proxy.nix + ../../modules/drone.nix + ../../modules/drone-runner.nix + ../../modules/bridge.nix + ../../modules/dashboard.nix + ../../modules/backupbot.nix + ../../modules/harness.nix + ../../modules/warm-keycloak.nix + ../../modules/nightly-sweep.nix + ]; + + # Timezone (same as Incus host — see configuration.nix there for rationale). + time.timeZone = "UTC"; + environment.etc."timezone".text = "UTC\n"; + + # Tailscale — keeps the orchestrator→cc-ci access path unchanged (direct peer). + # On the Hetzner host the auth key is also seeded via /etc/ts-auth-key. + services.tailscale = { + enable = true; + authKeyFile = "/etc/ts-auth-key"; + extraUpFlags = [ "--hostname=cc-ci" ]; + }; + + # SSH — allow root login over tailscale (same as Incus host). + services.openssh = { + enable = true; + settings.PermitRootLogin = "yes"; + }; + + # Firewall — Hetzner has a public IP, so open 80+443 for Traefik. + # Tailscale interface is trusted (no port restrictions for orchestrator access). + # Plan §6: v1 keeps the sops wildcard cert; evaluate ACME-on-public-IP as follow-up. + networking.firewall = { + enable = true; + trustedInterfaces = [ "tailscale0" ]; + allowedTCPPorts = [ 22 80 443 ]; + }; + + environment.systemPackages = with pkgs; [ + curl + git + jq + openssh + ]; + + nix.settings.experimental-features = [ "nix-command" "flakes" ]; + + system.stateVersion = "24.11"; +} diff --git a/nix/hosts/cc-ci-hetzner/hardware.nix b/nix/hosts/cc-ci-hetzner/hardware.nix new file mode 100644 index 0000000..1c86506 --- /dev/null +++ b/nix/hosts/cc-ci-hetzner/hardware.nix @@ -0,0 +1,35 @@ +# Hardware configuration for cc-ci on Hetzner Cloud (cpx32: AMD 4 vCPU / 8 GB / x86_64). +# Generated by nixos-infect from a Debian 12 base image, then committed here. +# +# nixos-infect uses GRUB + EFI on Hetzner (not systemd-boot), with a qemu-guest profile +# because Hetzner Cloud uses KVM virtualisation. +# +# IMPORTANT: networking.nix (below) contains the server's static public IP. +# When provisioning a new server via `terraform apply`, copy the fresh networking.nix +# from /etc/nixos/networking.nix on the new host and commit it here before rebuilding. +{ modulesPath, ... }: +{ + imports = [ (modulesPath + "/profiles/qemu-guest.nix") ]; + + boot.loader = { + efi.efiSysMountPoint = "/boot/efi"; + grub = { + efiSupport = true; + efiInstallAsRemovable = true; + device = "nodev"; + }; + }; + + fileSystems."/boot/efi" = { + device = "/dev/disk/by-uuid/90B1-5F80"; + fsType = "vfat"; + }; + + boot.initrd.availableKernelModules = [ "ata_piix" "uhci_hcd" "xen_blkfront" "vmw_pvscsi" ]; + boot.initrd.kernelModules = [ "nvme" ]; + + fileSystems."/" = { + device = "/dev/sda1"; + fsType = "ext4"; + }; +} diff --git a/nix/hosts/cc-ci-hetzner/networking.nix b/nix/hosts/cc-ci-hetzner/networking.nix new file mode 100644 index 0000000..8970567 --- /dev/null +++ b/nix/hosts/cc-ci-hetzner/networking.nix @@ -0,0 +1,41 @@ +# Hetzner static networking — generated by nixos-infect at provision time. +# +# This file is server-specific: the IP, gateway, and MAC address are tied to a +# particular Hetzner instance. When provisioning a new server: +# 1. After `terraform apply` + nixos-infect completes, run: +# ssh root@ 'cat /etc/nixos/networking.nix' +# 2. Replace this file's contents with the output and commit. +# 3. Then: `nixos-rebuild switch --flake .#cc-ci-hetzner` +# +# Current instance: 168.119.126.100 (throwaway verification run 2026-05-31; +# this value will be updated when the production server is provisioned). +{ lib, ... }: { + networking = { + nameservers = [ + "185.12.64.1" + "185.12.64.2" + ]; + defaultGateway = "172.31.1.1"; + defaultGateway6 = { + address = ""; + interface = "eth0"; + }; + dhcpcd.enable = false; + usePredictableInterfaceNames = lib.mkForce false; + interfaces = { + eth0 = { + ipv4.addresses = [ + { address = "168.119.126.100"; prefixLength = 32; } + ]; + ipv6.addresses = [ + { address = "fe80::9000:8ff:fe03:c400"; prefixLength = 64; } + ]; + ipv4.routes = [{ address = "172.31.1.1"; prefixLength = 32; }]; + ipv6.routes = [{ address = ""; prefixLength = 128; }]; + }; + }; + }; + services.udev.extraRules = '' + ATTR{address}=="92:00:08:03:c4:00", NAME="eth0" + ''; +} diff --git a/terraform/.gitignore b/terraform/.gitignore new file mode 100644 index 0000000..1b2e69a --- /dev/null +++ b/terraform/.gitignore @@ -0,0 +1,19 @@ +# Terraform state — may contain secrets; NEVER commit +*.tfstate +*.tfstate.* +*.tfstate.backup + +# Variable files with secret values — NEVER commit +*.auto.tfvars +*.auto.tfvars.json +terraform.tfvars + +# Terraform working directory (downloaded providers, modules) +.terraform/ + +# Crash logs +crash.log +crash.*.log + +# NOTE: .terraform.lock.hcl (provider lock file) IS committed — it pins provider SHAs +# for reproducibility, analogous to flake.lock. diff --git a/terraform/.terraform.lock.hcl b/terraform/.terraform.lock.hcl new file mode 100644 index 0000000..e30bf87 --- /dev/null +++ b/terraform/.terraform.lock.hcl @@ -0,0 +1,23 @@ +# This file is maintained automatically by "tofu init". +# Manual edits may be lost in future updates. + +provider "registry.opentofu.org/hetznercloud/hcloud" { + version = "1.64.0" + constraints = "1.64.0" + hashes = [ + "h1:FUkTfFrWlmv0JhsbjQvTk3zY7A2Q0LuoSs0PKEzaLpk=", + "zh:5bf7f8f429b1a8f485988d199f46295676a6cdf7d84ad11f1f4613faecfa89d5", + "zh:63b3d182474dd5afd0d5ab3f5f66228b752504436bcb2f4721bd6f1233d0f2ae", + "zh:6867da2d89d297b6760d80dde373e74df511bea72f7daccf6a944a9de4b4d4ed", + "zh:766fdcea1b03038a92414eafaa430b9ac0c57b36ce4c1573e6e291431659d528", + "zh:7f3186dfcae4028eac4f2c9c2c382b49c1fad0b63d0471b50748ee6817fbd8d2", + "zh:bb8a33b6ff9a4d3bce87628c49b08a4780e2c034762f40112058d96f5a4e52bd", + "zh:cc93751c7c90a37f180cf3e5439ed34f3154e60de5920a13d153d93954938239", + "zh:d6e2abf05a0eb8fe0544eb099960a4962db61532e7757016ccacbf0b83bcd1ae", + "zh:da9e3adedd8d33623aac4929fa8b1210f98d2931d5737c201da0dda992dd25ab", + "zh:dffc931aec4d7b0733690e115b1aabdf5c157b7d347a09a9d149ee6b7e9d8ce3", + "zh:e565dea4f28182099a271f794e3b781f069ea54976f5f05dbb79a1c2b6627459", + "zh:e79411287af28ccf6187bd418b7ea2ee217e642026392ddc8027bf3e3287fb80", + "zh:f5102d7141a04c193dffbb5cbc3f7e3588c41b87e11877d2e20d57ea5ef64123", + ] +} diff --git a/terraform/README.md b/terraform/README.md new file mode 100644 index 0000000..115cead --- /dev/null +++ b/terraform/README.md @@ -0,0 +1,100 @@ +# cc-ci Hetzner Cloud Terraform + +Provisions the cc-ci NixOS server on Hetzner Cloud (cpx32, 4 vCPU / 8 GB, x86 AMD, nbg1). +Stage 1 (Terraform): creates the server, runs nixos-infect to convert Debian 12 → NixOS. +Stage 2 (manual): clone the flake + apply the cc-ci config. + +## Prerequisites (Class-A1 inputs — provide at apply time, NEVER commit) + +| Input | How to provide | +|---|---| +| `HCLOUD_TOKEN` | `export HCLOUD_TOKEN=` in shell before `tofu apply` | +| SSH key pair | Generate once: `ssh-keygen -t ed25519 -f ~/.ssh/cc-ci-hetzner`; pass pubkey via `TF_VAR_ssh_public_key="$(cat ~/.ssh/cc-ci-hetzner.pub)"` | +| Bootstrap age key | Provision to `/var/lib/sops-nix/key.txt` on the server (Stage 2; see `docs/install.md`) | + +## Stage 1 — Provision server + nixos-infect + +```bash +cd terraform/ + +# Provide secrets via environment +export HCLOUD_TOKEN= +export TF_VAR_ssh_public_key="$(cat ~/.ssh/cc-ci-hetzner.pub)" + +# Download providers (uses .terraform.lock.hcl — pinned, reproducible) +tofu init # or: terraform init + +# Preview +tofu plan + +# Apply — creates cpx31 server in nbg1, runs nixos-infect on first boot +tofu apply + +# Note the output IP: +# server_ipv4 = "x.x.x.x" +# ssh_connect = "ssh root@x.x.x.x" +``` + +nixos-infect runs on first boot and **reboots the server** into NixOS (~5 min total). +Wait for the reboot to complete, then verify: + +```bash +# Check NixOS is up: +ssh root@ 'nixos-version' + +# Inspect infect log if needed: +ssh root@ 'cat /var/log/nixos-infect.log' +``` + +After the reboot the server runs bare NixOS (infect-generated config). Proceed to Stage 2. + +## Stage 2 — Apply the cc-ci flake config + +Follows the D8 install flow documented in `docs/install.md` exactly: + +```bash +# On the Hetzner server (ssh root@): + +# 1. Clone the flake (--recursive brings cc-ci-secrets submodule) +git clone --recursive https://git.autonomic.zone/recipe-maintainers/cc-ci.git /etc/cc-ci +cd /etc/cc-ci + +# 2. Provision the bootstrap age key (the one irreducible out-of-band secret) +mkdir -p /var/lib/sops-nix +install -m 0600 /dev/stdin /var/lib/sops-nix/key.txt <<'EOF' + +EOF + +# 3. Apply the cc-ci Hetzner host config +nixos-rebuild switch --flake .#cc-ci-hetzner + +# 4. Verify (all units green, reconcile oneshots converged) +systemctl --failed +``` + +## Variables + +| Variable | Default | Description | +|---|---|---| +| `server_type` | `cpx31` | x86 only. `cpx31`=AMD 4vCPU/8GB, `cx33`=Intel 4vCPU/8GB. Never `cax*` (ARM). | +| `location` | `nbg1` | Hetzner datacenter. | +| `image` | `debian-12` | Base image; nixos-infect converts it to NixOS. debian-12 preferred. | +| `server_name` | `cc-ci` | Hetzner server name. | +| `ssh_public_key` | (required) | Public key registered for root access. | + +Override via env: `TF_VAR_location=hel1 tofu apply`. + +## Teardown (throwaway verification run) + +```bash +tofu destroy # removes server + SSH key; billing stops immediately +``` + +## Notes + +- `.terraform.lock.hcl` is committed (pins provider SHAs — analogous to flake.lock). +- `*.tfstate`, `*.tfvars`, `.terraform/` are gitignored — never commit state or secrets. +- `cpx31` is retired in some Hetzner DCs; `cpx32` (equivalent AMD, 4 vCPU / 8 GB) is the default. + `cx33` (Intel, same spec) is also available. Both are x86_64 — compatible with the `x86_64-linux` flake. +- The Hetzner server has a public IPv4 — future: point `*.ci.commoninternet.net` A record directly + at it and drop the gateway/MagicDNS path (see plan §6 + `DECISIONS.md`). diff --git a/terraform/main.tf b/terraform/main.tf new file mode 100644 index 0000000..20f1b49 --- /dev/null +++ b/terraform/main.tf @@ -0,0 +1,32 @@ +resource "hcloud_ssh_key" "cc_ci" { + name = "cc-ci-deploy" + public_key = var.ssh_public_key + + labels = { + project = "cc-ci" + managed = "terraform" + } +} + +resource "hcloud_server" "cc_ci" { + name = var.server_name + server_type = var.server_type + image = var.image + location = var.location + ssh_keys = [hcloud_ssh_key.cc_ci.id] + + # Stage 1: cloud-init runs nixos-infect on first boot, converting Ubuntu to NixOS, + # then reboots. See user-data.sh for the pinned infect revision. + user_data = file("${path.module}/user-data.sh") + + public_net { + ipv4_enabled = true + ipv6_enabled = false + } + + labels = { + project = "cc-ci" + managed = "terraform" + stage = "infect" + } +} diff --git a/terraform/outputs.tf b/terraform/outputs.tf new file mode 100644 index 0000000..d46516d --- /dev/null +++ b/terraform/outputs.tf @@ -0,0 +1,19 @@ +output "server_ipv4" { + description = "Public IPv4 address of the cc-ci Hetzner server" + value = hcloud_server.cc_ci.ipv4_address +} + +output "server_id" { + description = "Hetzner internal server ID" + value = hcloud_server.cc_ci.id +} + +output "ssh_connect" { + description = "SSH command to connect as root" + value = "ssh root@${hcloud_server.cc_ci.ipv4_address}" +} + +output "nixos_infect_log" { + description = "Path on the server where nixos-infect logs are written" + value = "ssh root@${hcloud_server.cc_ci.ipv4_address} 'cat /var/log/nixos-infect.log'" +} diff --git a/terraform/user-data.sh b/terraform/user-data.sh new file mode 100644 index 0000000..b9dae20 --- /dev/null +++ b/terraform/user-data.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# Stage 1 — convert Debian 12 → NixOS via nixos-infect (pinned revision). +# +# nixos-infect generates /etc/nixos/{configuration.nix,hardware-configuration.nix,networking.nix} +# with Hetzner-correct bootloader (GRUB, not systemd-boot) and networking, then reboots into NixOS. +# +# After the reboot: +# - SSH as root is available (key registered with Hetzner survives infect) +# - Run Stage 2 per terraform/README.md: clone cc-ci + cc-ci-secrets, provision the bootstrap +# age key, then `nixos-rebuild switch --flake .#cc-ci-hetzner` +# +# Logs are written to /var/log/nixos-infect.log on the server for post-mortem inspection. +# The server reboots automatically at the end of infect — wait ~5 min before sshing in. +set -euo pipefail + +# Pinned nixos-infect revision (2026-03-22: "fixes errors for non efi systems"). +# Update deliberately; verify Hetzner still supported before bumping. +INFECT_SHA="40f62a680bb0e8f2f607d79abfaaecd99d59401c" + +export NIX_CHANNEL="nixos-24.11" +export PROVIDER="hetzner" # tells nixos-infect to use GRUB + Hetzner networking +export NIXOS_IMPORT="" # no extra imports at infect time; we apply the real flake in Stage 2 + +curl -fsSL "https://raw.githubusercontent.com/elitak/nixos-infect/${INFECT_SHA}/nixos-infect" \ + | bash -x 2>&1 | tee /var/log/nixos-infect.log diff --git a/terraform/variables.tf b/terraform/variables.tf new file mode 100644 index 0000000..3dced28 --- /dev/null +++ b/terraform/variables.tf @@ -0,0 +1,37 @@ +variable "location" { + description = "Hetzner datacenter (nbg1=Nuremberg, fsn1=Falkenstein, hel1=Helsinki, ash=Ashburn, hil=Hillsboro)" + type = string + default = "nbg1" +} + +variable "server_type" { + description = <<-EOT + Hetzner server type. Must be x86 — the flake is x86_64-linux; NEVER use cax* (ARM). + cpx32 = AMD 4 vCPU / 8 GB (default; replaces cpx31 which is retired in some DCs). + cx33 = Intel 4 vCPU / 8 GB (alternative). + EOT + type = string + default = "cpx32" + + validation { + condition = !startswith(var.server_type, "cax") + error_message = "ARM server types (cax*) are not supported — the cc-ci flake is x86_64-linux only." + } +} + +variable "image" { + description = "Base OS image. nixos-infect supports debian-12 and ubuntu-24.04. debian-12 preferred." + type = string + default = "debian-12" +} + +variable "ssh_public_key" { + description = "SSH public key content (the full line, e.g. 'ssh-ed25519 AAAA... comment'). Registered with Hetzner for root access post-infect. Pass via TF_VAR_ssh_public_key or terraform.tfvars (gitignored)." + type = string +} + +variable "server_name" { + description = "Hetzner server name and initial NixOS hostname" + type = string + default = "cc-ci" +} diff --git a/terraform/versions.tf b/terraform/versions.tf new file mode 100644 index 0000000..875220b --- /dev/null +++ b/terraform/versions.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.0" + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = "1.64.0" + } + } +} + +# The hcloud provider reads HCLOUD_TOKEN from the environment automatically. +# Never put the token value in any .tf file or .tfvars — keep it in the shell +# environment (export HCLOUD_TOKEN=...) or pass via TF_VAR_hcloud_token. +provider "hcloud" {}