2 Commits

Author SHA1 Message Date
d03fa5021f Add renovate.json 2026-04-18 06:01:45 +00:00
d22ea96879 Move frigate into the cluster and enable GPU detector 2026-03-28 17:10:23 +11:00
20 changed files with 149 additions and 858 deletions

View File

@@ -1,54 +0,0 @@
name: CI
on:
pull_request:
push:
branches:
- main
jobs:
terraform-validate:
name: Terraform fmt + validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hashicorp/setup-terraform@v3
- name: fmt check — 1-nixos-node
run: terraform fmt -check -recursive
working-directory: 1-nixos-node
- name: fmt check — 2-nomad-config
run: terraform fmt -check -recursive
working-directory: 2-nomad-config
- name: validate — 2-nomad-config (no backend)
run: |
terraform init -backend=false
terraform validate
working-directory: 2-nomad-config
nomad-validate:
name: Nomad job spec validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Nomad CLI
run: |
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
. /etc/os-release
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com ${VERSION_CODENAME} main" | tee /etc/apt/sources.list.d/hashicorp.list
apt-get update && apt-get install -y nomad
- name: Validate all job specs
env:
NOMAD_ADDR: http://jaglan-beta-m20.lan:4646
run: |
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
echo "==> $f"
nomad job validate "$f"
done

View File

@@ -64,7 +64,6 @@
cni_path = "$${pkgs.cni-plugins}/bin";
};
plugin.docker.config.allow_privileged = true;
plugin.docker.config.volumes.enabled = true;
};
extraPackages = with pkgs; [
cni-plugins

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,66 +0,0 @@
job "act-runner" {
group "act-runner" {
network {
mode = "bridge"
}
# Consul Connect upstream to Gitea so the runner can register and receive jobs
service {
name = "act-runner"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "code-connect"
local_bind_port = 3000
}
}
}
}
}
task "act-runner" {
driver = "docker"
config {
image = "gitea/act_runner:latest"
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
}
env = {
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
CONFIG_FILE = "/secrets/runner-config.yml"
}
# Required SOPS key:
# act-runner.registration_token — runner registration token from Gitea
# Admin → Settings → Actions → Runners → Create new runner
template {
data = <<EOF
GITEA_RUNNER_REGISTRATION_TOKEN={{ with nomadVar "nomad/jobs/act-runner" }}{{ .registration_token }}{{ end }}
EOF
destination = "secrets/runner.env"
env = true
}
# Limit which images/labels the runner will accept so it doesn't pick up
# unrelated workloads if more runners are added later.
template {
data = <<EOF
runner:
labels:
- "ubuntu-latest:docker://node:20-bookworm"
- "ubuntu-22.04:docker://node:20-bookworm"
- "ubuntu-24.04:docker://node:20-bookworm"
EOF
destination = "secrets/runner-config.yml"
}
resources {
cpu = 200
memory = 256
memory_max = 1024
}
}
}
}

View File

@@ -1,10 +0,0 @@
resource "nomad_job" "act_runner" {
jobspec = file("act-runner.nomad.hcl")
}
resource "nomad_variable" "act_runner" {
path = "nomad/jobs/act-runner"
items = {
registration_token = data.sops_file.secrets.data["act-runner.registration_token"]
}
}

View File

@@ -0,0 +1,97 @@
job "codeproject-ai" {
# Pin to N150 LattePanda nodes for their Intel UHD iGPU (OpenVINO) and dedicated CPU headroom.
# The node_class is set via the node.tf/configuration.nix templates in 1-nixos-node.
constraint {
attribute = "${node.class}"
value = "latte-panda-n150"
}
group "codeproject-ai" {
count = 1
network {
port "http" {
to = 32168
}
}
task "codeproject-ai" {
driver = "docker"
config {
image = "codeproject/ai-server:latest"
ports = ["http"]
# Pass the Intel iGPU render nodes so CPAI can accelerate inference
# via OpenVINO on the N150's Intel UHD Graphics.
# Requires hardware.graphics.enable = true in the NixOS node config
# (added automatically when node_class = "latte-panda-n150").
devices = [
{
host_path = "/dev/dri"
container_path = "/dev/dri"
}
]
}
service {
name = "codeproject-ai"
port = "http"
tags = [
"traefik.enable=true",
# Auth-gated user-facing UI at codeproject-ai.othrayte.one (generated by defaultRule).
"traefik.http.routers.codeproject-ai.middlewares=auth@file",
# No-auth bypass for external Frigate access until Frigate moves into the cluster
# and can reach the service via Consul DNS directly.
"traefik.http.routers.codeproject-ai-token.rule=Host(`c3ll7nbevl5j4j8rcnfxnr95q48fuayz-codeproject-ai.othrayte.one`)",
]
check {
name = "alive"
type = "http"
path = "/v1/server/status/ping"
method = "GET"
port = "http"
interval = "10s"
timeout = "5s"
}
}
env {
TZ = "Australia/Melbourne"
}
# Persistent storage for downloaded AI modules and their models.
# On first start CPAI will download ~1-2 GB of YOLOv5/MobileNet weights
# into this volume; subsequent restarts reuse the cached models.
volume_mount {
volume = "unraid_appdata_codeproject_ai"
destination = "/etc/codeproject/ai"
read_only = false
}
resources {
# ~56% of the N150's 7200 MHz allocation - enough for concurrent
# object detection requests without starving other jobs on the node.
cpu = 4000
# YOLOv5-6.2 (default detection module) needs ~900 MB;
# leave headroom for a second module (e.g. face detection) and the
# CPAI process itself.
memory = 3072
}
}
volume "unraid_appdata_codeproject_ai" {
type = "csi"
read_only = false
source = "unraid_appdata_codeproject_ai"
access_mode = "single-node-writer"
attachment_mode = "file-system"
mount_options {
mount_flags = ["uid=1000", "gid=1000"]
}
}
}
}

View File

@@ -0,0 +1,9 @@
resource "nomad_job" "codeproject_ai" {
jobspec = file("codeproject-ai.nomad.hcl")
}
module "appdata_codeproject_ai" {
source = "./modules/appdata"
name = "codeproject-ai"
}

View File

@@ -27,8 +27,6 @@ job "gitea" {
tags = [
"traefik.enable=true",
"traefik.http.routers.gitea.middlewares=auth@file",
# Token subdomain — no auth middleware — used by act_runner step containers for git checkout
"traefik.http.routers.gitea-token.rule=Host(`gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one`)",
]
check {
@@ -39,17 +37,6 @@ job "gitea" {
}
}
# Separate service for Consul Connect ingress (address_mode=alloc avoids hairpin NAT issue)
service {
name = "code-connect"
port = "http"
address_mode = "alloc"
connect {
sidecar_service {}
}
}
task "gitea" {
driver = "docker"

View File

@@ -1,89 +0,0 @@
job "ntfy" {
group "ntfy" {
network {
mode = "bridge"
port "http" {
to = 80
}
}
# Consul Connect sidecar with upstream to postgres
service {
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "postgres"
local_bind_port = 5432
}
}
}
}
}
service {
name = "ntfy"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.ntfy.middlewares=auth@file",
# Token subdomain bypasses Authelia — ntfy's own token auth is sufficient for API access
"traefik.http.routers.ntfy-token.rule=Host(`ntfy-2e30e5869ab6bfde4961012b48761a9b.othrayte.one`)",
]
check {
type = "http"
path = "/healthz"
interval = "10s"
timeout = "2s"
}
}
# Users and tokens are provisioned declaratively via auth-users / auth-tokens in server.yml.
# ntfy reads and applies them on every startup — no poststart task, no race conditions.
#
# Bcrypt hashes are not secrets and are hardcoded below (same as /etc/shadow — safe to commit).
# Generate with: docker run --rm -it binwiederhier/ntfy user hash
# or: echo "mypassword" | docker run --rm -i binwiederhier/ntfy user hash
# Required SOPS keys:
# ntfy.database_pw — postgres password for the ntfy role
task "ntfy" {
driver = "docker"
config {
image = "binwiederhier/ntfy:latest"
ports = ["http"]
command = "serve"
volumes = [
"local/server.yml:/etc/ntfy/server.yml",
]
}
env = {
TZ = "Australia/Melbourne"
}
template {
data = <<EOF
base-url: "https://ntfy.othrayte.one"
listen-http: ":80"
database-url: "postgres://ntfy:{{ with nomadVar "nomad/jobs/ntfy" }}{{ .database_pw }}{{ end }}@localhost:5432/ntfy"
auth-default-access: "deny-all"
behind-proxy: true
enable-login: true
auth-users:
- "admin:$2a$10$rLp4qagJnsA8Es5hQlISH.WrlzwMrXE2MBaEgz7zdd2lkAVu30lMy:admin"
EOF
destination = "local/server.yml"
}
resources {
cpu = 50
memory = 64
memory_max = 128
}
}
}
}

View File

@@ -1,21 +0,0 @@
resource "nomad_job" "ntfy" {
jobspec = file("ntfy.nomad.hcl")
}
resource "nomad_variable" "ntfy" {
path = "nomad/jobs/ntfy"
items = {
database_pw = data.sops_file.secrets.data["ntfy.database_pw"]
}
}
resource "postgresql_role" "ntfy" {
name = "ntfy"
password = data.sops_file.secrets.data["ntfy.database_pw"]
login = true
}
resource "postgresql_database" "ntfy" {
name = "ntfy"
owner = postgresql_role.ntfy.name
}

View File

@@ -1,116 +0,0 @@
job "openreader" {
group "openreader" {
network {
mode = "bridge"
port "http" {
to = 3003
}
}
# Consul Connect sidecar with upstream to postgres
service {
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "postgres"
local_bind_port = 5432
}
}
}
}
}
service {
name = "openreader"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.openreader.middlewares=auth@file",
]
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
service {
name = "openreader-api"
port = "http"
address_mode = "alloc" # Use allocation IP for Connect as the sidecar can't access the host's published port (hairpin/loopback NAT issue)
connect {
sidecar_service {}
}
check {
type = "http"
path = "/"
interval = "10s"
timeout = "2s"
}
}
task "openreader" {
driver = "docker"
config {
image = "ghcr.io/richardr1126/openreader:v2.1.2"
ports = ["http"]
}
env = {
TZ = "Australia/Melbourne"
# Use embedded SeaweedFS for blob storage (data lives in /app/docstore/seaweedfs).
# Port 8333 is not exposed; browser uploads/downloads fall back through the app API.
USE_EMBEDDED_WEED_MINI = "true"
S3_ENDPOINT = "http://localhost:8333"
S3_FORCE_PATH_STYLE = "true"
# Auth is intentionally disabled (no BASE_URL / AUTH_SECRET set).
# Access is controlled by the Authelia middleware on the Traefik router above.
# To enable server-side library import from an Unraid share, add a second CSI volume
# mount for the share (e.g. unraid_media_books → /app/docstore/library:ro) and set:
# IMPORT_LIBRARY_DIR = "/app/docstore/library"
}
template {
data = <<EOF
POSTGRES_URL=postgresql://openreader:{{ with nomadVar "nomad/jobs/openreader" }}{{ .database_pw }}{{ end }}@localhost:5432/openreader
EOF
destination = "secrets/openreader.env"
env = true
}
volume_mount {
volume = "unraid_appdata_openreader"
destination = "/app/docstore"
read_only = false
}
resources {
cpu = 200
memory = 750
memory_max = 1024
}
}
volume "unraid_appdata_openreader" {
type = "csi"
read_only = false
source = "unraid_appdata_openreader"
access_mode = "single-node-writer"
attachment_mode = "file-system"
mount_options {
mount_flags = ["uid=1000", "gid=1000"]
}
}
}
}

View File

@@ -1,26 +0,0 @@
resource "nomad_job" "openreader" {
jobspec = file("openreader.nomad.hcl")
}
resource "nomad_variable" "openreader" {
path = "nomad/jobs/openreader"
items = {
database_pw = data.sops_file.secrets.data["openreader.database_pw"]
}
}
resource "postgresql_role" "openreader" {
name = "openreader"
password = data.sops_file.secrets.data["openreader.database_pw"]
login = true
}
resource "postgresql_database" "openreader" {
name = "openreader"
owner = postgresql_role.openreader.name
}
module "appdata_openreader" {
source = "./modules/appdata"
name = "openreader"
}

View File

@@ -1,67 +0,0 @@
job "renovate" {
type = "batch"
periodic {
cron = "0 4 * * *" # Daily at 4am
prohibit_overlap = true
}
group "renovate" {
network {
mode = "bridge"
}
# Consul Connect sidecar with upstream to Gitea (service: code-connect, port 3000)
service {
name = "renovate"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "code-connect"
local_bind_port = 3000
}
}
}
}
}
task "renovate" {
driver = "docker"
config {
image = "renovate/renovate:latest"
}
env = {
RENOVATE_PLATFORM = "gitea"
RENOVATE_ENDPOINT = "http://localhost:3000"
RENOVATE_GIT_URL = "endpoint"
RENOVATE_REPOSITORIES = "othrayte/infra"
RENOVATE_GIT_AUTHOR = "Renovate Bot <renovate@othrayte.one>"
LOG_LEVEL = "debug"
}
# Required SOPS key:
# renovate.gitea_token — PAT for the renovate bot account in Gitea
# Create a dedicated 'renovate' user in Gitea with these token scopes:
# repo (read+write), user (read), issue (read+write), organization (read)
# renovate.github_token — read-only GitHub PAT (any account) for
# fetching changelogs and avoiding github.com API rate limits
template {
data = <<EOF
RENOVATE_TOKEN={{ with nomadVar "nomad/jobs/renovate" }}{{ .gitea_token }}{{ end }}
RENOVATE_GITHUB_COM_TOKEN={{ with nomadVar "nomad/jobs/renovate" }}{{ .github_token }}{{ end }}
EOF
destination = "secrets/renovate.env"
env = true
}
resources {
cpu = 500
memory = 512
memory_max = 1024
}
}
}
}

View File

@@ -1,11 +0,0 @@
resource "nomad_job" "renovate" {
jobspec = file("renovate.nomad.hcl")
}
resource "nomad_variable" "renovate" {
path = "nomad/jobs/renovate"
items = {
gitea_token = data.sops_file.secrets.data["renovate.gitea_token"]
github_token = data.sops_file.secrets.data["renovate.github_token"]
}
}

View File

@@ -46,19 +46,6 @@
"frigate": {
"rtsp_password": "ENC[AES256_GCM,data:8vq06/IkNOUgpHmf,iv:lj8buuIC0ub0YOUiOiaN6tokkIT2/+bBwFNz2QXmCd4=,tag:EMm/bIHdJSAtjYAlrNOCMw==,type:str]"
},
"openreader": {
"database_pw": "ENC[AES256_GCM,data:2Ey9Ypb2Ked/LP/ApJhCqhKWuzognxVK7ku60nERp7I=,iv:KdLFD+fuNpYmPEU5G96SvFcQeZB0XlnOh/6uf7OfFqI=,tag:h7DQlqx5fxhiHuWyFd7svQ==,type:str]"
},
"ntfy": {
"database_pw": "ENC[AES256_GCM,data:79c2KFs3tcbet1dSGnkSDlAeKLCZrh4aMYLXTROM8w==,iv:eZ4limyjl++nsvHUzPKy82hfLZEOc+XQYpO6Czo/8os=,tag:iX9SiEACQ5IM8f1jhZh5Qw==,type:str]"
},
"renovate": {
"gitea_token": "ENC[AES256_GCM,data:/J3CDMgWZLe20oQ+ENKBMi8fs/+jgsARV7xihMq0OLmRk8C8ae/IXg==,iv:e7WYOanSOCZ/LhN6SKrH0VrR3xLPTTppOKpGpSl+oAc=,tag:XBAilRdK3jL7WtM+92Fsmg==,type:str]",
"github_token": "ENC[AES256_GCM,data:omZpdsTV1aFgQ9PjIApITEyIRKk6Z8QyvD2Kp5tJnBWzFCm4v2lRAg==,iv:cKL7z+CSChzF9eZEcske2lbmx9KV6CrWw0tn7rmP/10=,tag:gon3Sc1d3ntNSbWwenHuOw==,type:str]"
},
"act-runner": {
"registration_token": "ENC[AES256_GCM,data:RnDvcNh69lLlL/ms+sMPKhhc+ECtc5hUHSkAQZv8e77iTD/QPd356Q==,iv:sl2Aua8rTe6cKYQAUC7O4UyHajGy1LgG/ZNLTVP4SyE=,tag:JjdaQqZ4PaWjfoiVmBl6lQ==,type:str]"
},
"sops": {
"age": [
{
@@ -66,8 +53,8 @@
"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSByUWM4ZDVVbGFrUGdMRHBX\nUFBmU3Nlc0RBSzhFK0tHNHpkQXUvUVdiZUZJCmpRN1lFdENpWW0rcThjVlVQNUl6\nWnlLU0RnQ3FZby81Ly8xTFBrek9nMncKLS0tIFQ4UTRNOC9CRmx4OFJWem1wckZz\nUDFTSzdWZldFK3FqcTNWTWRyNDhHQ2MKS811mR5xn7qiC/aVgPFYJ5c6Q3zxRfcr\nHcvxUvB01vNJKZpRg92vvKPkV6lQO3DXCT98OdfwiymlEOvYxg71Pg==\n-----END AGE ENCRYPTED FILE-----\n"
}
],
"lastmodified": "2026-04-18T07:41:42Z",
"mac": "ENC[AES256_GCM,data:+HhhsiZXok4BZI05tG3p9veZaj51kELSQlWFYMSInv7bGfEadmOrJqCxaGrFcNkMmgVPx80jWQFrILfVLW5MUvEsHAhD4Vza2TSWeUq1HuL9DbMxsK2G9Y1fbthd12r/++dDcXxVnTUf/rCD70in/+g/zRObocAnUcFEcIqx1JE=,iv:pS+aj+47J4bYZYGlMVniQVTlLt4jtCLUT7oROJLUkZo=,tag:+lznxDhs2C3bcz5quxfHjA==,type:str]",
"lastmodified": "2026-03-28T02:45:25Z",
"mac": "ENC[AES256_GCM,data:LOzPiZlrS5Rqcsub1jfQ2yfKfTzySA19RjA566MdEY3/h1NmWrodfysPU1bZ9ClOALTFmv6JfmBVxrJ3Mq8T3YBT1qoVjkg7aRO8x6WCMRt11Lba2/SwjLDNFkE/EItvMu10RhzxIYLzZTf+X3zHiLVuSQ4aS9vn9IGTa8yWQ0k=,iv:x7GSxq7G6JpVtHi0nGHM8FDx9gDZI4bR+jJQremxKpc=,tag:DsjEhCOt+XPs2WVznxvYCg==,type:str]",
"encrypted_regex": "^(.*)$",
"version": "3.10.2"
}

View File

@@ -2,34 +2,23 @@
# Manual edits may be lost in future updates.
provider "registry.terraform.io/dmacvicar/libvirt" {
version = "0.9.7"
constraints = "0.9.7"
version = "0.7.6"
constraints = "0.7.6"
hashes = [
"h1:2UgC5zSaQlQ6YkFCgNqejAi29ILgLdECAZyTEf/5jGc=",
"h1:2v7BBcq3D+4X4z3rEyVG7RsO+6mBowOsdqgDMc7hUdo=",
"h1:3Ipfiie71ruKDALSpPZecG3+wLleuqDiUmVDx1raOcc=",
"h1:Cm89jub5fHuqba/fsNn/wztI9g9F72s2DHV2qGlLdZ0=",
"h1:IFXvQT9JqeiXlAbQRQlNHpZ6LHV1ZTYpAOUUajen8Fw=",
"h1:KQ6ygrhp6zZobRovn6T57ZCPJcBNStJ0LR+EHY3Dklo=",
"h1:PVdYUKN4ZrUzDVWrydJExt4Ry6xboAiXl2+lK7RGz44=",
"h1:Q5ZOIKPi8iASnC7HFDualeA08S6tg9r4V37WBW0YbUI=",
"h1:SD92jZbvN4HfRzvds0/Y+imu39y0c5NuwtPuIj64zPU=",
"h1:W0pqgeoCVxzdr/2aUMIG+TfLYZSfAYv84IPoUm/UYSk=",
"h1:aiOjaz5bsvczvEfc78PWZCsPDvQmwVMzIebVAzIuhGc=",
"h1:osjWHVgSitvr0ycgZQEVMVX9ly33lq5bMiMKAUh4JU0=",
"h1:vdMblF3sJkstxm2zXE1z/0Hije42plcpynyDmqqjuD4=",
"zh:0892a2581f460333a8c140cc53110a1675657200cbd2a49d9f4667bc238d0098",
"zh:17c06d5ca81bda6ff8bdcab401a45b8345bbcb7e6964af5f4aa3099880d9cb8e",
"zh:2a01ce583bae2fd412a49860ab13e2736255241f5710c5427d6fa274b8a11cf8",
"zh:2d333b5e5a8eaf9269d0844c08563a86b7941175c656b17bb36e0bdfaf974bd2",
"zh:3e0bc0f1852fc32bbebd506fef074123a49452cffb737de5795c1c3f8e6a4802",
"zh:4ca9364bd4f5f7183bb3aca41d0d99b70beb7a9e01b94cc940123882e96b7001",
"zh:a7eb62a7e5ac71ee72abe7a6d183dce88968969eecef710bbe2fb5da0c84ca23",
"zh:c5c6f41a7165da93eaffda938c3ed11999a6ffbf635897e38026e622d438e6ad",
"zh:d3b595c75d2237ab39b690550dd9d6fe21b8a9a716e7ff94348e988575aad5a1",
"zh:d5572d18c21f055f71783b2dbe998895007f4e0d6aab8540f11f6a012f3b32c2",
"zh:e78950fd984805cddad8432f2283451059445baab8014c3b165038ff0887328e",
"zh:e8157504e9324892fb9860912cf765b9d333d3f6ca9bc24faf411ce729b3cdd2",
"zh:f7f7d41de689de068891a608a2b1b4f312cc41171aeea783f193285ca15968ca",
"h1:mmbm4vTyC/DCGO4Ed/vbp5AKvy1gmVn/94fzB9VmR08=",
"zh:0bde54f6f658b20b620b875daf106b5b25b1bae4d15408d6c5f06d58360e254d",
"zh:0c97c6930015918b8a34b6d7a2b0c3d17a649c226fcd1874fcba5bbbc0f35972",
"zh:1bdd7aa0011c5f024a09a124836ee9bc8e71b05a6ece810c61824275fd3f695f",
"zh:2b0cc7c794e4caf395d84ffff0b380d17e4b3219a4696264271bfe5059450efe",
"zh:2f8633f7fe07f76c188836ed6f93321ec5fbf5c004bc7699e1741d9b21ed5f37",
"zh:5bf47eed286ce55ed10a5cf657de49a34ab21cc8677c56fef3aab69cdde41a27",
"zh:7dca790fc5fd1d42bc4bc7170be003a7093602026d0f95c8aab84ad551fdf2a4",
"zh:80476b68bc84e3d661d1390025f83879b88f9cdc836de9751af09bd5716089cb",
"zh:82f3e2f3f50176cd6041c8ba36e295cbda1b289ef52ab75b5eceb0f921f64f7b",
"zh:a179b165f3b9bb9a67ebbbf9d73157ded33f02d476b2f58906389dca03b653c9",
"zh:acae54a5d0616f22b3180ddd8e8aad39af664e604394fdacf1f7b337bca2d5b4",
"zh:da4406a2428a9a7e98272c032cb93431c3919253af2fe9934b532d26c0deab09",
"zh:f63dbd8e579ab5268d01ffab4503b8a8e736b70d1a04e4f271559ba8dd133dcd",
"zh:f85c1d9e51a94ecde137435c9d6b0fb7be590437ea8a725334d1577eebbc550c",
]
}

View File

@@ -2,7 +2,7 @@ terraform {
required_providers {
libvirt = {
source = "dmacvicar/libvirt"
version = "0.9.7"
version = "0.7.6"
}
}
}

View File

@@ -1,305 +0,0 @@
# CI/CD Plan
## Overview
Three distinct problems, tackled in phases:
1. **Does the config parse/validate without errors?** (static, no credentials)
2. **Does the new Docker image actually exist and start?** (pre-merge, needs Docker)
3. **Does the running service stay healthy through a deployment?** (post-merge, needs Nomad canary)
The goal is: Renovate opens a PR → CI runs checks → you review → merge → canary starts automatically → you promote (or it auto-reverts).
---
## Phase 1 — Static Validation (proves the runner works)
No secrets needed. Runs on every PR.
### Infrastructure required
- `act_runner` Nomad job (see below) with a Gitea runner token
- `.gitea/workflows/ci.yml` in this repo
### Checks
| Check | Command | Notes |
| --------------------- | ----------------------------------------------------- | ------------------------------------------------------------------- |
| HCL formatting | `terraform fmt -check -recursive` | Fails on whitespace/style drift |
| Terraform syntax | `terraform init -backend=false && terraform validate` | Catches wrong resource types, missing required args, bad references |
| Nomad job spec syntax | `nomad job validate <file>` | Catches Nomad-specific issues; needs `NOMAD_ADDR` + read token |
`terraform validate -backend=false` is the most valuable: it catches ~90% of real mistakes with zero secret exposure. The Nomad validate step requires a low-privilege read token — worth adding once the runner is trusted.
### Workflow sketch
```yaml
# .gitea/workflows/ci.yml
on: [pull_request]
jobs:
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: hashicorp/setup-terraform@v3
- name: fmt check
run: terraform fmt -check -recursive
working-directory: 2-nomad-config
- name: init + validate (no backend)
run: |
terraform init -backend=false
terraform validate
working-directory: 2-nomad-config
- name: fmt check (nixos-node)
run: terraform fmt -check -recursive
working-directory: 1-nixos-node
nomad-validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Nomad CLI
run: |
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
sudo apt-get update && sudo apt-get install -y nomad
- name: validate all job specs
env:
NOMAD_ADDR: ${{ secrets.NOMAD_ADDR }}
NOMAD_TOKEN: ${{ secrets.NOMAD_TOKEN }} # read-only policy sufficient
run: |
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
echo "==> $f"
nomad job validate "$f"
done
```
### act_runner Nomad job
```hcl
# act-runner.nomad.hcl
job "act-runner" {
group "act-runner" {
network {
mode = "bridge"
}
# Connect upstream to Gitea
service {
name = "act-runner"
connect {
sidecar_service {
proxy {
upstreams {
destination_name = "code-connect"
local_bind_port = 3000
}
}
}
}
}
task "act-runner" {
driver = "docker"
config {
image = "gitea/act_runner:latest"
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
}
env = {
GITEA_INSTANCE_URL = "http://localhost:3000"
}
template {
data = <<EOF
GITEA_RUNNER_REGISTRATION_TOKEN={{ with nomadVar "nomad/jobs/act-runner" }}{{ .registration_token }}{{ end }}
EOF
destination = "secrets/runner.env"
env = true
}
resources {
cpu = 200
memory = 256
memory_max = 512
}
}
}
}
```
**Security note**: mounting `/var/run/docker.sock` gives the runner root-equivalent access to the host. Acceptable for a home server. Alternative: use `docker:dind` sidecar or Nomad's `exec` driver — more complex, lower risk.
---
## Phase 2 — Docker Image Validation (pre-merge)
Runs on PRs that touch `.nomad.hcl` files. Catches: tag typos, deleted images, registry outages.
Requires the `act_runner` to have Docker access (same socket mount as above).
```yaml
image-pull:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Pull changed images
run: |
# Extract image tags added or changed vs main
git fetch origin main
git diff origin/main...HEAD -- '*.nomad.hcl' \
| grep '^\+\s*image\s*=' \
| grep -oP '"[^"]+:[^"]+"' \
| tr -d '"' \
| sort -u \
| while read image; do
echo "==> Pulling $image"
docker pull "$image"
done
```
This intentionally only tests _changed_ images — no value in pulling everything on every PR.
---
## Phase 3 — Nomad Canary Deployments (post-merge gate)
Makes "merge" mean "start canary" rather than "go live". The old allocation keeps running until you promote.
### Which jobs get canaries
Most jobs already have Consul health checks — these can use `health_check = "checks"` for automatic revert gating.
| Job | Health check | Shared writable volume | Canary safe? |
| ---------- | ------------- | ----------------------- | --------------------------------------------------------------------------------- |
| ntfy | ✅ `/healthz` | no | ✅ yes |
| gitea | ✅ `/` | ✅ `single-node-writer` | ⚠️ volume blocks 2nd alloc from mounting — needs `max_parallel=1` rolling instead |
| jellyfin | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| immich | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| sonarr | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| prowlarr | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| deluge | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| glance | ✅ | no | ✅ yes |
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
| openreader | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
| unifi | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
| renovate | batch job | n/a | n/a — no deployment model |
| postgres | (data layer) | ✅ | ❌ never canary — single-writer DB |
### Canary stanza (stateless jobs with no volume conflict)
```hcl
update {
canary = 1
auto_promote = false
auto_revert = true
health_check = "checks"
healthy_deadline = "5m"
min_healthy_time = "30s"
}
```
### Rolling stanza (jobs with single-node-writer volumes)
```hcl
update {
max_parallel = 1
auto_revert = true
health_check = "checks"
healthy_deadline = "5m"
min_healthy_time = "30s"
}
```
Rolling with `max_parallel=1` still gives auto-revert but doesn't attempt to run two allocations simultaneously — the old one stops before the new one mounts the volume.
---
## Phase 4 — Automated terraform apply + Deployment Promotion
Full CD: merge triggers apply, which creates the canary, CI then watches it and promotes or reverts.
### Flow
```
PR merged to main
Gitea Actions (on: push, branches: [main])
- terraform init
- terraform apply -auto-approve
Nomad canary starts (old allocation still live)
CI polls `nomad deployment list` for the new deployment ID
CI waits for canary allocation to reach "healthy" in Consul
│ healthy within deadline
CI runs: nomad deployment promote <id>
│ or unhealthy → nomad deployment fail <id> (auto_revert fires)
ntfy notification: "deployment promoted" or "deployment reverted"
```
### Secrets required for full CD
| Secret | Used by | Risk level |
| ---------------------- | ----------------------------------- | ---------------------------------- |
| `NOMAD_ADDR` | validate + apply + promote | Low (internal LAN addr) |
| `NOMAD_TOKEN` | terraform apply (write) + promote | **High** — grants full infra write |
| `CLOUDFLARE_API_TOKEN` | terraform apply | **High** — DNS write |
| `SOPS_AGE_KEY` | terraform apply (decrypt secrets) | **High** — decrypts all secrets |
| `PG_PASSWORD` | terraform apply (postgres provider) | High |
Full CD requires all of these in Gitea Actions secrets. This is acceptable for a self-hosted, non-public Gitea instance where you control runner access — but it's the trust boundary to be deliberate about. A reasonable middle ground: **Phase 1-3 are fully automated; Phase 4 (apply + promote) runs automatically but requires a manual re-trigger or approval step** (Gitea supports required reviewers on environments).
### Promote/revert script sketch
```bash
# In CI, after terraform apply completes:
DEPLOY_ID=$(nomad deployment list -json | jq -r '[.[] | select(.JobID == "$JOB" and .Status == "running")] | first | .ID')
echo "Watching deployment $DEPLOY_ID..."
for i in $(seq 1 30); do
STATUS=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.Status')
HEALTHY=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.TaskGroups[].HealthyAllocs')
echo "[$i] status=$STATUS healthy=$HEALTHY"
if [ "$STATUS" = "successful" ]; then exit 0; fi
if [ "$STATUS" = "failed" ]; then exit 1; fi
# Check if canary is healthy enough to promote
CANARY_HEALTHY=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.TaskGroups[].DesiredCanaries == .TaskGroups[].HealthyAllocs')
if [ "$CANARY_HEALTHY" = "true" ]; then
nomad deployment promote "$DEPLOY_ID"
exit 0
fi
sleep 10
done
nomad deployment fail "$DEPLOY_ID"
exit 1
```
---
## Implementation Order
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
- [ ] **Phase 2**: Add image pull validation step to the workflow
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, etc.)
- [ ] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes

View File

@@ -1,15 +1,3 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"extends": ["config:recommended"],
"customManagers": [
{
"description": "Update Docker image tags in Nomad job files",
"customType": "regex",
"fileMatch": ["\\.nomad\\.hcl$"],
"matchStrings": [
"image\\s*=\\s*\"(?<depName>[^:\"]+):(?<currentValue>[^\"]+)\""
],
"datasourceTemplate": "docker"
}
]
"$schema": "https://docs.renovatebot.com/renovate-schema.json"
}