Compare commits
11 Commits
d03fa5021f
...
renovate/m
| Author | SHA1 | Date | |
|---|---|---|---|
| 8b30257b65 | |||
| 1b73fda11f | |||
| 96ddfa6ec5 | |||
| 6c0b1c9281 | |||
| 4f2934411b | |||
| 54dbe0c667 | |||
| a30e60b557 | |||
| 2536e855e5 | |||
| dbe11dc8fa | |||
| b27f3e58ca | |||
| 5b3f2cf8f4 |
54
.gitea/workflows/ci.yml
Normal file
54
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,54 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
terraform-validate:
|
||||
name: Terraform fmt + validate
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: fmt check — 1-nixos-node
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 1-nixos-node
|
||||
|
||||
- name: fmt check — 2-nomad-config
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
- name: validate — 2-nomad-config (no backend)
|
||||
run: |
|
||||
terraform init -backend=false
|
||||
terraform validate
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
nomad-validate:
|
||||
name: Nomad job spec validate
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Nomad CLI
|
||||
run: |
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
|
||||
. /etc/os-release
|
||||
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com ${VERSION_CODENAME} main" | tee /etc/apt/sources.list.d/hashicorp.list
|
||||
apt-get update && apt-get install -y nomad
|
||||
|
||||
- name: Validate all job specs
|
||||
env:
|
||||
NOMAD_ADDR: http://jaglan-beta-m20.lan:4646
|
||||
run: |
|
||||
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
|
||||
echo "==> $f"
|
||||
nomad job validate "$f"
|
||||
done
|
||||
@@ -49,6 +49,9 @@
|
||||
preferred_address_family = "ipv4";
|
||||
%{if cpu_total_compute != null ~}
|
||||
cpu_total_compute = ${cpu_total_compute};
|
||||
%{endif ~}
|
||||
%{if node_class != null ~}
|
||||
node_class = "${node_class}";
|
||||
%{endif ~}
|
||||
host_volume = {
|
||||
%{ for volume in host_volumes ~}
|
||||
@@ -61,6 +64,7 @@
|
||||
cni_path = "$${pkgs.cni-plugins}/bin";
|
||||
};
|
||||
plugin.docker.config.allow_privileged = true;
|
||||
plugin.docker.config.volumes.enabled = true;
|
||||
};
|
||||
extraPackages = with pkgs; [
|
||||
cni-plugins
|
||||
@@ -116,6 +120,17 @@
|
||||
# Ensure Docker daemon is available (Nomad enableDocker only configures Nomad, does not guarantee docker service)
|
||||
virtualisation.docker.enable = true;
|
||||
|
||||
%{if node_class == "latte-panda-n150" ~}
|
||||
# Enable Intel iGPU (N150 UHD Graphics) for OpenVINO / VA-API workloads running in Docker
|
||||
hardware.graphics = {
|
||||
enable = true;
|
||||
extraPackages = with pkgs; [
|
||||
intel-media-driver # VA-API (iHD)
|
||||
intel-compute-runtime # OpenCL / oneAPI
|
||||
];
|
||||
};
|
||||
|
||||
%{endif ~}
|
||||
# Proper systemd service definition for macvlan network creation
|
||||
systemd.services.docker-macvlan-network = {
|
||||
description = "Ensure macvlan Docker network exists";
|
||||
|
||||
@@ -21,6 +21,7 @@ variable "nodes" {
|
||||
bind_interface = string
|
||||
bootstrap = optional(bool, false) # Optional field for bootstrap nodes
|
||||
cpu_total_compute = optional(number, null) # Optional field for CPU total compute
|
||||
node_class = optional(string, null) # Optional Nomad node_class for scheduling constraints
|
||||
host_volumes = list(string)
|
||||
}))
|
||||
}
|
||||
@@ -32,6 +33,7 @@ locals {
|
||||
bind_interface = v.bind_interface
|
||||
bootstrap = v.bootstrap
|
||||
cpu_total_compute = v.cpu_total_compute
|
||||
node_class = v.node_class
|
||||
host_volumes = v.host_volumes
|
||||
})
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -6,7 +6,7 @@ job "csi-smb" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "mcr.microsoft.com/k8s/csi/smb-csi:v1.7.0"
|
||||
image = "mcr.microsoft.com/k8s/csi/smb-csi:v1.17.0"
|
||||
args = [
|
||||
"--v=5",
|
||||
"--nodeid=${attr.unique.hostname}",
|
||||
|
||||
@@ -157,11 +157,6 @@ http:
|
||||
service: unraid
|
||||
middlewares:
|
||||
- auth
|
||||
frigate:
|
||||
rule: "Host(`frigate.othrayte.one`)"
|
||||
service: frigate
|
||||
middlewares:
|
||||
- auth
|
||||
kopia:
|
||||
rule: "Host(`kopia.othrayte.one`)"
|
||||
service: kopia
|
||||
@@ -199,10 +194,6 @@ http:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://betelgeuse-seven-unraid.lan:80"
|
||||
frigate:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://betelgeuse-seven-unraid.lan:5000"
|
||||
kopia:
|
||||
loadBalancer:
|
||||
servers:
|
||||
|
||||
66
2-nomad-config/act-runner.nomad.hcl
Normal file
66
2-nomad-config/act-runner.nomad.hcl
Normal file
@@ -0,0 +1,66 @@
|
||||
job "act-runner" {
|
||||
group "act-runner" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
# Consul Connect upstream to Gitea so the runner can register and receive jobs
|
||||
service {
|
||||
name = "act-runner"
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "code-connect"
|
||||
local_bind_port = 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "act-runner" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "gitea/act_runner:latest"
|
||||
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
|
||||
}
|
||||
|
||||
env = {
|
||||
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
|
||||
CONFIG_FILE = "/secrets/runner-config.yml"
|
||||
}
|
||||
|
||||
# Required SOPS key:
|
||||
# act-runner.registration_token — runner registration token from Gitea
|
||||
# Admin → Settings → Actions → Runners → Create new runner
|
||||
template {
|
||||
data = <<EOF
|
||||
GITEA_RUNNER_REGISTRATION_TOKEN={{ with nomadVar "nomad/jobs/act-runner" }}{{ .registration_token }}{{ end }}
|
||||
EOF
|
||||
destination = "secrets/runner.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
# Limit which images/labels the runner will accept so it doesn't pick up
|
||||
# unrelated workloads if more runners are added later.
|
||||
template {
|
||||
data = <<EOF
|
||||
runner:
|
||||
labels:
|
||||
- "ubuntu-latest:docker://node:20-bookworm"
|
||||
- "ubuntu-22.04:docker://node:20-bookworm"
|
||||
- "ubuntu-24.04:docker://node:20-bookworm"
|
||||
EOF
|
||||
destination = "secrets/runner-config.yml"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 256
|
||||
memory_max = 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
10
2-nomad-config/act-runner.tf
Normal file
10
2-nomad-config/act-runner.tf
Normal file
@@ -0,0 +1,10 @@
|
||||
resource "nomad_job" "act_runner" {
|
||||
jobspec = file("act-runner.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "act_runner" {
|
||||
path = "nomad/jobs/act-runner"
|
||||
items = {
|
||||
registration_token = data.sops_file.secrets.data["act-runner.registration_token"]
|
||||
}
|
||||
}
|
||||
216
2-nomad-config/frigate.nomad.hcl
Normal file
216
2-nomad-config/frigate.nomad.hcl
Normal file
@@ -0,0 +1,216 @@
|
||||
job "frigate" {
|
||||
# Pin to N150 LattePanda nodes - Intel UHD iGPU for OpenVINO-accelerated detection.
|
||||
# hardware.graphics (intel-compute-runtime) is deployed to these nodes via configuration.nix.
|
||||
constraint {
|
||||
attribute = "${node.class}"
|
||||
value = "latte-panda-n150"
|
||||
}
|
||||
|
||||
group "frigate" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
to = 5000
|
||||
}
|
||||
}
|
||||
|
||||
# Prestart: restore Frigate's SQLite DB from the Litestream file replica on the CIFS share.
|
||||
# Runs to completion before the frigate task starts. Safe on first boot (-if-replica-exists
|
||||
# is a no-op when no replica exists yet).
|
||||
task "litestream-restore" {
|
||||
lifecycle {
|
||||
hook = "prestart"
|
||||
sidecar = false
|
||||
}
|
||||
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "litestream/litestream:0.5.9"
|
||||
command = "restore"
|
||||
args = ["-if-replica-exists", "-config", "/local/litestream.yml", "/alloc/data/frigate.db"]
|
||||
}
|
||||
|
||||
# Litestream config: replicate to /config/frigate.db.litestream/ on the CIFS share.
|
||||
# Litestream writes its own segment format - no SQLite advisory locking involved.
|
||||
# Frigate must be configured with database.path: /alloc/data/frigate.db in config.yml.
|
||||
template {
|
||||
data = <<EOH
|
||||
dbs:
|
||||
- path: /alloc/data/frigate.db
|
||||
replicas:
|
||||
- url: file:///config/frigate.db.litestream
|
||||
EOH
|
||||
destination = "local/litestream.yml"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "unraid_appdata_frigate"
|
||||
destination = "/config"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
memory_max = 256
|
||||
}
|
||||
}
|
||||
|
||||
# Sidecar: continuously stream WAL changes from /alloc/data/frigate.db to the CIFS replica.
|
||||
# Runs alongside frigate for the lifetime of the allocation.
|
||||
task "litestream-replicate" {
|
||||
lifecycle {
|
||||
hook = "poststart"
|
||||
sidecar = true
|
||||
}
|
||||
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "litestream/litestream:0.5"
|
||||
command = "replicate"
|
||||
args = ["-config", "/local/litestream.yml"]
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOH
|
||||
dbs:
|
||||
- path: /alloc/data/frigate.db
|
||||
replicas:
|
||||
- url: file:///config/frigate.db.litestream
|
||||
EOH
|
||||
destination = "local/litestream.yml"
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "unraid_appdata_frigate"
|
||||
destination = "/config"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 100
|
||||
memory = 64
|
||||
memory_max = 256
|
||||
}
|
||||
}
|
||||
|
||||
task "frigate" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "ghcr.io/blakeblackshear/frigate:0.17.1"
|
||||
ports = ["http"]
|
||||
privileged = true
|
||||
|
||||
# Shared memory for inter-process frame buffers (frigate forks detector processes).
|
||||
shm_size = 268435456 # 256 MiB
|
||||
|
||||
# Large tmpfs for decoded frame cache - avoids wearing out any storage.
|
||||
mounts = [
|
||||
{
|
||||
type = "tmpfs"
|
||||
target = "/tmp/cache"
|
||||
readonly = false
|
||||
tmpfs_options = {
|
||||
size = 1000000000 # 1 GiB in bytes
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
# Intel iGPU render node - Frigate's bundled OpenVINO runtime auto-detects
|
||||
# GPU device and uses it for object detection without any extra env vars.
|
||||
# Requires hardware.graphics.enable = true on the NixOS node (N150 nodes).
|
||||
devices = [
|
||||
{
|
||||
host_path = "/dev/dri/renderD128"
|
||||
container_path = "/dev/dri/renderD128"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# RTSP password injected from Nomad variables (sourced from sops secrets).
|
||||
# Reference in config.yml as: {FRIGATE_RTSP_PASSWORD}
|
||||
template {
|
||||
data = <<EOH
|
||||
FRIGATE_RTSP_PASSWORD="{{ with nomadVar "nomad/jobs/frigate" }}{{ .rtsp_password }}{{ end }}"
|
||||
EOH
|
||||
destination = "secrets/frigate.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
service {
|
||||
name = "frigate"
|
||||
port = "http"
|
||||
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.frigate.middlewares=auth@file",
|
||||
"traefik.http.routers.frigate-token.rule=Host(`n7gdph5cuh7bd1cakbq8s099rvrv3qhs-frigate.othrayte.one`)",
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/api/version"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
env {
|
||||
TZ = "Australia/Melbourne"
|
||||
}
|
||||
|
||||
# config.yml lives here (read from CIFS). SQLite DB is at /alloc/data/frigate.db
|
||||
# (local NVMe, managed by Litestream). Requires in config.yml:
|
||||
# database:
|
||||
# path: /alloc/data/frigate.db
|
||||
volume_mount {
|
||||
volume = "unraid_appdata_frigate"
|
||||
destination = "/config"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
# Recordings, clips, and exports.
|
||||
volume_mount {
|
||||
volume = "unraid_media_frigate"
|
||||
destination = "/media/frigate"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
# GPU handles inference; CPU manages stream ingestion, motion detection, and recording.
|
||||
cpu = 2000
|
||||
memory = 2048
|
||||
}
|
||||
}
|
||||
|
||||
volume "unraid_appdata_frigate" {
|
||||
type = "csi"
|
||||
read_only = false
|
||||
source = "unraid_appdata_frigate"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
||||
mount_options {
|
||||
mount_flags = ["nobrl", "uid=0", "gid=0"]
|
||||
}
|
||||
}
|
||||
|
||||
volume "unraid_media_frigate" {
|
||||
type = "csi"
|
||||
read_only = false
|
||||
source = "unraid_media_frigate"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
||||
mount_options {
|
||||
mount_flags = ["nobrl", "uid=0", "gid=0"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
23
2-nomad-config/frigate.tf
Normal file
23
2-nomad-config/frigate.tf
Normal file
@@ -0,0 +1,23 @@
|
||||
|
||||
resource "nomad_job" "frigate" {
|
||||
jobspec = file("frigate.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "frigate" {
|
||||
path = "nomad/jobs/frigate"
|
||||
items = {
|
||||
rtsp_password = data.sops_file.secrets.data["frigate.rtsp_password"]
|
||||
}
|
||||
}
|
||||
|
||||
module "appdata_frigate" {
|
||||
source = "./modules/appdata"
|
||||
name = "frigate"
|
||||
}
|
||||
|
||||
module "unraid_smb_frigate_media" {
|
||||
source = "./modules/unraid_smb"
|
||||
name = "frigate"
|
||||
share = "media"
|
||||
subDir = "frigate"
|
||||
}
|
||||
@@ -27,6 +27,8 @@ job "gitea" {
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.gitea.middlewares=auth@file",
|
||||
# Token subdomain — no auth middleware — used by act_runner step containers for git checkout
|
||||
"traefik.http.routers.gitea-token.rule=Host(`gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one`)",
|
||||
]
|
||||
|
||||
check {
|
||||
@@ -37,6 +39,17 @@ job "gitea" {
|
||||
}
|
||||
}
|
||||
|
||||
# Separate service for Consul Connect ingress (address_mode=alloc avoids hairpin NAT issue)
|
||||
service {
|
||||
name = "code-connect"
|
||||
port = "http"
|
||||
address_mode = "alloc"
|
||||
|
||||
connect {
|
||||
sidecar_service {}
|
||||
}
|
||||
}
|
||||
|
||||
task "gitea" {
|
||||
driver = "docker"
|
||||
|
||||
|
||||
89
2-nomad-config/ntfy.nomad.hcl
Normal file
89
2-nomad-config/ntfy.nomad.hcl
Normal file
@@ -0,0 +1,89 @@
|
||||
job "ntfy" {
|
||||
group "ntfy" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
port "http" {
|
||||
to = 80
|
||||
}
|
||||
}
|
||||
|
||||
# Consul Connect sidecar with upstream to postgres
|
||||
service {
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "postgres"
|
||||
local_bind_port = 5432
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "ntfy"
|
||||
port = "http"
|
||||
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.ntfy.middlewares=auth@file",
|
||||
# Token subdomain bypasses Authelia — ntfy's own token auth is sufficient for API access
|
||||
"traefik.http.routers.ntfy-token.rule=Host(`ntfy-2e30e5869ab6bfde4961012b48761a9b.othrayte.one`)",
|
||||
]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/healthz"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
|
||||
# Users and tokens are provisioned declaratively via auth-users / auth-tokens in server.yml.
|
||||
# ntfy reads and applies them on every startup — no poststart task, no race conditions.
|
||||
#
|
||||
# Bcrypt hashes are not secrets and are hardcoded below (same as /etc/shadow — safe to commit).
|
||||
# Generate with: docker run --rm -it binwiederhier/ntfy user hash
|
||||
# or: echo "mypassword" | docker run --rm -i binwiederhier/ntfy user hash
|
||||
# Required SOPS keys:
|
||||
# ntfy.database_pw — postgres password for the ntfy role
|
||||
task "ntfy" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "binwiederhier/ntfy:latest"
|
||||
ports = ["http"]
|
||||
command = "serve"
|
||||
volumes = [
|
||||
"local/server.yml:/etc/ntfy/server.yml",
|
||||
]
|
||||
}
|
||||
|
||||
env = {
|
||||
TZ = "Australia/Melbourne"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOF
|
||||
base-url: "https://ntfy.othrayte.one"
|
||||
listen-http: ":80"
|
||||
database-url: "postgres://ntfy:{{ with nomadVar "nomad/jobs/ntfy" }}{{ .database_pw }}{{ end }}@localhost:5432/ntfy"
|
||||
auth-default-access: "deny-all"
|
||||
behind-proxy: true
|
||||
enable-login: true
|
||||
auth-users:
|
||||
- "admin:$2a$10$rLp4qagJnsA8Es5hQlISH.WrlzwMrXE2MBaEgz7zdd2lkAVu30lMy:admin"
|
||||
EOF
|
||||
destination = "local/server.yml"
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 50
|
||||
memory = 64
|
||||
memory_max = 128
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
21
2-nomad-config/ntfy.tf
Normal file
21
2-nomad-config/ntfy.tf
Normal file
@@ -0,0 +1,21 @@
|
||||
resource "nomad_job" "ntfy" {
|
||||
jobspec = file("ntfy.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "ntfy" {
|
||||
path = "nomad/jobs/ntfy"
|
||||
items = {
|
||||
database_pw = data.sops_file.secrets.data["ntfy.database_pw"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "postgresql_role" "ntfy" {
|
||||
name = "ntfy"
|
||||
password = data.sops_file.secrets.data["ntfy.database_pw"]
|
||||
login = true
|
||||
}
|
||||
|
||||
resource "postgresql_database" "ntfy" {
|
||||
name = "ntfy"
|
||||
owner = postgresql_role.ntfy.name
|
||||
}
|
||||
116
2-nomad-config/openreader.nomad.hcl
Normal file
116
2-nomad-config/openreader.nomad.hcl
Normal file
@@ -0,0 +1,116 @@
|
||||
job "openreader" {
|
||||
group "openreader" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
port "http" {
|
||||
to = 3003
|
||||
}
|
||||
}
|
||||
|
||||
# Consul Connect sidecar with upstream to postgres
|
||||
service {
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "postgres"
|
||||
local_bind_port = 5432
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "openreader"
|
||||
port = "http"
|
||||
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.openreader.middlewares=auth@file",
|
||||
]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
|
||||
service {
|
||||
name = "openreader-api"
|
||||
port = "http"
|
||||
address_mode = "alloc" # Use allocation IP for Connect as the sidecar can't access the host's published port (hairpin/loopback NAT issue)
|
||||
|
||||
connect {
|
||||
sidecar_service {}
|
||||
}
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
|
||||
task "openreader" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "ghcr.io/richardr1126/openreader:v2.1.2"
|
||||
ports = ["http"]
|
||||
}
|
||||
|
||||
env = {
|
||||
TZ = "Australia/Melbourne"
|
||||
|
||||
# Use embedded SeaweedFS for blob storage (data lives in /app/docstore/seaweedfs).
|
||||
# Port 8333 is not exposed; browser uploads/downloads fall back through the app API.
|
||||
USE_EMBEDDED_WEED_MINI = "true"
|
||||
S3_ENDPOINT = "http://localhost:8333"
|
||||
S3_FORCE_PATH_STYLE = "true"
|
||||
|
||||
# Auth is intentionally disabled (no BASE_URL / AUTH_SECRET set).
|
||||
# Access is controlled by the Authelia middleware on the Traefik router above.
|
||||
|
||||
# To enable server-side library import from an Unraid share, add a second CSI volume
|
||||
# mount for the share (e.g. unraid_media_books → /app/docstore/library:ro) and set:
|
||||
# IMPORT_LIBRARY_DIR = "/app/docstore/library"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOF
|
||||
POSTGRES_URL=postgresql://openreader:{{ with nomadVar "nomad/jobs/openreader" }}{{ .database_pw }}{{ end }}@localhost:5432/openreader
|
||||
EOF
|
||||
destination = "secrets/openreader.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "unraid_appdata_openreader"
|
||||
destination = "/app/docstore"
|
||||
read_only = false
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 750
|
||||
memory_max = 1024
|
||||
}
|
||||
}
|
||||
|
||||
volume "unraid_appdata_openreader" {
|
||||
type = "csi"
|
||||
read_only = false
|
||||
source = "unraid_appdata_openreader"
|
||||
access_mode = "single-node-writer"
|
||||
attachment_mode = "file-system"
|
||||
|
||||
mount_options {
|
||||
mount_flags = ["uid=1000", "gid=1000"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
26
2-nomad-config/openreader.tf
Normal file
26
2-nomad-config/openreader.tf
Normal file
@@ -0,0 +1,26 @@
|
||||
resource "nomad_job" "openreader" {
|
||||
jobspec = file("openreader.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "openreader" {
|
||||
path = "nomad/jobs/openreader"
|
||||
items = {
|
||||
database_pw = data.sops_file.secrets.data["openreader.database_pw"]
|
||||
}
|
||||
}
|
||||
|
||||
resource "postgresql_role" "openreader" {
|
||||
name = "openreader"
|
||||
password = data.sops_file.secrets.data["openreader.database_pw"]
|
||||
login = true
|
||||
}
|
||||
|
||||
resource "postgresql_database" "openreader" {
|
||||
name = "openreader"
|
||||
owner = postgresql_role.openreader.name
|
||||
}
|
||||
|
||||
module "appdata_openreader" {
|
||||
source = "./modules/appdata"
|
||||
name = "openreader"
|
||||
}
|
||||
67
2-nomad-config/renovate.nomad.hcl
Normal file
67
2-nomad-config/renovate.nomad.hcl
Normal file
@@ -0,0 +1,67 @@
|
||||
job "renovate" {
|
||||
type = "batch"
|
||||
|
||||
periodic {
|
||||
cron = "0 4 * * *" # Daily at 4am
|
||||
prohibit_overlap = true
|
||||
}
|
||||
|
||||
group "renovate" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
# Consul Connect sidecar with upstream to Gitea (service: code-connect, port 3000)
|
||||
service {
|
||||
name = "renovate"
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "code-connect"
|
||||
local_bind_port = 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "renovate" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "renovate/renovate:latest"
|
||||
}
|
||||
|
||||
env = {
|
||||
RENOVATE_PLATFORM = "gitea"
|
||||
RENOVATE_ENDPOINT = "http://localhost:3000"
|
||||
RENOVATE_GIT_URL = "endpoint"
|
||||
RENOVATE_REPOSITORIES = "othrayte/infra"
|
||||
RENOVATE_GIT_AUTHOR = "Renovate Bot <renovate@othrayte.one>"
|
||||
LOG_LEVEL = "debug"
|
||||
}
|
||||
|
||||
# Required SOPS key:
|
||||
# renovate.gitea_token — PAT for the renovate bot account in Gitea
|
||||
# Create a dedicated 'renovate' user in Gitea with these token scopes:
|
||||
# repo (read+write), user (read), issue (read+write), organization (read)
|
||||
# renovate.github_token — read-only GitHub PAT (any account) for
|
||||
# fetching changelogs and avoiding github.com API rate limits
|
||||
template {
|
||||
data = <<EOF
|
||||
RENOVATE_TOKEN={{ with nomadVar "nomad/jobs/renovate" }}{{ .gitea_token }}{{ end }}
|
||||
RENOVATE_GITHUB_COM_TOKEN={{ with nomadVar "nomad/jobs/renovate" }}{{ .github_token }}{{ end }}
|
||||
EOF
|
||||
destination = "secrets/renovate.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 500
|
||||
memory = 512
|
||||
memory_max = 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
11
2-nomad-config/renovate.tf
Normal file
11
2-nomad-config/renovate.tf
Normal file
@@ -0,0 +1,11 @@
|
||||
resource "nomad_job" "renovate" {
|
||||
jobspec = file("renovate.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "renovate" {
|
||||
path = "nomad/jobs/renovate"
|
||||
items = {
|
||||
gitea_token = data.sops_file.secrets.data["renovate.gitea_token"]
|
||||
github_token = data.sops_file.secrets.data["renovate.github_token"]
|
||||
}
|
||||
}
|
||||
@@ -43,6 +43,22 @@
|
||||
"prowlarr": {
|
||||
"database_pw": "ENC[AES256_GCM,data:FkW5LPoyn8bh0UfWcFq3og==,iv:SFq4Xsdz3FfCDyPjIaAmz5nsC/SPdFrR03GCr3KE/nw=,tag:PVYj7hSWDnfeE7igSXGBSA==,type:str]"
|
||||
},
|
||||
"frigate": {
|
||||
"rtsp_password": "ENC[AES256_GCM,data:8vq06/IkNOUgpHmf,iv:lj8buuIC0ub0YOUiOiaN6tokkIT2/+bBwFNz2QXmCd4=,tag:EMm/bIHdJSAtjYAlrNOCMw==,type:str]"
|
||||
},
|
||||
"openreader": {
|
||||
"database_pw": "ENC[AES256_GCM,data:2Ey9Ypb2Ked/LP/ApJhCqhKWuzognxVK7ku60nERp7I=,iv:KdLFD+fuNpYmPEU5G96SvFcQeZB0XlnOh/6uf7OfFqI=,tag:h7DQlqx5fxhiHuWyFd7svQ==,type:str]"
|
||||
},
|
||||
"ntfy": {
|
||||
"database_pw": "ENC[AES256_GCM,data:79c2KFs3tcbet1dSGnkSDlAeKLCZrh4aMYLXTROM8w==,iv:eZ4limyjl++nsvHUzPKy82hfLZEOc+XQYpO6Czo/8os=,tag:iX9SiEACQ5IM8f1jhZh5Qw==,type:str]"
|
||||
},
|
||||
"renovate": {
|
||||
"gitea_token": "ENC[AES256_GCM,data:/J3CDMgWZLe20oQ+ENKBMi8fs/+jgsARV7xihMq0OLmRk8C8ae/IXg==,iv:e7WYOanSOCZ/LhN6SKrH0VrR3xLPTTppOKpGpSl+oAc=,tag:XBAilRdK3jL7WtM+92Fsmg==,type:str]",
|
||||
"github_token": "ENC[AES256_GCM,data:omZpdsTV1aFgQ9PjIApITEyIRKk6Z8QyvD2Kp5tJnBWzFCm4v2lRAg==,iv:cKL7z+CSChzF9eZEcske2lbmx9KV6CrWw0tn7rmP/10=,tag:gon3Sc1d3ntNSbWwenHuOw==,type:str]"
|
||||
},
|
||||
"act-runner": {
|
||||
"registration_token": "ENC[AES256_GCM,data:RnDvcNh69lLlL/ms+sMPKhhc+ECtc5hUHSkAQZv8e77iTD/QPd356Q==,iv:sl2Aua8rTe6cKYQAUC7O4UyHajGy1LgG/ZNLTVP4SyE=,tag:JjdaQqZ4PaWjfoiVmBl6lQ==,type:str]"
|
||||
},
|
||||
"sops": {
|
||||
"age": [
|
||||
{
|
||||
@@ -50,8 +66,8 @@
|
||||
"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSByUWM4ZDVVbGFrUGdMRHBX\nUFBmU3Nlc0RBSzhFK0tHNHpkQXUvUVdiZUZJCmpRN1lFdENpWW0rcThjVlVQNUl6\nWnlLU0RnQ3FZby81Ly8xTFBrek9nMncKLS0tIFQ4UTRNOC9CRmx4OFJWem1wckZz\nUDFTSzdWZldFK3FqcTNWTWRyNDhHQ2MKS811mR5xn7qiC/aVgPFYJ5c6Q3zxRfcr\nHcvxUvB01vNJKZpRg92vvKPkV6lQO3DXCT98OdfwiymlEOvYxg71Pg==\n-----END AGE ENCRYPTED FILE-----\n"
|
||||
}
|
||||
],
|
||||
"lastmodified": "2025-11-18T12:09:57Z",
|
||||
"mac": "ENC[AES256_GCM,data:zchzaSPjJVbUBzXTrRPGAtQE6xzqxMjTCXRABvwEvgNqLO7i2C/d04hI+pwOuLWyZXiUfqHlGt798ZnP5+MJi249FJmK08l8dAXxKSc+KSc4EfDoF+jWiwmoDEf5SypyHL9RPTeI7zoSJ0IxhtP3zZcD/7Q2PPG7cLDBKHdTJFU=,iv:+UiyEfMtAObZ/vvs2ZaIup9OxxP0uqSmo+h45PrymLk=,tag:f/D8417EGjuOfSdzZtir4Q==,type:str]",
|
||||
"lastmodified": "2026-04-18T07:41:42Z",
|
||||
"mac": "ENC[AES256_GCM,data:+HhhsiZXok4BZI05tG3p9veZaj51kELSQlWFYMSInv7bGfEadmOrJqCxaGrFcNkMmgVPx80jWQFrILfVLW5MUvEsHAhD4Vza2TSWeUq1HuL9DbMxsK2G9Y1fbthd12r/++dDcXxVnTUf/rCD70in/+g/zRObocAnUcFEcIqx1JE=,iv:pS+aj+47J4bYZYGlMVniQVTlLt4jtCLUT7oROJLUkZo=,tag:+lznxDhs2C3bcz5quxfHjA==,type:str]",
|
||||
"encrypted_regex": "^(.*)$",
|
||||
"version": "3.10.2"
|
||||
}
|
||||
|
||||
305
cicd-plan.md
Normal file
305
cicd-plan.md
Normal file
@@ -0,0 +1,305 @@
|
||||
# CI/CD Plan
|
||||
|
||||
## Overview
|
||||
|
||||
Three distinct problems, tackled in phases:
|
||||
|
||||
1. **Does the config parse/validate without errors?** (static, no credentials)
|
||||
2. **Does the new Docker image actually exist and start?** (pre-merge, needs Docker)
|
||||
3. **Does the running service stay healthy through a deployment?** (post-merge, needs Nomad canary)
|
||||
|
||||
The goal is: Renovate opens a PR → CI runs checks → you review → merge → canary starts automatically → you promote (or it auto-reverts).
|
||||
|
||||
---
|
||||
|
||||
## Phase 1 — Static Validation (proves the runner works)
|
||||
|
||||
No secrets needed. Runs on every PR.
|
||||
|
||||
### Infrastructure required
|
||||
|
||||
- `act_runner` Nomad job (see below) with a Gitea runner token
|
||||
- `.gitea/workflows/ci.yml` in this repo
|
||||
|
||||
### Checks
|
||||
|
||||
| Check | Command | Notes |
|
||||
| --------------------- | ----------------------------------------------------- | ------------------------------------------------------------------- |
|
||||
| HCL formatting | `terraform fmt -check -recursive` | Fails on whitespace/style drift |
|
||||
| Terraform syntax | `terraform init -backend=false && terraform validate` | Catches wrong resource types, missing required args, bad references |
|
||||
| Nomad job spec syntax | `nomad job validate <file>` | Catches Nomad-specific issues; needs `NOMAD_ADDR` + read token |
|
||||
|
||||
`terraform validate -backend=false` is the most valuable: it catches ~90% of real mistakes with zero secret exposure. The Nomad validate step requires a low-privilege read token — worth adding once the runner is trusted.
|
||||
|
||||
### Workflow sketch
|
||||
|
||||
```yaml
|
||||
# .gitea/workflows/ci.yml
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: fmt check
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
- name: init + validate (no backend)
|
||||
run: |
|
||||
terraform init -backend=false
|
||||
terraform validate
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
- name: fmt check (nixos-node)
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 1-nixos-node
|
||||
|
||||
nomad-validate:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Install Nomad CLI
|
||||
run: |
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
|
||||
sudo apt-get update && sudo apt-get install -y nomad
|
||||
- name: validate all job specs
|
||||
env:
|
||||
NOMAD_ADDR: ${{ secrets.NOMAD_ADDR }}
|
||||
NOMAD_TOKEN: ${{ secrets.NOMAD_TOKEN }} # read-only policy sufficient
|
||||
run: |
|
||||
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
|
||||
echo "==> $f"
|
||||
nomad job validate "$f"
|
||||
done
|
||||
```
|
||||
|
||||
### act_runner Nomad job
|
||||
|
||||
```hcl
|
||||
# act-runner.nomad.hcl
|
||||
job "act-runner" {
|
||||
group "act-runner" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
# Connect upstream to Gitea
|
||||
service {
|
||||
name = "act-runner"
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "code-connect"
|
||||
local_bind_port = 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "act-runner" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "gitea/act_runner:latest"
|
||||
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
|
||||
}
|
||||
|
||||
env = {
|
||||
GITEA_INSTANCE_URL = "http://localhost:3000"
|
||||
}
|
||||
|
||||
template {
|
||||
data = <<EOF
|
||||
GITEA_RUNNER_REGISTRATION_TOKEN={{ with nomadVar "nomad/jobs/act-runner" }}{{ .registration_token }}{{ end }}
|
||||
EOF
|
||||
destination = "secrets/runner.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 256
|
||||
memory_max = 512
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Security note**: mounting `/var/run/docker.sock` gives the runner root-equivalent access to the host. Acceptable for a home server. Alternative: use `docker:dind` sidecar or Nomad's `exec` driver — more complex, lower risk.
|
||||
|
||||
---
|
||||
|
||||
## Phase 2 — Docker Image Validation (pre-merge)
|
||||
|
||||
Runs on PRs that touch `.nomad.hcl` files. Catches: tag typos, deleted images, registry outages.
|
||||
|
||||
Requires the `act_runner` to have Docker access (same socket mount as above).
|
||||
|
||||
```yaml
|
||||
image-pull:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Pull changed images
|
||||
run: |
|
||||
# Extract image tags added or changed vs main
|
||||
git fetch origin main
|
||||
git diff origin/main...HEAD -- '*.nomad.hcl' \
|
||||
| grep '^\+\s*image\s*=' \
|
||||
| grep -oP '"[^"]+:[^"]+"' \
|
||||
| tr -d '"' \
|
||||
| sort -u \
|
||||
| while read image; do
|
||||
echo "==> Pulling $image"
|
||||
docker pull "$image"
|
||||
done
|
||||
```
|
||||
|
||||
This intentionally only tests _changed_ images — no value in pulling everything on every PR.
|
||||
|
||||
---
|
||||
|
||||
## Phase 3 — Nomad Canary Deployments (post-merge gate)
|
||||
|
||||
Makes "merge" mean "start canary" rather than "go live". The old allocation keeps running until you promote.
|
||||
|
||||
### Which jobs get canaries
|
||||
|
||||
Most jobs already have Consul health checks — these can use `health_check = "checks"` for automatic revert gating.
|
||||
|
||||
| Job | Health check | Shared writable volume | Canary safe? |
|
||||
| ---------- | ------------- | ----------------------- | --------------------------------------------------------------------------------- |
|
||||
| ntfy | ✅ `/healthz` | no | ✅ yes |
|
||||
| gitea | ✅ `/` | ✅ `single-node-writer` | ⚠️ volume blocks 2nd alloc from mounting — needs `max_parallel=1` rolling instead |
|
||||
| jellyfin | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| immich | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| sonarr | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| prowlarr | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| deluge | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| glance | ✅ | no | ✅ yes |
|
||||
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
|
||||
| openreader | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||
| unifi | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
|
||||
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
|
||||
| renovate | batch job | n/a | n/a — no deployment model |
|
||||
| postgres | (data layer) | ✅ | ❌ never canary — single-writer DB |
|
||||
|
||||
### Canary stanza (stateless jobs with no volume conflict)
|
||||
|
||||
```hcl
|
||||
update {
|
||||
canary = 1
|
||||
auto_promote = false
|
||||
auto_revert = true
|
||||
health_check = "checks"
|
||||
healthy_deadline = "5m"
|
||||
min_healthy_time = "30s"
|
||||
}
|
||||
```
|
||||
|
||||
### Rolling stanza (jobs with single-node-writer volumes)
|
||||
|
||||
```hcl
|
||||
update {
|
||||
max_parallel = 1
|
||||
auto_revert = true
|
||||
health_check = "checks"
|
||||
healthy_deadline = "5m"
|
||||
min_healthy_time = "30s"
|
||||
}
|
||||
```
|
||||
|
||||
Rolling with `max_parallel=1` still gives auto-revert but doesn't attempt to run two allocations simultaneously — the old one stops before the new one mounts the volume.
|
||||
|
||||
---
|
||||
|
||||
## Phase 4 — Automated terraform apply + Deployment Promotion
|
||||
|
||||
Full CD: merge triggers apply, which creates the canary, CI then watches it and promotes or reverts.
|
||||
|
||||
### Flow
|
||||
|
||||
```
|
||||
PR merged to main
|
||||
│
|
||||
▼
|
||||
Gitea Actions (on: push, branches: [main])
|
||||
- terraform init
|
||||
- terraform apply -auto-approve
|
||||
│
|
||||
▼
|
||||
Nomad canary starts (old allocation still live)
|
||||
│
|
||||
▼
|
||||
CI polls `nomad deployment list` for the new deployment ID
|
||||
CI waits for canary allocation to reach "healthy" in Consul
|
||||
│ healthy within deadline
|
||||
▼
|
||||
CI runs: nomad deployment promote <id>
|
||||
│ or unhealthy → nomad deployment fail <id> (auto_revert fires)
|
||||
▼
|
||||
ntfy notification: "deployment promoted" or "deployment reverted"
|
||||
```
|
||||
|
||||
### Secrets required for full CD
|
||||
|
||||
| Secret | Used by | Risk level |
|
||||
| ---------------------- | ----------------------------------- | ---------------------------------- |
|
||||
| `NOMAD_ADDR` | validate + apply + promote | Low (internal LAN addr) |
|
||||
| `NOMAD_TOKEN` | terraform apply (write) + promote | **High** — grants full infra write |
|
||||
| `CLOUDFLARE_API_TOKEN` | terraform apply | **High** — DNS write |
|
||||
| `SOPS_AGE_KEY` | terraform apply (decrypt secrets) | **High** — decrypts all secrets |
|
||||
| `PG_PASSWORD` | terraform apply (postgres provider) | High |
|
||||
|
||||
Full CD requires all of these in Gitea Actions secrets. This is acceptable for a self-hosted, non-public Gitea instance where you control runner access — but it's the trust boundary to be deliberate about. A reasonable middle ground: **Phase 1-3 are fully automated; Phase 4 (apply + promote) runs automatically but requires a manual re-trigger or approval step** (Gitea supports required reviewers on environments).
|
||||
|
||||
### Promote/revert script sketch
|
||||
|
||||
```bash
|
||||
# In CI, after terraform apply completes:
|
||||
DEPLOY_ID=$(nomad deployment list -json | jq -r '[.[] | select(.JobID == "$JOB" and .Status == "running")] | first | .ID')
|
||||
echo "Watching deployment $DEPLOY_ID..."
|
||||
|
||||
for i in $(seq 1 30); do
|
||||
STATUS=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.Status')
|
||||
HEALTHY=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.TaskGroups[].HealthyAllocs')
|
||||
echo "[$i] status=$STATUS healthy=$HEALTHY"
|
||||
if [ "$STATUS" = "successful" ]; then exit 0; fi
|
||||
if [ "$STATUS" = "failed" ]; then exit 1; fi
|
||||
# Check if canary is healthy enough to promote
|
||||
CANARY_HEALTHY=$(nomad deployment status -json "$DEPLOY_ID" | jq -r '.TaskGroups[].DesiredCanaries == .TaskGroups[].HealthyAllocs')
|
||||
if [ "$CANARY_HEALTHY" = "true" ]; then
|
||||
nomad deployment promote "$DEPLOY_ID"
|
||||
exit 0
|
||||
fi
|
||||
sleep 10
|
||||
done
|
||||
nomad deployment fail "$DEPLOY_ID"
|
||||
exit 1
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Implementation Order
|
||||
|
||||
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
|
||||
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
|
||||
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
|
||||
- [ ] **Phase 2**: Add image pull validation step to the workflow
|
||||
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
|
||||
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, etc.)
|
||||
- [ ] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
|
||||
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
|
||||
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
|
||||
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes
|
||||
15
renovate.json
Normal file
15
renovate.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
|
||||
"extends": ["config:recommended"],
|
||||
"customManagers": [
|
||||
{
|
||||
"description": "Update Docker image tags in Nomad job files",
|
||||
"customType": "regex",
|
||||
"fileMatch": ["\\.nomad\\.hcl$"],
|
||||
"matchStrings": [
|
||||
"image\\s*=\\s*\"(?<depName>[^:\"]+):(?<currentValue>[^\"]+)\""
|
||||
],
|
||||
"datasourceTemplate": "docker"
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user