Compare commits
4 Commits
8bd15702c8
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| e695485353 | |||
| 8e32d00d90 | |||
| 1b73fda11f | |||
| 96ddfa6ec5 |
90
.gitea/workflows/ci.yml
Normal file
90
.gitea/workflows/ci.yml
Normal file
@@ -0,0 +1,90 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
terraform-validate:
|
||||
name: Terraform fmt + validate
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: fmt check — 1-nixos-node
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 1-nixos-node
|
||||
|
||||
- name: fmt check — 2-nomad-config
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
- name: validate — 2-nomad-config (no backend)
|
||||
run: |
|
||||
terraform init -backend=false
|
||||
terraform validate
|
||||
working-directory: 2-nomad-config
|
||||
|
||||
nomad-validate:
|
||||
name: Nomad job spec validate
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Install Nomad CLI
|
||||
run: |
|
||||
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
|
||||
. /etc/os-release
|
||||
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com ${VERSION_CODENAME} main" | tee /etc/apt/sources.list.d/hashicorp.list
|
||||
apt-get update && apt-get install -y nomad
|
||||
|
||||
- name: Validate all job specs
|
||||
env:
|
||||
NOMAD_ADDR: http://jaglan-beta-m20.lan:4646
|
||||
run: |
|
||||
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
|
||||
echo "==> $f"
|
||||
nomad job validate "$f"
|
||||
done
|
||||
|
||||
image-pull:
|
||||
name: Docker image pull validation
|
||||
runs-on: ubuntu-latest
|
||||
# Only run on PRs that touch nomad job specs
|
||||
if: github.event_name == 'pull_request'
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Pull changed images
|
||||
run: |
|
||||
git fetch origin ${{ github.base_ref }}
|
||||
IMAGES=$(git diff origin/${{ github.base_ref }}...HEAD -- '*.nomad.hcl' \
|
||||
| grep '^+\s*image\s*=' \
|
||||
| grep -oP '"[^"]+:[^"]+"' \
|
||||
| tr -d '"' \
|
||||
| sort -u || true)
|
||||
|
||||
if [ -z "$IMAGES" ]; then
|
||||
echo "No image changes detected, skipping pull."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
FAILED=0
|
||||
while IFS= read -r image; do
|
||||
echo "==> Pulling $image"
|
||||
if ! docker pull "$image"; then
|
||||
echo "ERROR: Failed to pull $image"
|
||||
FAILED=1
|
||||
fi
|
||||
done <<< "$IMAGES"
|
||||
|
||||
exit $FAILED
|
||||
@@ -64,6 +64,7 @@
|
||||
cni_path = "$${pkgs.cni-plugins}/bin";
|
||||
};
|
||||
plugin.docker.config.allow_privileged = true;
|
||||
plugin.docker.config.volumes.enabled = true;
|
||||
};
|
||||
extraPackages = with pkgs; [
|
||||
cni-plugins
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -58,8 +58,10 @@ job "pgadmin" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -7,6 +7,10 @@ job "postgres" {
|
||||
connect {
|
||||
sidecar_service {}
|
||||
}
|
||||
|
||||
# Note: TCP checks are not valid for Connect-enabled services (runs through
|
||||
# Envoy sidecar). Postgres is a single-writer DB that we never canary, so
|
||||
# observable health via Consul is lower priority than other services.
|
||||
}
|
||||
|
||||
task "postgres" {
|
||||
|
||||
@@ -33,8 +33,10 @@ job "authelia" {
|
||||
}
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/health"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -35,7 +35,8 @@ job "traefik" {
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
type = "http"
|
||||
path = "/ping"
|
||||
port = "api"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
@@ -85,6 +86,9 @@ api:
|
||||
dashboard: true
|
||||
insecure: true
|
||||
|
||||
ping:
|
||||
entryPoint: traefik
|
||||
|
||||
providers:
|
||||
file:
|
||||
directory: "/etc/traefik/configs/"
|
||||
|
||||
52
2-nomad-config/act-runner.nomad.hcl
Normal file
52
2-nomad-config/act-runner.nomad.hcl
Normal file
@@ -0,0 +1,52 @@
|
||||
job "act-runner" {
|
||||
group "act-runner" {
|
||||
network {
|
||||
mode = "bridge"
|
||||
}
|
||||
|
||||
# Consul Connect upstream to Gitea so the runner can register and receive jobs
|
||||
service {
|
||||
name = "act-runner"
|
||||
connect {
|
||||
sidecar_service {
|
||||
proxy {
|
||||
upstreams {
|
||||
destination_name = "code-connect"
|
||||
local_bind_port = 3000
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task "act-runner" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "gitea/act_runner:latest"
|
||||
volumes = ["/var/run/docker.sock:/var/run/docker.sock"]
|
||||
}
|
||||
|
||||
env = {
|
||||
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
|
||||
}
|
||||
|
||||
# Required SOPS key:
|
||||
# act-runner.registration_token — runner registration token from Gitea
|
||||
# Admin → Settings → Actions → Runners → Create new runner
|
||||
template {
|
||||
data = <<EOF
|
||||
GITEA_RUNNER_REGISTRATION_TOKEN={{ with nomadVar "nomad/jobs/act-runner" }}{{ .registration_token }}{{ end }}
|
||||
EOF
|
||||
destination = "secrets/runner.env"
|
||||
env = true
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 200
|
||||
memory = 256
|
||||
memory_max = 1024
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
10
2-nomad-config/act-runner.tf
Normal file
10
2-nomad-config/act-runner.tf
Normal file
@@ -0,0 +1,10 @@
|
||||
resource "nomad_job" "act_runner" {
|
||||
jobspec = file("act-runner.nomad.hcl")
|
||||
}
|
||||
|
||||
resource "nomad_variable" "act_runner" {
|
||||
path = "nomad/jobs/act-runner"
|
||||
items = {
|
||||
registration_token = data.sops_file.secrets.data["act-runner.registration_token"]
|
||||
}
|
||||
}
|
||||
@@ -54,8 +54,10 @@ EOH
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
@@ -78,8 +80,10 @@ EOH
|
||||
}
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -27,11 +27,15 @@ job "gitea" {
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.gitea.middlewares=auth@file",
|
||||
# Token subdomain — no auth middleware — used by act_runner step containers for git checkout
|
||||
"traefik.http.routers.gitea-token.rule=Host(`gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one`)",
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
path = "/api/healthz"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ job "glance" {
|
||||
driver = "docker"
|
||||
|
||||
config {
|
||||
image = "glanceapp/glance:latest"
|
||||
image = "glanceapp/glance:v0.7.8"
|
||||
ports = ["http"]
|
||||
volumes = [
|
||||
"local/glance.yml:/app/config/glance.yml",
|
||||
@@ -30,7 +30,8 @@ job "glance" {
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
|
||||
@@ -28,10 +28,11 @@ job "jellyfin" {
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "tcp"
|
||||
type = "http"
|
||||
path = "/health"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
timeout = "5s"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -33,8 +33,10 @@ job "ntfy" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/healthz"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -31,8 +31,10 @@ job "openreader" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
@@ -48,8 +50,10 @@ job "openreader" {
|
||||
}
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -34,8 +34,9 @@ job "prowlarr" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
path = "/ping"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
@@ -51,8 +52,9 @@ job "prowlarr" {
|
||||
}
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
path = "/ping"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -56,6 +56,9 @@
|
||||
"gitea_token": "ENC[AES256_GCM,data:/J3CDMgWZLe20oQ+ENKBMi8fs/+jgsARV7xihMq0OLmRk8C8ae/IXg==,iv:e7WYOanSOCZ/LhN6SKrH0VrR3xLPTTppOKpGpSl+oAc=,tag:XBAilRdK3jL7WtM+92Fsmg==,type:str]",
|
||||
"github_token": "ENC[AES256_GCM,data:omZpdsTV1aFgQ9PjIApITEyIRKk6Z8QyvD2Kp5tJnBWzFCm4v2lRAg==,iv:cKL7z+CSChzF9eZEcske2lbmx9KV6CrWw0tn7rmP/10=,tag:gon3Sc1d3ntNSbWwenHuOw==,type:str]"
|
||||
},
|
||||
"act-runner": {
|
||||
"registration_token": "ENC[AES256_GCM,data:RnDvcNh69lLlL/ms+sMPKhhc+ECtc5hUHSkAQZv8e77iTD/QPd356Q==,iv:sl2Aua8rTe6cKYQAUC7O4UyHajGy1LgG/ZNLTVP4SyE=,tag:JjdaQqZ4PaWjfoiVmBl6lQ==,type:str]"
|
||||
},
|
||||
"sops": {
|
||||
"age": [
|
||||
{
|
||||
@@ -63,8 +66,8 @@
|
||||
"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSByUWM4ZDVVbGFrUGdMRHBX\nUFBmU3Nlc0RBSzhFK0tHNHpkQXUvUVdiZUZJCmpRN1lFdENpWW0rcThjVlVQNUl6\nWnlLU0RnQ3FZby81Ly8xTFBrek9nMncKLS0tIFQ4UTRNOC9CRmx4OFJWem1wckZz\nUDFTSzdWZldFK3FqcTNWTWRyNDhHQ2MKS811mR5xn7qiC/aVgPFYJ5c6Q3zxRfcr\nHcvxUvB01vNJKZpRg92vvKPkV6lQO3DXCT98OdfwiymlEOvYxg71Pg==\n-----END AGE ENCRYPTED FILE-----\n"
|
||||
}
|
||||
],
|
||||
"lastmodified": "2026-04-18T06:30:49Z",
|
||||
"mac": "ENC[AES256_GCM,data:ZqT+lJxFOxbRaDkex8URHRRoNSoHVkB9tbMCDVWoln0otMUBFDnxa1Fqwzl77G+JxD/I7W5QX5qUx+oSoDxhyCvC97tjBfTZ+nlqTos25wLddSKwOfbvRNS7oZrzMt5AepgauApucNDjjUWtZB55mTV497PzESLBrZeI/4zpCU0=,iv:AVvlyJLyLJup2PtLt8NzZO+uCbuQKmUV0S2swwl6nME=,tag:HxywCeG6NQotrsN7ovDfrw==,type:str]",
|
||||
"lastmodified": "2026-04-18T07:41:42Z",
|
||||
"mac": "ENC[AES256_GCM,data:+HhhsiZXok4BZI05tG3p9veZaj51kELSQlWFYMSInv7bGfEadmOrJqCxaGrFcNkMmgVPx80jWQFrILfVLW5MUvEsHAhD4Vza2TSWeUq1HuL9DbMxsK2G9Y1fbthd12r/++dDcXxVnTUf/rCD70in/+g/zRObocAnUcFEcIqx1JE=,iv:pS+aj+47J4bYZYGlMVniQVTlLt4jtCLUT7oROJLUkZo=,tag:+lznxDhs2C3bcz5quxfHjA==,type:str]",
|
||||
"encrypted_regex": "^(.*)$",
|
||||
"version": "3.10.2"
|
||||
}
|
||||
|
||||
@@ -37,8 +37,9 @@ job "sonarr" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
path = "/ping"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
@@ -54,8 +55,9 @@ job "sonarr" {
|
||||
}
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
path = "/ping"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -16,8 +16,10 @@ job "transfer" {
|
||||
]
|
||||
|
||||
check {
|
||||
name = "alive"
|
||||
type = "http"
|
||||
path = "/"
|
||||
port = "http"
|
||||
interval = "10s"
|
||||
timeout = "2s"
|
||||
}
|
||||
|
||||
@@ -21,6 +21,26 @@ job "unifi-network" {
|
||||
UNIFI_STDOUT = "true"
|
||||
}
|
||||
|
||||
# Register in Consul so Traefik and health checks can find it.
|
||||
# address_mode=driver uses the macvlan IP (192.168.1.50) rather than the host IP.
|
||||
service {
|
||||
name = "unifi-network"
|
||||
port = 8443
|
||||
address_mode = "driver"
|
||||
|
||||
# TCP/HTTP checks from the Consul agent can't reach the macvlan IP (host↔macvlan
|
||||
# isolation). Use a script check instead — it runs inside the container via
|
||||
# docker exec and connects to localhost:8443 directly.
|
||||
check {
|
||||
name = "alive"
|
||||
type = "script"
|
||||
command = "/usr/bin/curl"
|
||||
args = ["-sk", "--max-time", "5", "-o", "/dev/null", "https://localhost:8443"]
|
||||
interval = "30s"
|
||||
timeout = "10s"
|
||||
}
|
||||
}
|
||||
|
||||
volume_mount {
|
||||
volume = "unraid_appdata_unifi_network"
|
||||
destination = "/unifi" # Expected root directory (contains data, log, cert subdirs)
|
||||
|
||||
16
cicd-plan.md
16
cicd-plan.md
@@ -188,8 +188,8 @@ Most jobs already have Consul health checks — these can use `health_check = "c
|
||||
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||
| glance | ✅ | no | ✅ yes |
|
||||
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
|
||||
| openreader | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||
| unifi | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||
| openreader | ✅ `/` | ✅ `single-node-writer` | ⚠️ rolling |
|
||||
| unifi | ✅ script | ✅ `single-node-writer` | ⚠️ rolling |
|
||||
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
|
||||
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
|
||||
| renovate | batch job | n/a | n/a — no deployment model |
|
||||
@@ -293,13 +293,13 @@ exit 1
|
||||
|
||||
## Implementation Order
|
||||
|
||||
- [ ] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
|
||||
- [ ] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
|
||||
- [ ] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
|
||||
- [ ] **Phase 2**: Add image pull validation step to the workflow
|
||||
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
|
||||
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
|
||||
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
|
||||
- [x] **Phase 2**: Add image pull validation step to the workflow
|
||||
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
|
||||
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, etc.)
|
||||
- [ ] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
|
||||
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, prowlarr, deluge, gitea, immich, transfer, frigate, openreader, unifi, authelia, traefik)
|
||||
- [x] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
|
||||
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
|
||||
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
|
||||
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes
|
||||
|
||||
Reference in New Issue
Block a user