2 Commits

Author SHA1 Message Date
8bd15702c8 Update mcr.microsoft.com/k8s/csi/smb-csi Docker tag to v1.17.0
All checks were successful
CI / Terraform fmt + validate (pull_request) Successful in 21s
2026-04-19 04:01:39 +00:00
a13f2cef25 Add Gitea act-runner and test actions for the repo
All checks were successful
CI / Terraform fmt + validate (push) Successful in 34s
2026-04-18 18:12:39 +10:00
18 changed files with 34 additions and 133 deletions

View File

@@ -29,62 +29,3 @@ jobs:
terraform init -backend=false
terraform validate
working-directory: 2-nomad-config
nomad-validate:
name: Nomad job spec validate
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install Nomad CLI
run: |
curl -fsSL https://apt.releases.hashicorp.com/gpg | gpg --dearmor -o /usr/share/keyrings/hashicorp.gpg
. /etc/os-release
echo "deb [signed-by=/usr/share/keyrings/hashicorp.gpg] https://apt.releases.hashicorp.com ${VERSION_CODENAME} main" | tee /etc/apt/sources.list.d/hashicorp.list
apt-get update && apt-get install -y nomad
- name: Validate all job specs
env:
NOMAD_ADDR: http://jaglan-beta-m20.lan:4646
run: |
find 2-nomad-config -name '*.nomad.hcl' | while read f; do
echo "==> $f"
nomad job validate "$f"
done
image-pull:
name: Docker image pull validation
runs-on: ubuntu-latest
# Only run on PRs that touch nomad job specs
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Pull changed images
run: |
git fetch origin ${{ github.base_ref }}
IMAGES=$(git diff origin/${{ github.base_ref }}...HEAD -- '*.nomad.hcl' \
| grep '^+\s*image\s*=' \
| grep -oP '"[^"]+:[^"]+"' \
| tr -d '"' \
| sort -u || true)
if [ -z "$IMAGES" ]; then
echo "No image changes detected, skipping pull."
exit 0
fi
FAILED=0
while IFS= read -r image; do
echo "==> Pulling $image"
if ! docker pull "$image"; then
echo "ERROR: Failed to pull $image"
FAILED=1
fi
done <<< "$IMAGES"
exit $FAILED

View File

@@ -6,7 +6,7 @@ job "csi-smb" {
driver = "docker"
config {
image = "mcr.microsoft.com/k8s/csi/smb-csi:v1.7.0"
image = "mcr.microsoft.com/k8s/csi/smb-csi:v1.17.0"
args = [
"--v=5",
"--nodeid=${attr.unique.hostname}",

View File

@@ -58,10 +58,8 @@ job "pgadmin" {
]
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -7,10 +7,6 @@ job "postgres" {
connect {
sidecar_service {}
}
# Note: TCP checks are not valid for Connect-enabled services (runs through
# Envoy sidecar). Postgres is a single-writer DB that we never canary, so
# observable health via Consul is lower priority than other services.
}
task "postgres" {

View File

@@ -33,10 +33,8 @@ job "authelia" {
}
check {
name = "alive"
type = "http"
path = "/health"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -35,8 +35,7 @@ job "traefik" {
check {
name = "alive"
type = "http"
path = "/ping"
type = "tcp"
port = "api"
interval = "10s"
timeout = "2s"
@@ -86,9 +85,6 @@ api:
dashboard: true
insecure: true
ping:
entryPoint: traefik
providers:
file:
directory: "/etc/traefik/configs/"

View File

@@ -28,7 +28,8 @@ job "act-runner" {
}
env = {
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
GITEA_INSTANCE_URL = "http://localhost:3000"
CONFIG_FILE = "/secrets/runner-config.yml"
}
# Required SOPS key:
@@ -42,6 +43,19 @@ EOF
env = true
}
# Limit which images/labels the runner will accept so it doesn't pick up
# unrelated workloads if more runners are added later.
template {
data = <<EOF
runner:
labels:
- "ubuntu-latest:docker://node:20-bookworm"
- "ubuntu-22.04:docker://node:20-bookworm"
- "ubuntu-24.04:docker://node:20-bookworm"
EOF
destination = "secrets/runner-config.yml"
}
resources {
cpu = 200
memory = 256

View File

@@ -54,10 +54,8 @@ EOH
]
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}
@@ -80,10 +78,8 @@ EOH
}
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -27,15 +27,11 @@ job "gitea" {
tags = [
"traefik.enable=true",
"traefik.http.routers.gitea.middlewares=auth@file",
# Token subdomain — no auth middleware — used by act_runner step containers for git checkout
"traefik.http.routers.gitea-token.rule=Host(`gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one`)",
]
check {
name = "alive"
type = "http"
path = "/api/healthz"
port = "http"
path = "/"
interval = "10s"
timeout = "2s"
}

View File

@@ -12,7 +12,7 @@ job "glance" {
driver = "docker"
config {
image = "glanceapp/glance:v0.7.8"
image = "glanceapp/glance:latest"
ports = ["http"]
volumes = [
"local/glance.yml:/app/config/glance.yml",
@@ -30,8 +30,7 @@ job "glance" {
check {
name = "alive"
type = "http"
path = "/"
type = "tcp"
port = "http"
interval = "10s"
timeout = "2s"

View File

@@ -28,11 +28,10 @@ job "jellyfin" {
check {
name = "alive"
type = "http"
path = "/health"
type = "tcp"
port = "http"
interval = "10s"
timeout = "5s"
timeout = "2s"
}
}

View File

@@ -33,10 +33,8 @@ job "ntfy" {
]
check {
name = "alive"
type = "http"
path = "/healthz"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -31,10 +31,8 @@ job "openreader" {
]
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}
@@ -50,10 +48,8 @@ job "openreader" {
}
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -34,9 +34,8 @@ job "prowlarr" {
]
check {
name = "alive"
type = "http"
path = "/ping"
path = "/"
interval = "10s"
timeout = "2s"
}
@@ -52,9 +51,8 @@ job "prowlarr" {
}
check {
name = "alive"
type = "http"
path = "/ping"
path = "/"
interval = "10s"
timeout = "2s"
}

View File

@@ -37,9 +37,8 @@ job "sonarr" {
]
check {
name = "alive"
type = "http"
path = "/ping"
path = "/"
interval = "10s"
timeout = "2s"
}
@@ -55,9 +54,8 @@ job "sonarr" {
}
check {
name = "alive"
type = "http"
path = "/ping"
path = "/"
interval = "10s"
timeout = "2s"
}

View File

@@ -16,10 +16,8 @@ job "transfer" {
]
check {
name = "alive"
type = "http"
path = "/"
port = "http"
interval = "10s"
timeout = "2s"
}

View File

@@ -21,26 +21,6 @@ job "unifi-network" {
UNIFI_STDOUT = "true"
}
# Register in Consul so Traefik and health checks can find it.
# address_mode=driver uses the macvlan IP (192.168.1.50) rather than the host IP.
service {
name = "unifi-network"
port = 8443
address_mode = "driver"
# TCP/HTTP checks from the Consul agent can't reach the macvlan IP (host↔macvlan
# isolation). Use a script check instead — it runs inside the container via
# docker exec and connects to localhost:8443 directly.
check {
name = "alive"
type = "script"
command = "/usr/bin/curl"
args = ["-sk", "--max-time", "5", "-o", "/dev/null", "https://localhost:8443"]
interval = "30s"
timeout = "10s"
}
}
volume_mount {
volume = "unraid_appdata_unifi_network"
destination = "/unifi" # Expected root directory (contains data, log, cert subdirs)

View File

@@ -188,8 +188,8 @@ Most jobs already have Consul health checks — these can use `health_check = "c
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
| glance | ✅ | no | ✅ yes |
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
| openreader | `/` | ✅ `single-node-writer` | ⚠️ rolling |
| unifi | ✅ script | ✅ `single-node-writer` | ⚠️ rolling |
| openreader | | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
| unifi | | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
| renovate | batch job | n/a | n/a — no deployment model |
@@ -293,13 +293,13 @@ exit 1
## Implementation Order
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
- [x] **Phase 2**: Add image pull validation step to the workflow
- [ ] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
- [ ] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
- [ ] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
- [ ] **Phase 2**: Add image pull validation step to the workflow
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, prowlarr, deluge, gitea, immich, transfer, frigate, openreader, unifi, authelia, traefik)
- [x] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, etc.)
- [ ] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes