Compare commits
1 Commits
main
...
d0744c1658
| Author | SHA1 | Date | |
|---|---|---|---|
| d0744c1658 |
@@ -52,39 +52,3 @@ jobs:
|
|||||||
echo "==> $f"
|
echo "==> $f"
|
||||||
nomad job validate "$f"
|
nomad job validate "$f"
|
||||||
done
|
done
|
||||||
|
|
||||||
image-pull:
|
|
||||||
name: Docker image pull validation
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
# Only run on PRs that touch nomad job specs
|
|
||||||
if: github.event_name == 'pull_request'
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Pull changed images
|
|
||||||
run: |
|
|
||||||
git fetch origin ${{ github.base_ref }}
|
|
||||||
IMAGES=$(git diff origin/${{ github.base_ref }}...HEAD -- '*.nomad.hcl' \
|
|
||||||
| grep '^+\s*image\s*=' \
|
|
||||||
| grep -oP '"[^"]+:[^"]+"' \
|
|
||||||
| tr -d '"' \
|
|
||||||
| sort -u || true)
|
|
||||||
|
|
||||||
if [ -z "$IMAGES" ]; then
|
|
||||||
echo "No image changes detected, skipping pull."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
FAILED=0
|
|
||||||
while IFS= read -r image; do
|
|
||||||
echo "==> Pulling $image"
|
|
||||||
if ! docker pull "$image"; then
|
|
||||||
echo "ERROR: Failed to pull $image"
|
|
||||||
FAILED=1
|
|
||||||
fi
|
|
||||||
done <<< "$IMAGES"
|
|
||||||
|
|
||||||
exit $FAILED
|
|
||||||
|
|||||||
@@ -58,10 +58,8 @@ job "pgadmin" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ job "postgres" {
|
|||||||
connect {
|
connect {
|
||||||
sidecar_service {}
|
sidecar_service {}
|
||||||
}
|
}
|
||||||
|
|
||||||
# Note: TCP checks are not valid for Connect-enabled services (runs through
|
|
||||||
# Envoy sidecar). Postgres is a single-writer DB that we never canary, so
|
|
||||||
# observable health via Consul is lower priority than other services.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
task "postgres" {
|
task "postgres" {
|
||||||
|
|||||||
@@ -33,10 +33,8 @@ job "authelia" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/health"
|
path = "/health"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -35,8 +35,7 @@ job "traefik" {
|
|||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
name = "alive"
|
||||||
type = "http"
|
type = "tcp"
|
||||||
path = "/ping"
|
|
||||||
port = "api"
|
port = "api"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
@@ -86,9 +85,6 @@ api:
|
|||||||
dashboard: true
|
dashboard: true
|
||||||
insecure: true
|
insecure: true
|
||||||
|
|
||||||
ping:
|
|
||||||
entryPoint: traefik
|
|
||||||
|
|
||||||
providers:
|
providers:
|
||||||
file:
|
file:
|
||||||
directory: "/etc/traefik/configs/"
|
directory: "/etc/traefik/configs/"
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ job "act-runner" {
|
|||||||
|
|
||||||
env = {
|
env = {
|
||||||
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
|
GITEA_INSTANCE_URL = "https://gitea-1ef0bea6b75a4fd3e9393a9f7f7e4b02.othrayte.one"
|
||||||
|
CONFIG_FILE = "/secrets/runner-config.yml"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Required SOPS key:
|
# Required SOPS key:
|
||||||
@@ -42,6 +43,19 @@ EOF
|
|||||||
env = true
|
env = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Limit which images/labels the runner will accept so it doesn't pick up
|
||||||
|
# unrelated workloads if more runners are added later.
|
||||||
|
template {
|
||||||
|
data = <<EOF
|
||||||
|
runner:
|
||||||
|
labels:
|
||||||
|
- "ubuntu-latest:docker://node:20-bookworm"
|
||||||
|
- "ubuntu-22.04:docker://node:20-bookworm"
|
||||||
|
- "ubuntu-24.04:docker://node:20-bookworm"
|
||||||
|
EOF
|
||||||
|
destination = "secrets/runner-config.yml"
|
||||||
|
}
|
||||||
|
|
||||||
resources {
|
resources {
|
||||||
cpu = 200
|
cpu = 200
|
||||||
memory = 256
|
memory = 256
|
||||||
|
|||||||
@@ -54,10 +54,8 @@ EOH
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
@@ -80,10 +78,8 @@ EOH
|
|||||||
}
|
}
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,10 +32,8 @@ job "gitea" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/api/healthz"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ job "glance" {
|
|||||||
driver = "docker"
|
driver = "docker"
|
||||||
|
|
||||||
config {
|
config {
|
||||||
image = "glanceapp/glance:v0.7.8"
|
image = "glanceapp/glance:latest"
|
||||||
ports = ["http"]
|
ports = ["http"]
|
||||||
volumes = [
|
volumes = [
|
||||||
"local/glance.yml:/app/config/glance.yml",
|
"local/glance.yml:/app/config/glance.yml",
|
||||||
@@ -30,8 +30,7 @@ job "glance" {
|
|||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
name = "alive"
|
||||||
type = "http"
|
type = "tcp"
|
||||||
path = "/"
|
|
||||||
port = "http"
|
port = "http"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
|
|||||||
@@ -28,11 +28,10 @@ job "jellyfin" {
|
|||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
name = "alive"
|
||||||
type = "http"
|
type = "tcp"
|
||||||
path = "/health"
|
|
||||||
port = "http"
|
port = "http"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "5s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -33,10 +33,8 @@ job "ntfy" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/healthz"
|
path = "/healthz"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,10 +31,8 @@ job "openreader" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
@@ -50,10 +48,8 @@ job "openreader" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
@@ -63,7 +59,7 @@ job "openreader" {
|
|||||||
driver = "docker"
|
driver = "docker"
|
||||||
|
|
||||||
config {
|
config {
|
||||||
image = "ghcr.io/richardr1126/openreader:v2.1.2"
|
image = "ghcr.io/richardr1126/openreader:v2.2.0"
|
||||||
ports = ["http"]
|
ports = ["http"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -34,9 +34,8 @@ job "prowlarr" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/ping"
|
path = "/"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
@@ -52,9 +51,8 @@ job "prowlarr" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/ping"
|
path = "/"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,9 +37,8 @@ job "sonarr" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/ping"
|
path = "/"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
@@ -55,9 +54,8 @@ job "sonarr" {
|
|||||||
}
|
}
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/ping"
|
path = "/"
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,10 +16,8 @@ job "transfer" {
|
|||||||
]
|
]
|
||||||
|
|
||||||
check {
|
check {
|
||||||
name = "alive"
|
|
||||||
type = "http"
|
type = "http"
|
||||||
path = "/"
|
path = "/"
|
||||||
port = "http"
|
|
||||||
interval = "10s"
|
interval = "10s"
|
||||||
timeout = "2s"
|
timeout = "2s"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,26 +21,6 @@ job "unifi-network" {
|
|||||||
UNIFI_STDOUT = "true"
|
UNIFI_STDOUT = "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
# Register in Consul so Traefik and health checks can find it.
|
|
||||||
# address_mode=driver uses the macvlan IP (192.168.1.50) rather than the host IP.
|
|
||||||
service {
|
|
||||||
name = "unifi-network"
|
|
||||||
port = 8443
|
|
||||||
address_mode = "driver"
|
|
||||||
|
|
||||||
# TCP/HTTP checks from the Consul agent can't reach the macvlan IP (host↔macvlan
|
|
||||||
# isolation). Use a script check instead — it runs inside the container via
|
|
||||||
# docker exec and connects to localhost:8443 directly.
|
|
||||||
check {
|
|
||||||
name = "alive"
|
|
||||||
type = "script"
|
|
||||||
command = "/usr/bin/curl"
|
|
||||||
args = ["-sk", "--max-time", "5", "-o", "/dev/null", "https://localhost:8443"]
|
|
||||||
interval = "30s"
|
|
||||||
timeout = "10s"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
volume_mount {
|
volume_mount {
|
||||||
volume = "unraid_appdata_unifi_network"
|
volume = "unraid_appdata_unifi_network"
|
||||||
destination = "/unifi" # Expected root directory (contains data, log, cert subdirs)
|
destination = "/unifi" # Expected root directory (contains data, log, cert subdirs)
|
||||||
|
|||||||
10
cicd-plan.md
10
cicd-plan.md
@@ -188,8 +188,8 @@ Most jobs already have Consul health checks — these can use `health_check = "c
|
|||||||
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
| frigate | ✅ | ✅ `single-node-writer` | ⚠️ same — rolling |
|
||||||
| glance | ✅ | no | ✅ yes |
|
| glance | ✅ | no | ✅ yes |
|
||||||
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
|
| transfer | ✅ | ✅ `single-node-writer` | ⚠️ rolling |
|
||||||
| openreader | ✅ `/` | ✅ `single-node-writer` | ⚠️ rolling |
|
| openreader | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||||
| unifi | ✅ script | ✅ `single-node-writer` | ⚠️ rolling |
|
| unifi | ❌ | ✅ `single-node-writer` | ⚠️ add check first, then rolling |
|
||||||
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
|
| traefik | (ingress) | ✅ | ⚠️ rolling — downtime risk, promote quickly |
|
||||||
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
|
| authelia | (ingress) | ✅ | ✅ stateless config, canary fine |
|
||||||
| renovate | batch job | n/a | n/a — no deployment model |
|
| renovate | batch job | n/a | n/a — no deployment model |
|
||||||
@@ -296,10 +296,10 @@ exit 1
|
|||||||
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
|
- [x] **Phase 1a**: Create `act-runner.nomad.hcl` + Terraform wrapper, register runner token in Gitea, get a hello-world workflow green
|
||||||
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
|
- [x] **Phase 1b**: Add `terraform fmt` + `terraform validate -backend=false` workflow — no secrets needed
|
||||||
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
|
- [x] **Phase 1c**: Add Nomad validate step — add `NOMAD_ADDR` + read-only `NOMAD_TOKEN` to Gitea secrets
|
||||||
- [x] **Phase 2**: Add image pull validation step to the workflow
|
- [ ] **Phase 2**: Add image pull validation step to the workflow
|
||||||
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
|
- [ ] **Phase 3a**: Add `update` stanzas to ntfy and glance (simplest, no volume conflict)
|
||||||
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, prowlarr, deluge, gitea, immich, transfer, frigate, openreader, unifi, authelia, traefik)
|
- [ ] **Phase 3b**: Add rolling `update` stanzas to remaining service jobs (jellyfin, sonarr, etc.)
|
||||||
- [x] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
|
- [ ] **Phase 3c**: Add health checks to openreader and unifi before adding update stanzas
|
||||||
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
|
- [ ] **Phase 4a**: Add on-push workflow that runs `terraform apply -auto-approve` using full credential set
|
||||||
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
|
- [ ] **Phase 4b**: Add deployment promotion/revert polling script
|
||||||
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes
|
- [ ] **Phase 4c**: Wire ntfy notifications for promote/revert outcomes
|
||||||
|
|||||||
Reference in New Issue
Block a user