Move frigate into the cluster and enable GPU detector

This commit is contained in:
2026-03-28 17:10:23 +11:00
parent 68cf58ead5
commit d22ea96879
10 changed files with 386 additions and 31 deletions

View File

@@ -157,11 +157,6 @@ http:
service: unraid
middlewares:
- auth
frigate:
rule: "Host(`frigate.othrayte.one`)"
service: frigate
middlewares:
- auth
kopia:
rule: "Host(`kopia.othrayte.one`)"
service: kopia
@@ -199,10 +194,6 @@ http:
loadBalancer:
servers:
- url: "http://betelgeuse-seven-unraid.lan:80"
frigate:
loadBalancer:
servers:
- url: "http://betelgeuse-seven-unraid.lan:5000"
kopia:
loadBalancer:
servers:

View File

@@ -0,0 +1,97 @@
job "codeproject-ai" {
# Pin to N150 LattePanda nodes for their Intel UHD iGPU (OpenVINO) and dedicated CPU headroom.
# The node_class is set via the node.tf/configuration.nix templates in 1-nixos-node.
constraint {
attribute = "${node.class}"
value = "latte-panda-n150"
}
group "codeproject-ai" {
count = 1
network {
port "http" {
to = 32168
}
}
task "codeproject-ai" {
driver = "docker"
config {
image = "codeproject/ai-server:latest"
ports = ["http"]
# Pass the Intel iGPU render nodes so CPAI can accelerate inference
# via OpenVINO on the N150's Intel UHD Graphics.
# Requires hardware.graphics.enable = true in the NixOS node config
# (added automatically when node_class = "latte-panda-n150").
devices = [
{
host_path = "/dev/dri"
container_path = "/dev/dri"
}
]
}
service {
name = "codeproject-ai"
port = "http"
tags = [
"traefik.enable=true",
# Auth-gated user-facing UI at codeproject-ai.othrayte.one (generated by defaultRule).
"traefik.http.routers.codeproject-ai.middlewares=auth@file",
# No-auth bypass for external Frigate access until Frigate moves into the cluster
# and can reach the service via Consul DNS directly.
"traefik.http.routers.codeproject-ai-token.rule=Host(`c3ll7nbevl5j4j8rcnfxnr95q48fuayz-codeproject-ai.othrayte.one`)",
]
check {
name = "alive"
type = "http"
path = "/v1/server/status/ping"
method = "GET"
port = "http"
interval = "10s"
timeout = "5s"
}
}
env {
TZ = "Australia/Melbourne"
}
# Persistent storage for downloaded AI modules and their models.
# On first start CPAI will download ~1-2 GB of YOLOv5/MobileNet weights
# into this volume; subsequent restarts reuse the cached models.
volume_mount {
volume = "unraid_appdata_codeproject_ai"
destination = "/etc/codeproject/ai"
read_only = false
}
resources {
# ~56% of the N150's 7200 MHz allocation - enough for concurrent
# object detection requests without starving other jobs on the node.
cpu = 4000
# YOLOv5-6.2 (default detection module) needs ~900 MB;
# leave headroom for a second module (e.g. face detection) and the
# CPAI process itself.
memory = 3072
}
}
volume "unraid_appdata_codeproject_ai" {
type = "csi"
read_only = false
source = "unraid_appdata_codeproject_ai"
access_mode = "single-node-writer"
attachment_mode = "file-system"
mount_options {
mount_flags = ["uid=1000", "gid=1000"]
}
}
}
}

View File

@@ -0,0 +1,9 @@
resource "nomad_job" "codeproject_ai" {
jobspec = file("codeproject-ai.nomad.hcl")
}
module "appdata_codeproject_ai" {
source = "./modules/appdata"
name = "codeproject-ai"
}

View File

@@ -0,0 +1,216 @@
job "frigate" {
# Pin to N150 LattePanda nodes - Intel UHD iGPU for OpenVINO-accelerated detection.
# hardware.graphics (intel-compute-runtime) is deployed to these nodes via configuration.nix.
constraint {
attribute = "${node.class}"
value = "latte-panda-n150"
}
group "frigate" {
count = 1
network {
port "http" {
to = 5000
}
}
# Prestart: restore Frigate's SQLite DB from the Litestream file replica on the CIFS share.
# Runs to completion before the frigate task starts. Safe on first boot (-if-replica-exists
# is a no-op when no replica exists yet).
task "litestream-restore" {
lifecycle {
hook = "prestart"
sidecar = false
}
driver = "docker"
config {
image = "litestream/litestream:0.5.9"
command = "restore"
args = ["-if-replica-exists", "-config", "/local/litestream.yml", "/alloc/data/frigate.db"]
}
# Litestream config: replicate to /config/frigate.db.litestream/ on the CIFS share.
# Litestream writes its own segment format - no SQLite advisory locking involved.
# Frigate must be configured with database.path: /alloc/data/frigate.db in config.yml.
template {
data = <<EOH
dbs:
- path: /alloc/data/frigate.db
replicas:
- url: file:///config/frigate.db.litestream
EOH
destination = "local/litestream.yml"
}
volume_mount {
volume = "unraid_appdata_frigate"
destination = "/config"
read_only = false
}
resources {
cpu = 100
memory = 64
memory_max = 256
}
}
# Sidecar: continuously stream WAL changes from /alloc/data/frigate.db to the CIFS replica.
# Runs alongside frigate for the lifetime of the allocation.
task "litestream-replicate" {
lifecycle {
hook = "poststart"
sidecar = true
}
driver = "docker"
config {
image = "litestream/litestream:0.5"
command = "replicate"
args = ["-config", "/local/litestream.yml"]
}
template {
data = <<EOH
dbs:
- path: /alloc/data/frigate.db
replicas:
- url: file:///config/frigate.db.litestream
EOH
destination = "local/litestream.yml"
}
volume_mount {
volume = "unraid_appdata_frigate"
destination = "/config"
read_only = false
}
resources {
cpu = 100
memory = 64
memory_max = 256
}
}
task "frigate" {
driver = "docker"
config {
image = "ghcr.io/blakeblackshear/frigate:0.17.1"
ports = ["http"]
privileged = true
# Shared memory for inter-process frame buffers (frigate forks detector processes).
shm_size = 268435456 # 256 MiB
# Large tmpfs for decoded frame cache - avoids wearing out any storage.
mounts = [
{
type = "tmpfs"
target = "/tmp/cache"
readonly = false
tmpfs_options = {
size = 1000000000 # 1 GiB in bytes
}
}
]
# Intel iGPU render node - Frigate's bundled OpenVINO runtime auto-detects
# GPU device and uses it for object detection without any extra env vars.
# Requires hardware.graphics.enable = true on the NixOS node (N150 nodes).
devices = [
{
host_path = "/dev/dri/renderD128"
container_path = "/dev/dri/renderD128"
}
]
}
# RTSP password injected from Nomad variables (sourced from sops secrets).
# Reference in config.yml as: {FRIGATE_RTSP_PASSWORD}
template {
data = <<EOH
FRIGATE_RTSP_PASSWORD="{{ with nomadVar "nomad/jobs/frigate" }}{{ .rtsp_password }}{{ end }}"
EOH
destination = "secrets/frigate.env"
env = true
}
service {
name = "frigate"
port = "http"
tags = [
"traefik.enable=true",
"traefik.http.routers.frigate.middlewares=auth@file",
"traefik.http.routers.frigate-token.rule=Host(`n7gdph5cuh7bd1cakbq8s099rvrv3qhs-frigate.othrayte.one`)",
]
check {
name = "alive"
type = "http"
path = "/api/version"
port = "http"
interval = "10s"
timeout = "5s"
}
}
env {
TZ = "Australia/Melbourne"
}
# config.yml lives here (read from CIFS). SQLite DB is at /alloc/data/frigate.db
# (local NVMe, managed by Litestream). Requires in config.yml:
# database:
# path: /alloc/data/frigate.db
volume_mount {
volume = "unraid_appdata_frigate"
destination = "/config"
read_only = false
}
# Recordings, clips, and exports.
volume_mount {
volume = "unraid_media_frigate"
destination = "/media/frigate"
read_only = false
}
resources {
# GPU handles inference; CPU manages stream ingestion, motion detection, and recording.
cpu = 2000
memory = 2048
}
}
volume "unraid_appdata_frigate" {
type = "csi"
read_only = false
source = "unraid_appdata_frigate"
access_mode = "single-node-writer"
attachment_mode = "file-system"
mount_options {
mount_flags = ["nobrl", "uid=0", "gid=0"]
}
}
volume "unraid_media_frigate" {
type = "csi"
read_only = false
source = "unraid_media_frigate"
access_mode = "single-node-writer"
attachment_mode = "file-system"
mount_options {
mount_flags = ["nobrl", "uid=0", "gid=0"]
}
}
}
}

23
2-nomad-config/frigate.tf Normal file
View File

@@ -0,0 +1,23 @@
resource "nomad_job" "frigate" {
jobspec = file("frigate.nomad.hcl")
}
resource "nomad_variable" "frigate" {
path = "nomad/jobs/frigate"
items = {
rtsp_password = data.sops_file.secrets.data["frigate.rtsp_password"]
}
}
module "appdata_frigate" {
source = "./modules/appdata"
name = "frigate"
}
module "unraid_smb_frigate_media" {
source = "./modules/unraid_smb"
name = "frigate"
share = "media"
subDir = "frigate"
}

View File

@@ -43,6 +43,9 @@
"prowlarr": {
"database_pw": "ENC[AES256_GCM,data:FkW5LPoyn8bh0UfWcFq3og==,iv:SFq4Xsdz3FfCDyPjIaAmz5nsC/SPdFrR03GCr3KE/nw=,tag:PVYj7hSWDnfeE7igSXGBSA==,type:str]"
},
"frigate": {
"rtsp_password": "ENC[AES256_GCM,data:8vq06/IkNOUgpHmf,iv:lj8buuIC0ub0YOUiOiaN6tokkIT2/+bBwFNz2QXmCd4=,tag:EMm/bIHdJSAtjYAlrNOCMw==,type:str]"
},
"sops": {
"age": [
{
@@ -50,8 +53,8 @@
"enc": "-----BEGIN AGE ENCRYPTED FILE-----\nYWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSByUWM4ZDVVbGFrUGdMRHBX\nUFBmU3Nlc0RBSzhFK0tHNHpkQXUvUVdiZUZJCmpRN1lFdENpWW0rcThjVlVQNUl6\nWnlLU0RnQ3FZby81Ly8xTFBrek9nMncKLS0tIFQ4UTRNOC9CRmx4OFJWem1wckZz\nUDFTSzdWZldFK3FqcTNWTWRyNDhHQ2MKS811mR5xn7qiC/aVgPFYJ5c6Q3zxRfcr\nHcvxUvB01vNJKZpRg92vvKPkV6lQO3DXCT98OdfwiymlEOvYxg71Pg==\n-----END AGE ENCRYPTED FILE-----\n"
}
],
"lastmodified": "2025-11-18T12:09:57Z",
"mac": "ENC[AES256_GCM,data:zchzaSPjJVbUBzXTrRPGAtQE6xzqxMjTCXRABvwEvgNqLO7i2C/d04hI+pwOuLWyZXiUfqHlGt798ZnP5+MJi249FJmK08l8dAXxKSc+KSc4EfDoF+jWiwmoDEf5SypyHL9RPTeI7zoSJ0IxhtP3zZcD/7Q2PPG7cLDBKHdTJFU=,iv:+UiyEfMtAObZ/vvs2ZaIup9OxxP0uqSmo+h45PrymLk=,tag:f/D8417EGjuOfSdzZtir4Q==,type:str]",
"lastmodified": "2026-03-28T02:45:25Z",
"mac": "ENC[AES256_GCM,data:LOzPiZlrS5Rqcsub1jfQ2yfKfTzySA19RjA566MdEY3/h1NmWrodfysPU1bZ9ClOALTFmv6JfmBVxrJ3Mq8T3YBT1qoVjkg7aRO8x6WCMRt11Lba2/SwjLDNFkE/EItvMu10RhzxIYLzZTf+X3zHiLVuSQ4aS9vn9IGTa8yWQ0k=,iv:x7GSxq7G6JpVtHi0nGHM8FDx9gDZI4bR+jJQremxKpc=,tag:DsjEhCOt+XPs2WVznxvYCg==,type:str]",
"encrypted_regex": "^(.*)$",
"version": "3.10.2"
}