This commit is contained in:
Joakim Hellsén 2026-04-27 20:43:26 +02:00
commit a7a5b5c8ea
Signed by: Joakim Hellsén
SSH key fingerprint: SHA256:/9h/CsExpFp+PRhsfA0xznFx2CGfTT5R/kpuFfUgEQk
43 changed files with 5531 additions and 9 deletions

View file

@ -0,0 +1 @@
"""Control-plane models and runtime helpers for hosted deployments."""

223
control_plane/admin.py Normal file
View file

@ -0,0 +1,223 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from django.contrib import admin
from django.contrib import messages
from django.db.models import Count
from django.db.models import F
from control_plane.models import Deployment
from control_plane.models import HostedSite
from control_plane.models import RuntimeService
from control_plane.models import RuntimeServiceKind
from control_plane.models import Tenant
from control_plane.tasks import provision_test_runtime_services
if TYPE_CHECKING:
from django.db.models import QuerySet
from django.http import HttpRequest
RuntimeServiceInlineBase = admin.StackedInline[RuntimeService]
TenantAdminBase = admin.ModelAdmin[Tenant]
HostedSiteAdminBase = admin.ModelAdmin[HostedSite]
DeploymentAdminBase = admin.ModelAdmin[Deployment]
RuntimeServiceAdminBase = admin.ModelAdmin[RuntimeService]
else:
RuntimeServiceInlineBase = admin.StackedInline
TenantAdminBase = admin.ModelAdmin
HostedSiteAdminBase = admin.ModelAdmin
DeploymentAdminBase = admin.ModelAdmin
RuntimeServiceAdminBase = admin.ModelAdmin
class RuntimeServiceInline(RuntimeServiceInlineBase):
"""Allow deployment admins to create/edit related runtime services inline."""
model = RuntimeService
extra = 0
max_num = len(RuntimeServiceKind)
show_change_link = True
@admin.register(Tenant)
class TenantAdmin(TenantAdminBase):
"""Expose tenants for admin-managed smoke data setup."""
list_display = ("slug", "display_name")
search_fields = ("slug", "display_name")
ordering = ("slug",)
@admin.register(HostedSite)
class HostedSiteAdmin(HostedSiteAdminBase):
"""Expose hosted sites so admins can build deployment test graphs."""
list_display = ("slug", "display_name", "tenant_slug", "service_port")
list_filter = ("tenant",)
search_fields = (
"slug",
"display_name",
"tenant__slug",
"tenant__display_name",
"wsgi_module",
)
ordering = ("tenant__slug", "slug")
autocomplete_fields = ("tenant",)
list_select_related = ("tenant",)
def get_queryset(self, request: HttpRequest) -> QuerySet[HostedSite]:
"""Load tenant slug values for changelist rendering.
Returns:
Hosted site queryset with tenant join and tenant slug annotation.
"""
return (
super()
.get_queryset(request)
.select_related("tenant")
.annotate(
tenant_slug_value=F("tenant__slug"),
)
)
@admin.display(ordering="tenant__slug", description="Tenant")
def tenant_slug(self, hosted_site: HostedSite) -> str:
"""Return tenant slug for changelist display and sorting."""
return str(vars(hosted_site)["tenant_slug_value"])
@admin.register(Deployment)
class DeploymentAdmin(DeploymentAdminBase):
"""Expose deployments and queue test container provisioning."""
list_display = (
"id",
"status",
"tenant_slug",
"site_slug",
"idempotency_key",
"guest_port",
"runtime_service_total",
)
list_filter = ("status",)
search_fields = (
"=id",
"idempotency_key",
"firecracker_vm_id",
"hosted_site__slug",
"hosted_site__tenant__slug",
)
ordering = ("hosted_site__tenant__slug", "hosted_site__slug", "-created_at")
autocomplete_fields = ("hosted_site",)
list_select_related = ("hosted_site__tenant",)
inlines = (RuntimeServiceInline,)
actions = ("create_test_containers",)
def get_queryset(self, request: HttpRequest) -> QuerySet[Deployment]:
"""Load related hosted site and tenant rows for admin rendering.
Returns:
Deployment queryset with hosted site and tenant joined.
"""
return (
super()
.get_queryset(request)
.select_related("hosted_site__tenant")
.annotate(
tenant_slug_value=F("hosted_site__tenant__slug"),
site_slug_value=F("hosted_site__slug"),
runtime_service_total_value=Count("runtime_services", distinct=True),
)
)
@admin.display(ordering="hosted_site__tenant__slug", description="Tenant")
def tenant_slug(self, deployment: Deployment) -> str:
"""Return tenant slug for changelist display and sorting."""
return str(vars(deployment)["tenant_slug_value"])
@admin.display(ordering="hosted_site__slug", description="Site")
def site_slug(self, deployment: Deployment) -> str:
"""Return hosted site slug for changelist display and sorting."""
return str(vars(deployment)["site_slug_value"])
@admin.display(description="Runtime services")
def runtime_service_total(self, deployment: Deployment) -> int:
"""Return total runtime services currently linked to a deployment."""
return int(vars(deployment)["runtime_service_total_value"])
@admin.action(description="Queue test container provisioning")
def create_test_containers(
self,
request: HttpRequest,
queryset: QuerySet[Deployment],
) -> None:
"""Queue Celery jobs that seed and provision local test containers."""
deployment_ids = [str(deployment_id) for deployment_id in queryset.values_list("id", flat=True)]
for deployment_id in deployment_ids:
provision_test_runtime_services.delay(deployment_id)
self.message_user(
request,
(
f"Queued test container provisioning for {len(deployment_ids)} deployments. "
"Run a Celery worker to execute queued jobs."
),
level=messages.SUCCESS,
)
@admin.register(RuntimeService)
class RuntimeServiceAdmin(RuntimeServiceAdminBase):
"""Expose runtime service containers to Django admin users."""
list_display = (
"container_name",
"kind",
"status",
"tenant_slug",
"site_slug",
"internal_port",
)
list_filter = ("kind", "status")
search_fields = (
"container_name",
"network_name",
"hostname",
"deployment__idempotency_key",
"deployment__hosted_site__slug",
"deployment__hosted_site__tenant__slug",
)
ordering = (
"deployment__hosted_site__tenant__slug",
"deployment__hosted_site__slug",
"kind",
)
autocomplete_fields = ("deployment",)
list_select_related = ("deployment__hosted_site__tenant",)
def get_queryset(self, request: HttpRequest) -> QuerySet[RuntimeService]:
"""Load related deployment context for changelist rendering.
Returns:
Runtime service queryset with deployment, site, and tenant joined.
"""
return (
super()
.get_queryset(request)
.select_related("deployment__hosted_site__tenant")
.annotate(
tenant_slug_value=F("deployment__hosted_site__tenant__slug"),
site_slug_value=F("deployment__hosted_site__slug"),
)
)
@admin.display(ordering="deployment__hosted_site__tenant__slug", description="Tenant")
def tenant_slug(self, runtime_service: RuntimeService) -> str:
"""Return tenant slug for changelist display and sorting."""
return str(vars(runtime_service)["tenant_slug_value"])
@admin.display(ordering="deployment__hosted_site__slug", description="Site")
def site_slug(self, runtime_service: RuntimeService) -> str:
"""Return hosted site slug for changelist display and sorting."""
return str(vars(runtime_service)["site_slug_value"])

8
control_plane/apps.py Normal file
View file

@ -0,0 +1,8 @@
from django.apps import AppConfig
class ControlPlaneConfig(AppConfig):
"""Register control-plane models and task discovery."""
name = "control_plane"
verbose_name = "Tussilago Control Plane"

View file

@ -0,0 +1,13 @@
FROM docker.io/library/python:3.14-slim
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
RUN python -m pip install --no-cache-dir \
"django>=6.0.4" \
"gunicorn>=23.0.0" \
"psycopg[binary]>=3.2.9" \
"redis>=6.0.0"
WORKDIR /srv/test-app

View file

@ -0,0 +1,171 @@
from __future__ import annotations
import logging
import os
import shlex
import subprocess # noqa: S404
from dataclasses import dataclass
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Mapping
from collections.abc import Sequence
from pathlib import Path
logger = logging.getLogger("tussilago.control_plane.host_commands")
DEFAULT_INHERITED_ENV_KEYS: frozenset[str] = frozenset(
{
"HOME",
"LANG",
"LC_ALL",
"LC_CTYPE",
"LOGNAME",
"PATH",
"SSL_CERT_DIR",
"SSL_CERT_FILE",
"TMPDIR",
"USER",
"UV_CACHE_DIR",
"VIRTUAL_ENV",
"XDG_CACHE_HOME",
"XDG_RUNTIME_DIR",
},
)
@dataclass(frozen=True, slots=True)
class HostCommandResult:
"""Capture output from a completed host-side command."""
args: tuple[str, ...]
returncode: int
stdout: str
stderr: str
class HostCommandError(RuntimeError):
"""Raised when a host-side command fails or times out."""
def __init__(
self,
message: str,
*,
args: Sequence[str],
returncode: int | None,
stdout: str,
stderr: str,
) -> None:
"""Store captured command context for later error reporting."""
super().__init__(message)
self.command_args = tuple(args)
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
def build_host_command_env(
*,
env_overrides: Mapping[str, str] | None = None,
allowed_env_keys: frozenset[str] | None = None,
inherited_env_keys: frozenset[str] = DEFAULT_INHERITED_ENV_KEYS,
) -> dict[str, str]:
"""Build a sanitized environment for host-side child processes.
Returns:
A filtered environment dictionary suitable for subprocess execution.
Raises:
ValueError: If env overrides are provided without an allowlist.
"""
resolved_env = {key: value for key, value in os.environ.items() if key in inherited_env_keys}
if env_overrides is None:
return resolved_env
if allowed_env_keys is None:
msg = "allowed_env_keys is required when env_overrides are provided"
raise ValueError(msg)
disallowed_keys = sorted(set(env_overrides).difference(allowed_env_keys))
if disallowed_keys:
msg = f"env_overrides contains disallowed keys: {', '.join(disallowed_keys)}"
raise ValueError(msg)
resolved_env.update(env_overrides)
return resolved_env
def run_host_command(
*,
command: Sequence[str],
cwd: Path | None = None,
env_overrides: Mapping[str, str] | None = None,
allowed_env_keys: frozenset[str] | None = None,
timeout_seconds: float = 60.0,
) -> HostCommandResult:
"""Run a host-side command with explicit environment and timeout controls.
Returns:
A result object containing the command, return code, and captured output.
Raises:
ValueError: If the command is empty or env overrides are not allowlisted.
HostCommandError: If the command fails or times out.
"""
normalized_command = tuple(command)
if not normalized_command:
msg = "command must not be empty"
raise ValueError(msg)
if any(not argument for argument in normalized_command):
msg = "command arguments must be non-empty strings"
raise ValueError(msg)
resolved_env = build_host_command_env(
env_overrides=env_overrides,
allowed_env_keys=allowed_env_keys,
)
logger.debug(
"Running host command executable=%s argc=%s (cwd=%s)",
shlex.quote(normalized_command[0]),
len(normalized_command),
cwd,
)
try:
completed = subprocess.run( # noqa: S603
normalized_command,
check=True,
capture_output=True,
text=True,
cwd=cwd,
env=resolved_env,
timeout=timeout_seconds,
)
except subprocess.CalledProcessError as error:
msg_0 = "Host command failed."
raise HostCommandError(
msg_0,
args=tuple(str(argument) for argument in error.cmd),
returncode=error.returncode,
stdout=error.stdout or "",
stderr=error.stderr or "",
) from error
except subprocess.TimeoutExpired as error:
msg_0 = "Host command timed out."
raise HostCommandError(
msg_0,
args=normalized_command,
returncode=None,
stdout=str(error.stdout) or "",
stderr=str(error.stderr) or "",
) from error
return HostCommandResult(
args=normalized_command,
returncode=completed.returncode,
stdout=completed.stdout,
stderr=completed.stderr,
)

View file

@ -0,0 +1,212 @@
from __future__ import annotations
import hashlib
import secrets
import socket
import time
from dataclasses import dataclass
from celery import chain
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from control_plane.host_commands import HostCommandError
from control_plane.local_test_runtime import build_test_django_local_url
from control_plane.models import Deployment
from control_plane.models import DeploymentStatus
from control_plane.models import HostedSite
from control_plane.models import Tenant
from control_plane.observability import capture_test_deployment_diagnostics
from control_plane.tasks import mark_deployment_booting
from control_plane.tasks import mark_deployment_provisioning
from control_plane.tasks import provision_test_django_runtime
from control_plane.tasks import provision_test_runtime_services
from control_plane.tasks import run_test_django_runtime_provisioning
@dataclass(frozen=True, slots=True)
class CreatedTestDeployment:
"""Bundle control-plane rows created for one local test deployment."""
tenant: Tenant
hosted_site: HostedSite
deployment: Deployment
@property
def sentinel_url(self) -> str:
"""Return published local sentinel URL for this deployment."""
return build_test_django_local_url(self.deployment)
def create_test_deployment() -> CreatedTestDeployment:
"""Create a randomized tenant, hosted site, and deployment for local testing.
Returns:
Newly created tenant, hosted site, and deployment rows.
"""
tenant_token = secrets.token_hex(4)
site_token = secrets.token_hex(4)
tenant_slug = f"tenant-{tenant_token}"
site_slug = f"site-{site_token}"
idempotency_key = f"test-deploy-{secrets.token_hex(8)}"
guest_port = _find_free_port()
source_sha256 = hashlib.sha256(
f"{tenant_slug}:{site_slug}:{idempotency_key}".encode(),
).hexdigest()
with transaction.atomic():
tenant = Tenant.objects.create(
slug=tenant_slug,
display_name=f"Test Tenant {tenant_token.upper()}",
)
hosted_site = HostedSite.objects.create(
tenant=tenant,
slug=site_slug,
display_name=f"Test Site {site_token.upper()}",
wsgi_module="tenant_site.wsgi:application",
service_port=guest_port,
)
deployment = Deployment.objects.create(
hosted_site=hosted_site,
idempotency_key=idempotency_key,
source_sha256=source_sha256,
guest_port=guest_port,
)
return CreatedTestDeployment(
tenant=tenant,
hosted_site=hosted_site,
deployment=deployment,
)
def queue_test_deployment_provisioning(deployment_id: str) -> str:
"""Queue full local test deployment Celery chain and return task id.
Returns:
Celery task id for the queued orchestration chain.
"""
_ensure_async_broker_configuration()
result = chain(
mark_deployment_provisioning.si(deployment_id),
provision_test_runtime_services.si(deployment_id),
mark_deployment_booting.si(deployment_id),
provision_test_django_runtime.si(deployment_id),
).apply_async()
return str(result.id)
def wait_for_test_deployment(
deployment_id: str,
*,
timeout_seconds: float,
poll_interval_seconds: float,
) -> Deployment:
"""Wait until a queued local test deployment becomes running or fails.
Returns:
Deployment row in running state.
Raises:
RuntimeError: If deployment reaches failed state.
TimeoutError: If deployment does not finish before timeout.
"""
deadline = time.monotonic() + timeout_seconds
while True:
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
if deployment.status == DeploymentStatus.RUNNING.value:
return deployment
if deployment.status == DeploymentStatus.FAILED.value:
failure_message = deployment.last_error or "Local test deployment failed."
raise RuntimeError(failure_message)
if time.monotonic() >= deadline:
msg = (
"Timed out waiting for local test deployment "
f"{deployment.id} to become ready. Current status: {deployment.status}."
)
raise TimeoutError(msg)
time.sleep(poll_interval_seconds)
def provision_test_deployment(deployment_id: str) -> Deployment:
"""Run full local test deployment provisioning inline in the current process.
Returns:
Deployment row after provisioning completes.
Raises:
RuntimeError: If runtime provisioning fails.
TimeoutError: If the Django sentinel endpoint never becomes ready.
ValueError: If runtime configuration is invalid.
"""
try:
mark_deployment_provisioning.run(deployment_id)
provision_test_runtime_services.run(deployment_id)
mark_deployment_booting.run(deployment_id)
run_test_django_runtime_provisioning(deployment_id)
except HostCommandError as error:
message = _build_host_command_failure_message(error)
_mark_inline_deployment_failed(deployment_id, message=message)
_capture_test_deployment_diagnostics_snapshot(deployment_id)
raise RuntimeError(message) from error
except (RuntimeError, TimeoutError, ValueError) as error:
_mark_inline_deployment_failed(deployment_id, message=str(error))
_capture_test_deployment_diagnostics_snapshot(deployment_id)
raise
return Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
def _ensure_async_broker_configuration() -> None:
broker_url = settings.CELERY_BROKER_URL
if not broker_url:
msg = "Async queueing requires TUSSILAGO_CELERY_BROKER_URL to be set to a real broker URL."
raise RuntimeError(msg)
if broker_url == "memory://":
msg = (
"Async queueing cannot use memory:// because the worker cannot consume tasks from another process. "
"Set TUSSILAGO_CELERY_BROKER_URL to a real broker such as Redis or RabbitMQ."
)
raise RuntimeError(msg)
def _mark_inline_deployment_failed(deployment_id: str, *, message: str) -> None:
deployment = Deployment.objects.get(pk=deployment_id)
if deployment.status == DeploymentStatus.FAILED.value:
return
deployment.status = DeploymentStatus.FAILED.value
deployment.last_error = message
deployment.finished_at = timezone.now()
deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"])
def _build_host_command_failure_message(error: HostCommandError) -> str:
lines = [str(error)]
if error.stderr.strip():
lines.append(error.stderr.strip())
elif error.stdout.strip():
lines.append(error.stdout.strip())
return "\n".join(lines)
def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None:
try:
capture_test_deployment_diagnostics(deployment_id)
except OSError:
return
except ValueError:
return
except Deployment.DoesNotExist:
return
def _find_free_port() -> int:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
probe.bind(("127.0.0.1", 0))
probe.listen(1)
return int(probe.getsockname()[1])

View file

@ -0,0 +1,297 @@
from __future__ import annotations
from pathlib import Path
from textwrap import dedent
from typing import TYPE_CHECKING
from django.conf import settings
from control_plane.models import RuntimeServiceKind
from control_plane.models import _build_limited_identifier
if TYPE_CHECKING:
from collections.abc import Iterable
from control_plane.models import Deployment
from control_plane.models import RuntimeService
TEST_DJANGO_CONTAINER_PORT = 8000
TEST_DJANGO_IMAGE_REFERENCE = "localhost/tussilago-test-django:latest"
TEST_DJANGO_WORKDIR = "/srv/test-app"
TEST_POSTGRES_AUTH_DIR = "/run/postgres-auth"
TEST_REDIS_AUTH_DIR = "/run/redis-auth"
TEST_POSTGRES_PASSWORD_FILE = f"{TEST_POSTGRES_AUTH_DIR}/password"
TEST_REDIS_PASSWORD_FILE = f"{TEST_REDIS_AUTH_DIR}/password"
def build_test_django_project_root(deployment: Deployment) -> Path:
"""Return filesystem root for one generated local Django test app."""
return Path(settings.DATA_DIR) / "test-deployments" / str(deployment.id) / "django-app"
def build_test_django_image_reference() -> str:
"""Return Podman image reference for the reusable local Django runtime."""
return TEST_DJANGO_IMAGE_REFERENCE
def build_test_django_containerfile_path() -> Path:
"""Return checked-in Containerfile used for local Django test runtimes."""
return Path(__file__).resolve().parent / "container_assets" / "test_django" / "Containerfile"
def build_test_django_container_context_path() -> Path:
"""Return Podman build context for the reusable local Django runtime image."""
return build_test_django_containerfile_path().parent
def build_test_django_local_url(deployment: Deployment) -> str:
"""Return published sentinel URL for a local Django test deployment."""
return f"http://127.0.0.1:{deployment.guest_port}/sentinel/"
def build_test_django_container_names(deployment: Deployment) -> tuple[str, str]:
"""Return deterministic Podman container names for server and migrate steps."""
deployment_suffix = deployment.id.hex[:12]
tenant_slug = deployment.hosted_site.tenant.slug
site_slug = deployment.hosted_site.slug
return (
_build_limited_identifier(
prefix="django",
tenant_slug=tenant_slug,
site_slug=site_slug,
suffix=deployment_suffix,
max_length=128,
),
_build_limited_identifier(
prefix="django-migrate",
tenant_slug=tenant_slug,
site_slug=site_slug,
suffix=deployment_suffix,
max_length=128,
),
)
def build_test_django_container_labels(deployment: Deployment) -> tuple[tuple[str, str], ...]:
"""Return stable labels to simplify inspection and cleanup."""
return (
("tussilago.deployment-id", str(deployment.id)),
("tussilago.tenant-slug", deployment.hosted_site.tenant.slug),
("tussilago.site-slug", deployment.hosted_site.slug),
("tussilago.role", "django"),
)
def build_test_django_environment(
deployment: Deployment,
runtime_services: Iterable[RuntimeService],
) -> tuple[tuple[str, str], ...]:
"""Return container environment variables for the generated Django test app.
Raises:
ValueError: If PostgreSQL or Redis runtime services are missing.
"""
postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value)
redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value)
if not postgres_service.connection_database or not postgres_service.connection_username:
msg = "PostgreSQL runtime service is missing connection credentials."
raise ValueError(msg)
return (
("DJANGO_SECRET_KEY", f"test-deployment-{deployment.id.hex}"),
("DJANGO_SETTINGS_MODULE", "tenant_site.settings"),
("PYTHONPATH", TEST_DJANGO_WORKDIR),
("TEST_TENANT_SLUG", deployment.hosted_site.tenant.slug),
("TEST_SITE_SLUG", deployment.hosted_site.slug),
("TEST_POSTGRES_HOST", "127.0.0.1"),
("TEST_POSTGRES_PORT", str(postgres_service.internal_port)),
("TEST_POSTGRES_DATABASE", postgres_service.connection_database),
("TEST_POSTGRES_USERNAME", postgres_service.connection_username),
("TEST_POSTGRES_PASSWORD_FILE", TEST_POSTGRES_PASSWORD_FILE),
("TEST_REDIS_HOST", "127.0.0.1"),
("TEST_REDIS_PORT", str(redis_service.internal_port)),
("TEST_REDIS_PASSWORD_FILE", TEST_REDIS_PASSWORD_FILE),
)
def build_test_django_secret_mounts(
runtime_services: Iterable[RuntimeService],
) -> tuple[tuple[Path, str], ...]:
"""Return host-to-container secret mounts for generated Django test apps."""
postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value)
redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value)
return (
(_runtime_service_secret_directory(postgres_service), TEST_POSTGRES_AUTH_DIR),
(_runtime_service_secret_directory(redis_service), TEST_REDIS_AUTH_DIR),
)
def write_test_django_project(
deployment: Deployment,
runtime_services: Iterable[RuntimeService],
) -> Path:
"""Write deterministic Django project files for one deployment.
Returns:
Root directory containing the generated Django project.
"""
build_test_django_environment(deployment, runtime_services)
project_root = build_test_django_project_root(deployment)
package_root = project_root / "tenant_site"
package_root.mkdir(parents=True, exist_ok=True)
(project_root / "manage.py").write_text(_manage_py_contents(), encoding="utf-8")
(package_root / "__init__.py").write_text("", encoding="utf-8")
(package_root / "settings.py").write_text(_settings_contents(), encoding="utf-8")
(package_root / "urls.py").write_text(_urls_contents(), encoding="utf-8")
(package_root / "wsgi.py").write_text(_wsgi_contents(), encoding="utf-8")
return project_root
def _get_runtime_service(
runtime_services: Iterable[RuntimeService],
kind: str,
) -> RuntimeService:
for runtime_service in runtime_services:
if runtime_service.kind == kind:
return runtime_service
msg = f"Missing runtime service kind: {kind}"
raise ValueError(msg)
def _runtime_service_secret_directory(runtime_service: RuntimeService) -> Path:
return (
Path(settings.DATA_DIR)
/ "runtime-services"
/ str(runtime_service.deployment_id)
/ runtime_service.kind
/ "secrets"
)
def _manage_py_contents() -> str:
return dedent(
"""
#!/usr/bin/env python
import os
import sys
def main() -> None:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings")
from django.core.management import execute_from_command_line
execute_from_command_line(sys.argv)
if __name__ == "__main__":
main()
""",
).lstrip()
def _settings_contents() -> str:
return dedent(
"""
import os
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent.parent
SECRET_KEY = os.environ["DJANGO_SECRET_KEY"]
DEBUG = False
ALLOWED_HOSTS = ["127.0.0.1", "localhost"]
ROOT_URLCONF = "tenant_site.urls"
WSGI_APPLICATION = "tenant_site.wsgi.application"
INSTALLED_APPS = [
"django.contrib.contenttypes",
]
MIDDLEWARE = []
TIME_ZONE = "UTC"
USE_TZ = True
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
TEST_TENANT_SLUG = os.environ["TEST_TENANT_SLUG"]
TEST_SITE_SLUG = os.environ["TEST_SITE_SLUG"]
TEST_REDIS_HOST = os.environ["TEST_REDIS_HOST"]
TEST_REDIS_PORT = int(os.environ["TEST_REDIS_PORT"])
def _read_secret(env_key: str) -> str:
return Path(os.environ[env_key]).read_text(encoding="utf-8").strip()
DATABASES = {
"default": {
"ENGINE": "django.db.backends.postgresql",
"NAME": os.environ["TEST_POSTGRES_DATABASE"],
"USER": os.environ["TEST_POSTGRES_USERNAME"],
"PASSWORD": _read_secret("TEST_POSTGRES_PASSWORD_FILE"),
"HOST": os.environ["TEST_POSTGRES_HOST"],
"PORT": int(os.environ["TEST_POSTGRES_PORT"]),
},
}
TEST_REDIS_PASSWORD = _read_secret("TEST_REDIS_PASSWORD_FILE")
""",
).lstrip()
def _urls_contents() -> str:
return dedent(
"""
import redis
from django.conf import settings
from django.db import connection
from django.http import JsonResponse
from django.urls import path
def sentinel_view(request):
with connection.cursor() as cursor:
cursor.execute("SELECT 1")
postgres_value = int(cursor.fetchone()[0])
redis_key = f"sentinel:{settings.TEST_TENANT_SLUG}:{settings.TEST_SITE_SLUG}"
redis_client = redis.Redis(
host=settings.TEST_REDIS_HOST,
port=settings.TEST_REDIS_PORT,
password=settings.TEST_REDIS_PASSWORD,
decode_responses=True,
socket_timeout=1,
)
redis_client.set(redis_key, settings.TEST_SITE_SLUG, ex=60)
redis_value = redis_client.get(redis_key)
return JsonResponse(
{
"status": "ok",
"postgres": postgres_value,
"redis": redis_value,
"tenant": settings.TEST_TENANT_SLUG,
"site": settings.TEST_SITE_SLUG,
},
)
urlpatterns = [
path("sentinel/", sentinel_view),
]
""",
).lstrip()
def _wsgi_contents() -> str:
return dedent(
"""
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings")
application = get_wsgi_application()
""",
).lstrip()

View file

@ -0,0 +1 @@
"""Django management command package for control-plane workflows."""

View file

@ -0,0 +1 @@
"""Management commands for local control-plane operations."""

View file

@ -0,0 +1,57 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from control_plane.local_test_deployment import create_test_deployment
from control_plane.local_test_deployment import provision_test_deployment
from control_plane.local_test_deployment import queue_test_deployment_provisioning
if TYPE_CHECKING:
from argparse import ArgumentParser
class Command(BaseCommand):
"""Create a randomized local test deployment and optionally wait for readiness."""
help = "Create a randomized tenant and provision a local test deployment inline by default."
def add_arguments(self, parser: ArgumentParser) -> None:
"""Register CLI flags for local test deployment orchestration."""
parser.add_argument(
"--no-wait",
action="store_true",
help="Queue provisioning asynchronously and return immediately without running it inline.",
)
def handle(self, *_args: str, **options: bool | float) -> None:
"""Create a randomized local test deployment and optionally wait for readiness.
Raises:
CommandError: If the deployment fails or never becomes ready.
"""
created = create_test_deployment()
self.stdout.write(f"tenant_slug={created.tenant.slug}")
self.stdout.write(f"site_slug={created.hosted_site.slug}")
self.stdout.write(f"deployment_id={created.deployment.id}")
self.stdout.write(f"sentinel_url={created.sentinel_url}")
if options["no_wait"]:
try:
task_id = queue_test_deployment_provisioning(str(created.deployment.id))
except RuntimeError as error:
raise CommandError(str(error)) from error
self.stdout.write(f"celery_task_id={task_id}")
self.stdout.write("status=queued")
return
self.stdout.write("execution_mode=inline")
try:
deployment = provision_test_deployment(str(created.deployment.id))
except (RuntimeError, TimeoutError, ValueError) as error:
raise CommandError(str(error)) from error
self.stdout.write(f"status={deployment.status}")

View file

@ -0,0 +1,289 @@
# Generated by Django 6.0.4 on 2026-04-27 12:21
import uuid
import auto_prefetch
import django.core.validators
import django.db.models.deletion
import django.db.models.manager
from django.db import migrations
from django.db import models
class Migration(migrations.Migration):
initial = True
dependencies = []
operations = [
migrations.CreateModel(
name="HostedSite",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("slug", models.SlugField(max_length=64)),
("display_name", models.CharField(max_length=255)),
("working_directory", models.CharField(default=".", max_length=255)),
("wsgi_module", models.CharField(max_length=255)),
(
"service_port",
models.PositiveIntegerField(
default=8000,
validators=[
django.core.validators.MinValueValidator(1024),
django.core.validators.MaxValueValidator(65535),
],
),
),
],
options={
"ordering": ("tenant__slug", "slug"),
"abstract": False,
"base_manager_name": "prefetch_manager",
},
managers=[
("objects", django.db.models.manager.Manager()),
("prefetch_manager", django.db.models.manager.Manager()),
],
),
migrations.CreateModel(
name="Deployment",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("idempotency_key", models.CharField(max_length=64, unique=True)),
("source_sha256", models.CharField(max_length=64)),
(
"status",
models.CharField(
choices=[
("queued", "Queued"),
("provisioning", "Provisioning"),
("booting", "Booting"),
("running", "Running"),
("failed", "Failed"),
("stopped", "Stopped"),
("destroying", "Destroying"),
("destroyed", "Destroyed"),
],
default="queued",
max_length=32,
),
),
(
"guest_ipv4",
models.GenericIPAddressField(
blank=True,
null=True,
protocol="IPv4",
),
),
(
"guest_port",
models.PositiveIntegerField(
default=8000,
validators=[
django.core.validators.MinValueValidator(1024),
django.core.validators.MaxValueValidator(65535),
],
),
),
(
"firecracker_vm_id",
models.CharField(blank=True, max_length=64, null=True, unique=True),
),
("last_error", models.TextField(blank=True)),
("started_at", models.DateTimeField(blank=True, null=True)),
("finished_at", models.DateTimeField(blank=True, null=True)),
(
"hosted_site",
auto_prefetch.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="deployments",
to="control_plane.hostedsite",
),
),
],
options={
"ordering": ("-created_at",),
"abstract": False,
"base_manager_name": "prefetch_manager",
},
managers=[
("objects", django.db.models.manager.Manager()),
("prefetch_manager", django.db.models.manager.Manager()),
],
),
migrations.CreateModel(
name="RuntimeService",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
(
"kind",
models.CharField(
choices=[("postgresql", "PostgreSQL"), ("redis", "Redis")],
max_length=32,
),
),
(
"status",
models.CharField(
choices=[
("queued", "Queued"),
("provisioning", "Provisioning"),
("ready", "Ready"),
("failed", "Failed"),
("destroying", "Destroying"),
("destroyed", "Destroyed"),
],
default="queued",
max_length=32,
),
),
("container_name", models.CharField(max_length=128, unique=True)),
("network_name", models.CharField(max_length=128)),
("hostname", models.CharField(max_length=128)),
("image_reference", models.CharField(max_length=255)),
(
"internal_port",
models.PositiveIntegerField(
validators=[
django.core.validators.MinValueValidator(1),
django.core.validators.MaxValueValidator(65535),
],
),
),
("connection_username", models.CharField(blank=True, max_length=63)),
("connection_database", models.CharField(blank=True, max_length=63)),
("connection_secret_ref", models.CharField(max_length=255)),
(
"deployment",
auto_prefetch.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="runtime_services",
to="control_plane.deployment",
),
),
],
options={
"ordering": ("deployment__created_at", "kind"),
"abstract": False,
"base_manager_name": "prefetch_manager",
},
managers=[
("objects", django.db.models.manager.Manager()),
("prefetch_manager", django.db.models.manager.Manager()),
],
),
migrations.CreateModel(
name="Tenant",
fields=[
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"id",
models.UUIDField(
default=uuid.uuid4,
editable=False,
primary_key=True,
serialize=False,
),
),
("slug", models.SlugField(max_length=64, unique=True)),
("display_name", models.CharField(max_length=255)),
],
options={
"ordering": ("slug",),
"abstract": False,
"base_manager_name": "prefetch_manager",
"indexes": [models.Index(fields=["slug"], name="tenant_slug_idx")],
},
managers=[
("objects", django.db.models.manager.Manager()),
("prefetch_manager", django.db.models.manager.Manager()),
],
),
migrations.AddField(
model_name="hostedsite",
name="tenant",
field=auto_prefetch.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="hosted_sites",
to="control_plane.tenant",
),
),
migrations.AddIndex(
model_name="deployment",
index=models.Index(
fields=["hosted_site", "status"],
name="deploy_site_status_idx",
),
),
migrations.AddIndex(
model_name="deployment",
index=models.Index(
fields=["status", "created_at"],
name="deploy_status_created_idx",
),
),
migrations.AddIndex(
model_name="runtimeservice",
index=models.Index(
fields=["deployment", "kind"],
name="service_deploy_kind_idx",
),
),
migrations.AddIndex(
model_name="runtimeservice",
index=models.Index(
fields=["kind", "status"],
name="service_kind_status_idx",
),
),
migrations.AddConstraint(
model_name="runtimeservice",
constraint=models.UniqueConstraint(
fields=("deployment", "kind"),
name="runtime_service_unique_deployment_kind",
),
),
migrations.AddIndex(
model_name="hostedsite",
index=models.Index(fields=["tenant", "slug"], name="site_tenant_slug_idx"),
),
migrations.AddConstraint(
model_name="hostedsite",
constraint=models.UniqueConstraint(
fields=("tenant", "slug"),
name="hosted_site_unique_tenant_slug",
),
),
]

View file

@ -0,0 +1 @@
"""Migration package for control-plane models."""

340
control_plane/models.py Normal file
View file

@ -0,0 +1,340 @@
from __future__ import annotations
import uuid
from dataclasses import dataclass
import auto_prefetch
from auto_prefetch import ForeignKey
from auto_prefetch import Manager
from django.core.validators import MaxValueValidator
from django.core.validators import MinValueValidator
from django.db import models
from django.db import transaction
from control_plane.runtime_plans import DjangoApplicationLaunchConfig
from control_plane.runtime_plans import build_django_server_command
class TimestampedModel(auto_prefetch.Model):
"""Provide created and updated timestamps for control-plane records."""
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)
class Meta(auto_prefetch.Model.Meta):
abstract = True
class DeploymentStatus(models.TextChoices):
"""Track deployment lifecycle state inside control plane."""
QUEUED = "queued", "Queued"
PROVISIONING = "provisioning", "Provisioning"
BOOTING = "booting", "Booting"
RUNNING = "running", "Running"
FAILED = "failed", "Failed"
STOPPED = "stopped", "Stopped"
DESTROYING = "destroying", "Destroying"
DESTROYED = "destroyed", "Destroyed"
class RuntimeServiceKind(models.TextChoices):
"""Enumerate deployment-scoped backing services."""
POSTGRESQL = "postgresql", "PostgreSQL"
REDIS = "redis", "Redis"
class RuntimeServiceStatus(models.TextChoices):
"""Track lifecycle state for a deployment-scoped service."""
QUEUED = "queued", "Queued"
PROVISIONING = "provisioning", "Provisioning"
READY = "ready", "Ready"
FAILED = "failed", "Failed"
DESTROYING = "destroying", "Destroying"
DESTROYED = "destroyed", "Destroyed"
@dataclass(frozen=True, slots=True)
class RuntimeServiceSeedSpec:
"""Describe default values for admin-seeded test runtime services."""
hostname: str
image_reference: str
internal_port: int
RUNTIME_SERVICE_SEED_SPECS: dict[RuntimeServiceKind, RuntimeServiceSeedSpec] = {
RuntimeServiceKind.POSTGRESQL: RuntimeServiceSeedSpec(
hostname="postgres.internal",
image_reference="docker.io/library/postgres:17-alpine",
internal_port=5432,
),
RuntimeServiceKind.REDIS: RuntimeServiceSeedSpec(
hostname="redis.internal",
image_reference="docker.io/library/redis:7.4-alpine",
internal_port=6379,
),
}
def _build_limited_identifier(
*,
prefix: str,
tenant_slug: str,
site_slug: str,
suffix: str,
max_length: int,
) -> str:
"""Build a bounded identifier while preserving deployment uniqueness.
Args:
prefix: Static prefix to identify the type of resource (e.g. "net" or
"postgres").
tenant_slug: Hosted site tenant slug to include in the name for uniqueness.
site_slug: Hosted site slug to include in the name for uniqueness.
suffix: Unique suffix to ensure no collisions across deployments of the same site.
max_length: Maximum length for the resulting identifier.
Returns:
A string that combines the prefix, tenant slug, site slug, and suffix,
truncated as needed to fit within max_length.
"""
candidate = f"{prefix}-{tenant_slug}-{site_slug}-{suffix}"
if len(candidate) <= max_length:
return candidate
min_length = len(prefix) + len(suffix) + 2
if min_length >= max_length:
return f"{prefix}-{suffix}"[:max_length]
remaining_length = max_length - len(prefix) - len(suffix) - 3
tenant_budget = max(1, remaining_length // 2)
site_budget = max(1, remaining_length - tenant_budget)
return "-".join(
(
prefix,
tenant_slug[:tenant_budget],
site_slug[:site_budget],
suffix,
),
)
def _build_limited_connection_name(*, site_slug: str, suffix: str, max_length: int = 63) -> str:
"""Build a bounded database identifier that stays unique per deployment.
Args:
site_slug: Hosted site slug to include in the name for uniqueness.
suffix: Unique suffix to ensure no collisions across deployments of the same site.
max_length: Maximum length for the resulting identifier, defaulting to 63 for database compatibility
Returns:
A string that combines the site slug and suffix, truncated as needed to fit within max_length.
"""
candidate = f"{site_slug}-{suffix}"
if len(candidate) <= max_length:
return candidate
min_length = len(suffix) + 1
if min_length >= max_length:
return suffix[:max_length]
site_budget = max_length - len(suffix) - 1
return f"{site_slug[:site_budget]}-{suffix}"
class Tenant(TimestampedModel):
"""Represent a tenant that owns hosted applications and deployments."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
slug = models.SlugField(max_length=64, unique=True)
display_name = models.CharField(max_length=255)
objects = Manager()
class Meta(TimestampedModel.Meta):
ordering = ("slug",)
indexes = [models.Index(fields=("slug",), name="tenant_slug_idx")]
def __str__(self) -> str:
return self.display_name
class HostedSite(TimestampedModel):
"""Describe a deployable Django site owned by a tenant."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
tenant = ForeignKey(Tenant, on_delete=models.CASCADE, related_name="hosted_sites")
slug = models.SlugField(max_length=64)
display_name = models.CharField(max_length=255)
working_directory = models.CharField(max_length=255, default=".")
wsgi_module = models.CharField(max_length=255)
service_port = models.PositiveIntegerField(
default=8000,
validators=[MinValueValidator(1024), MaxValueValidator(65535)],
)
objects = Manager()
class Meta(TimestampedModel.Meta):
ordering = ("tenant__slug", "slug")
constraints = [
models.UniqueConstraint(
fields=("tenant", "slug"),
name="hosted_site_unique_tenant_slug",
),
]
indexes = [
models.Index(fields=("tenant", "slug"), name="site_tenant_slug_idx"),
]
def __str__(self) -> str:
return f"{self.tenant.slug}/{self.slug}"
class Deployment(TimestampedModel):
"""Track a single deployable runtime instance for a hosted site."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
hosted_site = ForeignKey(HostedSite, on_delete=models.CASCADE, related_name="deployments")
idempotency_key = models.CharField(max_length=64, unique=True)
source_sha256 = models.CharField(max_length=64)
status = models.CharField(
max_length=32,
choices=DeploymentStatus,
default=DeploymentStatus.QUEUED,
)
guest_ipv4 = models.GenericIPAddressField(protocol="IPv4", blank=True, null=True)
guest_port = models.PositiveIntegerField(
default=8000,
validators=[MinValueValidator(1024), MaxValueValidator(65535)],
)
firecracker_vm_id = models.CharField(max_length=64, blank=True, null=True, unique=True)
last_error = models.TextField(blank=True)
started_at = models.DateTimeField(blank=True, null=True)
finished_at = models.DateTimeField(blank=True, null=True)
objects = Manager()
class Meta(TimestampedModel.Meta):
ordering = ("-created_at",)
indexes = [
models.Index(fields=("hosted_site", "status"), name="deploy_site_status_idx"),
models.Index(fields=("status", "created_at"), name="deploy_status_created_idx"),
]
def __str__(self) -> str:
return f"{self.hosted_site} [{self.status}]"
def build_django_launch_command(self) -> tuple[str, ...]:
"""Build a uv-driven Gunicorn command for this deployment's Django app.
Returns:
Tuple of command arguments ready for subprocess execution inside a guest VM.
"""
config = DjangoApplicationLaunchConfig(
wsgi_module=self.hosted_site.wsgi_module,
bind_host="0.0.0.0", # noqa: S104
port=self.guest_port,
)
return build_django_server_command(config)
def ensure_test_runtime_services(self) -> tuple[RuntimeService, ...]:
"""Create missing test runtime services for all supported service kinds.
Returns:
Newly created runtime service records.
"""
tenant_slug = self.hosted_site.tenant.slug
site_slug = self.hosted_site.slug
deployment_suffix = self.id.hex[:12]
network_name = _build_limited_identifier(
prefix="net",
tenant_slug=tenant_slug,
site_slug=site_slug,
suffix=deployment_suffix,
max_length=128,
)
connection_name = _build_limited_connection_name(
site_slug=site_slug,
suffix=deployment_suffix,
)
created_services: list[RuntimeService] = []
with transaction.atomic():
existing_kinds = set(
RuntimeService.objects.filter(deployment=self).values_list("kind", flat=True),
)
for kind, seed_spec in RUNTIME_SERVICE_SEED_SPECS.items():
if kind.value in existing_kinds:
continue
created_services.append(
RuntimeService(
deployment=self,
kind=kind.value,
status=RuntimeServiceStatus.QUEUED.value,
container_name=_build_limited_identifier(
prefix=kind.value,
tenant_slug=tenant_slug,
site_slug=site_slug,
suffix=deployment_suffix,
max_length=128,
),
network_name=network_name,
hostname=seed_spec.hostname,
image_reference=seed_spec.image_reference,
internal_port=seed_spec.internal_port,
connection_username=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "",
connection_database=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "",
connection_secret_ref=(f"secret://{kind.value}/{tenant_slug}/{site_slug}/{deployment_suffix}"),
),
)
if created_services:
RuntimeService.objects.bulk_create(created_services)
return tuple(created_services)
class RuntimeService(TimestampedModel):
"""Track a dedicated PostgreSQL or Redis service for one deployment."""
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
deployment = ForeignKey(Deployment, on_delete=models.CASCADE, related_name="runtime_services")
kind = models.CharField(max_length=32, choices=RuntimeServiceKind)
status = models.CharField(
max_length=32,
choices=RuntimeServiceStatus,
default=RuntimeServiceStatus.QUEUED,
)
container_name = models.CharField(max_length=128, unique=True)
network_name = models.CharField(max_length=128)
hostname = models.CharField(max_length=128)
image_reference = models.CharField(max_length=255)
internal_port = models.PositiveIntegerField(
validators=[MinValueValidator(1), MaxValueValidator(65535)],
)
connection_username = models.CharField(max_length=63, blank=True)
connection_database = models.CharField(max_length=63, blank=True)
connection_secret_ref = models.CharField(max_length=255)
objects = Manager()
class Meta(TimestampedModel.Meta):
ordering = ("deployment__created_at", "kind")
constraints = [
models.UniqueConstraint(
fields=("deployment", "kind"),
name="runtime_service_unique_deployment_kind",
),
]
indexes = [
models.Index(fields=("deployment", "kind"), name="service_deploy_kind_idx"),
models.Index(fields=("kind", "status"), name="service_kind_status_idx"),
]
def __str__(self) -> str:
return f"{self.deployment_id}:{self.kind}"

View file

@ -0,0 +1,254 @@
from __future__ import annotations
import json
from typing import TYPE_CHECKING
from urllib.error import HTTPError
from urllib.error import URLError
from urllib.request import urlopen
from django.utils import timezone
from control_plane.host_commands import HostCommandError
from control_plane.host_commands import run_host_command
from control_plane.local_test_runtime import build_test_django_container_names
from control_plane.local_test_runtime import build_test_django_local_url
from control_plane.local_test_runtime import build_test_django_project_root
from control_plane.models import Deployment
from control_plane.models import DeploymentStatus
if TYPE_CHECKING:
from collections.abc import Iterable
from pathlib import Path
from control_plane.models import RuntimeService
MAX_DIAGNOSTIC_LOG_LINES = 200
DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS = 2.0
type JsonPrimitive = bool | int | float | str | None
type JsonValue = JsonPrimitive | list[JsonValue] | dict[str, JsonValue]
def build_test_deployment_diagnostics_root(deployment: Deployment) -> Path:
"""Return filesystem root for persisted deployment diagnostics."""
return build_test_django_project_root(deployment).parent / "diagnostics"
def build_test_deployment_diagnostics_snapshot_path(deployment: Deployment) -> Path:
"""Return JSON snapshot path for one deployment's latest diagnostics."""
return build_test_deployment_diagnostics_root(deployment) / "snapshot.json"
def capture_test_deployment_diagnostics(deployment_id: str) -> None:
"""Capture current pod, container, and log state for one deployment."""
deployment = (
Deployment.objects
.select_related("hosted_site__tenant")
.prefetch_related("runtime_services")
.get(pk=deployment_id)
)
snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment)
snapshot_path.parent.mkdir(parents=True, exist_ok=True)
snapshot_path.write_text(
json.dumps(_build_diagnostics_snapshot(deployment), indent=2),
encoding="utf-8",
)
def load_test_deployment_diagnostics(deployment: Deployment) -> dict[str, JsonValue] | None:
"""Load the latest persisted diagnostics snapshot for one deployment.
Returns:
Parsed diagnostics payload, or None when no snapshot has been captured yet.
"""
snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment)
if not snapshot_path.exists():
return None
try:
payload = json.loads(snapshot_path.read_text(encoding="utf-8"))
except json.JSONDecodeError as error:
return {
"capture_error": f"Unable to parse diagnostics snapshot: {error}",
"captured_at": None,
}
if not isinstance(payload, dict):
return {
"capture_error": "Diagnostics snapshot is not a JSON object.",
"captured_at": None,
}
return payload
def probe_test_deployment_health(
deployment: Deployment,
*,
timeout_seconds: float = DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS,
) -> dict[str, JsonValue]:
"""Probe the generated deployment sentinel endpoint and return structured status.
Returns:
JSON-serializable probe state describing current sentinel reachability and payload.
"""
sentinel_url = build_test_django_local_url(deployment)
result: dict[str, JsonValue] = {
"checked_at": timezone.now().isoformat(),
"deployment_id": str(deployment.id),
"deployment_status": deployment.status,
"sentinel_url": sentinel_url,
"ok": False,
"status": "not-running",
"label": "Not Running",
"payload": None,
"error": "",
"http_status": None,
}
if deployment.status not in {DeploymentStatus.RUNNING.value, DeploymentStatus.BOOTING.value}:
return result
try:
with urlopen(sentinel_url, timeout=timeout_seconds) as response: # noqa: S310
payload = json.loads(response.read().decode("utf-8"))
result["http_status"] = int(getattr(response, "status", 200))
if isinstance(payload, dict):
result["payload"] = payload
if payload.get("status") == "ok":
result["ok"] = True
result["status"] = "healthy"
result["label"] = "Healthy"
else:
result["status"] = "unexpected-payload"
result["label"] = "Unexpected"
else:
result["payload"] = {"value": str(payload)}
result["status"] = "unexpected-payload"
result["label"] = "Unexpected"
except (HTTPError, URLError, OSError, json.JSONDecodeError) as error:
result["status"] = "unreachable"
result["label"] = "Unreachable"
result["error"] = str(error)
return result
def _build_diagnostics_snapshot(deployment: Deployment) -> dict[str, JsonValue]:
runtime_services = tuple(_ordered_runtime_services(deployment.runtime_services.all()))
server_container_name, _ = build_test_django_container_names(deployment)
pod_name = runtime_services[0].network_name if runtime_services else ""
return {
"captured_at": timezone.now().isoformat(),
"deployment_id": str(deployment.id),
"deployment_status": deployment.status,
"tenant_slug": deployment.hosted_site.tenant.slug,
"site_slug": deployment.hosted_site.slug,
"guest_port": deployment.guest_port,
"sentinel_url": build_test_django_local_url(deployment),
"last_error": deployment.last_error,
"pod": _collect_pod_diagnostics(pod_name),
"django": _collect_container_diagnostics(
container_name=server_container_name,
control_plane_status=deployment.status,
label="django",
),
"runtime_services": [
_collect_container_diagnostics(
container_name=runtime_service.container_name,
control_plane_status=runtime_service.status,
label=runtime_service.kind,
)
for runtime_service in runtime_services
],
}
def _ordered_runtime_services(runtime_services: Iterable[RuntimeService]) -> tuple[RuntimeService, ...]:
return tuple(sorted(runtime_services, key=lambda runtime_service: runtime_service.kind))
def _collect_pod_diagnostics(pod_name: str) -> dict[str, JsonValue]:
if not pod_name:
return {
"name": "",
"status": "missing",
"error": "No runtime services are linked to this deployment yet.",
}
try:
result = run_host_command(
command=("podman", "pod", "inspect", "--format", "{{.State}}", pod_name),
timeout_seconds=20.0,
)
except HostCommandError as error:
return {
"name": pod_name,
"status": "missing",
"error": _format_host_command_error(error),
}
return {
"name": pod_name,
"status": result.stdout.strip() or "unknown",
"error": "",
}
def _collect_container_diagnostics(
*,
container_name: str,
control_plane_status: str,
label: str,
) -> dict[str, JsonValue]:
container_status, inspect_error = _inspect_container_status(container_name)
logs, log_error = _read_container_logs(container_name)
return {
"label": label,
"container_name": container_name,
"control_plane_status": control_plane_status,
"container_status": container_status,
"logs": logs,
"inspect_error": inspect_error,
"log_error": log_error,
}
def _inspect_container_status(container_name: str) -> tuple[str, str]:
try:
result = run_host_command(
command=(
"podman",
"inspect",
"--format",
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
container_name,
),
timeout_seconds=20.0,
)
except HostCommandError as error:
return "missing", _format_host_command_error(error)
return result.stdout.strip() or "unknown", ""
def _read_container_logs(container_name: str) -> tuple[str, str]:
try:
result = run_host_command(
command=("podman", "logs", "--tail", str(MAX_DIAGNOSTIC_LOG_LINES), container_name),
timeout_seconds=20.0,
)
except HostCommandError as error:
return "", _format_host_command_error(error)
output = result.stdout.strip() or result.stderr.strip()
return output, ""
def _format_host_command_error(error: HostCommandError) -> str:
if error.stderr.strip():
return error.stderr.strip()
if error.stdout.strip():
return error.stdout.strip()
return str(error)

View file

@ -0,0 +1,366 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from collections.abc import Sequence
from pathlib import Path
@dataclass(frozen=True, slots=True)
class PostgresContainerConfig:
"""Input required to build a Podman command for a tenant PostgreSQL service."""
container_name: str
network_name: str
hostname: str
username: str
database_name: str
data_directory: Path
password_file: Path
pod_name: str | None = None
image_reference: str = "docker.io/library/postgres:17-alpine"
memory_limit_mib: int = 512
cpu_limit: float = 1.0
@dataclass(frozen=True, slots=True)
class RedisContainerConfig:
"""Input required to build a Podman command for a tenant Redis service."""
container_name: str
network_name: str
hostname: str
data_directory: Path
password_file: Path
pod_name: str | None = None
image_reference: str = "docker.io/library/redis:7.4-alpine"
memory_limit_mib: int = 256
cpu_limit: float = 0.5
@dataclass(frozen=True, slots=True)
class DjangoApplicationLaunchConfig:
"""Input required to build a uv-driven Gunicorn command for a Django app."""
wsgi_module: str
port: int = 8000
bind_host: str = "127.0.0.1"
workers: int = 2
python_executable: Path | None = None
uv_project_path: Path | None = None
@dataclass(frozen=True, slots=True)
class DjangoContainerImageBuildConfig:
"""Input required to build the reusable local Django test image."""
image_reference: str
containerfile_path: Path
context_directory: Path
@dataclass(frozen=True, slots=True)
class DjangoContainerRuntimeConfig:
"""Input required to run a local Django test container with Podman."""
container_name: str
network_name: str
hostname: str
image_reference: str
application_directory: Path
pod_name: str | None = None
host_port: int | None = None
guest_port: int = 8000
working_directory: str = "/srv/test-app"
environment: tuple[tuple[str, str], ...] = ()
secret_mounts: tuple[tuple[Path, str], ...] = ()
labels: tuple[tuple[str, str], ...] = ()
memory_limit_mib: int = 256
cpu_limit: float = 1.0
def build_postgres_container_command(
config: PostgresContainerConfig,
) -> tuple[str, ...]:
"""Build a hardened Podman command for a deployment-scoped PostgreSQL service.
Returns:
Tuple of Podman arguments ready for subprocess execution.
"""
command = [
"podman",
"run",
"--detach",
"--replace",
"--name",
config.container_name,
]
if config.pod_name is None:
command.extend(("--network", config.network_name, "--hostname", config.hostname))
else:
command.extend(("--pod", config.pod_name))
command.extend(
[
"--cap-drop=all",
"--cap-add=CHOWN",
"--cap-add=FOWNER",
"--cap-add=SETUID",
"--cap-add=SETGID",
"--cap-add=DAC_OVERRIDE",
"--security-opt=no-new-privileges",
"--pids-limit=256",
"--memory",
f"{config.memory_limit_mib}m",
"--cpus",
str(config.cpu_limit),
"--read-only",
"--tmpfs",
"/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108
"--tmpfs",
"/var/run/postgresql:rw,nosuid,nodev,noexec,size=16m",
"--volume",
f"{config.data_directory}:/var/lib/postgresql/data:Z,rw",
"--volume",
f"{config.password_file}:/run/secrets/postgres-password:Z,ro",
"--env",
f"POSTGRES_USER={config.username}",
"--env",
f"POSTGRES_DB={config.database_name}",
"--env",
"POSTGRES_PASSWORD_FILE=/run/secrets/postgres-password",
"--health-cmd",
f"pg_isready -U {config.username} -d {config.database_name}",
"--health-interval",
"10s",
"--health-retries",
"5",
config.image_reference,
"postgres",
"-c",
"listen_addresses=*",
"-c",
"password_encryption=scram-sha-256",
],
)
return tuple(command)
def build_redis_container_command(config: RedisContainerConfig) -> tuple[str, ...]:
"""Build a hardened Podman command for a deployment-scoped Redis service.
Returns:
Tuple of Podman arguments ready for subprocess execution.
"""
command = [
"podman",
"run",
"--detach",
"--replace",
"--name",
config.container_name,
]
if config.pod_name is None:
command.extend(("--network", config.network_name, "--hostname", config.hostname))
else:
command.extend(("--pod", config.pod_name))
command.extend(
[
"--cap-drop=all",
"--security-opt=no-new-privileges",
"--pids-limit=128",
"--memory",
f"{config.memory_limit_mib}m",
"--cpus",
str(config.cpu_limit),
"--read-only",
"--tmpfs",
"/tmp:rw,nosuid,nodev,noexec,size=32m", # noqa: S108
"--volume",
f"{config.data_directory}:/data:Z,rw",
"--volume",
f"{config.password_file}:/run/secrets/redis-password:Z,ro",
"--health-cmd",
"sh -eu -c 'redis-cli --no-auth-warning -a \"$(cat /run/secrets/redis-password)\" ping'",
"--health-interval",
"10s",
"--health-retries",
"5",
config.image_reference,
"sh",
"-eu",
"-c",
'redis_password=$(cat /run/secrets/redis-password) && exec redis-server --appendonly yes --protected-mode yes --requirepass "${redis_password}"',
],
)
return tuple(command)
def build_django_server_command(
config: DjangoApplicationLaunchConfig,
) -> tuple[str, ...]:
"""Build a uv-driven Gunicorn command for a hosted Django deployment.
Returns:
Tuple of command arguments ready for subprocess execution.
Raises:
ValueError: If both direct-python and uv-project execution modes are requested.
"""
if config.python_executable is not None and config.uv_project_path is not None:
msg = "python_executable and uv_project_path are mutually exclusive"
raise ValueError(msg)
if config.python_executable is not None:
command = [str(config.python_executable), "-m", "gunicorn"]
else:
command = ["uv", "run"]
if config.uv_project_path is not None:
command.extend(["--project", str(config.uv_project_path)])
command.append("gunicorn")
command.extend(
[
"--bind",
f"{config.bind_host}:{config.port}",
"--workers",
str(config.workers),
"--access-logfile",
"-",
"--error-logfile",
"-",
"--capture-output",
"--graceful-timeout",
"30",
"--timeout",
"60",
config.wsgi_module,
],
)
return tuple(command)
def build_django_container_image_command(
config: DjangoContainerImageBuildConfig,
) -> tuple[str, ...]:
"""Build a Podman image command for the reusable Django test runtime.
Returns:
Tuple of Podman arguments ready for subprocess execution.
"""
return (
"podman",
"build",
"--pull=missing",
"--tag",
config.image_reference,
"--file",
str(config.containerfile_path),
str(config.context_directory),
)
def build_django_container_run_command(
config: DjangoContainerRuntimeConfig,
*,
command: Sequence[str],
detach: bool,
remove: bool = False,
) -> tuple[str, ...]:
"""Build a hardened Podman command for a local Django test container.
Returns:
Tuple of Podman arguments ready for subprocess execution.
Raises:
ValueError: If the command sequence is empty.
"""
if not command:
msg = "command must not be empty"
raise ValueError(msg)
podman_command = ["podman", "run"]
if detach:
podman_command.extend(("--detach", "--replace"))
if remove:
podman_command.append("--rm")
podman_command.extend(
[
"--name",
config.container_name,
],
)
if config.pod_name is None:
podman_command.extend(("--network", config.network_name, "--hostname", config.hostname))
else:
podman_command.extend(("--pod", config.pod_name))
podman_command.extend(
[
"--workdir",
config.working_directory,
"--cap-drop=all",
"--security-opt=no-new-privileges",
"--pids-limit=256",
"--memory",
f"{config.memory_limit_mib}m",
"--cpus",
str(config.cpu_limit),
"--read-only",
"--tmpfs",
"/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108
"--tmpfs",
"/run:rw,nosuid,nodev,noexec,size=16m",
"--volume",
f"{config.application_directory}:{config.working_directory}:Z,ro",
],
)
if config.host_port is not None and config.pod_name is None:
podman_command.extend(("--publish", f"127.0.0.1:{config.host_port}:{config.guest_port}"))
for mount_source, mount_target in config.secret_mounts:
podman_command.extend(("--volume", f"{mount_source}:{mount_target}:Z,ro"))
for key, value in config.environment:
podman_command.extend(("--env", f"{key}={value}"))
for key, value in config.labels:
podman_command.extend(("--label", f"{key}={value}"))
podman_command.append(config.image_reference)
podman_command.extend(command)
return tuple(podman_command)
def build_django_migrate_command(
uv_project_path: Path | None = None,
*,
python_executable: Path | None = None,
) -> tuple[str, ...]:
"""Build a uv-driven migration command for a hosted Django deployment.
Returns:
Tuple of command arguments ready for subprocess execution.
Raises:
ValueError: If direct-python and uv-project execution modes are mixed.
"""
if python_executable is not None and uv_project_path is not None:
msg = "python_executable and uv_project_path are mutually exclusive"
raise ValueError(msg)
if python_executable is not None:
return (str(python_executable), "manage.py", "migrate", "--noinput")
command = ["uv", "run"]
if uv_project_path is not None:
command.extend(["--project", str(uv_project_path)])
command.extend(["python", "manage.py", "migrate", "--noinput"])
return tuple(command)

656
control_plane/tasks.py Normal file
View file

@ -0,0 +1,656 @@
from __future__ import annotations
import json
import logging
import secrets
import time
from pathlib import Path
from typing import TYPE_CHECKING
from typing import NoReturn
from urllib.request import urlopen
from celery import shared_task
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from control_plane.host_commands import HostCommandError
from control_plane.host_commands import run_host_command
from control_plane.local_test_runtime import TEST_DJANGO_CONTAINER_PORT
from control_plane.local_test_runtime import TEST_DJANGO_WORKDIR
from control_plane.local_test_runtime import build_test_django_container_context_path
from control_plane.local_test_runtime import build_test_django_container_labels
from control_plane.local_test_runtime import build_test_django_container_names
from control_plane.local_test_runtime import build_test_django_containerfile_path
from control_plane.local_test_runtime import build_test_django_environment
from control_plane.local_test_runtime import build_test_django_image_reference
from control_plane.local_test_runtime import build_test_django_local_url
from control_plane.local_test_runtime import build_test_django_secret_mounts
from control_plane.local_test_runtime import write_test_django_project
from control_plane.models import Deployment
from control_plane.models import DeploymentStatus
from control_plane.models import RuntimeService
from control_plane.models import RuntimeServiceKind
from control_plane.models import RuntimeServiceStatus
from control_plane.observability import capture_test_deployment_diagnostics
from control_plane.runtime_plans import DjangoApplicationLaunchConfig
from control_plane.runtime_plans import DjangoContainerImageBuildConfig
from control_plane.runtime_plans import DjangoContainerRuntimeConfig
from control_plane.runtime_plans import PostgresContainerConfig
from control_plane.runtime_plans import RedisContainerConfig
from control_plane.runtime_plans import build_django_container_image_command
from control_plane.runtime_plans import build_django_container_run_command
from control_plane.runtime_plans import build_django_migrate_command
from control_plane.runtime_plans import build_django_server_command
from control_plane.runtime_plans import build_postgres_container_command
from control_plane.runtime_plans import build_redis_container_command
if TYPE_CHECKING:
from celery.app.task import Task
type BoundControlPlaneTask = Task[..., str]
logger = logging.getLogger("tussilago.control_plane.tasks")
DEFAULT_HTTP_READY_TIMEOUT_SECONDS = 45.0
DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS = 45.0
TERMINAL_DEPLOYMENT_STATES: frozenset[str] = frozenset(
{
DeploymentStatus.DESTROYED.value,
DeploymentStatus.FAILED.value,
},
)
TERMINAL_RUNTIME_SERVICE_STATES: frozenset[str] = frozenset(
{
RuntimeServiceStatus.DESTROYING.value,
RuntimeServiceStatus.DESTROYED.value,
},
)
def _runtime_service_root(runtime_service: RuntimeService) -> Path:
"""Return filesystem root for one runtime service's local test artifacts."""
return Path(settings.DATA_DIR) / "runtime-services" / str(runtime_service.deployment_id) / runtime_service.kind
def _mark_deployment_failed(*, deployment_id: str, message: str) -> None:
"""Persist failed deployment state with the latest error details."""
with transaction.atomic():
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
deployment.status = DeploymentStatus.FAILED.value
deployment.last_error = message
deployment.finished_at = timezone.now()
deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"])
def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None:
"""Persist best-effort diagnostics without breaking deployment flow."""
try:
capture_test_deployment_diagnostics(deployment_id)
except OSError:
logger.exception("Failed to write diagnostics snapshot deployment_id=%s", deployment_id)
except ValueError:
logger.exception("Invalid diagnostics snapshot state deployment_id=%s", deployment_id)
except Deployment.DoesNotExist:
logger.exception("Diagnostics snapshot skipped for missing deployment_id=%s", deployment_id)
def _ensure_test_django_image_exists(image_reference: str) -> None:
"""Build the reusable Django test image if it is missing locally.
Raises:
HostCommandError: If Podman image inspection or build fails.
"""
try:
run_host_command(command=("podman", "image", "exists", image_reference))
except HostCommandError as error:
if error.returncode != 1:
raise
run_host_command(
command=build_django_container_image_command(
DjangoContainerImageBuildConfig(
image_reference=image_reference,
containerfile_path=build_test_django_containerfile_path(),
context_directory=build_test_django_container_context_path(),
),
),
timeout_seconds=300.0,
)
def _read_container_logs(container_name: str) -> str:
"""Return captured container logs for failure reporting when available."""
try:
result = run_host_command(command=("podman", "logs", container_name))
except HostCommandError:
return ""
return result.stdout.strip() or result.stderr.strip()
def _read_container_status(container_name: str) -> str:
"""Return current Podman health status for one container when available."""
result = run_host_command(
command=(
"podman",
"inspect",
"--format",
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
container_name,
),
)
return result.stdout.strip()
def _wait_for_container_ready(
runtime_service: RuntimeService,
*,
timeout_seconds: float = DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS,
) -> None:
"""Poll Podman health state until one runtime service is ready.
Raises:
RuntimeError: If the runtime service exits or becomes unhealthy before it is ready.
TimeoutError: If the runtime service does not become ready before timeout.
"""
deadline = time.monotonic() + timeout_seconds
while time.monotonic() < deadline:
status = _read_container_status(runtime_service.container_name)
if status == "healthy":
return
if status in {"exited", "dead", "stopped", "unhealthy"}:
logs = _read_container_logs(runtime_service.container_name)
message = f"Runtime service {runtime_service.kind} failed to become ready: {status}."
if logs:
message = f"{message}\n{logs}"
raise RuntimeError(message)
time.sleep(1.0)
msg = f"Timed out waiting for runtime service {runtime_service.kind} to become healthy."
raise TimeoutError(msg)
def _wait_for_http_ready(
url: str,
*,
timeout_seconds: float = DEFAULT_HTTP_READY_TIMEOUT_SECONDS,
) -> dict[str, str | int]:
"""Poll a sentinel endpoint until it confirms PostgreSQL and Redis connectivity.
Returns:
Parsed JSON response from the sentinel endpoint.
Raises:
TimeoutError: If the endpoint does not become healthy before timeout.
"""
deadline = time.monotonic() + timeout_seconds
last_error: Exception | None = None
while time.monotonic() < deadline:
try:
with urlopen(url, timeout=2) as response: # noqa: S310
payload = json.loads(response.read().decode("utf-8"))
if payload.get("status") == "ok":
return payload
except (OSError, json.JSONDecodeError) as error:
last_error = error
time.sleep(1.0)
msg = f"Timed out waiting for healthy Django sentinel endpoint at {url}"
raise TimeoutError(msg) from last_error
def _build_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
return tuple(
RuntimeService.objects
.select_related("deployment__hosted_site__tenant")
.filter(deployment=deployment)
.order_by("kind"),
)
def _get_ready_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
"""Return ready runtime services required by the generated Django test app.
Raises:
ValueError: If PostgreSQL or Redis containers are not ready.
"""
runtime_services = _build_django_runtime_services(deployment)
if not runtime_services or any(
runtime_service.status != RuntimeServiceStatus.READY.value for runtime_service in runtime_services
):
msg = "All runtime services must be ready before provisioning the Django test runtime."
raise ValueError(msg)
return runtime_services
def _build_django_runtime_configs(
deployment: Deployment,
runtime_services: tuple[RuntimeService, ...],
*,
project_root: Path,
) -> tuple[str, DjangoContainerRuntimeConfig, DjangoContainerRuntimeConfig]:
"""Build image reference plus migrate and server configs for one deployment.
Returns:
Image reference plus migrate and server Podman runtime configs.
"""
image_reference = build_test_django_image_reference()
environment = build_test_django_environment(deployment, runtime_services)
secret_mounts = build_test_django_secret_mounts(runtime_services)
labels = build_test_django_container_labels(deployment)
server_container_name, migrate_container_name = build_test_django_container_names(deployment)
network_name = runtime_services[0].network_name
migrate_config = DjangoContainerRuntimeConfig(
container_name=migrate_container_name,
network_name=network_name,
hostname="django-migrate.internal",
image_reference=image_reference,
application_directory=project_root,
pod_name=network_name,
working_directory=TEST_DJANGO_WORKDIR,
environment=environment,
secret_mounts=secret_mounts,
labels=labels,
)
server_config = DjangoContainerRuntimeConfig(
container_name=server_container_name,
network_name=network_name,
hostname="django.internal",
image_reference=image_reference,
application_directory=project_root,
pod_name=network_name,
host_port=deployment.guest_port,
guest_port=TEST_DJANGO_CONTAINER_PORT,
working_directory=TEST_DJANGO_WORKDIR,
environment=environment,
secret_mounts=secret_mounts,
labels=labels,
)
return image_reference, migrate_config, server_config
def _launch_django_runtime(
deployment: Deployment,
*,
image_reference: str,
migrate_config: DjangoContainerRuntimeConfig,
server_config: DjangoContainerRuntimeConfig,
) -> dict[str, str | int]:
"""Build image, run migrations, launch the Django container, and wait for readiness.
Returns:
Parsed JSON sentinel payload from the running Django test app.
"""
_ensure_test_django_image_exists(image_reference)
migrate_command = build_django_migrate_command(python_executable=Path("/usr/local/bin/python"))
run_host_command(
command=build_django_container_run_command(
migrate_config,
command=migrate_command,
detach=False,
remove=True,
),
timeout_seconds=120.0,
)
server_command = build_django_server_command(
DjangoApplicationLaunchConfig(
wsgi_module=deployment.hosted_site.wsgi_module,
bind_host="0.0.0.0", # noqa: S104
port=TEST_DJANGO_CONTAINER_PORT,
workers=1,
python_executable=Path("/usr/local/bin/python"),
),
)
run_host_command(
command=build_django_container_run_command(
server_config,
command=server_command,
detach=True,
),
timeout_seconds=120.0,
)
return _wait_for_http_ready(build_test_django_local_url(deployment))
def _retry_or_fail_django_runtime(
self: BoundControlPlaneTask,
*,
deployment: Deployment,
error: HostCommandError | TimeoutError,
) -> NoReturn:
"""Retry transient Django runtime failures, or mark deployment failed when retries are exhausted."""
retries = getattr(self.request, "retries", 0)
logger.warning(
"Django runtime provisioning retry deployment_id=%s retries=%s error=%s",
deployment.id,
retries,
error,
)
if retries >= self.max_retries:
server_container_name, _ = build_test_django_container_names(deployment)
logs = _read_container_logs(server_container_name)
failure_message = str(error)
if logs:
failure_message = f"{failure_message}\n{logs}"
_mark_deployment_failed(deployment_id=str(deployment.id), message=failure_message)
_capture_test_deployment_diagnostics_snapshot(str(deployment.id))
logger.error("Django runtime provisioning failed deployment_id=%s", deployment.id)
raise error
countdown = min(300, 2 ** (retries + 1))
raise self.retry(exc=error, countdown=countdown) from error
def run_test_django_runtime_provisioning(deployment_id: str) -> str:
"""Run generated Django runtime provisioning inline for one deployment.
Returns:
Final deployment status for the processed deployment.
"""
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
if deployment.status in TERMINAL_DEPLOYMENT_STATES or deployment.status == DeploymentStatus.RUNNING.value:
return deployment.status
runtime_services = _get_ready_django_runtime_services(deployment)
project_root = write_test_django_project(deployment, runtime_services)
image_reference, migrate_config, server_config = _build_django_runtime_configs(
deployment,
runtime_services,
project_root=project_root,
)
sentinel_payload = _launch_django_runtime(
deployment,
image_reference=image_reference,
migrate_config=migrate_config,
server_config=server_config,
)
with transaction.atomic():
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
return deployment.status
deployment.status = DeploymentStatus.RUNNING.value
deployment.last_error = ""
deployment.started_at = timezone.now()
deployment.finished_at = None
deployment.save(update_fields=["status", "last_error", "started_at", "finished_at", "updated_at"])
_capture_test_deployment_diagnostics_snapshot(deployment_id)
logger.info(
"Django runtime ready deployment_id=%s tenant_slug=%s site_slug=%s postgres=%s redis=%s",
deployment_id,
deployment.hosted_site.tenant.slug,
deployment.hosted_site.slug,
sentinel_payload.get("postgres"),
sentinel_payload.get("redis"),
)
return DeploymentStatus.RUNNING.value
def _ensure_secret_file(password_file: Path) -> None:
"""Write a reusable password file for a test container if one does not already exist."""
password_file.parent.mkdir(parents=True, exist_ok=True)
if password_file.exists():
return
password_file.write_text(f"{secrets.token_urlsafe(24)}\n", encoding="utf-8")
password_file.chmod(0o600)
def _ensure_podman_pod(*, pod_name: str, host_port: int) -> None:
"""Create a Podman pod if it is missing.
Raises:
HostCommandError: If Podman pod inspection or creation fails.
"""
try:
run_host_command(command=("podman", "pod", "exists", pod_name))
except HostCommandError as error:
if error.returncode != 1:
raise
run_host_command(
command=(
"podman",
"pod",
"create",
"--replace",
"--name",
pod_name,
"--publish",
f"127.0.0.1:{host_port}:{TEST_DJANGO_CONTAINER_PORT}",
),
)
def _build_runtime_service_command(
runtime_service: RuntimeService,
*,
data_directory: Path,
password_file: Path,
) -> tuple[str, ...]:
"""Build a Podman command for one runtime service kind.
Returns:
Podman command arguments for the runtime service.
Raises:
ValueError: If the runtime service kind or configuration is unsupported.
"""
if runtime_service.kind == RuntimeServiceKind.POSTGRESQL.value:
if not runtime_service.connection_username or not runtime_service.connection_database:
msg = "PostgreSQL runtime service requires connection credentials."
raise ValueError(msg)
return build_postgres_container_command(
PostgresContainerConfig(
container_name=runtime_service.container_name,
network_name=runtime_service.network_name,
hostname=runtime_service.hostname,
username=runtime_service.connection_username,
database_name=runtime_service.connection_database,
data_directory=data_directory,
password_file=password_file,
pod_name=runtime_service.network_name,
image_reference=runtime_service.image_reference,
),
)
if runtime_service.kind == RuntimeServiceKind.REDIS.value:
return build_redis_container_command(
RedisContainerConfig(
container_name=runtime_service.container_name,
network_name=runtime_service.network_name,
hostname=runtime_service.hostname,
data_directory=data_directory,
password_file=password_file,
pod_name=runtime_service.network_name,
image_reference=runtime_service.image_reference,
),
)
msg = f"Unsupported runtime service kind: {runtime_service.kind}"
raise ValueError(msg)
def _provision_runtime_service_container(runtime_service: RuntimeService) -> None:
"""Create or replace a local test container for one runtime service."""
service_root = _runtime_service_root(runtime_service)
data_directory = service_root / "data"
password_file = service_root / "secrets" / "password"
data_directory.mkdir(parents=True, exist_ok=True)
_ensure_secret_file(password_file)
_ensure_podman_pod(
pod_name=runtime_service.network_name,
host_port=runtime_service.deployment.guest_port,
)
command = _build_runtime_service_command(
runtime_service,
data_directory=data_directory,
password_file=password_file,
)
run_host_command(command=command)
_wait_for_container_ready(runtime_service)
@shared_task(
bind=True,
autoretry_for=(HostCommandError, TimeoutError),
retry_backoff=True,
retry_backoff_max=300,
retry_jitter=True,
max_retries=5,
)
def provision_test_runtime_services(self: BoundControlPlaneTask, deployment_id: str) -> str:
"""Seed and provision runtime service test containers for one deployment.
Returns:
Final runtime service status for the processed deployment.
Raises:
HostCommandError: If Podman commands fail while provisioning backing services.
RuntimeError: If a backing container exits or becomes unhealthy during startup.
TimeoutError: If a backing container never becomes healthy.
ValueError: If runtime service configuration is invalid.
"""
del self
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
return deployment.status
deployment.ensure_test_runtime_services()
runtime_services = tuple(
RuntimeService.objects
.select_related("deployment__hosted_site__tenant")
.filter(deployment=deployment)
.order_by("kind"),
)
pending_runtime_services = tuple(
runtime_service
for runtime_service in runtime_services
if runtime_service.status not in TERMINAL_RUNTIME_SERVICE_STATES
and runtime_service.status != RuntimeServiceStatus.READY.value
)
if not pending_runtime_services:
return RuntimeServiceStatus.READY.value
for runtime_service in pending_runtime_services:
runtime_service.status = RuntimeServiceStatus.PROVISIONING.value
runtime_service.save(update_fields=["status", "updated_at"])
try:
_provision_runtime_service_container(runtime_service)
except HostCommandError, RuntimeError, TimeoutError:
runtime_service.status = RuntimeServiceStatus.FAILED.value
runtime_service.save(update_fields=["status", "updated_at"])
_capture_test_deployment_diagnostics_snapshot(deployment_id)
logger.exception(
"Runtime service provisioning failed deployment_id=%s runtime_service_id=%s kind=%s",
deployment_id,
runtime_service.id,
runtime_service.kind,
)
raise
except ValueError:
runtime_service.status = RuntimeServiceStatus.FAILED.value
runtime_service.save(update_fields=["status", "updated_at"])
logger.exception(
"Runtime service configuration invalid deployment_id=%s runtime_service_id=%s kind=%s",
deployment_id,
runtime_service.id,
runtime_service.kind,
)
raise
runtime_service.status = RuntimeServiceStatus.READY.value
runtime_service.save(update_fields=["status", "updated_at"])
_capture_test_deployment_diagnostics_snapshot(deployment_id)
return RuntimeServiceStatus.READY.value
@shared_task(
bind=True,
retry_backoff=True,
retry_backoff_max=300,
retry_jitter=True,
max_retries=5,
)
def mark_deployment_provisioning(self: BoundControlPlaneTask, deployment_id: str) -> str:
"""Move a deployment into provisioning state in an idempotent way.
Returns:
The deployment status after the transition attempt.
"""
del self
with transaction.atomic():
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
return deployment.status
if deployment.status == DeploymentStatus.PROVISIONING.value:
return deployment.status
deployment.status = DeploymentStatus.PROVISIONING.value
deployment.last_error = ""
deployment.save(update_fields=["status", "last_error", "updated_at"])
return deployment.status
@shared_task(
bind=True,
retry_backoff=True,
retry_backoff_max=300,
retry_jitter=True,
max_retries=5,
)
def mark_deployment_booting(self: BoundControlPlaneTask, deployment_id: str) -> str:
"""Move a deployment into booting state in an idempotent way.
Returns:
The deployment status after the transition attempt.
"""
del self
with transaction.atomic():
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
return deployment.status
if deployment.status == DeploymentStatus.BOOTING.value:
return deployment.status
deployment.status = DeploymentStatus.BOOTING.value
deployment.save(update_fields=["status", "updated_at"])
return deployment.status
@shared_task(bind=True, max_retries=5)
def provision_test_django_runtime(self: BoundControlPlaneTask, deployment_id: str) -> str:
"""Build and run a generated Django test app against ready PostgreSQL and Redis containers.
Returns:
Final deployment status for the processed deployment.
Raises:
ValueError: If required backing services are not ready.
"""
try:
return run_test_django_runtime_provisioning(deployment_id)
except ValueError as error:
_mark_deployment_failed(deployment_id=deployment_id, message=str(error))
logger.exception("Django runtime configuration invalid deployment_id=%s", deployment_id)
raise
except (HostCommandError, TimeoutError) as error:
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
_retry_or_fail_django_runtime(self, deployment=deployment, error=error)

View file

@ -0,0 +1,35 @@
{% load static %}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description"
content="Tussilago local deployment dashboard with runtime status, sentinel health, and captured container logs.">
<meta name="keywords"
content="Tussilago, deployment dashboard, podman, django, runtime logs">
<title>
{% block title %}Tussilago Deployments{% endblock %}
</title>
<link rel="stylesheet" href="{% static 'control_plane/dashboard.css' %}">
</head>
<body>
<div class="page-shell">
<header class="masthead">
<div>
<p class="eyebrow">Tussilago Local Runtime</p>
<h1>
<a href="{% url 'control_plane:deployment-dashboard' %}">Deployment Dashboard</a>
</h1>
</div>
<nav class="top-nav">
<a href="{% url 'control_plane:deployment-dashboard' %}">Deployments</a>
<a href="{% url 'admin:index' %}">Admin</a>
</nav>
</header>
<main>
{% block content %}{% endblock %}
</main>
</div>
</body>
</html>

View file

@ -0,0 +1,110 @@
{% extends 'control_plane/base.html' %}
{% block title %}Deployments · Tussilago{% endblock %}
{% block content %}
<section class="hero-panel">
<div>
<p class="eyebrow">Control Plane Overview</p>
<h2>See what is alive, what failed, and what to inspect next.</h2>
<p class="hero-copy">
Recent deployments, backing-service states, direct sentinel links, and fast paths into detailed logs.
</p>
</div>
<div class="hero-metrics">
<article class="metric-card">
<span class="metric-label">Recent Deployments</span>
<strong>{{ deployment_total }}</strong>
</article>
<article class="metric-card accent-good">
<span class="metric-label">Running Now</span>
<strong>{{ running_total }}</strong>
</article>
</div>
</section>
<section class="summary-grid">
{% for summary in status_summaries %}
<article class="summary-card status-{{ summary.status }}">
<span class="status-chip status-{{ summary.status }}">{{ summary.label }}</span>
<strong>{{ summary.total }}</strong>
</article>
{% empty %}
<article class="summary-card empty-state">
<strong>0</strong>
<span>No deployments yet.</span>
</article>
{% endfor %}
</section>
<section class="deployment-grid">
{% for card in deployment_cards %}
<article class="deployment-card">
<header class="card-header">
<div>
<p class="card-kicker">{{ card.deployment.hosted_site.tenant.slug }}</p>
<h3>{{ card.deployment.hosted_site.slug }}</h3>
<p class="card-meta">{{ card.deployment.id }}</p>
</div>
<span class="status-chip status-{{ card.deployment.status }}">{{ card.deployment.get_status_display }}</span>
</header>
<dl class="facts-grid compact-grid">
<div>
<dt>Created</dt>
<dd>
{{ card.deployment.created_at|date:'Y-m-d H:i:s' }}
</dd>
</div>
<div>
<dt>Sentinel</dt>
<dd>
{{ card.sentinel_url }}
</dd>
</div>
<div>
<dt>Runtime Ready</dt>
<dd>
{{ card.runtime_ready_total }}/{{ card.runtime_services|length }}
</dd>
</div>
<div>
<dt>Runtime Failed</dt>
<dd>
{{ card.runtime_failed_total }}
</dd>
</div>
</dl>
<div class="service-pill-row">
{% for runtime_service in card.runtime_services %}
<span class="service-pill status-{{ runtime_service.status }}">
{{ runtime_service.kind }} · {{ runtime_service.status }}
</span>
{% empty %}
<span class="service-pill muted-pill">No runtime services yet</span>
{% endfor %}
</div>
{% if card.deployment.last_error %}
<section class="error-panel">
<h4>Last Error</h4>
<pre>{{ card.deployment.last_error }}</pre>
</section>
{% endif %}
<div class="action-row">
<a class="button-link"
href="{% url 'control_plane:deployment-detail' card.deployment.id %}">Inspect deployment</a>
<a class="button-link subtle"
href="{{ card.sentinel_url }}"
target="_blank"
rel="noreferrer">Open sentinel</a>
<a class="button-link subtle"
href="{% url 'admin:control_plane_deployment_change' card.deployment.id %}">Admin row</a>
</div>
</article>
{% empty %}
<article class="deployment-card empty-state wide-card">
<h3>No deployments captured yet</h3>
<p>
Run <code>uv run python manage.py create_test_deployment</code> to populate this dashboard.
</p>
</article>
{% endfor %}
</section>
{% endblock content %}

View file

@ -0,0 +1,236 @@
{% extends 'control_plane/base.html' %}
{% block title %}{{ deployment.hosted_site.slug }} · Tussilago{% endblock %}
{% block content %}
<section class="hero-panel detail-hero">
<div>
<p class="eyebrow">Deployment Detail</p>
<h2>{{ deployment.hosted_site.tenant.slug }}/{{ deployment.hosted_site.slug }}</h2>
<p class="hero-copy">Deployment {{ deployment.id }} on localhost port {{ deployment.guest_port }}.</p>
</div>
<div class="hero-metrics">
<article class="metric-card">
<span class="metric-label">Control Plane</span>
<strong class="status-chip status-{{ deployment.status }}">{{ deployment.get_status_display }}</strong>
</article>
<article class="metric-card accent-good">
<span class="metric-label">Sentinel</span>
<strong><a href="{{ sentinel_url }}" target="_blank" rel="noreferrer">Open</a></strong>
</article>
</div>
</section>
<section class="panel-grid two-up">
<article class="panel-card"
data-health-panel
data-health-endpoint="{% url 'control_plane:deployment-health' deployment.id %}">
<div class="panel-header">
<div>
<p class="eyebrow">Live Health</p>
<h3>Sentinel probe</h3>
</div>
<button class="button-link subtle" type="button" data-health-refresh>Refresh</button>
</div>
<div class="health-strip">
<span class="status-chip health-{{ health_probe.status }}"
data-health-badge>{{ health_probe.label }}</span>
<span class="muted-copy" data-health-stamp>{{ health_probe.checked_at }}</span>
</div>
<p class="muted-copy" data-health-detail>
{% if health_probe.error %}
{{ health_probe.error }}
{% elif health_probe.ok %}
Sentinel responded with healthy payload.
{% else %}
Waiting for a healthy sentinel response.
{% endif %}
</p>
<pre class="log-output compact-log" data-health-json>{% if health_probe.payload %}{{ health_probe.payload }}{% elif health_probe.error %}{{ health_probe.error }}{% else %}No payload yet.{% endif %}</pre>
</article>
<article class="panel-card">
<div class="panel-header">
<div>
<p class="eyebrow">Facts</p>
<h3>Deployment metadata</h3>
</div>
</div>
<dl class="facts-grid">
<div>
<dt>Sentinel URL</dt>
<dd>
{{ sentinel_url }}
</dd>
</div>
<div>
<dt>Idempotency Key</dt>
<dd>
{{ deployment.idempotency_key }}
</dd>
</div>
<div>
<dt>Created</dt>
<dd>
{{ deployment.created_at|date:'Y-m-d H:i:s' }}
</dd>
</div>
<div>
<dt>Started</dt>
<dd>
{{ deployment.started_at|default:'-' }}
</dd>
</div>
<div>
<dt>Finished</dt>
<dd>
{{ deployment.finished_at|default:'-' }}
</dd>
</div>
<div>
<dt>Admin</dt>
<dd>
<a href="{% url 'admin:control_plane_deployment_change' deployment.id %}">Open admin change form</a>
</dd>
</div>
</dl>
{% if deployment.last_error %}
<section class="error-panel top-gap">
<h4>Last Error</h4>
<pre>{{ deployment.last_error }}</pre>
</section>
{% endif %}
</article>
</section>
<section class="panel-grid two-up">
<article class="panel-card">
<div class="panel-header">
<div>
<p class="eyebrow">Runtime Services</p>
<h3>Database and cache state</h3>
</div>
</div>
<div class="service-grid">
{% for runtime_service in runtime_services %}
<article class="service-card">
<span class="service-pill status-{{ runtime_service.status }}">{{ runtime_service.kind }}</span>
<h4>{{ runtime_service.container_name }}</h4>
<p class="muted-copy">{{ runtime_service.hostname }}:{{ runtime_service.internal_port }}</p>
<p class="muted-copy">Control plane status: {{ runtime_service.status }}</p>
</article>
{% empty %}
<p class="muted-copy">No runtime services recorded yet.</p>
{% endfor %}
</div>
</article>
<article class="panel-card">
<div class="panel-header">
<div>
<p class="eyebrow">Diagnostics Snapshot</p>
<h3>Persisted pod state and logs</h3>
</div>
</div>
{% if diagnostics %}
<dl class="facts-grid compact-grid">
<div>
<dt>Captured At</dt>
<dd>
{{ diagnostics.captured_at|default:'-' }}
</dd>
</div>
<div>
<dt>Pod</dt>
<dd>
{{ diagnostics.pod.name|default:'-' }}
</dd>
</div>
<div>
<dt>Pod Status</dt>
<dd>
{{ diagnostics.pod.status|default:'unknown' }}
</dd>
</div>
<div>
<dt>Snapshot Error</dt>
<dd>
{{ diagnostics.capture_error|default:'-' }}
</dd>
</div>
</dl>
{% if diagnostics.pod.error %}<p class="muted-copy top-gap">{{ diagnostics.pod.error }}</p>{% endif %}
{% else %}
<p class="muted-copy">No diagnostics snapshot has been captured yet.</p>
{% endif %}
</article>
</section>
<section class="panel-grid log-grid">
{% if diagnostics %}
<article class="panel-card wide-card">
<div class="panel-header">
<div>
<p class="eyebrow">Django Container</p>
<h3>{{ diagnostics.django.container_name }}</h3>
</div>
<span class="status-chip health-{{ diagnostics.django.container_status|default:'missing' }}">{{ diagnostics.django.container_status|default:'missing' }}</span>
</div>
{% if diagnostics.django.inspect_error %}<p class="muted-copy">{{ diagnostics.django.inspect_error }}</p>{% endif %}
<pre class="log-output">{{ diagnostics.django.logs|default:'No Django logs captured yet.' }}</pre>
</article>
{% for runtime_service in diagnostics.runtime_services %}
<article class="panel-card">
<div class="panel-header">
<div>
<p class="eyebrow">{{ runtime_service.label }}</p>
<h3>{{ runtime_service.container_name }}</h3>
</div>
<span class="status-chip health-{{ runtime_service.container_status|default:'missing' }}">{{ runtime_service.container_status|default:'missing' }}</span>
</div>
{% if runtime_service.inspect_error %}<p class="muted-copy">{{ runtime_service.inspect_error }}</p>{% endif %}
{% if runtime_service.log_error %}<p class="muted-copy">{{ runtime_service.log_error }}</p>{% endif %}
<pre class="log-output">{{ runtime_service.logs|default:'No logs captured yet.' }}</pre>
</article>
{% endfor %}
{% endif %}
</section>
<script>
document.addEventListener("DOMContentLoaded", () => {
const panel = document.querySelector("[data-health-panel]");
if (!panel) {
return;
}
const endpoint = panel.dataset.healthEndpoint;
const badge = panel.querySelector("[data-health-badge]");
const stamp = panel.querySelector("[data-health-stamp]");
const detail = panel.querySelector("[data-health-detail]");
const jsonTarget = panel.querySelector("[data-health-json]");
const refreshButton = panel.querySelector("[data-health-refresh]");
const renderPayload = (payload) => {
badge.textContent = payload.label;
badge.className = `status-chip health-${payload.status}`;
stamp.textContent = payload.checked_at;
detail.textContent = payload.error || (payload.ok ? "Sentinel responded with healthy payload." : "Waiting for a healthy sentinel response.");
if (payload.payload) {
jsonTarget.textContent = JSON.stringify(payload.payload, null, 2);
} else if (payload.error) {
jsonTarget.textContent = payload.error;
} else {
jsonTarget.textContent = "No payload yet.";
}
};
const refreshHealth = async () => {
const response = await fetch(endpoint, {headers: {"X-Requested-With": "fetch"}});
const payload = await response.json();
renderPayload(payload);
};
refreshButton.addEventListener("click", () => {
void refreshHealth();
});
window.setInterval(() => {
void refreshHealth();
}, 8000);
});
</script>
{% endblock content %}

15
control_plane/urls.py Normal file
View file

@ -0,0 +1,15 @@
from django.urls import path
from control_plane.views import DeploymentDashboardHomeView
from control_plane.views import DeploymentDashboardView
from control_plane.views import DeploymentDetailView
from control_plane.views import DeploymentHealthView
app_name = "control_plane"
urlpatterns = [
path("", DeploymentDashboardHomeView.as_view(), name="deployment-home"),
path("deployments/", DeploymentDashboardView.as_view(), name="deployment-dashboard"),
path("deployments/<uuid:deployment_id>/", DeploymentDetailView.as_view(), name="deployment-detail"),
path("deployments/<uuid:deployment_id>/health/", DeploymentHealthView.as_view(), name="deployment-health"),
]

163
control_plane/views.py Normal file
View file

@ -0,0 +1,163 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import TYPE_CHECKING
from typing import Any
from uuid import UUID
from django.db.models import Count
from django.db.models import Prefetch
from django.http import JsonResponse
from django.shortcuts import get_object_or_404
from django.views.generic import TemplateView
from django.views.generic import View
from control_plane.local_test_runtime import build_test_django_local_url
from control_plane.models import Deployment
from control_plane.models import DeploymentStatus
from control_plane.models import RuntimeService
from control_plane.observability import JsonValue
from control_plane.observability import load_test_deployment_diagnostics
from control_plane.observability import probe_test_deployment_health
if TYPE_CHECKING:
from django.db.models import QuerySet
from django.http import HttpRequest
type RouteKwarg = str | UUID
type DashboardContextValue = int | tuple[DeploymentCard, ...] | tuple[DeploymentStatusSummary, ...]
type DetailContextValue = (
DashboardContextValue | Deployment | tuple[RuntimeService, ...] | str | dict[str, JsonValue] | None
)
@dataclass(frozen=True, slots=True)
class DeploymentStatusSummary:
"""Aggregate deployments by lifecycle state for dashboard cards."""
status: str
label: str
total: int
@dataclass(frozen=True, slots=True)
class DeploymentCard:
"""Small view model used by the dashboard templates."""
deployment: Deployment
sentinel_url: str
runtime_services: tuple[RuntimeService, ...]
@property
def runtime_ready_total(self) -> int:
"""Return total runtime services currently marked ready."""
return sum(runtime_service.status == "ready" for runtime_service in self.runtime_services)
@property
def runtime_failed_total(self) -> int:
"""Return total runtime services currently marked failed."""
return sum(runtime_service.status == "failed" for runtime_service in self.runtime_services)
def _deployment_queryset() -> QuerySet[Deployment]:
runtime_services = RuntimeService.objects.order_by("kind")
return Deployment.objects.select_related("hosted_site__tenant").prefetch_related(
Prefetch("runtime_services", queryset=runtime_services),
)
class DeploymentDashboardView(TemplateView):
"""Render recent test deployments with links to diagnostics and sentinel probes."""
template_name = "control_plane/deployment_dashboard.html"
def get_context_data(self, **kwargs: RouteKwarg) -> dict[str, DashboardContextValue]:
"""Build recent deployment cards plus aggregate status counts for the dashboard.
Returns:
Template context containing deployment cards and summary counters.
"""
context = super().get_context_data(**kwargs)
deployments = tuple(_deployment_queryset().order_by("-created_at")[:24])
context.update(
{
"deployment_cards": tuple(_build_deployment_card(deployment) for deployment in deployments),
"status_summaries": _build_status_summaries(),
"running_total": sum(deployment.status == DeploymentStatus.RUNNING.value for deployment in deployments),
"deployment_total": len(deployments),
},
)
return context
class DeploymentDetailView(TemplateView):
"""Render one deployment with persisted diagnostics, logs, and live health state."""
template_name = "control_plane/deployment_detail.html"
def get_context_data(self, **kwargs: RouteKwarg) -> dict[str, DetailContextValue]:
"""Build one deployment view with persisted diagnostics and an initial health probe.
Returns:
Template context containing deployment metadata, diagnostics, and health state.
"""
context: dict[str, Any] = super().get_context_data(**kwargs)
deployment: Deployment = get_object_or_404(_deployment_queryset(), pk=self.kwargs["deployment_id"])
runtime_services = tuple(deployment.runtime_services.all())
context.update(
{
"deployment": deployment,
"runtime_services": runtime_services,
"sentinel_url": build_test_django_local_url(deployment),
"diagnostics": load_test_deployment_diagnostics(deployment),
"health_probe": probe_test_deployment_health(deployment),
},
)
return context
class DeploymentHealthView(View):
"""Return live sentinel health JSON for one deployment."""
def get(self, request: HttpRequest, deployment_id: str) -> JsonResponse:
"""Return JSON probe state for one deployment sentinel endpoint."""
del request
deployment = get_object_or_404(_deployment_queryset(), pk=deployment_id)
return JsonResponse(probe_test_deployment_health(deployment))
class DeploymentDashboardHomeView(DeploymentDashboardView):
"""Alias the dashboard at the site root for local testing convenience."""
template_name = "control_plane/deployment_dashboard.html"
def _build_deployment_card(deployment: Deployment) -> DeploymentCard:
return DeploymentCard(
deployment=deployment,
sentinel_url=build_test_django_local_url(deployment),
runtime_services=tuple(deployment.runtime_services.all()),
)
def _build_status_summaries() -> tuple[DeploymentStatusSummary, ...]:
summary_rows = tuple(
Deployment.objects.values("status").annotate(total=Count("id")).order_by("status"),
)
return tuple(
DeploymentStatusSummary(
status=status,
label=_resolve_status_label(status),
total=int(total),
)
for status, total in ((row["status"], row["total"]) for row in summary_rows)
)
def _resolve_status_label(status: str) -> str:
for choice in DeploymentStatus:
if choice.value == status:
return choice.label
return status.replace("-", " ").title()