WIP
This commit is contained in:
parent
e70a0584c9
commit
a7a5b5c8ea
43 changed files with 5531 additions and 9 deletions
1
control_plane/__init__.py
Normal file
1
control_plane/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Control-plane models and runtime helpers for hosted deployments."""
|
||||
223
control_plane/admin.py
Normal file
223
control_plane/admin.py
Normal file
|
|
@ -0,0 +1,223 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.contrib import admin
|
||||
from django.contrib import messages
|
||||
from django.db.models import Count
|
||||
from django.db.models import F
|
||||
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import HostedSite
|
||||
from control_plane.models import RuntimeService
|
||||
from control_plane.models import RuntimeServiceKind
|
||||
from control_plane.models import Tenant
|
||||
from control_plane.tasks import provision_test_runtime_services
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
from django.http import HttpRequest
|
||||
|
||||
RuntimeServiceInlineBase = admin.StackedInline[RuntimeService]
|
||||
TenantAdminBase = admin.ModelAdmin[Tenant]
|
||||
HostedSiteAdminBase = admin.ModelAdmin[HostedSite]
|
||||
DeploymentAdminBase = admin.ModelAdmin[Deployment]
|
||||
RuntimeServiceAdminBase = admin.ModelAdmin[RuntimeService]
|
||||
else:
|
||||
RuntimeServiceInlineBase = admin.StackedInline
|
||||
TenantAdminBase = admin.ModelAdmin
|
||||
HostedSiteAdminBase = admin.ModelAdmin
|
||||
DeploymentAdminBase = admin.ModelAdmin
|
||||
RuntimeServiceAdminBase = admin.ModelAdmin
|
||||
|
||||
|
||||
class RuntimeServiceInline(RuntimeServiceInlineBase):
|
||||
"""Allow deployment admins to create/edit related runtime services inline."""
|
||||
|
||||
model = RuntimeService
|
||||
extra = 0
|
||||
max_num = len(RuntimeServiceKind)
|
||||
show_change_link = True
|
||||
|
||||
|
||||
@admin.register(Tenant)
|
||||
class TenantAdmin(TenantAdminBase):
|
||||
"""Expose tenants for admin-managed smoke data setup."""
|
||||
|
||||
list_display = ("slug", "display_name")
|
||||
search_fields = ("slug", "display_name")
|
||||
ordering = ("slug",)
|
||||
|
||||
|
||||
@admin.register(HostedSite)
|
||||
class HostedSiteAdmin(HostedSiteAdminBase):
|
||||
"""Expose hosted sites so admins can build deployment test graphs."""
|
||||
|
||||
list_display = ("slug", "display_name", "tenant_slug", "service_port")
|
||||
list_filter = ("tenant",)
|
||||
search_fields = (
|
||||
"slug",
|
||||
"display_name",
|
||||
"tenant__slug",
|
||||
"tenant__display_name",
|
||||
"wsgi_module",
|
||||
)
|
||||
ordering = ("tenant__slug", "slug")
|
||||
autocomplete_fields = ("tenant",)
|
||||
list_select_related = ("tenant",)
|
||||
|
||||
def get_queryset(self, request: HttpRequest) -> QuerySet[HostedSite]:
|
||||
"""Load tenant slug values for changelist rendering.
|
||||
|
||||
Returns:
|
||||
Hosted site queryset with tenant join and tenant slug annotation.
|
||||
"""
|
||||
return (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.select_related("tenant")
|
||||
.annotate(
|
||||
tenant_slug_value=F("tenant__slug"),
|
||||
)
|
||||
)
|
||||
|
||||
@admin.display(ordering="tenant__slug", description="Tenant")
|
||||
def tenant_slug(self, hosted_site: HostedSite) -> str:
|
||||
"""Return tenant slug for changelist display and sorting."""
|
||||
return str(vars(hosted_site)["tenant_slug_value"])
|
||||
|
||||
|
||||
@admin.register(Deployment)
|
||||
class DeploymentAdmin(DeploymentAdminBase):
|
||||
"""Expose deployments and queue test container provisioning."""
|
||||
|
||||
list_display = (
|
||||
"id",
|
||||
"status",
|
||||
"tenant_slug",
|
||||
"site_slug",
|
||||
"idempotency_key",
|
||||
"guest_port",
|
||||
"runtime_service_total",
|
||||
)
|
||||
list_filter = ("status",)
|
||||
search_fields = (
|
||||
"=id",
|
||||
"idempotency_key",
|
||||
"firecracker_vm_id",
|
||||
"hosted_site__slug",
|
||||
"hosted_site__tenant__slug",
|
||||
)
|
||||
ordering = ("hosted_site__tenant__slug", "hosted_site__slug", "-created_at")
|
||||
autocomplete_fields = ("hosted_site",)
|
||||
list_select_related = ("hosted_site__tenant",)
|
||||
inlines = (RuntimeServiceInline,)
|
||||
actions = ("create_test_containers",)
|
||||
|
||||
def get_queryset(self, request: HttpRequest) -> QuerySet[Deployment]:
|
||||
"""Load related hosted site and tenant rows for admin rendering.
|
||||
|
||||
Returns:
|
||||
Deployment queryset with hosted site and tenant joined.
|
||||
"""
|
||||
return (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.select_related("hosted_site__tenant")
|
||||
.annotate(
|
||||
tenant_slug_value=F("hosted_site__tenant__slug"),
|
||||
site_slug_value=F("hosted_site__slug"),
|
||||
runtime_service_total_value=Count("runtime_services", distinct=True),
|
||||
)
|
||||
)
|
||||
|
||||
@admin.display(ordering="hosted_site__tenant__slug", description="Tenant")
|
||||
def tenant_slug(self, deployment: Deployment) -> str:
|
||||
"""Return tenant slug for changelist display and sorting."""
|
||||
return str(vars(deployment)["tenant_slug_value"])
|
||||
|
||||
@admin.display(ordering="hosted_site__slug", description="Site")
|
||||
def site_slug(self, deployment: Deployment) -> str:
|
||||
"""Return hosted site slug for changelist display and sorting."""
|
||||
return str(vars(deployment)["site_slug_value"])
|
||||
|
||||
@admin.display(description="Runtime services")
|
||||
def runtime_service_total(self, deployment: Deployment) -> int:
|
||||
"""Return total runtime services currently linked to a deployment."""
|
||||
return int(vars(deployment)["runtime_service_total_value"])
|
||||
|
||||
@admin.action(description="Queue test container provisioning")
|
||||
def create_test_containers(
|
||||
self,
|
||||
request: HttpRequest,
|
||||
queryset: QuerySet[Deployment],
|
||||
) -> None:
|
||||
"""Queue Celery jobs that seed and provision local test containers."""
|
||||
deployment_ids = [str(deployment_id) for deployment_id in queryset.values_list("id", flat=True)]
|
||||
for deployment_id in deployment_ids:
|
||||
provision_test_runtime_services.delay(deployment_id)
|
||||
|
||||
self.message_user(
|
||||
request,
|
||||
(
|
||||
f"Queued test container provisioning for {len(deployment_ids)} deployments. "
|
||||
"Run a Celery worker to execute queued jobs."
|
||||
),
|
||||
level=messages.SUCCESS,
|
||||
)
|
||||
|
||||
|
||||
@admin.register(RuntimeService)
|
||||
class RuntimeServiceAdmin(RuntimeServiceAdminBase):
|
||||
"""Expose runtime service containers to Django admin users."""
|
||||
|
||||
list_display = (
|
||||
"container_name",
|
||||
"kind",
|
||||
"status",
|
||||
"tenant_slug",
|
||||
"site_slug",
|
||||
"internal_port",
|
||||
)
|
||||
list_filter = ("kind", "status")
|
||||
search_fields = (
|
||||
"container_name",
|
||||
"network_name",
|
||||
"hostname",
|
||||
"deployment__idempotency_key",
|
||||
"deployment__hosted_site__slug",
|
||||
"deployment__hosted_site__tenant__slug",
|
||||
)
|
||||
ordering = (
|
||||
"deployment__hosted_site__tenant__slug",
|
||||
"deployment__hosted_site__slug",
|
||||
"kind",
|
||||
)
|
||||
autocomplete_fields = ("deployment",)
|
||||
list_select_related = ("deployment__hosted_site__tenant",)
|
||||
|
||||
def get_queryset(self, request: HttpRequest) -> QuerySet[RuntimeService]:
|
||||
"""Load related deployment context for changelist rendering.
|
||||
|
||||
Returns:
|
||||
Runtime service queryset with deployment, site, and tenant joined.
|
||||
"""
|
||||
return (
|
||||
super()
|
||||
.get_queryset(request)
|
||||
.select_related("deployment__hosted_site__tenant")
|
||||
.annotate(
|
||||
tenant_slug_value=F("deployment__hosted_site__tenant__slug"),
|
||||
site_slug_value=F("deployment__hosted_site__slug"),
|
||||
)
|
||||
)
|
||||
|
||||
@admin.display(ordering="deployment__hosted_site__tenant__slug", description="Tenant")
|
||||
def tenant_slug(self, runtime_service: RuntimeService) -> str:
|
||||
"""Return tenant slug for changelist display and sorting."""
|
||||
return str(vars(runtime_service)["tenant_slug_value"])
|
||||
|
||||
@admin.display(ordering="deployment__hosted_site__slug", description="Site")
|
||||
def site_slug(self, runtime_service: RuntimeService) -> str:
|
||||
"""Return hosted site slug for changelist display and sorting."""
|
||||
return str(vars(runtime_service)["site_slug_value"])
|
||||
8
control_plane/apps.py
Normal file
8
control_plane/apps.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class ControlPlaneConfig(AppConfig):
|
||||
"""Register control-plane models and task discovery."""
|
||||
|
||||
name = "control_plane"
|
||||
verbose_name = "Tussilago Control Plane"
|
||||
13
control_plane/container_assets/test_django/Containerfile
Normal file
13
control_plane/container_assets/test_django/Containerfile
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
FROM docker.io/library/python:3.14-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
|
||||
RUN python -m pip install --no-cache-dir \
|
||||
"django>=6.0.4" \
|
||||
"gunicorn>=23.0.0" \
|
||||
"psycopg[binary]>=3.2.9" \
|
||||
"redis>=6.0.0"
|
||||
|
||||
WORKDIR /srv/test-app
|
||||
171
control_plane/host_commands.py
Normal file
171
control_plane/host_commands.py
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
import subprocess # noqa: S404
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger("tussilago.control_plane.host_commands")
|
||||
|
||||
DEFAULT_INHERITED_ENV_KEYS: frozenset[str] = frozenset(
|
||||
{
|
||||
"HOME",
|
||||
"LANG",
|
||||
"LC_ALL",
|
||||
"LC_CTYPE",
|
||||
"LOGNAME",
|
||||
"PATH",
|
||||
"SSL_CERT_DIR",
|
||||
"SSL_CERT_FILE",
|
||||
"TMPDIR",
|
||||
"USER",
|
||||
"UV_CACHE_DIR",
|
||||
"VIRTUAL_ENV",
|
||||
"XDG_CACHE_HOME",
|
||||
"XDG_RUNTIME_DIR",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class HostCommandResult:
|
||||
"""Capture output from a completed host-side command."""
|
||||
|
||||
args: tuple[str, ...]
|
||||
returncode: int
|
||||
stdout: str
|
||||
stderr: str
|
||||
|
||||
|
||||
class HostCommandError(RuntimeError):
|
||||
"""Raised when a host-side command fails or times out."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str,
|
||||
*,
|
||||
args: Sequence[str],
|
||||
returncode: int | None,
|
||||
stdout: str,
|
||||
stderr: str,
|
||||
) -> None:
|
||||
"""Store captured command context for later error reporting."""
|
||||
super().__init__(message)
|
||||
self.command_args = tuple(args)
|
||||
self.returncode = returncode
|
||||
self.stdout = stdout
|
||||
self.stderr = stderr
|
||||
|
||||
|
||||
def build_host_command_env(
|
||||
*,
|
||||
env_overrides: Mapping[str, str] | None = None,
|
||||
allowed_env_keys: frozenset[str] | None = None,
|
||||
inherited_env_keys: frozenset[str] = DEFAULT_INHERITED_ENV_KEYS,
|
||||
) -> dict[str, str]:
|
||||
"""Build a sanitized environment for host-side child processes.
|
||||
|
||||
Returns:
|
||||
A filtered environment dictionary suitable for subprocess execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If env overrides are provided without an allowlist.
|
||||
"""
|
||||
resolved_env = {key: value for key, value in os.environ.items() if key in inherited_env_keys}
|
||||
|
||||
if env_overrides is None:
|
||||
return resolved_env
|
||||
|
||||
if allowed_env_keys is None:
|
||||
msg = "allowed_env_keys is required when env_overrides are provided"
|
||||
raise ValueError(msg)
|
||||
|
||||
disallowed_keys = sorted(set(env_overrides).difference(allowed_env_keys))
|
||||
if disallowed_keys:
|
||||
msg = f"env_overrides contains disallowed keys: {', '.join(disallowed_keys)}"
|
||||
raise ValueError(msg)
|
||||
|
||||
resolved_env.update(env_overrides)
|
||||
return resolved_env
|
||||
|
||||
|
||||
def run_host_command(
|
||||
*,
|
||||
command: Sequence[str],
|
||||
cwd: Path | None = None,
|
||||
env_overrides: Mapping[str, str] | None = None,
|
||||
allowed_env_keys: frozenset[str] | None = None,
|
||||
timeout_seconds: float = 60.0,
|
||||
) -> HostCommandResult:
|
||||
"""Run a host-side command with explicit environment and timeout controls.
|
||||
|
||||
Returns:
|
||||
A result object containing the command, return code, and captured output.
|
||||
|
||||
Raises:
|
||||
ValueError: If the command is empty or env overrides are not allowlisted.
|
||||
HostCommandError: If the command fails or times out.
|
||||
"""
|
||||
normalized_command = tuple(command)
|
||||
if not normalized_command:
|
||||
msg = "command must not be empty"
|
||||
raise ValueError(msg)
|
||||
|
||||
if any(not argument for argument in normalized_command):
|
||||
msg = "command arguments must be non-empty strings"
|
||||
raise ValueError(msg)
|
||||
|
||||
resolved_env = build_host_command_env(
|
||||
env_overrides=env_overrides,
|
||||
allowed_env_keys=allowed_env_keys,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Running host command executable=%s argc=%s (cwd=%s)",
|
||||
shlex.quote(normalized_command[0]),
|
||||
len(normalized_command),
|
||||
cwd,
|
||||
)
|
||||
|
||||
try:
|
||||
completed = subprocess.run( # noqa: S603
|
||||
normalized_command,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd=cwd,
|
||||
env=resolved_env,
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
except subprocess.CalledProcessError as error:
|
||||
msg_0 = "Host command failed."
|
||||
raise HostCommandError(
|
||||
msg_0,
|
||||
args=tuple(str(argument) for argument in error.cmd),
|
||||
returncode=error.returncode,
|
||||
stdout=error.stdout or "",
|
||||
stderr=error.stderr or "",
|
||||
) from error
|
||||
except subprocess.TimeoutExpired as error:
|
||||
msg_0 = "Host command timed out."
|
||||
raise HostCommandError(
|
||||
msg_0,
|
||||
args=normalized_command,
|
||||
returncode=None,
|
||||
stdout=str(error.stdout) or "",
|
||||
stderr=str(error.stderr) or "",
|
||||
) from error
|
||||
|
||||
return HostCommandResult(
|
||||
args=normalized_command,
|
||||
returncode=completed.returncode,
|
||||
stdout=completed.stdout,
|
||||
stderr=completed.stderr,
|
||||
)
|
||||
212
control_plane/local_test_deployment.py
Normal file
212
control_plane/local_test_deployment.py
Normal file
|
|
@ -0,0 +1,212 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import secrets
|
||||
import socket
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
from celery import chain
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from control_plane.host_commands import HostCommandError
|
||||
from control_plane.local_test_runtime import build_test_django_local_url
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import DeploymentStatus
|
||||
from control_plane.models import HostedSite
|
||||
from control_plane.models import Tenant
|
||||
from control_plane.observability import capture_test_deployment_diagnostics
|
||||
from control_plane.tasks import mark_deployment_booting
|
||||
from control_plane.tasks import mark_deployment_provisioning
|
||||
from control_plane.tasks import provision_test_django_runtime
|
||||
from control_plane.tasks import provision_test_runtime_services
|
||||
from control_plane.tasks import run_test_django_runtime_provisioning
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class CreatedTestDeployment:
|
||||
"""Bundle control-plane rows created for one local test deployment."""
|
||||
|
||||
tenant: Tenant
|
||||
hosted_site: HostedSite
|
||||
deployment: Deployment
|
||||
|
||||
@property
|
||||
def sentinel_url(self) -> str:
|
||||
"""Return published local sentinel URL for this deployment."""
|
||||
return build_test_django_local_url(self.deployment)
|
||||
|
||||
|
||||
def create_test_deployment() -> CreatedTestDeployment:
|
||||
"""Create a randomized tenant, hosted site, and deployment for local testing.
|
||||
|
||||
Returns:
|
||||
Newly created tenant, hosted site, and deployment rows.
|
||||
"""
|
||||
tenant_token = secrets.token_hex(4)
|
||||
site_token = secrets.token_hex(4)
|
||||
tenant_slug = f"tenant-{tenant_token}"
|
||||
site_slug = f"site-{site_token}"
|
||||
idempotency_key = f"test-deploy-{secrets.token_hex(8)}"
|
||||
guest_port = _find_free_port()
|
||||
source_sha256 = hashlib.sha256(
|
||||
f"{tenant_slug}:{site_slug}:{idempotency_key}".encode(),
|
||||
).hexdigest()
|
||||
|
||||
with transaction.atomic():
|
||||
tenant = Tenant.objects.create(
|
||||
slug=tenant_slug,
|
||||
display_name=f"Test Tenant {tenant_token.upper()}",
|
||||
)
|
||||
hosted_site = HostedSite.objects.create(
|
||||
tenant=tenant,
|
||||
slug=site_slug,
|
||||
display_name=f"Test Site {site_token.upper()}",
|
||||
wsgi_module="tenant_site.wsgi:application",
|
||||
service_port=guest_port,
|
||||
)
|
||||
deployment = Deployment.objects.create(
|
||||
hosted_site=hosted_site,
|
||||
idempotency_key=idempotency_key,
|
||||
source_sha256=source_sha256,
|
||||
guest_port=guest_port,
|
||||
)
|
||||
|
||||
return CreatedTestDeployment(
|
||||
tenant=tenant,
|
||||
hosted_site=hosted_site,
|
||||
deployment=deployment,
|
||||
)
|
||||
|
||||
|
||||
def queue_test_deployment_provisioning(deployment_id: str) -> str:
|
||||
"""Queue full local test deployment Celery chain and return task id.
|
||||
|
||||
Returns:
|
||||
Celery task id for the queued orchestration chain.
|
||||
"""
|
||||
_ensure_async_broker_configuration()
|
||||
result = chain(
|
||||
mark_deployment_provisioning.si(deployment_id),
|
||||
provision_test_runtime_services.si(deployment_id),
|
||||
mark_deployment_booting.si(deployment_id),
|
||||
provision_test_django_runtime.si(deployment_id),
|
||||
).apply_async()
|
||||
return str(result.id)
|
||||
|
||||
|
||||
def wait_for_test_deployment(
|
||||
deployment_id: str,
|
||||
*,
|
||||
timeout_seconds: float,
|
||||
poll_interval_seconds: float,
|
||||
) -> Deployment:
|
||||
"""Wait until a queued local test deployment becomes running or fails.
|
||||
|
||||
Returns:
|
||||
Deployment row in running state.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If deployment reaches failed state.
|
||||
TimeoutError: If deployment does not finish before timeout.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while True:
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
if deployment.status == DeploymentStatus.RUNNING.value:
|
||||
return deployment
|
||||
if deployment.status == DeploymentStatus.FAILED.value:
|
||||
failure_message = deployment.last_error or "Local test deployment failed."
|
||||
raise RuntimeError(failure_message)
|
||||
if time.monotonic() >= deadline:
|
||||
msg = (
|
||||
"Timed out waiting for local test deployment "
|
||||
f"{deployment.id} to become ready. Current status: {deployment.status}."
|
||||
)
|
||||
raise TimeoutError(msg)
|
||||
|
||||
time.sleep(poll_interval_seconds)
|
||||
|
||||
|
||||
def provision_test_deployment(deployment_id: str) -> Deployment:
|
||||
"""Run full local test deployment provisioning inline in the current process.
|
||||
|
||||
Returns:
|
||||
Deployment row after provisioning completes.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If runtime provisioning fails.
|
||||
TimeoutError: If the Django sentinel endpoint never becomes ready.
|
||||
ValueError: If runtime configuration is invalid.
|
||||
"""
|
||||
try:
|
||||
mark_deployment_provisioning.run(deployment_id)
|
||||
provision_test_runtime_services.run(deployment_id)
|
||||
mark_deployment_booting.run(deployment_id)
|
||||
run_test_django_runtime_provisioning(deployment_id)
|
||||
except HostCommandError as error:
|
||||
message = _build_host_command_failure_message(error)
|
||||
_mark_inline_deployment_failed(deployment_id, message=message)
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
raise RuntimeError(message) from error
|
||||
except (RuntimeError, TimeoutError, ValueError) as error:
|
||||
_mark_inline_deployment_failed(deployment_id, message=str(error))
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
raise
|
||||
|
||||
return Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
|
||||
|
||||
def _ensure_async_broker_configuration() -> None:
|
||||
broker_url = settings.CELERY_BROKER_URL
|
||||
if not broker_url:
|
||||
msg = "Async queueing requires TUSSILAGO_CELERY_BROKER_URL to be set to a real broker URL."
|
||||
raise RuntimeError(msg)
|
||||
|
||||
if broker_url == "memory://":
|
||||
msg = (
|
||||
"Async queueing cannot use memory:// because the worker cannot consume tasks from another process. "
|
||||
"Set TUSSILAGO_CELERY_BROKER_URL to a real broker such as Redis or RabbitMQ."
|
||||
)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
|
||||
def _mark_inline_deployment_failed(deployment_id: str, *, message: str) -> None:
|
||||
deployment = Deployment.objects.get(pk=deployment_id)
|
||||
if deployment.status == DeploymentStatus.FAILED.value:
|
||||
return
|
||||
|
||||
deployment.status = DeploymentStatus.FAILED.value
|
||||
deployment.last_error = message
|
||||
deployment.finished_at = timezone.now()
|
||||
deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"])
|
||||
|
||||
|
||||
def _build_host_command_failure_message(error: HostCommandError) -> str:
|
||||
lines = [str(error)]
|
||||
if error.stderr.strip():
|
||||
lines.append(error.stderr.strip())
|
||||
elif error.stdout.strip():
|
||||
lines.append(error.stdout.strip())
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None:
|
||||
try:
|
||||
capture_test_deployment_diagnostics(deployment_id)
|
||||
except OSError:
|
||||
return
|
||||
except ValueError:
|
||||
return
|
||||
except Deployment.DoesNotExist:
|
||||
return
|
||||
|
||||
|
||||
def _find_free_port() -> int:
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as probe:
|
||||
probe.bind(("127.0.0.1", 0))
|
||||
probe.listen(1)
|
||||
return int(probe.getsockname()[1])
|
||||
297
control_plane/local_test_runtime.py
Normal file
297
control_plane/local_test_runtime.py
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from textwrap import dedent
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.conf import settings
|
||||
|
||||
from control_plane.models import RuntimeServiceKind
|
||||
from control_plane.models import _build_limited_identifier
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import RuntimeService
|
||||
|
||||
|
||||
TEST_DJANGO_CONTAINER_PORT = 8000
|
||||
TEST_DJANGO_IMAGE_REFERENCE = "localhost/tussilago-test-django:latest"
|
||||
TEST_DJANGO_WORKDIR = "/srv/test-app"
|
||||
TEST_POSTGRES_AUTH_DIR = "/run/postgres-auth"
|
||||
TEST_REDIS_AUTH_DIR = "/run/redis-auth"
|
||||
TEST_POSTGRES_PASSWORD_FILE = f"{TEST_POSTGRES_AUTH_DIR}/password"
|
||||
TEST_REDIS_PASSWORD_FILE = f"{TEST_REDIS_AUTH_DIR}/password"
|
||||
|
||||
|
||||
def build_test_django_project_root(deployment: Deployment) -> Path:
|
||||
"""Return filesystem root for one generated local Django test app."""
|
||||
return Path(settings.DATA_DIR) / "test-deployments" / str(deployment.id) / "django-app"
|
||||
|
||||
|
||||
def build_test_django_image_reference() -> str:
|
||||
"""Return Podman image reference for the reusable local Django runtime."""
|
||||
return TEST_DJANGO_IMAGE_REFERENCE
|
||||
|
||||
|
||||
def build_test_django_containerfile_path() -> Path:
|
||||
"""Return checked-in Containerfile used for local Django test runtimes."""
|
||||
return Path(__file__).resolve().parent / "container_assets" / "test_django" / "Containerfile"
|
||||
|
||||
|
||||
def build_test_django_container_context_path() -> Path:
|
||||
"""Return Podman build context for the reusable local Django runtime image."""
|
||||
return build_test_django_containerfile_path().parent
|
||||
|
||||
|
||||
def build_test_django_local_url(deployment: Deployment) -> str:
|
||||
"""Return published sentinel URL for a local Django test deployment."""
|
||||
return f"http://127.0.0.1:{deployment.guest_port}/sentinel/"
|
||||
|
||||
|
||||
def build_test_django_container_names(deployment: Deployment) -> tuple[str, str]:
|
||||
"""Return deterministic Podman container names for server and migrate steps."""
|
||||
deployment_suffix = deployment.id.hex[:12]
|
||||
tenant_slug = deployment.hosted_site.tenant.slug
|
||||
site_slug = deployment.hosted_site.slug
|
||||
return (
|
||||
_build_limited_identifier(
|
||||
prefix="django",
|
||||
tenant_slug=tenant_slug,
|
||||
site_slug=site_slug,
|
||||
suffix=deployment_suffix,
|
||||
max_length=128,
|
||||
),
|
||||
_build_limited_identifier(
|
||||
prefix="django-migrate",
|
||||
tenant_slug=tenant_slug,
|
||||
site_slug=site_slug,
|
||||
suffix=deployment_suffix,
|
||||
max_length=128,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def build_test_django_container_labels(deployment: Deployment) -> tuple[tuple[str, str], ...]:
|
||||
"""Return stable labels to simplify inspection and cleanup."""
|
||||
return (
|
||||
("tussilago.deployment-id", str(deployment.id)),
|
||||
("tussilago.tenant-slug", deployment.hosted_site.tenant.slug),
|
||||
("tussilago.site-slug", deployment.hosted_site.slug),
|
||||
("tussilago.role", "django"),
|
||||
)
|
||||
|
||||
|
||||
def build_test_django_environment(
|
||||
deployment: Deployment,
|
||||
runtime_services: Iterable[RuntimeService],
|
||||
) -> tuple[tuple[str, str], ...]:
|
||||
"""Return container environment variables for the generated Django test app.
|
||||
|
||||
Raises:
|
||||
ValueError: If PostgreSQL or Redis runtime services are missing.
|
||||
"""
|
||||
postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value)
|
||||
redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value)
|
||||
if not postgres_service.connection_database or not postgres_service.connection_username:
|
||||
msg = "PostgreSQL runtime service is missing connection credentials."
|
||||
raise ValueError(msg)
|
||||
|
||||
return (
|
||||
("DJANGO_SECRET_KEY", f"test-deployment-{deployment.id.hex}"),
|
||||
("DJANGO_SETTINGS_MODULE", "tenant_site.settings"),
|
||||
("PYTHONPATH", TEST_DJANGO_WORKDIR),
|
||||
("TEST_TENANT_SLUG", deployment.hosted_site.tenant.slug),
|
||||
("TEST_SITE_SLUG", deployment.hosted_site.slug),
|
||||
("TEST_POSTGRES_HOST", "127.0.0.1"),
|
||||
("TEST_POSTGRES_PORT", str(postgres_service.internal_port)),
|
||||
("TEST_POSTGRES_DATABASE", postgres_service.connection_database),
|
||||
("TEST_POSTGRES_USERNAME", postgres_service.connection_username),
|
||||
("TEST_POSTGRES_PASSWORD_FILE", TEST_POSTGRES_PASSWORD_FILE),
|
||||
("TEST_REDIS_HOST", "127.0.0.1"),
|
||||
("TEST_REDIS_PORT", str(redis_service.internal_port)),
|
||||
("TEST_REDIS_PASSWORD_FILE", TEST_REDIS_PASSWORD_FILE),
|
||||
)
|
||||
|
||||
|
||||
def build_test_django_secret_mounts(
|
||||
runtime_services: Iterable[RuntimeService],
|
||||
) -> tuple[tuple[Path, str], ...]:
|
||||
"""Return host-to-container secret mounts for generated Django test apps."""
|
||||
postgres_service = _get_runtime_service(runtime_services, RuntimeServiceKind.POSTGRESQL.value)
|
||||
redis_service = _get_runtime_service(runtime_services, RuntimeServiceKind.REDIS.value)
|
||||
return (
|
||||
(_runtime_service_secret_directory(postgres_service), TEST_POSTGRES_AUTH_DIR),
|
||||
(_runtime_service_secret_directory(redis_service), TEST_REDIS_AUTH_DIR),
|
||||
)
|
||||
|
||||
|
||||
def write_test_django_project(
|
||||
deployment: Deployment,
|
||||
runtime_services: Iterable[RuntimeService],
|
||||
) -> Path:
|
||||
"""Write deterministic Django project files for one deployment.
|
||||
|
||||
Returns:
|
||||
Root directory containing the generated Django project.
|
||||
"""
|
||||
build_test_django_environment(deployment, runtime_services)
|
||||
|
||||
project_root = build_test_django_project_root(deployment)
|
||||
package_root = project_root / "tenant_site"
|
||||
package_root.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
(project_root / "manage.py").write_text(_manage_py_contents(), encoding="utf-8")
|
||||
(package_root / "__init__.py").write_text("", encoding="utf-8")
|
||||
(package_root / "settings.py").write_text(_settings_contents(), encoding="utf-8")
|
||||
(package_root / "urls.py").write_text(_urls_contents(), encoding="utf-8")
|
||||
(package_root / "wsgi.py").write_text(_wsgi_contents(), encoding="utf-8")
|
||||
return project_root
|
||||
|
||||
|
||||
def _get_runtime_service(
|
||||
runtime_services: Iterable[RuntimeService],
|
||||
kind: str,
|
||||
) -> RuntimeService:
|
||||
for runtime_service in runtime_services:
|
||||
if runtime_service.kind == kind:
|
||||
return runtime_service
|
||||
|
||||
msg = f"Missing runtime service kind: {kind}"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def _runtime_service_secret_directory(runtime_service: RuntimeService) -> Path:
|
||||
return (
|
||||
Path(settings.DATA_DIR)
|
||||
/ "runtime-services"
|
||||
/ str(runtime_service.deployment_id)
|
||||
/ runtime_service.kind
|
||||
/ "secrets"
|
||||
)
|
||||
|
||||
|
||||
def _manage_py_contents() -> str:
|
||||
return dedent(
|
||||
"""
|
||||
#!/usr/bin/env python
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
def main() -> None:
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings")
|
||||
from django.core.management import execute_from_command_line
|
||||
|
||||
execute_from_command_line(sys.argv)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
""",
|
||||
).lstrip()
|
||||
|
||||
|
||||
def _settings_contents() -> str:
|
||||
return dedent(
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
BASE_DIR = Path(__file__).resolve().parent.parent
|
||||
SECRET_KEY = os.environ["DJANGO_SECRET_KEY"]
|
||||
DEBUG = False
|
||||
ALLOWED_HOSTS = ["127.0.0.1", "localhost"]
|
||||
ROOT_URLCONF = "tenant_site.urls"
|
||||
WSGI_APPLICATION = "tenant_site.wsgi.application"
|
||||
INSTALLED_APPS = [
|
||||
"django.contrib.contenttypes",
|
||||
]
|
||||
MIDDLEWARE = []
|
||||
TIME_ZONE = "UTC"
|
||||
USE_TZ = True
|
||||
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
|
||||
TEST_TENANT_SLUG = os.environ["TEST_TENANT_SLUG"]
|
||||
TEST_SITE_SLUG = os.environ["TEST_SITE_SLUG"]
|
||||
TEST_REDIS_HOST = os.environ["TEST_REDIS_HOST"]
|
||||
TEST_REDIS_PORT = int(os.environ["TEST_REDIS_PORT"])
|
||||
|
||||
|
||||
def _read_secret(env_key: str) -> str:
|
||||
return Path(os.environ[env_key]).read_text(encoding="utf-8").strip()
|
||||
|
||||
|
||||
DATABASES = {
|
||||
"default": {
|
||||
"ENGINE": "django.db.backends.postgresql",
|
||||
"NAME": os.environ["TEST_POSTGRES_DATABASE"],
|
||||
"USER": os.environ["TEST_POSTGRES_USERNAME"],
|
||||
"PASSWORD": _read_secret("TEST_POSTGRES_PASSWORD_FILE"),
|
||||
"HOST": os.environ["TEST_POSTGRES_HOST"],
|
||||
"PORT": int(os.environ["TEST_POSTGRES_PORT"]),
|
||||
},
|
||||
}
|
||||
TEST_REDIS_PASSWORD = _read_secret("TEST_REDIS_PASSWORD_FILE")
|
||||
""",
|
||||
).lstrip()
|
||||
|
||||
|
||||
def _urls_contents() -> str:
|
||||
return dedent(
|
||||
"""
|
||||
import redis
|
||||
|
||||
from django.conf import settings
|
||||
from django.db import connection
|
||||
from django.http import JsonResponse
|
||||
from django.urls import path
|
||||
|
||||
|
||||
def sentinel_view(request):
|
||||
with connection.cursor() as cursor:
|
||||
cursor.execute("SELECT 1")
|
||||
postgres_value = int(cursor.fetchone()[0])
|
||||
|
||||
redis_key = f"sentinel:{settings.TEST_TENANT_SLUG}:{settings.TEST_SITE_SLUG}"
|
||||
redis_client = redis.Redis(
|
||||
host=settings.TEST_REDIS_HOST,
|
||||
port=settings.TEST_REDIS_PORT,
|
||||
password=settings.TEST_REDIS_PASSWORD,
|
||||
decode_responses=True,
|
||||
socket_timeout=1,
|
||||
)
|
||||
redis_client.set(redis_key, settings.TEST_SITE_SLUG, ex=60)
|
||||
redis_value = redis_client.get(redis_key)
|
||||
return JsonResponse(
|
||||
{
|
||||
"status": "ok",
|
||||
"postgres": postgres_value,
|
||||
"redis": redis_value,
|
||||
"tenant": settings.TEST_TENANT_SLUG,
|
||||
"site": settings.TEST_SITE_SLUG,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
urlpatterns = [
|
||||
path("sentinel/", sentinel_view),
|
||||
]
|
||||
""",
|
||||
).lstrip()
|
||||
|
||||
|
||||
def _wsgi_contents() -> str:
|
||||
return dedent(
|
||||
"""
|
||||
import os
|
||||
|
||||
from django.core.wsgi import get_wsgi_application
|
||||
|
||||
|
||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "tenant_site.settings")
|
||||
|
||||
application = get_wsgi_application()
|
||||
""",
|
||||
).lstrip()
|
||||
1
control_plane/management/__init__.py
Normal file
1
control_plane/management/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Django management command package for control-plane workflows."""
|
||||
1
control_plane/management/commands/__init__.py
Normal file
1
control_plane/management/commands/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Management commands for local control-plane operations."""
|
||||
57
control_plane/management/commands/create_test_deployment.py
Normal file
57
control_plane/management/commands/create_test_deployment.py
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.core.management.base import CommandError
|
||||
|
||||
from control_plane.local_test_deployment import create_test_deployment
|
||||
from control_plane.local_test_deployment import provision_test_deployment
|
||||
from control_plane.local_test_deployment import queue_test_deployment_provisioning
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from argparse import ArgumentParser
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
"""Create a randomized local test deployment and optionally wait for readiness."""
|
||||
|
||||
help = "Create a randomized tenant and provision a local test deployment inline by default."
|
||||
|
||||
def add_arguments(self, parser: ArgumentParser) -> None:
|
||||
"""Register CLI flags for local test deployment orchestration."""
|
||||
parser.add_argument(
|
||||
"--no-wait",
|
||||
action="store_true",
|
||||
help="Queue provisioning asynchronously and return immediately without running it inline.",
|
||||
)
|
||||
|
||||
def handle(self, *_args: str, **options: bool | float) -> None:
|
||||
"""Create a randomized local test deployment and optionally wait for readiness.
|
||||
|
||||
Raises:
|
||||
CommandError: If the deployment fails or never becomes ready.
|
||||
"""
|
||||
created = create_test_deployment()
|
||||
self.stdout.write(f"tenant_slug={created.tenant.slug}")
|
||||
self.stdout.write(f"site_slug={created.hosted_site.slug}")
|
||||
self.stdout.write(f"deployment_id={created.deployment.id}")
|
||||
self.stdout.write(f"sentinel_url={created.sentinel_url}")
|
||||
|
||||
if options["no_wait"]:
|
||||
try:
|
||||
task_id = queue_test_deployment_provisioning(str(created.deployment.id))
|
||||
except RuntimeError as error:
|
||||
raise CommandError(str(error)) from error
|
||||
|
||||
self.stdout.write(f"celery_task_id={task_id}")
|
||||
self.stdout.write("status=queued")
|
||||
return
|
||||
|
||||
self.stdout.write("execution_mode=inline")
|
||||
try:
|
||||
deployment = provision_test_deployment(str(created.deployment.id))
|
||||
except (RuntimeError, TimeoutError, ValueError) as error:
|
||||
raise CommandError(str(error)) from error
|
||||
|
||||
self.stdout.write(f"status={deployment.status}")
|
||||
289
control_plane/migrations/0001_initial.py
Normal file
289
control_plane/migrations/0001_initial.py
Normal file
|
|
@ -0,0 +1,289 @@
|
|||
# Generated by Django 6.0.4 on 2026-04-27 12:21
|
||||
|
||||
import uuid
|
||||
|
||||
import auto_prefetch
|
||||
import django.core.validators
|
||||
import django.db.models.deletion
|
||||
import django.db.models.manager
|
||||
from django.db import migrations
|
||||
from django.db import models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
initial = True
|
||||
|
||||
dependencies = []
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="HostedSite",
|
||||
fields=[
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.UUIDField(
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
),
|
||||
),
|
||||
("slug", models.SlugField(max_length=64)),
|
||||
("display_name", models.CharField(max_length=255)),
|
||||
("working_directory", models.CharField(default=".", max_length=255)),
|
||||
("wsgi_module", models.CharField(max_length=255)),
|
||||
(
|
||||
"service_port",
|
||||
models.PositiveIntegerField(
|
||||
default=8000,
|
||||
validators=[
|
||||
django.core.validators.MinValueValidator(1024),
|
||||
django.core.validators.MaxValueValidator(65535),
|
||||
],
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"ordering": ("tenant__slug", "slug"),
|
||||
"abstract": False,
|
||||
"base_manager_name": "prefetch_manager",
|
||||
},
|
||||
managers=[
|
||||
("objects", django.db.models.manager.Manager()),
|
||||
("prefetch_manager", django.db.models.manager.Manager()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="Deployment",
|
||||
fields=[
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.UUIDField(
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
),
|
||||
),
|
||||
("idempotency_key", models.CharField(max_length=64, unique=True)),
|
||||
("source_sha256", models.CharField(max_length=64)),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("queued", "Queued"),
|
||||
("provisioning", "Provisioning"),
|
||||
("booting", "Booting"),
|
||||
("running", "Running"),
|
||||
("failed", "Failed"),
|
||||
("stopped", "Stopped"),
|
||||
("destroying", "Destroying"),
|
||||
("destroyed", "Destroyed"),
|
||||
],
|
||||
default="queued",
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
(
|
||||
"guest_ipv4",
|
||||
models.GenericIPAddressField(
|
||||
blank=True,
|
||||
null=True,
|
||||
protocol="IPv4",
|
||||
),
|
||||
),
|
||||
(
|
||||
"guest_port",
|
||||
models.PositiveIntegerField(
|
||||
default=8000,
|
||||
validators=[
|
||||
django.core.validators.MinValueValidator(1024),
|
||||
django.core.validators.MaxValueValidator(65535),
|
||||
],
|
||||
),
|
||||
),
|
||||
(
|
||||
"firecracker_vm_id",
|
||||
models.CharField(blank=True, max_length=64, null=True, unique=True),
|
||||
),
|
||||
("last_error", models.TextField(blank=True)),
|
||||
("started_at", models.DateTimeField(blank=True, null=True)),
|
||||
("finished_at", models.DateTimeField(blank=True, null=True)),
|
||||
(
|
||||
"hosted_site",
|
||||
auto_prefetch.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="deployments",
|
||||
to="control_plane.hostedsite",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"ordering": ("-created_at",),
|
||||
"abstract": False,
|
||||
"base_manager_name": "prefetch_manager",
|
||||
},
|
||||
managers=[
|
||||
("objects", django.db.models.manager.Manager()),
|
||||
("prefetch_manager", django.db.models.manager.Manager()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="RuntimeService",
|
||||
fields=[
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.UUIDField(
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
),
|
||||
),
|
||||
(
|
||||
"kind",
|
||||
models.CharField(
|
||||
choices=[("postgresql", "PostgreSQL"), ("redis", "Redis")],
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
(
|
||||
"status",
|
||||
models.CharField(
|
||||
choices=[
|
||||
("queued", "Queued"),
|
||||
("provisioning", "Provisioning"),
|
||||
("ready", "Ready"),
|
||||
("failed", "Failed"),
|
||||
("destroying", "Destroying"),
|
||||
("destroyed", "Destroyed"),
|
||||
],
|
||||
default="queued",
|
||||
max_length=32,
|
||||
),
|
||||
),
|
||||
("container_name", models.CharField(max_length=128, unique=True)),
|
||||
("network_name", models.CharField(max_length=128)),
|
||||
("hostname", models.CharField(max_length=128)),
|
||||
("image_reference", models.CharField(max_length=255)),
|
||||
(
|
||||
"internal_port",
|
||||
models.PositiveIntegerField(
|
||||
validators=[
|
||||
django.core.validators.MinValueValidator(1),
|
||||
django.core.validators.MaxValueValidator(65535),
|
||||
],
|
||||
),
|
||||
),
|
||||
("connection_username", models.CharField(blank=True, max_length=63)),
|
||||
("connection_database", models.CharField(blank=True, max_length=63)),
|
||||
("connection_secret_ref", models.CharField(max_length=255)),
|
||||
(
|
||||
"deployment",
|
||||
auto_prefetch.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="runtime_services",
|
||||
to="control_plane.deployment",
|
||||
),
|
||||
),
|
||||
],
|
||||
options={
|
||||
"ordering": ("deployment__created_at", "kind"),
|
||||
"abstract": False,
|
||||
"base_manager_name": "prefetch_manager",
|
||||
},
|
||||
managers=[
|
||||
("objects", django.db.models.manager.Manager()),
|
||||
("prefetch_manager", django.db.models.manager.Manager()),
|
||||
],
|
||||
),
|
||||
migrations.CreateModel(
|
||||
name="Tenant",
|
||||
fields=[
|
||||
("created_at", models.DateTimeField(auto_now_add=True)),
|
||||
("updated_at", models.DateTimeField(auto_now=True)),
|
||||
(
|
||||
"id",
|
||||
models.UUIDField(
|
||||
default=uuid.uuid4,
|
||||
editable=False,
|
||||
primary_key=True,
|
||||
serialize=False,
|
||||
),
|
||||
),
|
||||
("slug", models.SlugField(max_length=64, unique=True)),
|
||||
("display_name", models.CharField(max_length=255)),
|
||||
],
|
||||
options={
|
||||
"ordering": ("slug",),
|
||||
"abstract": False,
|
||||
"base_manager_name": "prefetch_manager",
|
||||
"indexes": [models.Index(fields=["slug"], name="tenant_slug_idx")],
|
||||
},
|
||||
managers=[
|
||||
("objects", django.db.models.manager.Manager()),
|
||||
("prefetch_manager", django.db.models.manager.Manager()),
|
||||
],
|
||||
),
|
||||
migrations.AddField(
|
||||
model_name="hostedsite",
|
||||
name="tenant",
|
||||
field=auto_prefetch.ForeignKey(
|
||||
on_delete=django.db.models.deletion.CASCADE,
|
||||
related_name="hosted_sites",
|
||||
to="control_plane.tenant",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="deployment",
|
||||
index=models.Index(
|
||||
fields=["hosted_site", "status"],
|
||||
name="deploy_site_status_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="deployment",
|
||||
index=models.Index(
|
||||
fields=["status", "created_at"],
|
||||
name="deploy_status_created_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="runtimeservice",
|
||||
index=models.Index(
|
||||
fields=["deployment", "kind"],
|
||||
name="service_deploy_kind_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="runtimeservice",
|
||||
index=models.Index(
|
||||
fields=["kind", "status"],
|
||||
name="service_kind_status_idx",
|
||||
),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="runtimeservice",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("deployment", "kind"),
|
||||
name="runtime_service_unique_deployment_kind",
|
||||
),
|
||||
),
|
||||
migrations.AddIndex(
|
||||
model_name="hostedsite",
|
||||
index=models.Index(fields=["tenant", "slug"], name="site_tenant_slug_idx"),
|
||||
),
|
||||
migrations.AddConstraint(
|
||||
model_name="hostedsite",
|
||||
constraint=models.UniqueConstraint(
|
||||
fields=("tenant", "slug"),
|
||||
name="hosted_site_unique_tenant_slug",
|
||||
),
|
||||
),
|
||||
]
|
||||
1
control_plane/migrations/__init__.py
Normal file
1
control_plane/migrations/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
|||
"""Migration package for control-plane models."""
|
||||
340
control_plane/models.py
Normal file
340
control_plane/models.py
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
|
||||
import auto_prefetch
|
||||
from auto_prefetch import ForeignKey
|
||||
from auto_prefetch import Manager
|
||||
from django.core.validators import MaxValueValidator
|
||||
from django.core.validators import MinValueValidator
|
||||
from django.db import models
|
||||
from django.db import transaction
|
||||
|
||||
from control_plane.runtime_plans import DjangoApplicationLaunchConfig
|
||||
from control_plane.runtime_plans import build_django_server_command
|
||||
|
||||
|
||||
class TimestampedModel(auto_prefetch.Model):
|
||||
"""Provide created and updated timestamps for control-plane records."""
|
||||
|
||||
created_at = models.DateTimeField(auto_now_add=True)
|
||||
updated_at = models.DateTimeField(auto_now=True)
|
||||
|
||||
class Meta(auto_prefetch.Model.Meta):
|
||||
abstract = True
|
||||
|
||||
|
||||
class DeploymentStatus(models.TextChoices):
|
||||
"""Track deployment lifecycle state inside control plane."""
|
||||
|
||||
QUEUED = "queued", "Queued"
|
||||
PROVISIONING = "provisioning", "Provisioning"
|
||||
BOOTING = "booting", "Booting"
|
||||
RUNNING = "running", "Running"
|
||||
FAILED = "failed", "Failed"
|
||||
STOPPED = "stopped", "Stopped"
|
||||
DESTROYING = "destroying", "Destroying"
|
||||
DESTROYED = "destroyed", "Destroyed"
|
||||
|
||||
|
||||
class RuntimeServiceKind(models.TextChoices):
|
||||
"""Enumerate deployment-scoped backing services."""
|
||||
|
||||
POSTGRESQL = "postgresql", "PostgreSQL"
|
||||
REDIS = "redis", "Redis"
|
||||
|
||||
|
||||
class RuntimeServiceStatus(models.TextChoices):
|
||||
"""Track lifecycle state for a deployment-scoped service."""
|
||||
|
||||
QUEUED = "queued", "Queued"
|
||||
PROVISIONING = "provisioning", "Provisioning"
|
||||
READY = "ready", "Ready"
|
||||
FAILED = "failed", "Failed"
|
||||
DESTROYING = "destroying", "Destroying"
|
||||
DESTROYED = "destroyed", "Destroyed"
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RuntimeServiceSeedSpec:
|
||||
"""Describe default values for admin-seeded test runtime services."""
|
||||
|
||||
hostname: str
|
||||
image_reference: str
|
||||
internal_port: int
|
||||
|
||||
|
||||
RUNTIME_SERVICE_SEED_SPECS: dict[RuntimeServiceKind, RuntimeServiceSeedSpec] = {
|
||||
RuntimeServiceKind.POSTGRESQL: RuntimeServiceSeedSpec(
|
||||
hostname="postgres.internal",
|
||||
image_reference="docker.io/library/postgres:17-alpine",
|
||||
internal_port=5432,
|
||||
),
|
||||
RuntimeServiceKind.REDIS: RuntimeServiceSeedSpec(
|
||||
hostname="redis.internal",
|
||||
image_reference="docker.io/library/redis:7.4-alpine",
|
||||
internal_port=6379,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _build_limited_identifier(
|
||||
*,
|
||||
prefix: str,
|
||||
tenant_slug: str,
|
||||
site_slug: str,
|
||||
suffix: str,
|
||||
max_length: int,
|
||||
) -> str:
|
||||
"""Build a bounded identifier while preserving deployment uniqueness.
|
||||
|
||||
Args:
|
||||
prefix: Static prefix to identify the type of resource (e.g. "net" or
|
||||
"postgres").
|
||||
tenant_slug: Hosted site tenant slug to include in the name for uniqueness.
|
||||
site_slug: Hosted site slug to include in the name for uniqueness.
|
||||
suffix: Unique suffix to ensure no collisions across deployments of the same site.
|
||||
max_length: Maximum length for the resulting identifier.
|
||||
|
||||
Returns:
|
||||
A string that combines the prefix, tenant slug, site slug, and suffix,
|
||||
truncated as needed to fit within max_length.
|
||||
"""
|
||||
candidate = f"{prefix}-{tenant_slug}-{site_slug}-{suffix}"
|
||||
if len(candidate) <= max_length:
|
||||
return candidate
|
||||
|
||||
min_length = len(prefix) + len(suffix) + 2
|
||||
if min_length >= max_length:
|
||||
return f"{prefix}-{suffix}"[:max_length]
|
||||
|
||||
remaining_length = max_length - len(prefix) - len(suffix) - 3
|
||||
tenant_budget = max(1, remaining_length // 2)
|
||||
site_budget = max(1, remaining_length - tenant_budget)
|
||||
return "-".join(
|
||||
(
|
||||
prefix,
|
||||
tenant_slug[:tenant_budget],
|
||||
site_slug[:site_budget],
|
||||
suffix,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_limited_connection_name(*, site_slug: str, suffix: str, max_length: int = 63) -> str:
|
||||
"""Build a bounded database identifier that stays unique per deployment.
|
||||
|
||||
Args:
|
||||
site_slug: Hosted site slug to include in the name for uniqueness.
|
||||
suffix: Unique suffix to ensure no collisions across deployments of the same site.
|
||||
max_length: Maximum length for the resulting identifier, defaulting to 63 for database compatibility
|
||||
|
||||
Returns:
|
||||
A string that combines the site slug and suffix, truncated as needed to fit within max_length.
|
||||
"""
|
||||
candidate = f"{site_slug}-{suffix}"
|
||||
if len(candidate) <= max_length:
|
||||
return candidate
|
||||
|
||||
min_length = len(suffix) + 1
|
||||
if min_length >= max_length:
|
||||
return suffix[:max_length]
|
||||
|
||||
site_budget = max_length - len(suffix) - 1
|
||||
return f"{site_slug[:site_budget]}-{suffix}"
|
||||
|
||||
|
||||
class Tenant(TimestampedModel):
|
||||
"""Represent a tenant that owns hosted applications and deployments."""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
slug = models.SlugField(max_length=64, unique=True)
|
||||
display_name = models.CharField(max_length=255)
|
||||
|
||||
objects = Manager()
|
||||
|
||||
class Meta(TimestampedModel.Meta):
|
||||
ordering = ("slug",)
|
||||
indexes = [models.Index(fields=("slug",), name="tenant_slug_idx")]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.display_name
|
||||
|
||||
|
||||
class HostedSite(TimestampedModel):
|
||||
"""Describe a deployable Django site owned by a tenant."""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
tenant = ForeignKey(Tenant, on_delete=models.CASCADE, related_name="hosted_sites")
|
||||
slug = models.SlugField(max_length=64)
|
||||
display_name = models.CharField(max_length=255)
|
||||
working_directory = models.CharField(max_length=255, default=".")
|
||||
wsgi_module = models.CharField(max_length=255)
|
||||
service_port = models.PositiveIntegerField(
|
||||
default=8000,
|
||||
validators=[MinValueValidator(1024), MaxValueValidator(65535)],
|
||||
)
|
||||
|
||||
objects = Manager()
|
||||
|
||||
class Meta(TimestampedModel.Meta):
|
||||
ordering = ("tenant__slug", "slug")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=("tenant", "slug"),
|
||||
name="hosted_site_unique_tenant_slug",
|
||||
),
|
||||
]
|
||||
indexes = [
|
||||
models.Index(fields=("tenant", "slug"), name="site_tenant_slug_idx"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.tenant.slug}/{self.slug}"
|
||||
|
||||
|
||||
class Deployment(TimestampedModel):
|
||||
"""Track a single deployable runtime instance for a hosted site."""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
hosted_site = ForeignKey(HostedSite, on_delete=models.CASCADE, related_name="deployments")
|
||||
idempotency_key = models.CharField(max_length=64, unique=True)
|
||||
source_sha256 = models.CharField(max_length=64)
|
||||
status = models.CharField(
|
||||
max_length=32,
|
||||
choices=DeploymentStatus,
|
||||
default=DeploymentStatus.QUEUED,
|
||||
)
|
||||
guest_ipv4 = models.GenericIPAddressField(protocol="IPv4", blank=True, null=True)
|
||||
guest_port = models.PositiveIntegerField(
|
||||
default=8000,
|
||||
validators=[MinValueValidator(1024), MaxValueValidator(65535)],
|
||||
)
|
||||
firecracker_vm_id = models.CharField(max_length=64, blank=True, null=True, unique=True)
|
||||
last_error = models.TextField(blank=True)
|
||||
started_at = models.DateTimeField(blank=True, null=True)
|
||||
finished_at = models.DateTimeField(blank=True, null=True)
|
||||
|
||||
objects = Manager()
|
||||
|
||||
class Meta(TimestampedModel.Meta):
|
||||
ordering = ("-created_at",)
|
||||
indexes = [
|
||||
models.Index(fields=("hosted_site", "status"), name="deploy_site_status_idx"),
|
||||
models.Index(fields=("status", "created_at"), name="deploy_status_created_idx"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.hosted_site} [{self.status}]"
|
||||
|
||||
def build_django_launch_command(self) -> tuple[str, ...]:
|
||||
"""Build a uv-driven Gunicorn command for this deployment's Django app.
|
||||
|
||||
Returns:
|
||||
Tuple of command arguments ready for subprocess execution inside a guest VM.
|
||||
"""
|
||||
config = DjangoApplicationLaunchConfig(
|
||||
wsgi_module=self.hosted_site.wsgi_module,
|
||||
bind_host="0.0.0.0", # noqa: S104
|
||||
port=self.guest_port,
|
||||
)
|
||||
return build_django_server_command(config)
|
||||
|
||||
def ensure_test_runtime_services(self) -> tuple[RuntimeService, ...]:
|
||||
"""Create missing test runtime services for all supported service kinds.
|
||||
|
||||
Returns:
|
||||
Newly created runtime service records.
|
||||
"""
|
||||
tenant_slug = self.hosted_site.tenant.slug
|
||||
site_slug = self.hosted_site.slug
|
||||
deployment_suffix = self.id.hex[:12]
|
||||
network_name = _build_limited_identifier(
|
||||
prefix="net",
|
||||
tenant_slug=tenant_slug,
|
||||
site_slug=site_slug,
|
||||
suffix=deployment_suffix,
|
||||
max_length=128,
|
||||
)
|
||||
connection_name = _build_limited_connection_name(
|
||||
site_slug=site_slug,
|
||||
suffix=deployment_suffix,
|
||||
)
|
||||
created_services: list[RuntimeService] = []
|
||||
|
||||
with transaction.atomic():
|
||||
existing_kinds = set(
|
||||
RuntimeService.objects.filter(deployment=self).values_list("kind", flat=True),
|
||||
)
|
||||
for kind, seed_spec in RUNTIME_SERVICE_SEED_SPECS.items():
|
||||
if kind.value in existing_kinds:
|
||||
continue
|
||||
|
||||
created_services.append(
|
||||
RuntimeService(
|
||||
deployment=self,
|
||||
kind=kind.value,
|
||||
status=RuntimeServiceStatus.QUEUED.value,
|
||||
container_name=_build_limited_identifier(
|
||||
prefix=kind.value,
|
||||
tenant_slug=tenant_slug,
|
||||
site_slug=site_slug,
|
||||
suffix=deployment_suffix,
|
||||
max_length=128,
|
||||
),
|
||||
network_name=network_name,
|
||||
hostname=seed_spec.hostname,
|
||||
image_reference=seed_spec.image_reference,
|
||||
internal_port=seed_spec.internal_port,
|
||||
connection_username=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "",
|
||||
connection_database=connection_name if kind == RuntimeServiceKind.POSTGRESQL else "",
|
||||
connection_secret_ref=(f"secret://{kind.value}/{tenant_slug}/{site_slug}/{deployment_suffix}"),
|
||||
),
|
||||
)
|
||||
|
||||
if created_services:
|
||||
RuntimeService.objects.bulk_create(created_services)
|
||||
|
||||
return tuple(created_services)
|
||||
|
||||
|
||||
class RuntimeService(TimestampedModel):
|
||||
"""Track a dedicated PostgreSQL or Redis service for one deployment."""
|
||||
|
||||
id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
|
||||
deployment = ForeignKey(Deployment, on_delete=models.CASCADE, related_name="runtime_services")
|
||||
kind = models.CharField(max_length=32, choices=RuntimeServiceKind)
|
||||
status = models.CharField(
|
||||
max_length=32,
|
||||
choices=RuntimeServiceStatus,
|
||||
default=RuntimeServiceStatus.QUEUED,
|
||||
)
|
||||
container_name = models.CharField(max_length=128, unique=True)
|
||||
network_name = models.CharField(max_length=128)
|
||||
hostname = models.CharField(max_length=128)
|
||||
image_reference = models.CharField(max_length=255)
|
||||
internal_port = models.PositiveIntegerField(
|
||||
validators=[MinValueValidator(1), MaxValueValidator(65535)],
|
||||
)
|
||||
connection_username = models.CharField(max_length=63, blank=True)
|
||||
connection_database = models.CharField(max_length=63, blank=True)
|
||||
connection_secret_ref = models.CharField(max_length=255)
|
||||
|
||||
objects = Manager()
|
||||
|
||||
class Meta(TimestampedModel.Meta):
|
||||
ordering = ("deployment__created_at", "kind")
|
||||
constraints = [
|
||||
models.UniqueConstraint(
|
||||
fields=("deployment", "kind"),
|
||||
name="runtime_service_unique_deployment_kind",
|
||||
),
|
||||
]
|
||||
indexes = [
|
||||
models.Index(fields=("deployment", "kind"), name="service_deploy_kind_idx"),
|
||||
models.Index(fields=("kind", "status"), name="service_kind_status_idx"),
|
||||
]
|
||||
|
||||
def __str__(self) -> str:
|
||||
return f"{self.deployment_id}:{self.kind}"
|
||||
254
control_plane/observability.py
Normal file
254
control_plane/observability.py
Normal file
|
|
@ -0,0 +1,254 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import TYPE_CHECKING
|
||||
from urllib.error import HTTPError
|
||||
from urllib.error import URLError
|
||||
from urllib.request import urlopen
|
||||
|
||||
from django.utils import timezone
|
||||
|
||||
from control_plane.host_commands import HostCommandError
|
||||
from control_plane.host_commands import run_host_command
|
||||
from control_plane.local_test_runtime import build_test_django_container_names
|
||||
from control_plane.local_test_runtime import build_test_django_local_url
|
||||
from control_plane.local_test_runtime import build_test_django_project_root
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import DeploymentStatus
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from control_plane.models import RuntimeService
|
||||
|
||||
|
||||
MAX_DIAGNOSTIC_LOG_LINES = 200
|
||||
DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS = 2.0
|
||||
|
||||
type JsonPrimitive = bool | int | float | str | None
|
||||
type JsonValue = JsonPrimitive | list[JsonValue] | dict[str, JsonValue]
|
||||
|
||||
|
||||
def build_test_deployment_diagnostics_root(deployment: Deployment) -> Path:
|
||||
"""Return filesystem root for persisted deployment diagnostics."""
|
||||
return build_test_django_project_root(deployment).parent / "diagnostics"
|
||||
|
||||
|
||||
def build_test_deployment_diagnostics_snapshot_path(deployment: Deployment) -> Path:
|
||||
"""Return JSON snapshot path for one deployment's latest diagnostics."""
|
||||
return build_test_deployment_diagnostics_root(deployment) / "snapshot.json"
|
||||
|
||||
|
||||
def capture_test_deployment_diagnostics(deployment_id: str) -> None:
|
||||
"""Capture current pod, container, and log state for one deployment."""
|
||||
deployment = (
|
||||
Deployment.objects
|
||||
.select_related("hosted_site__tenant")
|
||||
.prefetch_related("runtime_services")
|
||||
.get(pk=deployment_id)
|
||||
)
|
||||
snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment)
|
||||
snapshot_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
snapshot_path.write_text(
|
||||
json.dumps(_build_diagnostics_snapshot(deployment), indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def load_test_deployment_diagnostics(deployment: Deployment) -> dict[str, JsonValue] | None:
|
||||
"""Load the latest persisted diagnostics snapshot for one deployment.
|
||||
|
||||
Returns:
|
||||
Parsed diagnostics payload, or None when no snapshot has been captured yet.
|
||||
"""
|
||||
snapshot_path = build_test_deployment_diagnostics_snapshot_path(deployment)
|
||||
if not snapshot_path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
payload = json.loads(snapshot_path.read_text(encoding="utf-8"))
|
||||
except json.JSONDecodeError as error:
|
||||
return {
|
||||
"capture_error": f"Unable to parse diagnostics snapshot: {error}",
|
||||
"captured_at": None,
|
||||
}
|
||||
|
||||
if not isinstance(payload, dict):
|
||||
return {
|
||||
"capture_error": "Diagnostics snapshot is not a JSON object.",
|
||||
"captured_at": None,
|
||||
}
|
||||
|
||||
return payload
|
||||
|
||||
|
||||
def probe_test_deployment_health(
|
||||
deployment: Deployment,
|
||||
*,
|
||||
timeout_seconds: float = DEFAULT_SENTINEL_PROBE_TIMEOUT_SECONDS,
|
||||
) -> dict[str, JsonValue]:
|
||||
"""Probe the generated deployment sentinel endpoint and return structured status.
|
||||
|
||||
Returns:
|
||||
JSON-serializable probe state describing current sentinel reachability and payload.
|
||||
"""
|
||||
sentinel_url = build_test_django_local_url(deployment)
|
||||
result: dict[str, JsonValue] = {
|
||||
"checked_at": timezone.now().isoformat(),
|
||||
"deployment_id": str(deployment.id),
|
||||
"deployment_status": deployment.status,
|
||||
"sentinel_url": sentinel_url,
|
||||
"ok": False,
|
||||
"status": "not-running",
|
||||
"label": "Not Running",
|
||||
"payload": None,
|
||||
"error": "",
|
||||
"http_status": None,
|
||||
}
|
||||
if deployment.status not in {DeploymentStatus.RUNNING.value, DeploymentStatus.BOOTING.value}:
|
||||
return result
|
||||
|
||||
try:
|
||||
with urlopen(sentinel_url, timeout=timeout_seconds) as response: # noqa: S310
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
result["http_status"] = int(getattr(response, "status", 200))
|
||||
if isinstance(payload, dict):
|
||||
result["payload"] = payload
|
||||
if payload.get("status") == "ok":
|
||||
result["ok"] = True
|
||||
result["status"] = "healthy"
|
||||
result["label"] = "Healthy"
|
||||
else:
|
||||
result["status"] = "unexpected-payload"
|
||||
result["label"] = "Unexpected"
|
||||
else:
|
||||
result["payload"] = {"value": str(payload)}
|
||||
result["status"] = "unexpected-payload"
|
||||
result["label"] = "Unexpected"
|
||||
except (HTTPError, URLError, OSError, json.JSONDecodeError) as error:
|
||||
result["status"] = "unreachable"
|
||||
result["label"] = "Unreachable"
|
||||
result["error"] = str(error)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _build_diagnostics_snapshot(deployment: Deployment) -> dict[str, JsonValue]:
|
||||
runtime_services = tuple(_ordered_runtime_services(deployment.runtime_services.all()))
|
||||
server_container_name, _ = build_test_django_container_names(deployment)
|
||||
pod_name = runtime_services[0].network_name if runtime_services else ""
|
||||
|
||||
return {
|
||||
"captured_at": timezone.now().isoformat(),
|
||||
"deployment_id": str(deployment.id),
|
||||
"deployment_status": deployment.status,
|
||||
"tenant_slug": deployment.hosted_site.tenant.slug,
|
||||
"site_slug": deployment.hosted_site.slug,
|
||||
"guest_port": deployment.guest_port,
|
||||
"sentinel_url": build_test_django_local_url(deployment),
|
||||
"last_error": deployment.last_error,
|
||||
"pod": _collect_pod_diagnostics(pod_name),
|
||||
"django": _collect_container_diagnostics(
|
||||
container_name=server_container_name,
|
||||
control_plane_status=deployment.status,
|
||||
label="django",
|
||||
),
|
||||
"runtime_services": [
|
||||
_collect_container_diagnostics(
|
||||
container_name=runtime_service.container_name,
|
||||
control_plane_status=runtime_service.status,
|
||||
label=runtime_service.kind,
|
||||
)
|
||||
for runtime_service in runtime_services
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def _ordered_runtime_services(runtime_services: Iterable[RuntimeService]) -> tuple[RuntimeService, ...]:
|
||||
return tuple(sorted(runtime_services, key=lambda runtime_service: runtime_service.kind))
|
||||
|
||||
|
||||
def _collect_pod_diagnostics(pod_name: str) -> dict[str, JsonValue]:
|
||||
if not pod_name:
|
||||
return {
|
||||
"name": "",
|
||||
"status": "missing",
|
||||
"error": "No runtime services are linked to this deployment yet.",
|
||||
}
|
||||
|
||||
try:
|
||||
result = run_host_command(
|
||||
command=("podman", "pod", "inspect", "--format", "{{.State}}", pod_name),
|
||||
timeout_seconds=20.0,
|
||||
)
|
||||
except HostCommandError as error:
|
||||
return {
|
||||
"name": pod_name,
|
||||
"status": "missing",
|
||||
"error": _format_host_command_error(error),
|
||||
}
|
||||
|
||||
return {
|
||||
"name": pod_name,
|
||||
"status": result.stdout.strip() or "unknown",
|
||||
"error": "",
|
||||
}
|
||||
|
||||
|
||||
def _collect_container_diagnostics(
|
||||
*,
|
||||
container_name: str,
|
||||
control_plane_status: str,
|
||||
label: str,
|
||||
) -> dict[str, JsonValue]:
|
||||
container_status, inspect_error = _inspect_container_status(container_name)
|
||||
logs, log_error = _read_container_logs(container_name)
|
||||
return {
|
||||
"label": label,
|
||||
"container_name": container_name,
|
||||
"control_plane_status": control_plane_status,
|
||||
"container_status": container_status,
|
||||
"logs": logs,
|
||||
"inspect_error": inspect_error,
|
||||
"log_error": log_error,
|
||||
}
|
||||
|
||||
|
||||
def _inspect_container_status(container_name: str) -> tuple[str, str]:
|
||||
try:
|
||||
result = run_host_command(
|
||||
command=(
|
||||
"podman",
|
||||
"inspect",
|
||||
"--format",
|
||||
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
|
||||
container_name,
|
||||
),
|
||||
timeout_seconds=20.0,
|
||||
)
|
||||
except HostCommandError as error:
|
||||
return "missing", _format_host_command_error(error)
|
||||
|
||||
return result.stdout.strip() or "unknown", ""
|
||||
|
||||
|
||||
def _read_container_logs(container_name: str) -> tuple[str, str]:
|
||||
try:
|
||||
result = run_host_command(
|
||||
command=("podman", "logs", "--tail", str(MAX_DIAGNOSTIC_LOG_LINES), container_name),
|
||||
timeout_seconds=20.0,
|
||||
)
|
||||
except HostCommandError as error:
|
||||
return "", _format_host_command_error(error)
|
||||
|
||||
output = result.stdout.strip() or result.stderr.strip()
|
||||
return output, ""
|
||||
|
||||
|
||||
def _format_host_command_error(error: HostCommandError) -> str:
|
||||
if error.stderr.strip():
|
||||
return error.stderr.strip()
|
||||
if error.stdout.strip():
|
||||
return error.stdout.strip()
|
||||
return str(error)
|
||||
366
control_plane/runtime_plans.py
Normal file
366
control_plane/runtime_plans.py
Normal file
|
|
@ -0,0 +1,366 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class PostgresContainerConfig:
|
||||
"""Input required to build a Podman command for a tenant PostgreSQL service."""
|
||||
|
||||
container_name: str
|
||||
network_name: str
|
||||
hostname: str
|
||||
username: str
|
||||
database_name: str
|
||||
data_directory: Path
|
||||
password_file: Path
|
||||
pod_name: str | None = None
|
||||
image_reference: str = "docker.io/library/postgres:17-alpine"
|
||||
memory_limit_mib: int = 512
|
||||
cpu_limit: float = 1.0
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class RedisContainerConfig:
|
||||
"""Input required to build a Podman command for a tenant Redis service."""
|
||||
|
||||
container_name: str
|
||||
network_name: str
|
||||
hostname: str
|
||||
data_directory: Path
|
||||
password_file: Path
|
||||
pod_name: str | None = None
|
||||
image_reference: str = "docker.io/library/redis:7.4-alpine"
|
||||
memory_limit_mib: int = 256
|
||||
cpu_limit: float = 0.5
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DjangoApplicationLaunchConfig:
|
||||
"""Input required to build a uv-driven Gunicorn command for a Django app."""
|
||||
|
||||
wsgi_module: str
|
||||
port: int = 8000
|
||||
bind_host: str = "127.0.0.1"
|
||||
workers: int = 2
|
||||
python_executable: Path | None = None
|
||||
uv_project_path: Path | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DjangoContainerImageBuildConfig:
|
||||
"""Input required to build the reusable local Django test image."""
|
||||
|
||||
image_reference: str
|
||||
containerfile_path: Path
|
||||
context_directory: Path
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DjangoContainerRuntimeConfig:
|
||||
"""Input required to run a local Django test container with Podman."""
|
||||
|
||||
container_name: str
|
||||
network_name: str
|
||||
hostname: str
|
||||
image_reference: str
|
||||
application_directory: Path
|
||||
pod_name: str | None = None
|
||||
host_port: int | None = None
|
||||
guest_port: int = 8000
|
||||
working_directory: str = "/srv/test-app"
|
||||
environment: tuple[tuple[str, str], ...] = ()
|
||||
secret_mounts: tuple[tuple[Path, str], ...] = ()
|
||||
labels: tuple[tuple[str, str], ...] = ()
|
||||
memory_limit_mib: int = 256
|
||||
cpu_limit: float = 1.0
|
||||
|
||||
|
||||
def build_postgres_container_command(
|
||||
config: PostgresContainerConfig,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a hardened Podman command for a deployment-scoped PostgreSQL service.
|
||||
|
||||
Returns:
|
||||
Tuple of Podman arguments ready for subprocess execution.
|
||||
"""
|
||||
command = [
|
||||
"podman",
|
||||
"run",
|
||||
"--detach",
|
||||
"--replace",
|
||||
"--name",
|
||||
config.container_name,
|
||||
]
|
||||
if config.pod_name is None:
|
||||
command.extend(("--network", config.network_name, "--hostname", config.hostname))
|
||||
else:
|
||||
command.extend(("--pod", config.pod_name))
|
||||
|
||||
command.extend(
|
||||
[
|
||||
"--cap-drop=all",
|
||||
"--cap-add=CHOWN",
|
||||
"--cap-add=FOWNER",
|
||||
"--cap-add=SETUID",
|
||||
"--cap-add=SETGID",
|
||||
"--cap-add=DAC_OVERRIDE",
|
||||
"--security-opt=no-new-privileges",
|
||||
"--pids-limit=256",
|
||||
"--memory",
|
||||
f"{config.memory_limit_mib}m",
|
||||
"--cpus",
|
||||
str(config.cpu_limit),
|
||||
"--read-only",
|
||||
"--tmpfs",
|
||||
"/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108
|
||||
"--tmpfs",
|
||||
"/var/run/postgresql:rw,nosuid,nodev,noexec,size=16m",
|
||||
"--volume",
|
||||
f"{config.data_directory}:/var/lib/postgresql/data:Z,rw",
|
||||
"--volume",
|
||||
f"{config.password_file}:/run/secrets/postgres-password:Z,ro",
|
||||
"--env",
|
||||
f"POSTGRES_USER={config.username}",
|
||||
"--env",
|
||||
f"POSTGRES_DB={config.database_name}",
|
||||
"--env",
|
||||
"POSTGRES_PASSWORD_FILE=/run/secrets/postgres-password",
|
||||
"--health-cmd",
|
||||
f"pg_isready -U {config.username} -d {config.database_name}",
|
||||
"--health-interval",
|
||||
"10s",
|
||||
"--health-retries",
|
||||
"5",
|
||||
config.image_reference,
|
||||
"postgres",
|
||||
"-c",
|
||||
"listen_addresses=*",
|
||||
"-c",
|
||||
"password_encryption=scram-sha-256",
|
||||
],
|
||||
)
|
||||
return tuple(command)
|
||||
|
||||
|
||||
def build_redis_container_command(config: RedisContainerConfig) -> tuple[str, ...]:
|
||||
"""Build a hardened Podman command for a deployment-scoped Redis service.
|
||||
|
||||
Returns:
|
||||
Tuple of Podman arguments ready for subprocess execution.
|
||||
"""
|
||||
command = [
|
||||
"podman",
|
||||
"run",
|
||||
"--detach",
|
||||
"--replace",
|
||||
"--name",
|
||||
config.container_name,
|
||||
]
|
||||
if config.pod_name is None:
|
||||
command.extend(("--network", config.network_name, "--hostname", config.hostname))
|
||||
else:
|
||||
command.extend(("--pod", config.pod_name))
|
||||
|
||||
command.extend(
|
||||
[
|
||||
"--cap-drop=all",
|
||||
"--security-opt=no-new-privileges",
|
||||
"--pids-limit=128",
|
||||
"--memory",
|
||||
f"{config.memory_limit_mib}m",
|
||||
"--cpus",
|
||||
str(config.cpu_limit),
|
||||
"--read-only",
|
||||
"--tmpfs",
|
||||
"/tmp:rw,nosuid,nodev,noexec,size=32m", # noqa: S108
|
||||
"--volume",
|
||||
f"{config.data_directory}:/data:Z,rw",
|
||||
"--volume",
|
||||
f"{config.password_file}:/run/secrets/redis-password:Z,ro",
|
||||
"--health-cmd",
|
||||
"sh -eu -c 'redis-cli --no-auth-warning -a \"$(cat /run/secrets/redis-password)\" ping'",
|
||||
"--health-interval",
|
||||
"10s",
|
||||
"--health-retries",
|
||||
"5",
|
||||
config.image_reference,
|
||||
"sh",
|
||||
"-eu",
|
||||
"-c",
|
||||
'redis_password=$(cat /run/secrets/redis-password) && exec redis-server --appendonly yes --protected-mode yes --requirepass "${redis_password}"',
|
||||
],
|
||||
)
|
||||
return tuple(command)
|
||||
|
||||
|
||||
def build_django_server_command(
|
||||
config: DjangoApplicationLaunchConfig,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a uv-driven Gunicorn command for a hosted Django deployment.
|
||||
|
||||
Returns:
|
||||
Tuple of command arguments ready for subprocess execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If both direct-python and uv-project execution modes are requested.
|
||||
"""
|
||||
if config.python_executable is not None and config.uv_project_path is not None:
|
||||
msg = "python_executable and uv_project_path are mutually exclusive"
|
||||
raise ValueError(msg)
|
||||
|
||||
if config.python_executable is not None:
|
||||
command = [str(config.python_executable), "-m", "gunicorn"]
|
||||
else:
|
||||
command = ["uv", "run"]
|
||||
if config.uv_project_path is not None:
|
||||
command.extend(["--project", str(config.uv_project_path)])
|
||||
|
||||
command.append("gunicorn")
|
||||
|
||||
command.extend(
|
||||
[
|
||||
"--bind",
|
||||
f"{config.bind_host}:{config.port}",
|
||||
"--workers",
|
||||
str(config.workers),
|
||||
"--access-logfile",
|
||||
"-",
|
||||
"--error-logfile",
|
||||
"-",
|
||||
"--capture-output",
|
||||
"--graceful-timeout",
|
||||
"30",
|
||||
"--timeout",
|
||||
"60",
|
||||
config.wsgi_module,
|
||||
],
|
||||
)
|
||||
return tuple(command)
|
||||
|
||||
|
||||
def build_django_container_image_command(
|
||||
config: DjangoContainerImageBuildConfig,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a Podman image command for the reusable Django test runtime.
|
||||
|
||||
Returns:
|
||||
Tuple of Podman arguments ready for subprocess execution.
|
||||
"""
|
||||
return (
|
||||
"podman",
|
||||
"build",
|
||||
"--pull=missing",
|
||||
"--tag",
|
||||
config.image_reference,
|
||||
"--file",
|
||||
str(config.containerfile_path),
|
||||
str(config.context_directory),
|
||||
)
|
||||
|
||||
|
||||
def build_django_container_run_command(
|
||||
config: DjangoContainerRuntimeConfig,
|
||||
*,
|
||||
command: Sequence[str],
|
||||
detach: bool,
|
||||
remove: bool = False,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a hardened Podman command for a local Django test container.
|
||||
|
||||
Returns:
|
||||
Tuple of Podman arguments ready for subprocess execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If the command sequence is empty.
|
||||
"""
|
||||
if not command:
|
||||
msg = "command must not be empty"
|
||||
raise ValueError(msg)
|
||||
|
||||
podman_command = ["podman", "run"]
|
||||
if detach:
|
||||
podman_command.extend(("--detach", "--replace"))
|
||||
if remove:
|
||||
podman_command.append("--rm")
|
||||
|
||||
podman_command.extend(
|
||||
[
|
||||
"--name",
|
||||
config.container_name,
|
||||
],
|
||||
)
|
||||
if config.pod_name is None:
|
||||
podman_command.extend(("--network", config.network_name, "--hostname", config.hostname))
|
||||
else:
|
||||
podman_command.extend(("--pod", config.pod_name))
|
||||
|
||||
podman_command.extend(
|
||||
[
|
||||
"--workdir",
|
||||
config.working_directory,
|
||||
"--cap-drop=all",
|
||||
"--security-opt=no-new-privileges",
|
||||
"--pids-limit=256",
|
||||
"--memory",
|
||||
f"{config.memory_limit_mib}m",
|
||||
"--cpus",
|
||||
str(config.cpu_limit),
|
||||
"--read-only",
|
||||
"--tmpfs",
|
||||
"/tmp:rw,nosuid,nodev,noexec,size=64m", # noqa: S108
|
||||
"--tmpfs",
|
||||
"/run:rw,nosuid,nodev,noexec,size=16m",
|
||||
"--volume",
|
||||
f"{config.application_directory}:{config.working_directory}:Z,ro",
|
||||
],
|
||||
)
|
||||
|
||||
if config.host_port is not None and config.pod_name is None:
|
||||
podman_command.extend(("--publish", f"127.0.0.1:{config.host_port}:{config.guest_port}"))
|
||||
|
||||
for mount_source, mount_target in config.secret_mounts:
|
||||
podman_command.extend(("--volume", f"{mount_source}:{mount_target}:Z,ro"))
|
||||
|
||||
for key, value in config.environment:
|
||||
podman_command.extend(("--env", f"{key}={value}"))
|
||||
|
||||
for key, value in config.labels:
|
||||
podman_command.extend(("--label", f"{key}={value}"))
|
||||
|
||||
podman_command.append(config.image_reference)
|
||||
podman_command.extend(command)
|
||||
return tuple(podman_command)
|
||||
|
||||
|
||||
def build_django_migrate_command(
|
||||
uv_project_path: Path | None = None,
|
||||
*,
|
||||
python_executable: Path | None = None,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a uv-driven migration command for a hosted Django deployment.
|
||||
|
||||
Returns:
|
||||
Tuple of command arguments ready for subprocess execution.
|
||||
|
||||
Raises:
|
||||
ValueError: If direct-python and uv-project execution modes are mixed.
|
||||
"""
|
||||
if python_executable is not None and uv_project_path is not None:
|
||||
msg = "python_executable and uv_project_path are mutually exclusive"
|
||||
raise ValueError(msg)
|
||||
|
||||
if python_executable is not None:
|
||||
return (str(python_executable), "manage.py", "migrate", "--noinput")
|
||||
|
||||
command = ["uv", "run"]
|
||||
if uv_project_path is not None:
|
||||
command.extend(["--project", str(uv_project_path)])
|
||||
|
||||
command.extend(["python", "manage.py", "migrate", "--noinput"])
|
||||
return tuple(command)
|
||||
656
control_plane/tasks.py
Normal file
656
control_plane/tasks.py
Normal file
|
|
@ -0,0 +1,656 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import secrets
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import NoReturn
|
||||
from urllib.request import urlopen
|
||||
|
||||
from celery import shared_task
|
||||
from django.conf import settings
|
||||
from django.db import transaction
|
||||
from django.utils import timezone
|
||||
|
||||
from control_plane.host_commands import HostCommandError
|
||||
from control_plane.host_commands import run_host_command
|
||||
from control_plane.local_test_runtime import TEST_DJANGO_CONTAINER_PORT
|
||||
from control_plane.local_test_runtime import TEST_DJANGO_WORKDIR
|
||||
from control_plane.local_test_runtime import build_test_django_container_context_path
|
||||
from control_plane.local_test_runtime import build_test_django_container_labels
|
||||
from control_plane.local_test_runtime import build_test_django_container_names
|
||||
from control_plane.local_test_runtime import build_test_django_containerfile_path
|
||||
from control_plane.local_test_runtime import build_test_django_environment
|
||||
from control_plane.local_test_runtime import build_test_django_image_reference
|
||||
from control_plane.local_test_runtime import build_test_django_local_url
|
||||
from control_plane.local_test_runtime import build_test_django_secret_mounts
|
||||
from control_plane.local_test_runtime import write_test_django_project
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import DeploymentStatus
|
||||
from control_plane.models import RuntimeService
|
||||
from control_plane.models import RuntimeServiceKind
|
||||
from control_plane.models import RuntimeServiceStatus
|
||||
from control_plane.observability import capture_test_deployment_diagnostics
|
||||
from control_plane.runtime_plans import DjangoApplicationLaunchConfig
|
||||
from control_plane.runtime_plans import DjangoContainerImageBuildConfig
|
||||
from control_plane.runtime_plans import DjangoContainerRuntimeConfig
|
||||
from control_plane.runtime_plans import PostgresContainerConfig
|
||||
from control_plane.runtime_plans import RedisContainerConfig
|
||||
from control_plane.runtime_plans import build_django_container_image_command
|
||||
from control_plane.runtime_plans import build_django_container_run_command
|
||||
from control_plane.runtime_plans import build_django_migrate_command
|
||||
from control_plane.runtime_plans import build_django_server_command
|
||||
from control_plane.runtime_plans import build_postgres_container_command
|
||||
from control_plane.runtime_plans import build_redis_container_command
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from celery.app.task import Task
|
||||
|
||||
type BoundControlPlaneTask = Task[..., str]
|
||||
|
||||
|
||||
logger = logging.getLogger("tussilago.control_plane.tasks")
|
||||
|
||||
DEFAULT_HTTP_READY_TIMEOUT_SECONDS = 45.0
|
||||
DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS = 45.0
|
||||
|
||||
|
||||
TERMINAL_DEPLOYMENT_STATES: frozenset[str] = frozenset(
|
||||
{
|
||||
DeploymentStatus.DESTROYED.value,
|
||||
DeploymentStatus.FAILED.value,
|
||||
},
|
||||
)
|
||||
|
||||
TERMINAL_RUNTIME_SERVICE_STATES: frozenset[str] = frozenset(
|
||||
{
|
||||
RuntimeServiceStatus.DESTROYING.value,
|
||||
RuntimeServiceStatus.DESTROYED.value,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _runtime_service_root(runtime_service: RuntimeService) -> Path:
|
||||
"""Return filesystem root for one runtime service's local test artifacts."""
|
||||
return Path(settings.DATA_DIR) / "runtime-services" / str(runtime_service.deployment_id) / runtime_service.kind
|
||||
|
||||
|
||||
def _mark_deployment_failed(*, deployment_id: str, message: str) -> None:
|
||||
"""Persist failed deployment state with the latest error details."""
|
||||
with transaction.atomic():
|
||||
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
deployment.status = DeploymentStatus.FAILED.value
|
||||
deployment.last_error = message
|
||||
deployment.finished_at = timezone.now()
|
||||
deployment.save(update_fields=["status", "last_error", "finished_at", "updated_at"])
|
||||
|
||||
|
||||
def _capture_test_deployment_diagnostics_snapshot(deployment_id: str) -> None:
|
||||
"""Persist best-effort diagnostics without breaking deployment flow."""
|
||||
try:
|
||||
capture_test_deployment_diagnostics(deployment_id)
|
||||
except OSError:
|
||||
logger.exception("Failed to write diagnostics snapshot deployment_id=%s", deployment_id)
|
||||
except ValueError:
|
||||
logger.exception("Invalid diagnostics snapshot state deployment_id=%s", deployment_id)
|
||||
except Deployment.DoesNotExist:
|
||||
logger.exception("Diagnostics snapshot skipped for missing deployment_id=%s", deployment_id)
|
||||
|
||||
|
||||
def _ensure_test_django_image_exists(image_reference: str) -> None:
|
||||
"""Build the reusable Django test image if it is missing locally.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman image inspection or build fails.
|
||||
"""
|
||||
try:
|
||||
run_host_command(command=("podman", "image", "exists", image_reference))
|
||||
except HostCommandError as error:
|
||||
if error.returncode != 1:
|
||||
raise
|
||||
|
||||
run_host_command(
|
||||
command=build_django_container_image_command(
|
||||
DjangoContainerImageBuildConfig(
|
||||
image_reference=image_reference,
|
||||
containerfile_path=build_test_django_containerfile_path(),
|
||||
context_directory=build_test_django_container_context_path(),
|
||||
),
|
||||
),
|
||||
timeout_seconds=300.0,
|
||||
)
|
||||
|
||||
|
||||
def _read_container_logs(container_name: str) -> str:
|
||||
"""Return captured container logs for failure reporting when available."""
|
||||
try:
|
||||
result = run_host_command(command=("podman", "logs", container_name))
|
||||
except HostCommandError:
|
||||
return ""
|
||||
|
||||
return result.stdout.strip() or result.stderr.strip()
|
||||
|
||||
|
||||
def _read_container_status(container_name: str) -> str:
|
||||
"""Return current Podman health status for one container when available."""
|
||||
result = run_host_command(
|
||||
command=(
|
||||
"podman",
|
||||
"inspect",
|
||||
"--format",
|
||||
"{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}",
|
||||
container_name,
|
||||
),
|
||||
)
|
||||
return result.stdout.strip()
|
||||
|
||||
|
||||
def _wait_for_container_ready(
|
||||
runtime_service: RuntimeService,
|
||||
*,
|
||||
timeout_seconds: float = DEFAULT_CONTAINER_READY_TIMEOUT_SECONDS,
|
||||
) -> None:
|
||||
"""Poll Podman health state until one runtime service is ready.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the runtime service exits or becomes unhealthy before it is ready.
|
||||
TimeoutError: If the runtime service does not become ready before timeout.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
while time.monotonic() < deadline:
|
||||
status = _read_container_status(runtime_service.container_name)
|
||||
if status == "healthy":
|
||||
return
|
||||
if status in {"exited", "dead", "stopped", "unhealthy"}:
|
||||
logs = _read_container_logs(runtime_service.container_name)
|
||||
message = f"Runtime service {runtime_service.kind} failed to become ready: {status}."
|
||||
if logs:
|
||||
message = f"{message}\n{logs}"
|
||||
raise RuntimeError(message)
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
msg = f"Timed out waiting for runtime service {runtime_service.kind} to become healthy."
|
||||
raise TimeoutError(msg)
|
||||
|
||||
|
||||
def _wait_for_http_ready(
|
||||
url: str,
|
||||
*,
|
||||
timeout_seconds: float = DEFAULT_HTTP_READY_TIMEOUT_SECONDS,
|
||||
) -> dict[str, str | int]:
|
||||
"""Poll a sentinel endpoint until it confirms PostgreSQL and Redis connectivity.
|
||||
|
||||
Returns:
|
||||
Parsed JSON response from the sentinel endpoint.
|
||||
|
||||
Raises:
|
||||
TimeoutError: If the endpoint does not become healthy before timeout.
|
||||
"""
|
||||
deadline = time.monotonic() + timeout_seconds
|
||||
last_error: Exception | None = None
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
with urlopen(url, timeout=2) as response: # noqa: S310
|
||||
payload = json.loads(response.read().decode("utf-8"))
|
||||
if payload.get("status") == "ok":
|
||||
return payload
|
||||
except (OSError, json.JSONDecodeError) as error:
|
||||
last_error = error
|
||||
|
||||
time.sleep(1.0)
|
||||
|
||||
msg = f"Timed out waiting for healthy Django sentinel endpoint at {url}"
|
||||
raise TimeoutError(msg) from last_error
|
||||
|
||||
|
||||
def _build_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
|
||||
return tuple(
|
||||
RuntimeService.objects
|
||||
.select_related("deployment__hosted_site__tenant")
|
||||
.filter(deployment=deployment)
|
||||
.order_by("kind"),
|
||||
)
|
||||
|
||||
|
||||
def _get_ready_django_runtime_services(deployment: Deployment) -> tuple[RuntimeService, ...]:
|
||||
"""Return ready runtime services required by the generated Django test app.
|
||||
|
||||
Raises:
|
||||
ValueError: If PostgreSQL or Redis containers are not ready.
|
||||
"""
|
||||
runtime_services = _build_django_runtime_services(deployment)
|
||||
if not runtime_services or any(
|
||||
runtime_service.status != RuntimeServiceStatus.READY.value for runtime_service in runtime_services
|
||||
):
|
||||
msg = "All runtime services must be ready before provisioning the Django test runtime."
|
||||
raise ValueError(msg)
|
||||
|
||||
return runtime_services
|
||||
|
||||
|
||||
def _build_django_runtime_configs(
|
||||
deployment: Deployment,
|
||||
runtime_services: tuple[RuntimeService, ...],
|
||||
*,
|
||||
project_root: Path,
|
||||
) -> tuple[str, DjangoContainerRuntimeConfig, DjangoContainerRuntimeConfig]:
|
||||
"""Build image reference plus migrate and server configs for one deployment.
|
||||
|
||||
Returns:
|
||||
Image reference plus migrate and server Podman runtime configs.
|
||||
"""
|
||||
image_reference = build_test_django_image_reference()
|
||||
environment = build_test_django_environment(deployment, runtime_services)
|
||||
secret_mounts = build_test_django_secret_mounts(runtime_services)
|
||||
labels = build_test_django_container_labels(deployment)
|
||||
server_container_name, migrate_container_name = build_test_django_container_names(deployment)
|
||||
network_name = runtime_services[0].network_name
|
||||
migrate_config = DjangoContainerRuntimeConfig(
|
||||
container_name=migrate_container_name,
|
||||
network_name=network_name,
|
||||
hostname="django-migrate.internal",
|
||||
image_reference=image_reference,
|
||||
application_directory=project_root,
|
||||
pod_name=network_name,
|
||||
working_directory=TEST_DJANGO_WORKDIR,
|
||||
environment=environment,
|
||||
secret_mounts=secret_mounts,
|
||||
labels=labels,
|
||||
)
|
||||
server_config = DjangoContainerRuntimeConfig(
|
||||
container_name=server_container_name,
|
||||
network_name=network_name,
|
||||
hostname="django.internal",
|
||||
image_reference=image_reference,
|
||||
application_directory=project_root,
|
||||
pod_name=network_name,
|
||||
host_port=deployment.guest_port,
|
||||
guest_port=TEST_DJANGO_CONTAINER_PORT,
|
||||
working_directory=TEST_DJANGO_WORKDIR,
|
||||
environment=environment,
|
||||
secret_mounts=secret_mounts,
|
||||
labels=labels,
|
||||
)
|
||||
return image_reference, migrate_config, server_config
|
||||
|
||||
|
||||
def _launch_django_runtime(
|
||||
deployment: Deployment,
|
||||
*,
|
||||
image_reference: str,
|
||||
migrate_config: DjangoContainerRuntimeConfig,
|
||||
server_config: DjangoContainerRuntimeConfig,
|
||||
) -> dict[str, str | int]:
|
||||
"""Build image, run migrations, launch the Django container, and wait for readiness.
|
||||
|
||||
Returns:
|
||||
Parsed JSON sentinel payload from the running Django test app.
|
||||
"""
|
||||
_ensure_test_django_image_exists(image_reference)
|
||||
|
||||
migrate_command = build_django_migrate_command(python_executable=Path("/usr/local/bin/python"))
|
||||
run_host_command(
|
||||
command=build_django_container_run_command(
|
||||
migrate_config,
|
||||
command=migrate_command,
|
||||
detach=False,
|
||||
remove=True,
|
||||
),
|
||||
timeout_seconds=120.0,
|
||||
)
|
||||
|
||||
server_command = build_django_server_command(
|
||||
DjangoApplicationLaunchConfig(
|
||||
wsgi_module=deployment.hosted_site.wsgi_module,
|
||||
bind_host="0.0.0.0", # noqa: S104
|
||||
port=TEST_DJANGO_CONTAINER_PORT,
|
||||
workers=1,
|
||||
python_executable=Path("/usr/local/bin/python"),
|
||||
),
|
||||
)
|
||||
run_host_command(
|
||||
command=build_django_container_run_command(
|
||||
server_config,
|
||||
command=server_command,
|
||||
detach=True,
|
||||
),
|
||||
timeout_seconds=120.0,
|
||||
)
|
||||
return _wait_for_http_ready(build_test_django_local_url(deployment))
|
||||
|
||||
|
||||
def _retry_or_fail_django_runtime(
|
||||
self: BoundControlPlaneTask,
|
||||
*,
|
||||
deployment: Deployment,
|
||||
error: HostCommandError | TimeoutError,
|
||||
) -> NoReturn:
|
||||
"""Retry transient Django runtime failures, or mark deployment failed when retries are exhausted."""
|
||||
retries = getattr(self.request, "retries", 0)
|
||||
logger.warning(
|
||||
"Django runtime provisioning retry deployment_id=%s retries=%s error=%s",
|
||||
deployment.id,
|
||||
retries,
|
||||
error,
|
||||
)
|
||||
if retries >= self.max_retries:
|
||||
server_container_name, _ = build_test_django_container_names(deployment)
|
||||
logs = _read_container_logs(server_container_name)
|
||||
failure_message = str(error)
|
||||
if logs:
|
||||
failure_message = f"{failure_message}\n{logs}"
|
||||
_mark_deployment_failed(deployment_id=str(deployment.id), message=failure_message)
|
||||
_capture_test_deployment_diagnostics_snapshot(str(deployment.id))
|
||||
logger.error("Django runtime provisioning failed deployment_id=%s", deployment.id)
|
||||
raise error
|
||||
|
||||
countdown = min(300, 2 ** (retries + 1))
|
||||
raise self.retry(exc=error, countdown=countdown) from error
|
||||
|
||||
|
||||
def run_test_django_runtime_provisioning(deployment_id: str) -> str:
|
||||
"""Run generated Django runtime provisioning inline for one deployment.
|
||||
|
||||
Returns:
|
||||
Final deployment status for the processed deployment.
|
||||
"""
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES or deployment.status == DeploymentStatus.RUNNING.value:
|
||||
return deployment.status
|
||||
|
||||
runtime_services = _get_ready_django_runtime_services(deployment)
|
||||
project_root = write_test_django_project(deployment, runtime_services)
|
||||
image_reference, migrate_config, server_config = _build_django_runtime_configs(
|
||||
deployment,
|
||||
runtime_services,
|
||||
project_root=project_root,
|
||||
)
|
||||
sentinel_payload = _launch_django_runtime(
|
||||
deployment,
|
||||
image_reference=image_reference,
|
||||
migrate_config=migrate_config,
|
||||
server_config=server_config,
|
||||
)
|
||||
|
||||
with transaction.atomic():
|
||||
deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.RUNNING.value
|
||||
deployment.last_error = ""
|
||||
deployment.started_at = timezone.now()
|
||||
deployment.finished_at = None
|
||||
deployment.save(update_fields=["status", "last_error", "started_at", "finished_at", "updated_at"])
|
||||
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
logger.info(
|
||||
"Django runtime ready deployment_id=%s tenant_slug=%s site_slug=%s postgres=%s redis=%s",
|
||||
deployment_id,
|
||||
deployment.hosted_site.tenant.slug,
|
||||
deployment.hosted_site.slug,
|
||||
sentinel_payload.get("postgres"),
|
||||
sentinel_payload.get("redis"),
|
||||
)
|
||||
return DeploymentStatus.RUNNING.value
|
||||
|
||||
|
||||
def _ensure_secret_file(password_file: Path) -> None:
|
||||
"""Write a reusable password file for a test container if one does not already exist."""
|
||||
password_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
if password_file.exists():
|
||||
return
|
||||
|
||||
password_file.write_text(f"{secrets.token_urlsafe(24)}\n", encoding="utf-8")
|
||||
password_file.chmod(0o600)
|
||||
|
||||
|
||||
def _ensure_podman_pod(*, pod_name: str, host_port: int) -> None:
|
||||
"""Create a Podman pod if it is missing.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman pod inspection or creation fails.
|
||||
"""
|
||||
try:
|
||||
run_host_command(command=("podman", "pod", "exists", pod_name))
|
||||
except HostCommandError as error:
|
||||
if error.returncode != 1:
|
||||
raise
|
||||
|
||||
run_host_command(
|
||||
command=(
|
||||
"podman",
|
||||
"pod",
|
||||
"create",
|
||||
"--replace",
|
||||
"--name",
|
||||
pod_name,
|
||||
"--publish",
|
||||
f"127.0.0.1:{host_port}:{TEST_DJANGO_CONTAINER_PORT}",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _build_runtime_service_command(
|
||||
runtime_service: RuntimeService,
|
||||
*,
|
||||
data_directory: Path,
|
||||
password_file: Path,
|
||||
) -> tuple[str, ...]:
|
||||
"""Build a Podman command for one runtime service kind.
|
||||
|
||||
Returns:
|
||||
Podman command arguments for the runtime service.
|
||||
|
||||
Raises:
|
||||
ValueError: If the runtime service kind or configuration is unsupported.
|
||||
"""
|
||||
if runtime_service.kind == RuntimeServiceKind.POSTGRESQL.value:
|
||||
if not runtime_service.connection_username or not runtime_service.connection_database:
|
||||
msg = "PostgreSQL runtime service requires connection credentials."
|
||||
raise ValueError(msg)
|
||||
|
||||
return build_postgres_container_command(
|
||||
PostgresContainerConfig(
|
||||
container_name=runtime_service.container_name,
|
||||
network_name=runtime_service.network_name,
|
||||
hostname=runtime_service.hostname,
|
||||
username=runtime_service.connection_username,
|
||||
database_name=runtime_service.connection_database,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
pod_name=runtime_service.network_name,
|
||||
image_reference=runtime_service.image_reference,
|
||||
),
|
||||
)
|
||||
|
||||
if runtime_service.kind == RuntimeServiceKind.REDIS.value:
|
||||
return build_redis_container_command(
|
||||
RedisContainerConfig(
|
||||
container_name=runtime_service.container_name,
|
||||
network_name=runtime_service.network_name,
|
||||
hostname=runtime_service.hostname,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
pod_name=runtime_service.network_name,
|
||||
image_reference=runtime_service.image_reference,
|
||||
),
|
||||
)
|
||||
|
||||
msg = f"Unsupported runtime service kind: {runtime_service.kind}"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def _provision_runtime_service_container(runtime_service: RuntimeService) -> None:
|
||||
"""Create or replace a local test container for one runtime service."""
|
||||
service_root = _runtime_service_root(runtime_service)
|
||||
data_directory = service_root / "data"
|
||||
password_file = service_root / "secrets" / "password"
|
||||
|
||||
data_directory.mkdir(parents=True, exist_ok=True)
|
||||
_ensure_secret_file(password_file)
|
||||
_ensure_podman_pod(
|
||||
pod_name=runtime_service.network_name,
|
||||
host_port=runtime_service.deployment.guest_port,
|
||||
)
|
||||
|
||||
command = _build_runtime_service_command(
|
||||
runtime_service,
|
||||
data_directory=data_directory,
|
||||
password_file=password_file,
|
||||
)
|
||||
run_host_command(command=command)
|
||||
_wait_for_container_ready(runtime_service)
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
autoretry_for=(HostCommandError, TimeoutError),
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def provision_test_runtime_services(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Seed and provision runtime service test containers for one deployment.
|
||||
|
||||
Returns:
|
||||
Final runtime service status for the processed deployment.
|
||||
|
||||
Raises:
|
||||
HostCommandError: If Podman commands fail while provisioning backing services.
|
||||
RuntimeError: If a backing container exits or becomes unhealthy during startup.
|
||||
TimeoutError: If a backing container never becomes healthy.
|
||||
ValueError: If runtime service configuration is invalid.
|
||||
"""
|
||||
del self
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
|
||||
deployment.ensure_test_runtime_services()
|
||||
runtime_services = tuple(
|
||||
RuntimeService.objects
|
||||
.select_related("deployment__hosted_site__tenant")
|
||||
.filter(deployment=deployment)
|
||||
.order_by("kind"),
|
||||
)
|
||||
pending_runtime_services = tuple(
|
||||
runtime_service
|
||||
for runtime_service in runtime_services
|
||||
if runtime_service.status not in TERMINAL_RUNTIME_SERVICE_STATES
|
||||
and runtime_service.status != RuntimeServiceStatus.READY.value
|
||||
)
|
||||
if not pending_runtime_services:
|
||||
return RuntimeServiceStatus.READY.value
|
||||
|
||||
for runtime_service in pending_runtime_services:
|
||||
runtime_service.status = RuntimeServiceStatus.PROVISIONING.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
|
||||
try:
|
||||
_provision_runtime_service_container(runtime_service)
|
||||
except HostCommandError, RuntimeError, TimeoutError:
|
||||
runtime_service.status = RuntimeServiceStatus.FAILED.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
logger.exception(
|
||||
"Runtime service provisioning failed deployment_id=%s runtime_service_id=%s kind=%s",
|
||||
deployment_id,
|
||||
runtime_service.id,
|
||||
runtime_service.kind,
|
||||
)
|
||||
raise
|
||||
except ValueError:
|
||||
runtime_service.status = RuntimeServiceStatus.FAILED.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
logger.exception(
|
||||
"Runtime service configuration invalid deployment_id=%s runtime_service_id=%s kind=%s",
|
||||
deployment_id,
|
||||
runtime_service.id,
|
||||
runtime_service.kind,
|
||||
)
|
||||
raise
|
||||
|
||||
runtime_service.status = RuntimeServiceStatus.READY.value
|
||||
runtime_service.save(update_fields=["status", "updated_at"])
|
||||
|
||||
_capture_test_deployment_diagnostics_snapshot(deployment_id)
|
||||
return RuntimeServiceStatus.READY.value
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def mark_deployment_provisioning(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Move a deployment into provisioning state in an idempotent way.
|
||||
|
||||
Returns:
|
||||
The deployment status after the transition attempt.
|
||||
"""
|
||||
del self
|
||||
with transaction.atomic():
|
||||
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
if deployment.status == DeploymentStatus.PROVISIONING.value:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.PROVISIONING.value
|
||||
deployment.last_error = ""
|
||||
deployment.save(update_fields=["status", "last_error", "updated_at"])
|
||||
return deployment.status
|
||||
|
||||
|
||||
@shared_task(
|
||||
bind=True,
|
||||
retry_backoff=True,
|
||||
retry_backoff_max=300,
|
||||
retry_jitter=True,
|
||||
max_retries=5,
|
||||
)
|
||||
def mark_deployment_booting(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Move a deployment into booting state in an idempotent way.
|
||||
|
||||
Returns:
|
||||
The deployment status after the transition attempt.
|
||||
"""
|
||||
del self
|
||||
with transaction.atomic():
|
||||
deployment: Deployment = Deployment.objects.select_for_update().get(pk=deployment_id)
|
||||
if deployment.status in TERMINAL_DEPLOYMENT_STATES:
|
||||
return deployment.status
|
||||
if deployment.status == DeploymentStatus.BOOTING.value:
|
||||
return deployment.status
|
||||
|
||||
deployment.status = DeploymentStatus.BOOTING.value
|
||||
deployment.save(update_fields=["status", "updated_at"])
|
||||
return deployment.status
|
||||
|
||||
|
||||
@shared_task(bind=True, max_retries=5)
|
||||
def provision_test_django_runtime(self: BoundControlPlaneTask, deployment_id: str) -> str:
|
||||
"""Build and run a generated Django test app against ready PostgreSQL and Redis containers.
|
||||
|
||||
Returns:
|
||||
Final deployment status for the processed deployment.
|
||||
|
||||
Raises:
|
||||
ValueError: If required backing services are not ready.
|
||||
"""
|
||||
try:
|
||||
return run_test_django_runtime_provisioning(deployment_id)
|
||||
except ValueError as error:
|
||||
_mark_deployment_failed(deployment_id=deployment_id, message=str(error))
|
||||
logger.exception("Django runtime configuration invalid deployment_id=%s", deployment_id)
|
||||
raise
|
||||
except (HostCommandError, TimeoutError) as error:
|
||||
deployment = Deployment.objects.select_related("hosted_site__tenant").get(pk=deployment_id)
|
||||
_retry_or_fail_django_runtime(self, deployment=deployment, error=error)
|
||||
35
control_plane/templates/control_plane/base.html
Normal file
35
control_plane/templates/control_plane/base.html
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
{% load static %}
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<meta name="description"
|
||||
content="Tussilago local deployment dashboard with runtime status, sentinel health, and captured container logs.">
|
||||
<meta name="keywords"
|
||||
content="Tussilago, deployment dashboard, podman, django, runtime logs">
|
||||
<title>
|
||||
{% block title %}Tussilago Deployments{% endblock %}
|
||||
</title>
|
||||
<link rel="stylesheet" href="{% static 'control_plane/dashboard.css' %}">
|
||||
</head>
|
||||
<body>
|
||||
<div class="page-shell">
|
||||
<header class="masthead">
|
||||
<div>
|
||||
<p class="eyebrow">Tussilago Local Runtime</p>
|
||||
<h1>
|
||||
<a href="{% url 'control_plane:deployment-dashboard' %}">Deployment Dashboard</a>
|
||||
</h1>
|
||||
</div>
|
||||
<nav class="top-nav">
|
||||
<a href="{% url 'control_plane:deployment-dashboard' %}">Deployments</a>
|
||||
<a href="{% url 'admin:index' %}">Admin</a>
|
||||
</nav>
|
||||
</header>
|
||||
<main>
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
110
control_plane/templates/control_plane/deployment_dashboard.html
Normal file
110
control_plane/templates/control_plane/deployment_dashboard.html
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
{% extends 'control_plane/base.html' %}
|
||||
|
||||
{% block title %}Deployments · Tussilago{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<section class="hero-panel">
|
||||
<div>
|
||||
<p class="eyebrow">Control Plane Overview</p>
|
||||
<h2>See what is alive, what failed, and what to inspect next.</h2>
|
||||
<p class="hero-copy">
|
||||
Recent deployments, backing-service states, direct sentinel links, and fast paths into detailed logs.
|
||||
</p>
|
||||
</div>
|
||||
<div class="hero-metrics">
|
||||
<article class="metric-card">
|
||||
<span class="metric-label">Recent Deployments</span>
|
||||
<strong>{{ deployment_total }}</strong>
|
||||
</article>
|
||||
<article class="metric-card accent-good">
|
||||
<span class="metric-label">Running Now</span>
|
||||
<strong>{{ running_total }}</strong>
|
||||
</article>
|
||||
</div>
|
||||
</section>
|
||||
<section class="summary-grid">
|
||||
{% for summary in status_summaries %}
|
||||
<article class="summary-card status-{{ summary.status }}">
|
||||
<span class="status-chip status-{{ summary.status }}">{{ summary.label }}</span>
|
||||
<strong>{{ summary.total }}</strong>
|
||||
</article>
|
||||
{% empty %}
|
||||
<article class="summary-card empty-state">
|
||||
<strong>0</strong>
|
||||
<span>No deployments yet.</span>
|
||||
</article>
|
||||
{% endfor %}
|
||||
</section>
|
||||
<section class="deployment-grid">
|
||||
{% for card in deployment_cards %}
|
||||
<article class="deployment-card">
|
||||
<header class="card-header">
|
||||
<div>
|
||||
<p class="card-kicker">{{ card.deployment.hosted_site.tenant.slug }}</p>
|
||||
<h3>{{ card.deployment.hosted_site.slug }}</h3>
|
||||
<p class="card-meta">{{ card.deployment.id }}</p>
|
||||
</div>
|
||||
<span class="status-chip status-{{ card.deployment.status }}">{{ card.deployment.get_status_display }}</span>
|
||||
</header>
|
||||
<dl class="facts-grid compact-grid">
|
||||
<div>
|
||||
<dt>Created</dt>
|
||||
<dd>
|
||||
{{ card.deployment.created_at|date:'Y-m-d H:i:s' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Sentinel</dt>
|
||||
<dd>
|
||||
{{ card.sentinel_url }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Runtime Ready</dt>
|
||||
<dd>
|
||||
{{ card.runtime_ready_total }}/{{ card.runtime_services|length }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Runtime Failed</dt>
|
||||
<dd>
|
||||
{{ card.runtime_failed_total }}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
<div class="service-pill-row">
|
||||
{% for runtime_service in card.runtime_services %}
|
||||
<span class="service-pill status-{{ runtime_service.status }}">
|
||||
{{ runtime_service.kind }} · {{ runtime_service.status }}
|
||||
</span>
|
||||
{% empty %}
|
||||
<span class="service-pill muted-pill">No runtime services yet</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% if card.deployment.last_error %}
|
||||
<section class="error-panel">
|
||||
<h4>Last Error</h4>
|
||||
<pre>{{ card.deployment.last_error }}</pre>
|
||||
</section>
|
||||
{% endif %}
|
||||
<div class="action-row">
|
||||
<a class="button-link"
|
||||
href="{% url 'control_plane:deployment-detail' card.deployment.id %}">Inspect deployment</a>
|
||||
<a class="button-link subtle"
|
||||
href="{{ card.sentinel_url }}"
|
||||
target="_blank"
|
||||
rel="noreferrer">Open sentinel</a>
|
||||
<a class="button-link subtle"
|
||||
href="{% url 'admin:control_plane_deployment_change' card.deployment.id %}">Admin row</a>
|
||||
</div>
|
||||
</article>
|
||||
{% empty %}
|
||||
<article class="deployment-card empty-state wide-card">
|
||||
<h3>No deployments captured yet</h3>
|
||||
<p>
|
||||
Run <code>uv run python manage.py create_test_deployment</code> to populate this dashboard.
|
||||
</p>
|
||||
</article>
|
||||
{% endfor %}
|
||||
</section>
|
||||
{% endblock content %}
|
||||
236
control_plane/templates/control_plane/deployment_detail.html
Normal file
236
control_plane/templates/control_plane/deployment_detail.html
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
{% extends 'control_plane/base.html' %}
|
||||
|
||||
{% block title %}{{ deployment.hosted_site.slug }} · Tussilago{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<section class="hero-panel detail-hero">
|
||||
<div>
|
||||
<p class="eyebrow">Deployment Detail</p>
|
||||
<h2>{{ deployment.hosted_site.tenant.slug }}/{{ deployment.hosted_site.slug }}</h2>
|
||||
<p class="hero-copy">Deployment {{ deployment.id }} on localhost port {{ deployment.guest_port }}.</p>
|
||||
</div>
|
||||
<div class="hero-metrics">
|
||||
<article class="metric-card">
|
||||
<span class="metric-label">Control Plane</span>
|
||||
<strong class="status-chip status-{{ deployment.status }}">{{ deployment.get_status_display }}</strong>
|
||||
</article>
|
||||
<article class="metric-card accent-good">
|
||||
<span class="metric-label">Sentinel</span>
|
||||
<strong><a href="{{ sentinel_url }}" target="_blank" rel="noreferrer">Open</a></strong>
|
||||
</article>
|
||||
</div>
|
||||
</section>
|
||||
<section class="panel-grid two-up">
|
||||
<article class="panel-card"
|
||||
data-health-panel
|
||||
data-health-endpoint="{% url 'control_plane:deployment-health' deployment.id %}">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Live Health</p>
|
||||
<h3>Sentinel probe</h3>
|
||||
</div>
|
||||
<button class="button-link subtle" type="button" data-health-refresh>Refresh</button>
|
||||
</div>
|
||||
<div class="health-strip">
|
||||
<span class="status-chip health-{{ health_probe.status }}"
|
||||
data-health-badge>{{ health_probe.label }}</span>
|
||||
<span class="muted-copy" data-health-stamp>{{ health_probe.checked_at }}</span>
|
||||
</div>
|
||||
<p class="muted-copy" data-health-detail>
|
||||
{% if health_probe.error %}
|
||||
{{ health_probe.error }}
|
||||
{% elif health_probe.ok %}
|
||||
Sentinel responded with healthy payload.
|
||||
{% else %}
|
||||
Waiting for a healthy sentinel response.
|
||||
{% endif %}
|
||||
</p>
|
||||
<pre class="log-output compact-log" data-health-json>{% if health_probe.payload %}{{ health_probe.payload }}{% elif health_probe.error %}{{ health_probe.error }}{% else %}No payload yet.{% endif %}</pre>
|
||||
</article>
|
||||
<article class="panel-card">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Facts</p>
|
||||
<h3>Deployment metadata</h3>
|
||||
</div>
|
||||
</div>
|
||||
<dl class="facts-grid">
|
||||
<div>
|
||||
<dt>Sentinel URL</dt>
|
||||
<dd>
|
||||
{{ sentinel_url }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Idempotency Key</dt>
|
||||
<dd>
|
||||
{{ deployment.idempotency_key }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Created</dt>
|
||||
<dd>
|
||||
{{ deployment.created_at|date:'Y-m-d H:i:s' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Started</dt>
|
||||
<dd>
|
||||
{{ deployment.started_at|default:'-' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Finished</dt>
|
||||
<dd>
|
||||
{{ deployment.finished_at|default:'-' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Admin</dt>
|
||||
<dd>
|
||||
<a href="{% url 'admin:control_plane_deployment_change' deployment.id %}">Open admin change form</a>
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
{% if deployment.last_error %}
|
||||
<section class="error-panel top-gap">
|
||||
<h4>Last Error</h4>
|
||||
<pre>{{ deployment.last_error }}</pre>
|
||||
</section>
|
||||
{% endif %}
|
||||
</article>
|
||||
</section>
|
||||
<section class="panel-grid two-up">
|
||||
<article class="panel-card">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Runtime Services</p>
|
||||
<h3>Database and cache state</h3>
|
||||
</div>
|
||||
</div>
|
||||
<div class="service-grid">
|
||||
{% for runtime_service in runtime_services %}
|
||||
<article class="service-card">
|
||||
<span class="service-pill status-{{ runtime_service.status }}">{{ runtime_service.kind }}</span>
|
||||
<h4>{{ runtime_service.container_name }}</h4>
|
||||
<p class="muted-copy">{{ runtime_service.hostname }}:{{ runtime_service.internal_port }}</p>
|
||||
<p class="muted-copy">Control plane status: {{ runtime_service.status }}</p>
|
||||
</article>
|
||||
{% empty %}
|
||||
<p class="muted-copy">No runtime services recorded yet.</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</article>
|
||||
<article class="panel-card">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Diagnostics Snapshot</p>
|
||||
<h3>Persisted pod state and logs</h3>
|
||||
</div>
|
||||
</div>
|
||||
{% if diagnostics %}
|
||||
<dl class="facts-grid compact-grid">
|
||||
<div>
|
||||
<dt>Captured At</dt>
|
||||
<dd>
|
||||
{{ diagnostics.captured_at|default:'-' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Pod</dt>
|
||||
<dd>
|
||||
{{ diagnostics.pod.name|default:'-' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Pod Status</dt>
|
||||
<dd>
|
||||
{{ diagnostics.pod.status|default:'unknown' }}
|
||||
</dd>
|
||||
</div>
|
||||
<div>
|
||||
<dt>Snapshot Error</dt>
|
||||
<dd>
|
||||
{{ diagnostics.capture_error|default:'-' }}
|
||||
</dd>
|
||||
</div>
|
||||
</dl>
|
||||
{% if diagnostics.pod.error %}<p class="muted-copy top-gap">{{ diagnostics.pod.error }}</p>{% endif %}
|
||||
{% else %}
|
||||
<p class="muted-copy">No diagnostics snapshot has been captured yet.</p>
|
||||
{% endif %}
|
||||
</article>
|
||||
</section>
|
||||
<section class="panel-grid log-grid">
|
||||
{% if diagnostics %}
|
||||
<article class="panel-card wide-card">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">Django Container</p>
|
||||
<h3>{{ diagnostics.django.container_name }}</h3>
|
||||
</div>
|
||||
<span class="status-chip health-{{ diagnostics.django.container_status|default:'missing' }}">{{ diagnostics.django.container_status|default:'missing' }}</span>
|
||||
</div>
|
||||
{% if diagnostics.django.inspect_error %}<p class="muted-copy">{{ diagnostics.django.inspect_error }}</p>{% endif %}
|
||||
<pre class="log-output">{{ diagnostics.django.logs|default:'No Django logs captured yet.' }}</pre>
|
||||
</article>
|
||||
{% for runtime_service in diagnostics.runtime_services %}
|
||||
<article class="panel-card">
|
||||
<div class="panel-header">
|
||||
<div>
|
||||
<p class="eyebrow">{{ runtime_service.label }}</p>
|
||||
<h3>{{ runtime_service.container_name }}</h3>
|
||||
</div>
|
||||
<span class="status-chip health-{{ runtime_service.container_status|default:'missing' }}">{{ runtime_service.container_status|default:'missing' }}</span>
|
||||
</div>
|
||||
{% if runtime_service.inspect_error %}<p class="muted-copy">{{ runtime_service.inspect_error }}</p>{% endif %}
|
||||
{% if runtime_service.log_error %}<p class="muted-copy">{{ runtime_service.log_error }}</p>{% endif %}
|
||||
<pre class="log-output">{{ runtime_service.logs|default:'No logs captured yet.' }}</pre>
|
||||
</article>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
</section>
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
const panel = document.querySelector("[data-health-panel]");
|
||||
if (!panel) {
|
||||
return;
|
||||
}
|
||||
|
||||
const endpoint = panel.dataset.healthEndpoint;
|
||||
const badge = panel.querySelector("[data-health-badge]");
|
||||
const stamp = panel.querySelector("[data-health-stamp]");
|
||||
const detail = panel.querySelector("[data-health-detail]");
|
||||
const jsonTarget = panel.querySelector("[data-health-json]");
|
||||
const refreshButton = panel.querySelector("[data-health-refresh]");
|
||||
|
||||
const renderPayload = (payload) => {
|
||||
badge.textContent = payload.label;
|
||||
badge.className = `status-chip health-${payload.status}`;
|
||||
stamp.textContent = payload.checked_at;
|
||||
detail.textContent = payload.error || (payload.ok ? "Sentinel responded with healthy payload." : "Waiting for a healthy sentinel response.");
|
||||
if (payload.payload) {
|
||||
jsonTarget.textContent = JSON.stringify(payload.payload, null, 2);
|
||||
} else if (payload.error) {
|
||||
jsonTarget.textContent = payload.error;
|
||||
} else {
|
||||
jsonTarget.textContent = "No payload yet.";
|
||||
}
|
||||
};
|
||||
|
||||
const refreshHealth = async () => {
|
||||
const response = await fetch(endpoint, {headers: {"X-Requested-With": "fetch"}});
|
||||
const payload = await response.json();
|
||||
renderPayload(payload);
|
||||
};
|
||||
|
||||
refreshButton.addEventListener("click", () => {
|
||||
void refreshHealth();
|
||||
});
|
||||
|
||||
window.setInterval(() => {
|
||||
void refreshHealth();
|
||||
}, 8000);
|
||||
});
|
||||
</script>
|
||||
{% endblock content %}
|
||||
15
control_plane/urls.py
Normal file
15
control_plane/urls.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
from django.urls import path
|
||||
|
||||
from control_plane.views import DeploymentDashboardHomeView
|
||||
from control_plane.views import DeploymentDashboardView
|
||||
from control_plane.views import DeploymentDetailView
|
||||
from control_plane.views import DeploymentHealthView
|
||||
|
||||
app_name = "control_plane"
|
||||
|
||||
urlpatterns = [
|
||||
path("", DeploymentDashboardHomeView.as_view(), name="deployment-home"),
|
||||
path("deployments/", DeploymentDashboardView.as_view(), name="deployment-dashboard"),
|
||||
path("deployments/<uuid:deployment_id>/", DeploymentDetailView.as_view(), name="deployment-detail"),
|
||||
path("deployments/<uuid:deployment_id>/health/", DeploymentHealthView.as_view(), name="deployment-health"),
|
||||
]
|
||||
163
control_plane/views.py
Normal file
163
control_plane/views.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
from uuid import UUID
|
||||
|
||||
from django.db.models import Count
|
||||
from django.db.models import Prefetch
|
||||
from django.http import JsonResponse
|
||||
from django.shortcuts import get_object_or_404
|
||||
from django.views.generic import TemplateView
|
||||
from django.views.generic import View
|
||||
|
||||
from control_plane.local_test_runtime import build_test_django_local_url
|
||||
from control_plane.models import Deployment
|
||||
from control_plane.models import DeploymentStatus
|
||||
from control_plane.models import RuntimeService
|
||||
from control_plane.observability import JsonValue
|
||||
from control_plane.observability import load_test_deployment_diagnostics
|
||||
from control_plane.observability import probe_test_deployment_health
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from django.db.models import QuerySet
|
||||
from django.http import HttpRequest
|
||||
|
||||
|
||||
type RouteKwarg = str | UUID
|
||||
type DashboardContextValue = int | tuple[DeploymentCard, ...] | tuple[DeploymentStatusSummary, ...]
|
||||
type DetailContextValue = (
|
||||
DashboardContextValue | Deployment | tuple[RuntimeService, ...] | str | dict[str, JsonValue] | None
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DeploymentStatusSummary:
|
||||
"""Aggregate deployments by lifecycle state for dashboard cards."""
|
||||
|
||||
status: str
|
||||
label: str
|
||||
total: int
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
class DeploymentCard:
|
||||
"""Small view model used by the dashboard templates."""
|
||||
|
||||
deployment: Deployment
|
||||
sentinel_url: str
|
||||
runtime_services: tuple[RuntimeService, ...]
|
||||
|
||||
@property
|
||||
def runtime_ready_total(self) -> int:
|
||||
"""Return total runtime services currently marked ready."""
|
||||
return sum(runtime_service.status == "ready" for runtime_service in self.runtime_services)
|
||||
|
||||
@property
|
||||
def runtime_failed_total(self) -> int:
|
||||
"""Return total runtime services currently marked failed."""
|
||||
return sum(runtime_service.status == "failed" for runtime_service in self.runtime_services)
|
||||
|
||||
|
||||
def _deployment_queryset() -> QuerySet[Deployment]:
|
||||
runtime_services = RuntimeService.objects.order_by("kind")
|
||||
return Deployment.objects.select_related("hosted_site__tenant").prefetch_related(
|
||||
Prefetch("runtime_services", queryset=runtime_services),
|
||||
)
|
||||
|
||||
|
||||
class DeploymentDashboardView(TemplateView):
|
||||
"""Render recent test deployments with links to diagnostics and sentinel probes."""
|
||||
|
||||
template_name = "control_plane/deployment_dashboard.html"
|
||||
|
||||
def get_context_data(self, **kwargs: RouteKwarg) -> dict[str, DashboardContextValue]:
|
||||
"""Build recent deployment cards plus aggregate status counts for the dashboard.
|
||||
|
||||
Returns:
|
||||
Template context containing deployment cards and summary counters.
|
||||
"""
|
||||
context = super().get_context_data(**kwargs)
|
||||
deployments = tuple(_deployment_queryset().order_by("-created_at")[:24])
|
||||
context.update(
|
||||
{
|
||||
"deployment_cards": tuple(_build_deployment_card(deployment) for deployment in deployments),
|
||||
"status_summaries": _build_status_summaries(),
|
||||
"running_total": sum(deployment.status == DeploymentStatus.RUNNING.value for deployment in deployments),
|
||||
"deployment_total": len(deployments),
|
||||
},
|
||||
)
|
||||
return context
|
||||
|
||||
|
||||
class DeploymentDetailView(TemplateView):
|
||||
"""Render one deployment with persisted diagnostics, logs, and live health state."""
|
||||
|
||||
template_name = "control_plane/deployment_detail.html"
|
||||
|
||||
def get_context_data(self, **kwargs: RouteKwarg) -> dict[str, DetailContextValue]:
|
||||
"""Build one deployment view with persisted diagnostics and an initial health probe.
|
||||
|
||||
Returns:
|
||||
Template context containing deployment metadata, diagnostics, and health state.
|
||||
"""
|
||||
context: dict[str, Any] = super().get_context_data(**kwargs)
|
||||
deployment: Deployment = get_object_or_404(_deployment_queryset(), pk=self.kwargs["deployment_id"])
|
||||
runtime_services = tuple(deployment.runtime_services.all())
|
||||
context.update(
|
||||
{
|
||||
"deployment": deployment,
|
||||
"runtime_services": runtime_services,
|
||||
"sentinel_url": build_test_django_local_url(deployment),
|
||||
"diagnostics": load_test_deployment_diagnostics(deployment),
|
||||
"health_probe": probe_test_deployment_health(deployment),
|
||||
},
|
||||
)
|
||||
return context
|
||||
|
||||
|
||||
class DeploymentHealthView(View):
|
||||
"""Return live sentinel health JSON for one deployment."""
|
||||
|
||||
def get(self, request: HttpRequest, deployment_id: str) -> JsonResponse:
|
||||
"""Return JSON probe state for one deployment sentinel endpoint."""
|
||||
del request
|
||||
deployment = get_object_or_404(_deployment_queryset(), pk=deployment_id)
|
||||
return JsonResponse(probe_test_deployment_health(deployment))
|
||||
|
||||
|
||||
class DeploymentDashboardHomeView(DeploymentDashboardView):
|
||||
"""Alias the dashboard at the site root for local testing convenience."""
|
||||
|
||||
template_name = "control_plane/deployment_dashboard.html"
|
||||
|
||||
|
||||
def _build_deployment_card(deployment: Deployment) -> DeploymentCard:
|
||||
return DeploymentCard(
|
||||
deployment=deployment,
|
||||
sentinel_url=build_test_django_local_url(deployment),
|
||||
runtime_services=tuple(deployment.runtime_services.all()),
|
||||
)
|
||||
|
||||
|
||||
def _build_status_summaries() -> tuple[DeploymentStatusSummary, ...]:
|
||||
summary_rows = tuple(
|
||||
Deployment.objects.values("status").annotate(total=Count("id")).order_by("status"),
|
||||
)
|
||||
return tuple(
|
||||
DeploymentStatusSummary(
|
||||
status=status,
|
||||
label=_resolve_status_label(status),
|
||||
total=int(total),
|
||||
)
|
||||
for status, total in ((row["status"], row["total"]) for row in summary_rows)
|
||||
)
|
||||
|
||||
|
||||
def _resolve_status_label(status: str) -> str:
|
||||
for choice in DeploymentStatus:
|
||||
if choice.value == status:
|
||||
return choice.label
|
||||
|
||||
return status.replace("-", " ").title()
|
||||
Loading…
Add table
Add a link
Reference in a new issue